Skip to content

Commit

Permalink
- Shave ~1 sec from RunTests by speeding-up slowest test 16 (bfgs) by…
Browse files Browse the repository at this point in the history
… ~70% (#1181)

(achieved by dropping '-b 20', pass 13 termination & output remain the same)
- Trim trailing spaces from bfgs progress output
  (trailing space appeared when time field was removed from lines)
- Trim trailing spaces from bfgs.cc source
- Update bfgs stderr reference files
  • Loading branch information
arielf authored and JohnLangford committed Jan 30, 2017
1 parent fd08248 commit 33e9da6
Show file tree
Hide file tree
Showing 5 changed files with 39 additions and 39 deletions.
2 changes: 1 addition & 1 deletion test/RunTests
Original file line number Diff line number Diff line change
Expand Up @@ -897,7 +897,7 @@ __DATA__
train-sets/ref/zero.stderr
# Test 16: LBFGS early termination
{VW} -k -c -d train-sets/rcv1_small.dat --loss_function=logistic -b 20 --bfgs --mem 7 --passes 20 --termination 0.001 --l2 1.0 --holdout_off
{VW} -k -c -d train-sets/rcv1_small.dat --loss_function=logistic --bfgs --mem 7 --passes 20 --termination 0.001 --l2 1.0 --holdout_off
train-sets/ref/rcv1_small.stdout
train-sets/ref/rcv1_small.stderr
Expand Down
16 changes: 8 additions & 8 deletions test/train-sets/ref/frank.stderr
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,16 @@ decay_learning_rate = 1
using l2 regularization
m = 7
Allocated 72M for weights and mem
## avg. loss der. mag. d. m. cond. wolfe1 wolfe2 mix fraction curvature dir. magnitude step size time
## avg. loss der. mag. d. m. cond. wolfe1 wolfe2 mix fraction curvature dir. magnitude step size time
1 3.313292e+12 9.499654e+14 1.905397e+13 6.440241e+13 2.698544e+12 2.958580e-01 0.690
3 4.946559e+11 1.587411e+13 2.431760e+11 0.500000 0.000000 4.134247e+08 1.000000e+00 1.152
4 4.262257e+11 1.360521e+13 2.087762e+11 0.963277 0.926554 9.978023e+10 1.000000e+00 1.711
5 5.509095e+09 1.685125e+10 8.165054e+08 0.502727 0.005672 4.928468e+09 1.000000e+00 2.350
6 2.722915e+09 9.129735e+09 4.115941e+08 0.848424 0.697668 2.678213e+10 1.000000e+00 3.318
7 5.137694e+06 6.706857e+06 1.018132e+05 0.499525 -0.000686 2.755246e+05 1.000000e+00 4.523
3 4.946559e+11 1.587411e+13 2.431760e+11 0.500000 0.000000 4.134247e+08 1.000000e+00 1.152
4 4.262257e+11 1.360521e+13 2.087762e+11 0.963277 0.926554 9.978023e+10 1.000000e+00 1.711
5 5.509095e+09 1.685125e+10 8.165054e+08 0.502727 0.005672 4.928468e+09 1.000000e+00 2.350
6 2.722915e+09 9.129735e+09 4.115941e+08 0.848424 0.697668 2.678213e+10 1.000000e+00 3.318
7 5.137694e+06 6.706857e+06 1.018132e+05 0.499525 -0.000686 2.755246e+05 1.000000e+00 4.523
8 5.995715e+06 3.517040e+07 5.294065e+05 -0.644854 -2.289846 (revise x 0.5) 5.000000e-01 4.767
9 5.019540e+06 2.805249e+06 4.237483e+04 0.177600 -0.644881 1.362459e+04 1.000000e+00 6.061
10 4.934764e+06 6.562551e+00 1.255939e-01 0.499927 -0.000122 7.431789e-03 1.000000e+00 7.549
9 5.019540e+06 2.805249e+06 4.237483e+04 0.177600 -0.644881 1.362459e+04 1.000000e+00 6.061
10 4.934764e+06 6.562551e+00 1.255939e-01 0.499927 -0.000122 7.431789e-03 1.000000e+00 7.549


finished run
Expand Down
30 changes: 15 additions & 15 deletions test/train-sets/ref/rcv1_small.stderr
Original file line number Diff line number Diff line change
@@ -1,28 +1,28 @@
using l2 regularization = 1
enabling BFGS based optimization **without** curvature calculation
Num weight bits = 20
Num weight bits = 18
learning rate = 0.5
initial_t = 0
power_t = 0.5
decay_learning_rate = 1
m = 7
Allocated 72M for weights and mem
## avg. loss der. mag. d. m. cond. wolfe1 wolfe2 mix fraction curvature dir. magnitude step size
Allocated 18M for weights and mem
## avg. loss der. mag. d. m. cond. wolfe1 wolfe2 mix fraction curvature dir. magnitude step size
creating cache_file = train-sets/rcv1_small.dat.cache
Reading datafile = train-sets/rcv1_small.dat
num sources = 1
1 0.69315 0.00266 0.87764 2.24708 776.93237 0.39057
3 0.51357 0.00493 4.93046 0.523903 0.088793 76.25748 1.00000
4 0.65936 0.04915 49.15202 -0.910622 -2.480116 (revise x 0.5) 0.50000
5 0.51658 0.00876 8.76105 -0.037665 -0.999616 (revise x 0.5) 0.25000
6 0.49499 0.00028 0.28254 0.463963 -0.056952 0.51262 1.00000
7 0.49354 0.00006 0.05641 0.619867 0.244153 0.08545 1.00000
8 0.49287 0.00005 0.05434 0.870687 0.741762 0.91640 1.00000
9 0.48978 0.00014 0.13750 0.772760 0.546930 2.01229 1.00000
10 0.48472 0.00027 0.27437 0.750340 0.501776 3.21399 1.00000
11 0.47920 0.00017 0.16867 0.671044 0.340515 1.40135 1.00000
12 0.47707 0.00001 0.00760 0.593376 0.181239 0.09201 1.00000
13 0.47691 0.00000 0.00168 0.593289 0.185020 0.00955 1.00000
1 0.69315 0.00266 0.87764 2.24708 776.93237 0.39057
3 0.51357 0.00493 4.93046 0.523903 0.088793 76.25748 1.00000
4 0.65936 0.04915 49.15202 -0.910622 -2.480116 (revise x 0.5) 0.50000
5 0.51658 0.00876 8.76105 -0.037665 -0.999616 (revise x 0.5) 0.25000
6 0.49499 0.00028 0.28254 0.463963 -0.056952 0.51262 1.00000
7 0.49354 0.00006 0.05641 0.619867 0.244153 0.08545 1.00000
8 0.49287 0.00005 0.05434 0.870687 0.741762 0.91640 1.00000
9 0.48978 0.00014 0.13750 0.772760 0.546930 2.01229 1.00000
10 0.48472 0.00027 0.27437 0.750340 0.501776 3.21399 1.00000
11 0.47920 0.00017 0.16867 0.671044 0.340515 1.40135 1.00000
12 0.47707 0.00001 0.00760 0.593376 0.181239 0.09201 1.00000
13 0.47691 0.00000 0.00168 0.593278 0.185019 0.00955 1.00000

finished run
number of examples = 13000
Expand Down
4 changes: 2 additions & 2 deletions test/train-sets/ref/zero.stderr
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@ power_t = 0.5
decay_learning_rate = 1
m = 7
Allocated 72M for weights and mem
## avg. loss der. mag. d. m. cond. wolfe1 wolfe2 mix fraction curvature dir. magnitude step size
## avg. loss der. mag. d. m. cond. wolfe1 wolfe2 mix fraction curvature dir. magnitude step size
creating cache_file = train-sets/zero.dat.cache
Reading datafile = train-sets/zero.dat
num sources = 1
1 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000
1 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000

finished run
number of examples = 10
Expand Down
26 changes: 13 additions & 13 deletions vowpalwabbit/bfgs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -341,7 +341,7 @@ void bfgs_iter_middle(vw& all, bfgs& b, float* mem, double* rho, double* alpha,
{
coef_j = alpha[j] - rho[j] * y_r;
y_r = 0.;
for (typename T::iterator w = weights.begin(); w != weights.end(); ++w)
for (typename T::iterator w = weights.begin(); w != weights.end(); ++w)
{
mem = mem0 + (w.index() >> weights.stride_shift()) * b.mem_stride;
(&(*w))[W_DIR] += (float)coef_j*mem[(2 * j + MEM_ST + origin) % b.mem_stride];
Expand Down Expand Up @@ -421,7 +421,7 @@ double wolfe_eval(vw& all, bfgs& b, float* mem, double loss_sum, double previous
template <class T> double add_regularization(vw& all, bfgs& b, float regularization, T& weights)
{ //compute the derivative difference
double ret = 0.;

if (b.regularizers == nullptr)
for (typename T::iterator w = weights.begin(); w != weights.end(); ++w)
{
Expand Down Expand Up @@ -451,7 +451,7 @@ template <class T> double add_regularization(vw& all, bfgs& b, float regularizat
ret -= 0.5*b.regularizers[2*i]*delta_weight*delta_weight;
}
}

return ret;
}

Expand Down Expand Up @@ -575,7 +575,7 @@ double derivative_in_direction(vw& all, bfgs& b, float* mem, int &origin, T& wei
{
double ret = 0.;
for (typename T::iterator w = weights.begin(); w != weights.end(); ++w)
{
{
float* mem1 = mem + (w.index() >> weights.stride_shift()) * b.mem_stride;
ret += mem1[(MEM_GT + origin) % b.mem_stride] * (&(*w))[W_DIR];
}
Expand Down Expand Up @@ -614,14 +614,14 @@ int process_pass(vw& all, bfgs& b)
/********************************************************************/
/* A) FIRST PASS FINISHED: INITIALIZE FIRST LINE SEARCH *************/
/********************************************************************/
if (b.first_pass)
if (b.first_pass)
{ if(all.all_reduce != nullptr)
{ accumulate(all, all.weights, W_COND); //Accumulate preconditioner
float temp = (float)b.importance_weight_sum;
b.importance_weight_sum = accumulate_scalar(all, temp);
}
//finalize_preconditioner(all, b, all.l2_lambda);
if(all.all_reduce != nullptr)
if(all.all_reduce != nullptr)
{ float temp = (float)b.loss_sum;
b.loss_sum = accumulate_scalar(all, temp); //Accumulate loss_sums
accumulate(all, all.weights, 1); //Accumulate gradients from all nodes
Expand All @@ -645,7 +645,7 @@ int process_pass(vw& all, bfgs& b)
ftime(&b.t_end_global);
b.net_time = (int) (1000.0 * (b.t_end_global.time - b.t_start_global.time) + (b.t_end_global.millitm - b.t_start_global.millitm));
if (!all.quiet)
fprintf(stderr, "%-10s\t%-10.5f\t%-10.5f\n", "", d_mag, b.step_size);
fprintf(stderr, "%-10s\t%-10.5f\t%-.5f\n", "", d_mag, b.step_size);
b.predictions.erase();
update_weight(all, b.step_size);
}
Expand Down Expand Up @@ -695,7 +695,7 @@ int process_pass(vw& all, bfgs& b)
b.net_time = (int) (1000.0 * (b.t_end_global.time - b.t_start_global.time) + (b.t_end_global.millitm - b.t_start_global.millitm));
float ratio = (b.step_size==0.f) ? 0.f : (float)new_step/(float)b.step_size;
if (!all.quiet)
fprintf(stderr, "%-10s\t%-10s\t(revise x %.1f)\t%-10.5f\n",
fprintf(stderr, "%-10s\t%-10s\t(revise x %.1f)\t%-.5f\n",
"","",ratio,
new_step);
b.predictions.erase();
Expand Down Expand Up @@ -739,7 +739,7 @@ int process_pass(vw& all, bfgs& b)
ftime(&b.t_end_global);
b.net_time = (int) (1000.0 * (b.t_end_global.time - b.t_start_global.time) + (b.t_end_global.millitm - b.t_start_global.millitm));
if (!all.quiet)
fprintf(stderr, "%-10s\t%-10.5f\t%-10.5f\n", "", d_mag, b.step_size);
fprintf(stderr, "%-10s\t%-10.5f\t%-.5f\n", "", d_mag, b.step_size);
b.predictions.erase();
update_weight(all, b.step_size);
}
Expand All @@ -750,7 +750,7 @@ int process_pass(vw& all, bfgs& b)
/* C) NOT FIRST PASS, CURVATURE CALCULATED **************************/
/********************************************************************/
else // just finished all second gradients
{
{
if(all.all_reduce != nullptr)
{ float t = (float)b.curvature;
b.curvature = accumulate_scalar(all, t); //Accumulate curvatures
Expand Down Expand Up @@ -779,7 +779,7 @@ int process_pass(vw& all, bfgs& b)
b.net_time = (int) (1000.0 * (b.t_end_global.time - b.t_start_global.time) + (b.t_end_global.millitm - b.t_start_global.millitm));

if (!all.quiet)
fprintf(stderr, "%-10.5f\t%-10.5f\t%-10.5f\n", b.curvature / b.importance_weight_sum, d_mag, b.step_size);
fprintf(stderr, "%-10.5f\t%-10.5f\t%-.5f\n", b.curvature / b.importance_weight_sum, d_mag, b.step_size);
b.gradient_pass = true;
}//now start computing derivatives.
b.current_pass++;
Expand Down Expand Up @@ -944,7 +944,7 @@ void save_load_regularizer(vw& all, bfgs& b, io_buf& model_file, bool read, bool
i++;
}
while ((!read && i < length) || (read && brw >0));

if (read)
regularizer_to_weight(all, b);
}
Expand Down Expand Up @@ -979,7 +979,7 @@ void save_load(bfgs& b, io_buf& model_file, bool read, bool text)
ftime(&b.t_start_global);

if (!all->quiet)
{ const char * header_fmt = "%2s %-10s\t%-10s\t%-10s\t %-10s\t%-10s\t%-10s\t%-10s\t%-10s\t%-10s\n";
{ const char * header_fmt = "%2s %-10s\t%-10s\t%-10s\t %-10s\t%-10s\t%-10s\t%-10s\t%-10s\t%-s\n";
fprintf(stderr, header_fmt,
"##", "avg. loss", "der. mag.", "d. m. cond.", "wolfe1", "wolfe2", "mix fraction", "curvature", "dir. magnitude", "step size");
cerr.precision(5);
Expand Down

0 comments on commit 33e9da6

Please sign in to comment.