From 33e9da6c1ab7d1be5aca159d5c280df048928252 Mon Sep 17 00:00:00 2001
From: Ariel Faigon <arielf@users.noreply.github.com>
Date: Mon, 30 Jan 2017 09:36:09 -0800
Subject: [PATCH] - Shave ~1 sec from RunTests by speeding-up slowest test 16
 (bfgs) by ~70% (#1181)

(achieved by dropping '-b 20', pass 13 termination & output remain the same)
- Trim trailing spaces from bfgs progress output
  (trailing space appeared when time field was removed from lines)
- Trim trailing spaces from bfgs.cc source
- Update bfgs stderr reference files
---
 test/RunTests                         |  2 +-
 test/train-sets/ref/frank.stderr      | 16 +++++++-------
 test/train-sets/ref/rcv1_small.stderr | 30 +++++++++++++--------------
 test/train-sets/ref/zero.stderr       |  4 ++--
 vowpalwabbit/bfgs.cc                  | 26 +++++++++++------------
 5 files changed, 39 insertions(+), 39 deletions(-)

diff --git a/test/RunTests b/test/RunTests
index 3be7c4bf0dc..d7167b750f2 100755
--- a/test/RunTests
+++ b/test/RunTests
@@ -897,7 +897,7 @@ __DATA__
     train-sets/ref/zero.stderr
 
 # Test 16: LBFGS early termination
-{VW} -k -c -d train-sets/rcv1_small.dat --loss_function=logistic -b 20 --bfgs --mem 7 --passes 20 --termination 0.001 --l2 1.0 --holdout_off
+{VW} -k -c -d train-sets/rcv1_small.dat --loss_function=logistic --bfgs --mem 7 --passes 20 --termination 0.001 --l2 1.0 --holdout_off
     train-sets/ref/rcv1_small.stdout
     train-sets/ref/rcv1_small.stderr
 
diff --git a/test/train-sets/ref/frank.stderr b/test/train-sets/ref/frank.stderr
index 8a9206fafbb..2e3095a7043 100644
--- a/test/train-sets/ref/frank.stderr
+++ b/test/train-sets/ref/frank.stderr
@@ -10,16 +10,16 @@ decay_learning_rate = 1
 using l2 regularization
 m = 7
 Allocated 72M for weights and mem
-## avg. loss 	der. mag. 	d. m. cond.	 wolfe1    	wolfe2    	mix fraction	curvature 	dir. magnitude	step size 	time      
+## avg. loss 	der. mag. 	d. m. cond.	 wolfe1    	wolfe2    	mix fraction	curvature 	dir. magnitude	step size 	time
  1 3.313292e+12	9.499654e+14	1.905397e+13	          	          	          	6.440241e+13	2.698544e+12	2.958580e-01	0.690
- 3 4.946559e+11	1.587411e+13	2.431760e+11	 0.500000  	0.000000  	          	          	4.134247e+08	1.000000e+00	1.152     
- 4 4.262257e+11	1.360521e+13	2.087762e+11	 0.963277  	0.926554  	          	          	9.978023e+10	1.000000e+00	1.711     
- 5 5.509095e+09	1.685125e+10	8.165054e+08	 0.502727  	0.005672  	          	          	4.928468e+09	1.000000e+00	2.350     
- 6 2.722915e+09	9.129735e+09	4.115941e+08	 0.848424  	0.697668  	          	          	2.678213e+10	1.000000e+00	3.318     
- 7 5.137694e+06	6.706857e+06	1.018132e+05	 0.499525  	-0.000686 	          	          	2.755246e+05	1.000000e+00	4.523     
+ 3 4.946559e+11	1.587411e+13	2.431760e+11	 0.500000  	0.000000  	          	          	4.134247e+08	1.000000e+00	1.152
+ 4 4.262257e+11	1.360521e+13	2.087762e+11	 0.963277  	0.926554  	          	          	9.978023e+10	1.000000e+00	1.711
+ 5 5.509095e+09	1.685125e+10	8.165054e+08	 0.502727  	0.005672  	          	          	4.928468e+09	1.000000e+00	2.350
+ 6 2.722915e+09	9.129735e+09	4.115941e+08	 0.848424  	0.697668  	          	          	2.678213e+10	1.000000e+00	3.318
+ 7 5.137694e+06	6.706857e+06	1.018132e+05	 0.499525  	-0.000686 	          	          	2.755246e+05	1.000000e+00	4.523
  8 5.995715e+06	3.517040e+07	5.294065e+05	 -0.644854 	-2.289846 	          	          	(revise x 0.5)	5.000000e-01	4.767
- 9 5.019540e+06	2.805249e+06	4.237483e+04	 0.177600  	-0.644881 	          	          	1.362459e+04	1.000000e+00	6.061     
-10 4.934764e+06	6.562551e+00	1.255939e-01	 0.499927  	-0.000122 	          	          	7.431789e-03	1.000000e+00	7.549     
+ 9 5.019540e+06	2.805249e+06	4.237483e+04	 0.177600  	-0.644881 	          	          	1.362459e+04	1.000000e+00	6.061
+10 4.934764e+06	6.562551e+00	1.255939e-01	 0.499927  	-0.000122 	          	          	7.431789e-03	1.000000e+00	7.549
 
 
 finished run
diff --git a/test/train-sets/ref/rcv1_small.stderr b/test/train-sets/ref/rcv1_small.stderr
index 96c5cff2bd4..0379f2d9873 100644
--- a/test/train-sets/ref/rcv1_small.stderr
+++ b/test/train-sets/ref/rcv1_small.stderr
@@ -1,28 +1,28 @@
 using l2 regularization = 1
 enabling BFGS based optimization **without** curvature calculation
-Num weight bits = 20
+Num weight bits = 18
 learning rate = 0.5
 initial_t = 0
 power_t = 0.5
 decay_learning_rate = 1
 m = 7
-Allocated 72M for weights and mem
-## avg. loss 	der. mag. 	d. m. cond.	 wolfe1    	wolfe2    	mix fraction	curvature 	dir. magnitude	step size 
+Allocated 18M for weights and mem
+## avg. loss 	der. mag. 	d. m. cond.	 wolfe1    	wolfe2    	mix fraction	curvature 	dir. magnitude	step size
 creating cache_file = train-sets/rcv1_small.dat.cache
 Reading datafile = train-sets/rcv1_small.dat
 num sources = 1
- 1 0.69315   	0.00266   	0.87764   	          	          	          	2.24708   	776.93237 	0.39057   
- 3 0.51357   	0.00493   	4.93046   	 0.523903  	0.088793  	          	          	76.25748  	1.00000   
- 4 0.65936   	0.04915   	49.15202  	 -0.910622 	-2.480116 	          	          	(revise x 0.5)	0.50000   
- 5 0.51658   	0.00876   	8.76105   	 -0.037665 	-0.999616 	          	          	(revise x 0.5)	0.25000   
- 6 0.49499   	0.00028   	0.28254   	 0.463963  	-0.056952 	          	          	0.51262   	1.00000   
- 7 0.49354   	0.00006   	0.05641   	 0.619867  	0.244153  	          	          	0.08545   	1.00000   
- 8 0.49287   	0.00005   	0.05434   	 0.870687  	0.741762  	          	          	0.91640   	1.00000   
- 9 0.48978   	0.00014   	0.13750   	 0.772760  	0.546930  	          	          	2.01229   	1.00000   
-10 0.48472   	0.00027   	0.27437   	 0.750340  	0.501776  	          	          	3.21399   	1.00000   
-11 0.47920   	0.00017   	0.16867   	 0.671044  	0.340515  	          	          	1.40135   	1.00000   
-12 0.47707   	0.00001   	0.00760   	 0.593376  	0.181239  	          	          	0.09201   	1.00000   
-13 0.47691   	0.00000   	0.00168   	 0.593289  	0.185020  	          	          	0.00955   	1.00000   
+ 1 0.69315   	0.00266   	0.87764   	          	          	          	2.24708   	776.93237 	0.39057
+ 3 0.51357   	0.00493   	4.93046   	 0.523903  	0.088793  	          	          	76.25748  	1.00000
+ 4 0.65936   	0.04915   	49.15202  	 -0.910622 	-2.480116 	          	          	(revise x 0.5)	0.50000
+ 5 0.51658   	0.00876   	8.76105   	 -0.037665 	-0.999616 	          	          	(revise x 0.5)	0.25000
+ 6 0.49499   	0.00028   	0.28254   	 0.463963  	-0.056952 	          	          	0.51262   	1.00000
+ 7 0.49354   	0.00006   	0.05641   	 0.619867  	0.244153  	          	          	0.08545   	1.00000
+ 8 0.49287   	0.00005   	0.05434   	 0.870687  	0.741762  	          	          	0.91640   	1.00000
+ 9 0.48978   	0.00014   	0.13750   	 0.772760  	0.546930  	          	          	2.01229   	1.00000
+10 0.48472   	0.00027   	0.27437   	 0.750340  	0.501776  	          	          	3.21399   	1.00000
+11 0.47920   	0.00017   	0.16867   	 0.671044  	0.340515  	          	          	1.40135   	1.00000
+12 0.47707   	0.00001   	0.00760   	 0.593376  	0.181239  	          	          	0.09201   	1.00000
+13 0.47691   	0.00000   	0.00168   	 0.593278  	0.185019  	          	          	0.00955   	1.00000
 
 finished run
 number of examples = 13000
diff --git a/test/train-sets/ref/zero.stderr b/test/train-sets/ref/zero.stderr
index afcf80f11dc..6559d0669f7 100644
--- a/test/train-sets/ref/zero.stderr
+++ b/test/train-sets/ref/zero.stderr
@@ -7,11 +7,11 @@ power_t = 0.5
 decay_learning_rate = 1
 m = 7
 Allocated 72M for weights and mem
-## avg. loss 	der. mag. 	d. m. cond.	 wolfe1    	wolfe2    	mix fraction	curvature 	dir. magnitude	step size 
+## avg. loss 	der. mag. 	d. m. cond.	 wolfe1    	wolfe2    	mix fraction	curvature 	dir. magnitude	step size
 creating cache_file = train-sets/zero.dat.cache
 Reading datafile = train-sets/zero.dat
 num sources = 1
- 1 0.00000   	0.00000   	0.00000   	          	          	          	0.00000   	0.00000   	0.00000   
+ 1 0.00000   	0.00000   	0.00000   	          	          	          	0.00000   	0.00000   	0.00000
 
 finished run
 number of examples = 10
diff --git a/vowpalwabbit/bfgs.cc b/vowpalwabbit/bfgs.cc
index 85d5d7d117d..076b392dbf1 100644
--- a/vowpalwabbit/bfgs.cc
+++ b/vowpalwabbit/bfgs.cc
@@ -341,7 +341,7 @@ void bfgs_iter_middle(vw& all, bfgs& b, float* mem, double* rho, double* alpha,
 	{
 		coef_j = alpha[j] - rho[j] * y_r;
 		y_r = 0.;
-		for (typename T::iterator w = weights.begin(); w != weights.end(); ++w) 
+		for (typename T::iterator w = weights.begin(); w != weights.end(); ++w)
 		{
 		  mem = mem0 + (w.index() >> weights.stride_shift()) * b.mem_stride;
 			(&(*w))[W_DIR] += (float)coef_j*mem[(2 * j + MEM_ST + origin) % b.mem_stride];
@@ -421,7 +421,7 @@ double wolfe_eval(vw& all, bfgs& b, float* mem, double loss_sum, double previous
 template <class T> double add_regularization(vw& all, bfgs& b, float regularization, T& weights)
 { //compute the derivative difference
   double ret = 0.;
-  
+
   if (b.regularizers == nullptr)
     for (typename T::iterator w = weights.begin(); w != weights.end(); ++w)
       {
@@ -451,7 +451,7 @@ template <class T> double add_regularization(vw& all, bfgs& b, float regularizat
 	ret -= 0.5*b.regularizers[2*i]*delta_weight*delta_weight;
       }
     }
-  
+
   return ret;
 }
 
@@ -575,7 +575,7 @@ double derivative_in_direction(vw& all, bfgs& b, float* mem, int &origin, T& wei
 {
 	double ret = 0.;
 	for (typename T::iterator w = weights.begin(); w != weights.end();  ++w)
-	{ 
+	{
 	  float* mem1 = mem + (w.index() >> weights.stride_shift()) * b.mem_stride;
 		ret += mem1[(MEM_GT + origin) % b.mem_stride] * (&(*w))[W_DIR];
 	}
@@ -614,14 +614,14 @@ int process_pass(vw& all, bfgs& b)
   /********************************************************************/
   /* A) FIRST PASS FINISHED: INITIALIZE FIRST LINE SEARCH *************/
   /********************************************************************/
-    if (b.first_pass) 
+    if (b.first_pass)
     { if(all.all_reduce != nullptr)
       { accumulate(all, all.weights, W_COND); //Accumulate preconditioner
         float temp = (float)b.importance_weight_sum;
         b.importance_weight_sum = accumulate_scalar(all, temp);
       }
       //finalize_preconditioner(all, b, all.l2_lambda);
-      if(all.all_reduce != nullptr) 
+      if(all.all_reduce != nullptr)
       {	float temp = (float)b.loss_sum;
 	b.loss_sum = accumulate_scalar(all, temp);  //Accumulate loss_sums
 	accumulate(all, all.weights, 1); //Accumulate gradients from all nodes
@@ -645,7 +645,7 @@ int process_pass(vw& all, bfgs& b)
       ftime(&b.t_end_global);
       b.net_time = (int) (1000.0 * (b.t_end_global.time - b.t_start_global.time) + (b.t_end_global.millitm - b.t_start_global.millitm));
        if (!all.quiet)
-        fprintf(stderr, "%-10s\t%-10.5f\t%-10.5f\n", "", d_mag, b.step_size);
+        fprintf(stderr, "%-10s\t%-10.5f\t%-.5f\n", "", d_mag, b.step_size);
       b.predictions.erase();
       update_weight(all, b.step_size);
     }
@@ -695,7 +695,7 @@ int process_pass(vw& all, bfgs& b)
         b.net_time = (int) (1000.0 * (b.t_end_global.time - b.t_start_global.time) + (b.t_end_global.millitm - b.t_start_global.millitm));
         float ratio = (b.step_size==0.f) ? 0.f : (float)new_step/(float)b.step_size;
         if (!all.quiet)
-          fprintf(stderr, "%-10s\t%-10s\t(revise x %.1f)\t%-10.5f\n",
+          fprintf(stderr, "%-10s\t%-10s\t(revise x %.1f)\t%-.5f\n",
                   "","",ratio,
                   new_step);
         b.predictions.erase();
@@ -739,7 +739,7 @@ int process_pass(vw& all, bfgs& b)
           ftime(&b.t_end_global);
           b.net_time = (int) (1000.0 * (b.t_end_global.time - b.t_start_global.time) + (b.t_end_global.millitm - b.t_start_global.millitm));
           if (!all.quiet)
-            fprintf(stderr, "%-10s\t%-10.5f\t%-10.5f\n", "", d_mag, b.step_size);
+            fprintf(stderr, "%-10s\t%-10.5f\t%-.5f\n", "", d_mag, b.step_size);
           b.predictions.erase();
           update_weight(all, b.step_size);
         }
@@ -750,7 +750,7 @@ int process_pass(vw& all, bfgs& b)
   /* C) NOT FIRST PASS, CURVATURE CALCULATED **************************/
   /********************************************************************/
     else // just finished all second gradients
-    {    
+    {
  if(all.all_reduce != nullptr)
       { float t = (float)b.curvature;
         b.curvature = accumulate_scalar(all, t);  //Accumulate curvatures
@@ -779,7 +779,7 @@ int process_pass(vw& all, bfgs& b)
       b.net_time = (int) (1000.0 * (b.t_end_global.time - b.t_start_global.time) + (b.t_end_global.millitm - b.t_start_global.millitm));
 
       if (!all.quiet)
-        fprintf(stderr, "%-10.5f\t%-10.5f\t%-10.5f\n", b.curvature / b.importance_weight_sum, d_mag, b.step_size);
+        fprintf(stderr, "%-10.5f\t%-10.5f\t%-.5f\n", b.curvature / b.importance_weight_sum, d_mag, b.step_size);
       b.gradient_pass = true;
     }//now start computing derivatives.
   b.current_pass++;
@@ -944,7 +944,7 @@ void save_load_regularizer(vw& all, bfgs& b, io_buf& model_file, bool read, bool
       i++;
   }
   while ((!read && i < length) || (read && brw >0));
-  
+
   if (read)
     regularizer_to_weight(all, b);
 }
@@ -979,7 +979,7 @@ void save_load(bfgs& b, io_buf& model_file, bool read, bool text)
     ftime(&b.t_start_global);
 
     if (!all->quiet)
-    { const char * header_fmt = "%2s %-10s\t%-10s\t%-10s\t %-10s\t%-10s\t%-10s\t%-10s\t%-10s\t%-10s\n";
+    { const char * header_fmt = "%2s %-10s\t%-10s\t%-10s\t %-10s\t%-10s\t%-10s\t%-10s\t%-10s\t%-s\n";
       fprintf(stderr, header_fmt,
               "##", "avg. loss", "der. mag.", "d. m. cond.", "wolfe1", "wolfe2", "mix fraction", "curvature", "dir. magnitude", "step size");
       cerr.precision(5);