From c891ae820f4cca2874d7e398a8cc165e069f0b78 Mon Sep 17 00:00:00 2001
From: Chicheng Zhang
 <chiczhan@chiczhan-ubuntu.northamerica.corp.microsoft.com>
Date: Thu, 25 Jan 2018 18:47:51 -0500
Subject: [PATCH 001/127] /

---
 vowpalwabbit/cb_explore.cc     | 186 ++++++++++++++++++++++++++++++++-
 vowpalwabbit/gen_cs_example.cc |   1 +
 2 files changed, 185 insertions(+), 2 deletions(-)
diff --git a/vowpalwabbit/cb_explore.cc b/vowpalwabbit/cb_explore.cc
index c3b2a07cffd..27d7953efbe 100644
--- a/vowpalwabbit/cb_explore.cc
+++ b/vowpalwabbit/cb_explore.cc
@@ -20,6 +20,8 @@ struct cb_explore
   cb_to_cs cbcs;
   v_array<uint32_t> preds;
   v_array<float> cover_probs;
+  v_array<float> cost_lambda;
+  v_array<float> lambdas;
 
   CB::label cb_label;
   COST_SENSITIVE::label cs_label;
@@ -32,6 +34,8 @@ struct cb_explore
   size_t bag_size;
   size_t cover_size;
   float psi;
+  size_t lambda_size;
+	float n_2;
 
   size_t counter;
 
@@ -117,6 +121,146 @@ void predict_or_learn_bag(cb_explore& data, base_learner& base, example& ec)
   ec.pred.a_s = probs;
 }
 
+/*
+template <bool is_learn>
+void predict_or_learn_lambda(cb_explore& data, base_learner& base, example& ec)
+{
+	v_array<action_score> probs = ec.pred.a_s;
+  probs.erase();
+
+	for (uint32_t i = 0; i < data.cbcs.num_actions; i++)
+		probs.push_back({i,0.});
+	float prob = 1.f/(float)data.lambda_size;
+	for (size_t i = 0; i < data.lambda_size; i++)
+	{
+		if (is_learn && n_1 > 0)
+		{
+			//learn with lambda learning rate
+			n_1--;
+		}
+		else if (is_learn && n_1 <= 0)
+		{
+			//learn with 1-lambda learning rate
+			n_1--;
+		}
+		else
+		{
+			// predict
+			// select the lambda that has the minimum cumulative cost
+			base.predict(ec, i);
+			uint32_t chosen = ec.pred.multiclass-1;
+			probs[chosen].score += prob;
+		}		
+	}
+  ec.pred.a_s = probs;
+
+}
+*/
+float find_min(v_array<float> arr)
+{
+	float min_val = FLT_MAX;
+	uint32_t argmin = -1;
+
+	for (uint32_t i = 0; i < arr.size(); i++)
+	{
+		if (arr[i] < min_val)
+		{	
+			min_val = arr[i];
+			argmin = i;
+		}
+	}
+
+	return argmin;
+}
+
+
+
+template <bool is_learn>
+void predict_or_learn_lambda(cb_explore& data, base_learner& base, example& ec)
+{
+	v_array<action_score> probs = ec.pred.a_s;
+  probs.erase();
+
+  data.cs_label.costs.erase();
+
+  for (uint32_t j = 0; j < data.cbcs.num_actions; j++)
+    data.cs_label.costs.push_back({FLT_MAX,j+1,0.,0.});
+
+  data.cb_label = ec.l.cb;
+
+  ec.l.cs = data.cs_label;
+	
+
+	// learn
+	if (is_learn)
+	{
+
+		// get the cost vector
+		//data.cs_label.costs.erase();
+		//data.cb_label = ec.l.cb;
+		//data.cbcs.known_cost = get_observed_cost(data.cb_label);
+		//gen_cs_example<false>(data.cbcs, ec, data.cb_label, data.cs_label);
+		
+		ec.l.cb = data.cb_label;
+    base.learn(ec);
+
+		cout<<data.cb_label.costs[0].action<<endl;
+
+    //1. Compute loss vector
+    data.cs_label.costs.erase();
+
+		for (size_t i = 0; i < data.cbcs.num_actions; i++)
+			cout<<"action "<<i<<" has cost "<<data.cs_label.costs[i].x<<endl;
+
+    ec.l.cb = data.cb_label;
+    data.cbcs.known_cost = get_observed_cost(data.cb_label);
+    gen_cs_example<false>(data.cbcs, ec, data.cb_label, data.cs_label);
+
+	
+		for (size_t i = 0; i < data.cbcs.num_actions; i++)
+			cout<<"action "<<i<<" has cost "<<data.cs_label.costs[i].x<<endl;
+
+
+		for (size_t i = 0; i < data.lambda_size; i++)
+		{
+				//learn with lambda/(1-lambda) learning rate		
+				ec.weight = data.lambdas[i] / (1-data.lambdas[i]);
+				base.learn(ec, i);
+
+				base.predict(ec, i);
+				uint32_t chosen = ec.pred.multiclass-1;
+
+
+				//update the cumulative costs of the lambdas
+				
+				data.cost_lambda[i] = data.cost_lambda[i] + data.cs_label.costs[chosen].x;
+				cout<<"i = "<<i<<", cumulative cost = "<<data.cost_lambda[i]<<endl;
+		}
+	}
+
+	float prob = data.epsilon/(float)data.cbcs.num_actions;
+	for (uint32_t i = 0; i < data.cbcs.num_actions; i++)
+		probs.push_back({i,prob});
+
+	// predict
+  // select the lambda that has the minimum cumulative cost (measured by IPS)
+	uint32_t argmin = find_min(data.cost_lambda);
+	//cout<<"lambda = " <<data.lambdas[argmin]<<endl;
+	base.predict(ec, argmin);
+	uint32_t chosen = ec.pred.multiclass-1;
+	probs[chosen].score = probs[chosen].score + (1 - data.epsilon);	
+	cout<<"chosen = "<<chosen<<endl;
+
+
+	ec.l.cb = data.cb_label;
+  ec.pred.a_s = probs;
+
+
+}
+
+
+
+
 void safety(v_array<action_score>& distribution, float min_prob, bool zeros)
 {
   //input: a probability distribution
@@ -227,11 +371,15 @@ void predict_or_learn_cover(cb_explore& data, base_learner& base, example& ec)
     data.cs_label.costs.erase();
     float norm = min_prob * num_actions;
     ec.l.cb = data.cb_label;
+
     data.cbcs.known_cost = get_observed_cost(data.cb_label);
     gen_cs_example<false>(data.cbcs, ec, data.cb_label, data.cs_label);
     for(uint32_t i = 0; i < num_actions; i++)
       probabilities[i] = 0;
 
+		for (size_t i = 0; i < data.cbcs.num_actions; i++)
+			cout<<"action "<<i<<" has cost "<<data.cs_label.costs[i].x<<endl;
+
     ec.l.cs = data.second_cs_label;
     //2. Update functions
     for (size_t i = 0; i < cover_size; i++)
@@ -339,7 +487,9 @@ base_learner* cb_explore_setup(vw& all)
   ("epsilon",po::value<float>() ,"epsilon-greedy exploration")
   ("bag",po::value<size_t>() ,"bagging-based exploration")
   ("cover",po::value<size_t>() ,"Online cover based exploration")
-  ("psi", po::value<float>(), "disagreement parameter for cover");
+  ("psi", po::value<float>(), "disagreement parameter for cover")
+  ("lambda",po::value<size_t>() ,"Online weighting based exploration")
+  ("n_2", po::value<float>(), "dataset size of source 2");
   add_options(all);
 
   po::variables_map& vm = all.vm;
@@ -388,13 +538,45 @@ base_learner* cb_explore_setup(vw& all)
     *all.file_options << " --psi " << type_string;
     l = &init_learner(&data, base, predict_or_learn_cover<true>, predict_or_learn_cover<false>, data.cover_size + 1, prediction_type::action_probs);
   }
+  else if (vm.count("lambda"))
+  {
+		data.lambda_size = (uint32_t)vm["lambda"].as<size_t>();
+		data.cs = all.cost_sensitive;
+		data.cost_lambda = v_init<float>();
+		for (uint32_t i = 0; i < data.lambda_size; i++)
+			data.cost_lambda.push_back(0.);
+
+		data.lambdas = v_init<float>();
+		for (uint32_t i = 0; i < data.lambda_size; i++)		
+			data.lambdas.push_back(((float) i )/ data.lambda_size);
+
+		data.second_cs_label.costs.resize(num_actions);
+		data.second_cs_label.costs.end() = data.second_cs_label.costs.begin()+num_actions;
+		*all.file_options << " --lambda "<< data.lambda_size;
+
+		if (vm.count("epsilon"))
+      data.epsilon = vm["epsilon"].as<float>();
+		else
+			data.epsilon = 0.05f;
+		//cout<<"epsilon = "<<data.epsilon<<endl;
+
+
+		//if (vm.count("n_2"))
+		//	data.n_2 = vm["n_2"].as<float>();
+		//data.preds = v_init<uint32_t>();
+ 		//data.preds.resize(data.lambda_size);
+		//sprintf(type_string, "%f", data.n_2);
+		//*all.file_options << " --phi " << type_string;
+		l = &init_learner(&data, base, predict_or_learn_lambda<true>, predict_or_learn_lambda<false>, data.lambda_size + 1, prediction_type::action_probs);
+
+  }
   else if (vm.count("bag"))
   {
     data.bag_size = (uint32_t)vm["bag"].as<size_t>();
     *all.file_options << " --bag "<< data.bag_size;
     l = &init_learner(&data, base, predict_or_learn_bag<true>, predict_or_learn_bag<false>, data.bag_size, prediction_type::action_probs);
   }
-  else if (vm.count("first") )
+  else if (vm.count("first"))
   {
     data.tau = (uint32_t)vm["first"].as<size_t>();
     *all.file_options << " --first "<< data.tau;
diff --git a/vowpalwabbit/gen_cs_example.cc b/vowpalwabbit/gen_cs_example.cc
index 4660d8d6a7d..fa2bc21440b 100644
--- a/vowpalwabbit/gen_cs_example.cc
+++ b/vowpalwabbit/gen_cs_example.cc
@@ -112,6 +112,7 @@ void gen_cs_test_example(v_array<example*> examples, COST_SENSITIVE::label& cs_l
 //single line version
 void gen_cs_example_ips(cb_to_cs& c, CB::label& ld, COST_SENSITIVE::label& cs_ld)
 {
+	//std::cout<<"-------"<<c.known_cost->action<<std::endl;
   //this implements the inverse propensity score method, where cost are importance weighted by the probability of the chosen action
   //generate cost-sensitive example
   cs_ld.costs.erase();

From cc0ac23a96af84414d9c0d915667daca40bca96e Mon Sep 17 00:00:00 2001
From: Chicheng Zhang
 <chiczhan@chiczhan-ubuntu.northamerica.corp.microsoft.com>
Date: Mon, 29 Jan 2018 17:45:41 -0500
Subject: [PATCH 002/127] not sure if the cost vector retrieved is correct

---
 vowpalwabbit/cb_explore.cc     | 92 ++++++++++++++++++++++++++++++----
 vowpalwabbit/gen_cs_example.cc |  2 +
 vowpalwabbit/gen_cs_example.h  |  1 +
 3 files changed, 85 insertions(+), 10 deletions(-)

diff --git a/vowpalwabbit/cb_explore.cc b/vowpalwabbit/cb_explore.cc
index 27d7953efbe..eb43b392fcc 100644
--- a/vowpalwabbit/cb_explore.cc
+++ b/vowpalwabbit/cb_explore.cc
@@ -173,7 +173,57 @@ float find_min(v_array<float> arr)
 	return argmin;
 }
 
+/*
+  //Randomize over predictions from a base set of predictors
+  //Use cost sensitive oracle to cover actions to form distribution.
+
+  uint32_t num_actions = data.cbcs.num_actions;
+
+  v_array<action_score> probs = ec.pred.a_s;
+  probs.erase();
+  data.cs_label.costs.erase();
+
+  for (uint32_t j = 0; j < num_actions; j++)
+    data.cs_label.costs.push_back({FLT_MAX,j+1,0.,0.});
+
+  size_t cover_size = data.cover_size;
+  size_t counter = data.counter;
+  v_array<float>& probabilities = data.cover_probs;
+  v_array<uint32_t>& predictions = data.preds;
+
+  float additive_probability = 1.f / (float)cover_size;
+
+  float min_prob = min(1.f / num_actions, 1.f / (float)sqrt(counter * num_actions));
+
+  data.cb_label = ec.l.cb;
+
+  ec.l.cs = data.cs_label;
+  get_cover_probabilities(data, base, ec, probs);
+
+  if (is_learn)
+  {
+    ec.l.cb = data.cb_label;
+    base.learn(ec);
+
+    //Now update oracles
+
+    //1. Compute loss vector
+    data.cs_label.costs.erase();
+    float norm = min_prob * num_actions;
+    ec.l.cb = data.cb_label;
+
+    data.cbcs.known_cost = get_observed_cost(data.cb_label);
 
+		for (size_t i = 0; i < data.cbcs.num_actions; i++)
+			cout<<"action "<<i<<" has cost "<<data.cs_label.costs[i].x<<endl;
+
+    gen_cs_example<false>(data.cbcs, ec, data.cb_label, data.cs_label);
+    for(uint32_t i = 0; i < num_actions; i++)
+      probabilities[i] = 0;
+
+		for (size_t i = 0; i < data.cbcs.num_actions; i++)
+			cout<<"action "<<i<<" has cost "<<data.cs_label.costs[i].x<<endl;
+*/
 
 template <bool is_learn>
 void predict_or_learn_lambda(cb_explore& data, base_learner& base, example& ec)
@@ -204,21 +254,23 @@ void predict_or_learn_lambda(cb_explore& data, base_learner& base, example& ec)
 		ec.l.cb = data.cb_label;
     base.learn(ec);
 
-		cout<<data.cb_label.costs[0].action<<endl;
+		//cout<<data.cb_label.costs.size()<<endl;
+		//cout<<data.cb_label.costs[0].action<<endl;
+		//cout<<data.cb_label.costs[0].cost<<endl;
 
     //1. Compute loss vector
     data.cs_label.costs.erase();
 
-		for (size_t i = 0; i < data.cbcs.num_actions; i++)
-			cout<<"action "<<i<<" has cost "<<data.cs_label.costs[i].x<<endl;
+		//for (size_t i = 0; i < data.cbcs.num_actions; i++)
+		//	cout<<"action "<<i<<" has cost "<<data.cs_label.costs[i].x<<endl;
 
     ec.l.cb = data.cb_label;
     data.cbcs.known_cost = get_observed_cost(data.cb_label);
     gen_cs_example<false>(data.cbcs, ec, data.cb_label, data.cs_label);
 
 	
-		for (size_t i = 0; i < data.cbcs.num_actions; i++)
-			cout<<"action "<<i<<" has cost "<<data.cs_label.costs[i].x<<endl;
+		//for (size_t i = 0; i < data.cbcs.num_actions; i++)
+		//	cout<<"action "<<i<<" has cost "<<data.cs_label.costs[i].x<<endl;
 
 
 		for (size_t i = 0; i < data.lambda_size; i++)
@@ -234,7 +286,7 @@ void predict_or_learn_lambda(cb_explore& data, base_learner& base, example& ec)
 				//update the cumulative costs of the lambdas
 				
 				data.cost_lambda[i] = data.cost_lambda[i] + data.cs_label.costs[chosen].x;
-				cout<<"i = "<<i<<", cumulative cost = "<<data.cost_lambda[i]<<endl;
+				//cout<<"Expert "<<i<<", cumulative cost = "<<data.cost_lambda[i]<<endl;
 		}
 	}
 
@@ -245,11 +297,11 @@ void predict_or_learn_lambda(cb_explore& data, base_learner& base, example& ec)
 	// predict
   // select the lambda that has the minimum cumulative cost (measured by IPS)
 	uint32_t argmin = find_min(data.cost_lambda);
-	//cout<<"lambda = " <<data.lambdas[argmin]<<endl;
+	cout<<"lambda = " <<data.lambdas[argmin]<<endl;
 	base.predict(ec, argmin);
 	uint32_t chosen = ec.pred.multiclass-1;
 	probs[chosen].score = probs[chosen].score + (1 - data.epsilon);	
-	cout<<"chosen = "<<chosen<<endl;
+	//cout<<"chosen = "<<chosen<<endl;
 
 
 	ec.l.cb = data.cb_label;
@@ -373,6 +425,12 @@ void predict_or_learn_cover(cb_explore& data, base_learner& base, example& ec)
     ec.l.cb = data.cb_label;
 
     data.cbcs.known_cost = get_observed_cost(data.cb_label);
+		cout<<"cbcs's cb type is "<<data.cbcs.cb_type<<endl;
+
+		for (size_t i = 0; i < data.cbcs.num_actions; i++)
+			cout<<"action "<<i<<" has cost "<<data.cs_label.costs[i].x<<endl;
+
+
     gen_cs_example<false>(data.cbcs, ec, data.cb_label, data.cs_label);
     for(uint32_t i = 0; i < num_actions; i++)
       probabilities[i] = 0;
@@ -547,8 +605,19 @@ base_learner* cb_explore_setup(vw& all)
 			data.cost_lambda.push_back(0.);
 
 		data.lambdas = v_init<float>();
-		for (uint32_t i = 0; i < data.lambda_size; i++)		
-			data.lambdas.push_back(((float) i )/ data.lambda_size);
+		for (uint32_t i = 0; i < data.lambda_size; i++)
+			if (i%2 == 0)
+			{
+				data.lambdas.push_back(pow(0.5f, floor(i/2) + 1));
+				//cout<<pow(0.5f, floor(i/2) + 1)<<endl;
+			}			
+			else
+			{	
+				data.lambdas.push_back(1 - pow(0.5f, floor(i/2) + 2));
+				//cout<<1 - pow(0.5f, floor(i/2) + 2)<<endl;
+			}
+
+
 
 		data.second_cs_label.costs.resize(num_actions);
 		data.second_cs_label.costs.end() = data.second_cs_label.costs.begin()+num_actions;
@@ -558,6 +627,9 @@ base_learner* cb_explore_setup(vw& all)
       data.epsilon = vm["epsilon"].as<float>();
 		else
 			data.epsilon = 0.05f;
+
+
+		//data.lambdas.push_back(((float) i )/ data.lambda_size);
 		//cout<<"epsilon = "<<data.epsilon<<endl;
 
 
diff --git a/vowpalwabbit/gen_cs_example.cc b/vowpalwabbit/gen_cs_example.cc
index fa2bc21440b..363ce97925b 100644
--- a/vowpalwabbit/gen_cs_example.cc
+++ b/vowpalwabbit/gen_cs_example.cc
@@ -118,6 +118,7 @@ void gen_cs_example_ips(cb_to_cs& c, CB::label& ld, COST_SENSITIVE::label& cs_ld
   cs_ld.costs.erase();
   if (ld.costs.size() == 1 && !is_test_label(ld))   //this is a typical example where we can perform all actions
   {
+		//std::cout<<"---typical----"<<std::endl;
     //in this case generate cost-sensitive example with all actions
     for (uint32_t i = 1; i <= c.num_actions; i++)
     {
@@ -138,6 +139,7 @@ void gen_cs_example_ips(cb_to_cs& c, CB::label& ld, COST_SENSITIVE::label& cs_ld
   }
   else   //this is an example where we can only perform a subset of the actions
   {
+		//std::cout<<"---not typical----"<<std::endl;
     //in this case generate cost-sensitive example with only allowed actions
     for (auto& cl : ld.costs)
     {
diff --git a/vowpalwabbit/gen_cs_example.h b/vowpalwabbit/gen_cs_example.h
index 7b1f081c7db..b634d04e148 100644
--- a/vowpalwabbit/gen_cs_example.h
+++ b/vowpalwabbit/gen_cs_example.h
@@ -112,6 +112,7 @@ void gen_cs_label(cb_to_cs& c, example& ec, COST_SENSITIVE::label& cs_ld, uint32
 
   //get cost prediction for this action
   wc.x = CB_ALGS::get_cost_pred<is_learn>(c.scorer, c.known_cost, ec, action, c.num_actions);
+	//std::cout<<"wc.x = "<<wc.x<<std::endl;
 
   c.pred_scores.costs.push_back(wc);
   //add correction if we observed cost for this action and regressor is wrong

From 4a279411113196ed780c1cceeb6059d9072506b0 Mon Sep 17 00:00:00 2001
From: Chicheng Zhang
 <chiczhan@chiczhan-ubuntu.northamerica.corp.microsoft.com>
Date: Mon, 29 Jan 2018 18:35:45 -0500
Subject: [PATCH 003/127] not sure if the cost vector retrieved is correct

---
 vowpalwabbit/cb_explore.cc | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/vowpalwabbit/cb_explore.cc b/vowpalwabbit/cb_explore.cc
index eb43b392fcc..beac5556048 100644
--- a/vowpalwabbit/cb_explore.cc
+++ b/vowpalwabbit/cb_explore.cc
@@ -269,8 +269,8 @@ void predict_or_learn_lambda(cb_explore& data, base_learner& base, example& ec)
     gen_cs_example<false>(data.cbcs, ec, data.cb_label, data.cs_label);
 
 	
-		//for (size_t i = 0; i < data.cbcs.num_actions; i++)
-		//	cout<<"action "<<i<<" has cost "<<data.cs_label.costs[i].x<<endl;
+		for (size_t i = 0; i < data.cbcs.num_actions; i++)
+			cout<<"action "<<i<<" has cost "<<data.cs_label.costs[i].x<<endl;
 
 
 		for (size_t i = 0; i < data.lambda_size; i++)
@@ -425,10 +425,10 @@ void predict_or_learn_cover(cb_explore& data, base_learner& base, example& ec)
     ec.l.cb = data.cb_label;
 
     data.cbcs.known_cost = get_observed_cost(data.cb_label);
-		cout<<"cbcs's cb type is "<<data.cbcs.cb_type<<endl;
+		//cout<<"cbcs's cb type is "<<data.cbcs.cb_type<<endl;
 
-		for (size_t i = 0; i < data.cbcs.num_actions; i++)
-			cout<<"action "<<i<<" has cost "<<data.cs_label.costs[i].x<<endl;
+		//for (size_t i = 0; i < data.cbcs.num_actions; i++)
+		//	cout<<"action "<<i<<" has cost "<<data.cs_label.costs[i].x<<endl;
 
 
     gen_cs_example<false>(data.cbcs, ec, data.cb_label, data.cs_label);

From 539b1e435bb579efd94e77fed65556562b6541ed Mon Sep 17 00:00:00 2001
From: chicheng <zcc1307@gmail.com>
Date: Thu, 8 Feb 2018 18:21:55 -0500
Subject: [PATCH 004/127] added cbify warm start code

---
 vowpalwabbit/cbify.cc | 171 ++++++++++++++++++++++++++++++++++--------
 1 file changed, 140 insertions(+), 31 deletions(-)

diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc
index 25d03db58b7..386d511a5a2 100644
--- a/vowpalwabbit/cbify.cc
+++ b/vowpalwabbit/cbify.cc
@@ -52,6 +52,13 @@ struct cbify
   cbify_adf_data adf_data;
   float loss0;
   float loss1;
+
+	size_t choices_lambda;
+	size_t warm_start_period;
+	v_array<float> cumulative_costs;
+	v_array<float> lambdas;
+	size_t num_actions;
+
 };
 
 vector<float> vw_scorer::Score_Actions(example& ctx)
@@ -126,37 +133,109 @@ void copy_example_to_adf(cbify& data, example& ec)
   }
 }
 
+uint32_t find_min(v_array<float> arr)
+{
+	float min_val = FLT_MAX;
+	uint32_t argmin = 0;
+
+	for (uint32_t i = 0; i < arr.size(); i++)
+	{
+		if (arr[i] < min_val)
+		{	
+			min_val = arr[i];
+			argmin = i;
+		}
+	}
+
+	return argmin;
+}
+
 template <bool is_learn>
 void predict_or_learn(cbify& data, base_learner& base, example& ec)
 {
-  //Store the multiclass input label
-  MULTICLASS::label_t ld = ec.l.multi;
-  data.cb_label.costs.erase();
-  ec.l.cb = data.cb_label;
-  ec.pred.a_s = data.a_s;
-
-  //Call the cb_explore algorithm. It returns a vector of probabilities for each action
-  base.predict(ec);
-  //data.probs = ec.pred.scalars;
-
-  uint32_t action = data.mwt_explorer->Choose_Action(*data.generic_explorer, StringUtils::to_string(data.example_counter++), ec);
-
-  CB::cb_class cl;
-  cl.action = action;
-  cl.probability = ec.pred.a_s[action-1].score;
-
-  if(!cl.action)
-    THROW("No action with non-zero probability found!");
-  cl.cost = loss(data, ld.label, cl.action);
-
-  //Create a new cb label
-  data.cb_label.costs.push_back(cl);
-  ec.l.cb = data.cb_label;
-  base.learn(ec);
-  data.a_s.erase();
-  data.a_s = ec.pred.a_s;
-  ec.l.multi = ld;
-  ec.pred.multiclass = action;
+	bool is_supervised;
+
+	if (data.warm_start_period > 0)
+	{
+		is_supervised = true;
+		data.warm_start_period--;
+	}	
+	else
+		is_supervised = false;
+
+	uint32_t argmin;
+	argmin = find_min(data.cumulative_costs);
+	if (argmin != 0)
+		cout<<"argmin is not zero"<<endl;
+
+	//Store the multiclass input label
+	MULTICLASS::label_t ld = ec.l.multi;
+
+	//cout<<ld.label<<endl;
+  
+	if (is_supervised)
+	{
+		//generate cost-sensitive label
+		COST_SENSITIVE::label csl;
+    csl.costs.resize(data.num_actions);
+    csl.costs.end() = csl.costs.begin()+data.num_actions;
+		for (uint32_t j = 0; j < data.num_actions; j++)
+		{
+			csl.costs[j].class_index = j+1;
+			csl.costs[j].x = loss(data, ld.label, j+1);
+		}
+
+		ec.l.cs = csl;
+
+		//predict
+		data.all->cost_sensitive->predict(ec, argmin);
+		//uint32_t chosen = ec.pred.multiclass-1;	
+		//cout<<ec.pred.multiclass<<endl;
+
+
+		for (uint32_t i = 0; i < data.choices_lambda; i++)
+		{
+			ec.weight = 1;
+			data.all->cost_sensitive->learn(ec, i);
+		}
+		ec.l.multi = ld;
+	}
+	else //Call the cb_explore algorithm. It returns a vector of probabilities for each action
+	{
+		data.cb_label.costs.erase();
+		ec.l.cb = data.cb_label;
+		ec.pred.a_s = data.a_s;
+		
+		base.predict(ec, argmin);
+		//base.predict(ec);
+		//data.probs = ec.pred.scalars;
+
+		uint32_t action = data.mwt_explorer->Choose_Action(*data.generic_explorer, StringUtils::to_string(data.example_counter++), ec);
+
+		CB::cb_class cl;
+		cl.action = action;
+		cl.probability = ec.pred.a_s[action-1].score;
+
+		if(!cl.action)
+		  THROW("No action with non-zero probability found!");
+		cl.cost = loss(data, ld.label, cl.action);
+
+		//Create a new cb label
+		data.cb_label.costs.push_back(cl);
+		ec.l.cb = data.cb_label;
+		//base.learn(ec);
+		for (uint32_t i = 0; i < data.choices_lambda; i++)
+		{
+			ec.weight = data.lambdas[i] / (1-data.lambdas[i]);
+			base.learn(ec, i);
+			data.cumulative_costs[i] += 0;
+		}
+
+		data.a_s.erase();
+		data.a_s = ec.pred.a_s;
+		ec.l.multi = ld;
+		ec.pred.multiclass = action;
+	}
 }
 
 template <bool is_learn>
@@ -213,6 +292,22 @@ void init_adf_data(cbify& data, const size_t num_actions)
   adf_data.empty_example->in_use = true;
 }
 
+void generate_lambdas(v_array<float>& lambdas, size_t lambda_size)
+{
+	lambdas = v_init<float>();
+	for (uint32_t i = 0; i < lambda_size; i++)
+		if (i%2 == 0)
+		{
+			lambdas.push_back(pow(0.5f, floor(i/2) + 1));
+			//cout<<pow(0.5f, floor(i/2) + 1)<<endl;
+		}			
+		else
+		{	
+			lambdas.push_back(1 - pow(0.5f, floor(i/2) + 2));
+			//cout<<1 - pow(0.5f, floor(i/2) + 2)<<endl;
+		}
+}
+
 base_learner* cbify_setup(vw& all)
 {
   //parse and set arguments
@@ -220,7 +315,9 @@ base_learner* cbify_setup(vw& all)
     return nullptr;
   new_options(all, "CBIFY options")
   ("loss0", po::value<float>(), "loss for correct label")
-  ("loss1", po::value<float>(), "loss for incorrect label");
+  ("loss1", po::value<float>(), "loss for incorrect label")
+	("warm_start", po::value<size_t>(), "number of training examples for fully-supervised warm start")
+  ("choices_lambda", po::value<size_t>(), "numbers of lambdas importance weights to aggregate");
   add_options(all);
 
   po::variables_map& vm = all.vm;
@@ -238,6 +335,18 @@ base_learner* cbify_setup(vw& all)
   data.generic_explorer = new GenericExplorer<example>(*data.scorer, (u32)num_actions);
   data.all = &all;
 
+	cout<<data.warm_start_period<<endl;
+	data.warm_start_period = vm.count("warm_start") ? vm["warm_start"].as<size_t>() : 0;
+	cout<<data.warm_start_period<<endl;
+	data.choices_lambda = vm.count("choices_lambda") ? vm["choices_lambda"].as<size_t>() : 1;
+
+	generate_lambdas(data.lambdas, data.choices_lambda);
+
+	for (size_t i = 0; i < data.choices_lambda; i++)
+		data.cumulative_costs.push_back(0.);
+
+	data.num_actions = num_actions;
+
   if (data.use_adf)
   {
     init_adf_data(data, num_actions);
@@ -263,11 +372,11 @@ base_learner* cbify_setup(vw& all)
   learner<cbify>* l;
   if (data.use_adf)
   {
-    l = &init_multiclass_learner(&data, base, predict_or_learn_adf<true>, predict_or_learn_adf<false>, all.p, 1);
+    l = &init_multiclass_learner(&data, base, predict_or_learn_adf<true>, predict_or_learn_adf<false>, all.p, data.choices_lambda);
   }
   else
   {
-    l = &init_multiclass_learner(&data, base, predict_or_learn<true>, predict_or_learn<false>, all.p, 1);
+    l = &init_multiclass_learner(&data, base, predict_or_learn<true>, predict_or_learn<false>, all.p, data.choices_lambda);
   }
   l->set_finish(finish);
 

From 961a5a583366aa135dcbd60bc99820a4d3f5de2e Mon Sep 17 00:00:00 2001
From: chicheng <zcc1307@gmail.com>
Date: Fri, 9 Feb 2018 14:28:52 -0500
Subject: [PATCH 005/127] commented out the multiple lambda code in cbify

---
 vowpalwabbit/cbify.cc | 40 ++++++++++++++++++++++++++++------------
 1 file changed, 28 insertions(+), 12 deletions(-)

diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc
index 386d511a5a2..2fb6169153b 100644
--- a/vowpalwabbit/cbify.cc
+++ b/vowpalwabbit/cbify.cc
@@ -140,12 +140,14 @@ uint32_t find_min(v_array<float> arr)
 
 	for (uint32_t i = 0; i < arr.size(); i++)
 	{
+		//cout<<arr[i]<<endl;
 		if (arr[i] < min_val)
 		{	
 			min_val = arr[i];
 			argmin = i;
 		}
 	}
+	//cout<<"argmin = "<<argmin<<endl;
 
 	return argmin;
 }
@@ -163,17 +165,18 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 	else
 		is_supervised = false;
 
-	uint32_t argmin;
-	argmin = find_min(data.cumulative_costs);
-	if (argmin != 0)
-		cout<<"argmin is not zero"<<endl;
+	//uint32_t argmin;
+	//argmin = find_min(data.cumulative_costs);
+	//cout<<argmin<<endl;
+	//if (argmin != 0)
+	//	cout<<"argmin is not zero"<<endl;
 
 	//Store the multiclass input label
 	MULTICLASS::label_t ld = ec.l.multi;
 
 	//cout<<ld.label<<endl;
   
-	if (is_supervised)
+	if (is_supervised) // Call the cost-sensitive learner directly
 	{
 		//generate cost-sensitive label
 		COST_SENSITIVE::label csl;
@@ -188,7 +191,8 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 		ec.l.cs = csl;
 
 		//predict
-		data.all->cost_sensitive->predict(ec, argmin);
+		//data.all->cost_sensitive->predict(ec, argmin);
+		data.all->cost_sensitive->predict(ec);
 		//uint32_t chosen = ec.pred.multiclass-1;	
 		//cout<<ec.pred.multiclass<<endl;
 
@@ -206,8 +210,8 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 		ec.l.cb = data.cb_label;
 		ec.pred.a_s = data.a_s;
 		
-		base.predict(ec, argmin);
-		//base.predict(ec);
+		//base.predict(ec, argmin);
+		base.predict(ec);
 		//data.probs = ec.pred.scalars;
 
 		uint32_t action = data.mwt_explorer->Choose_Action(*data.generic_explorer, StringUtils::to_string(data.example_counter++), ec);
@@ -223,13 +227,25 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 		//Create a new cb label
 		data.cb_label.costs.push_back(cl);
 		ec.l.cb = data.cb_label;
-		//base.learn(ec);
+
+		//IPS for approximating the cumulative costs for all lambdas
+		/*
+		for (uint32_t i = 0; i < data.choices_lambda; i++)
+		{
+			example ec2 = ec;
+			data.all->cost_sensitive->predict(ec2, i);
+			if (ec2.pred.multiclass == cl.action)
+				data.cumulative_costs[i] += cl.cost / cl.probability;
+			//cout<<data.cumulative_costs[i]<<endl;
+		}
 		for (uint32_t i = 0; i < data.choices_lambda; i++)
 		{
 			ec.weight = data.lambdas[i] / (1-data.lambdas[i]);
 			base.learn(ec, i);
-			data.cumulative_costs[i] += 0;
 		}
+		*/
+
+		base.learn(ec);
 
 		data.a_s.erase();
 		data.a_s = ec.pred.a_s;
@@ -335,9 +351,9 @@ base_learner* cbify_setup(vw& all)
   data.generic_explorer = new GenericExplorer<example>(*data.scorer, (u32)num_actions);
   data.all = &all;
 
-	cout<<data.warm_start_period<<endl;
+	//cout<<data.warm_start_period<<endl;
 	data.warm_start_period = vm.count("warm_start") ? vm["warm_start"].as<size_t>() : 0;
-	cout<<data.warm_start_period<<endl;
+	//cout<<data.warm_start_period<<endl;
 	data.choices_lambda = vm.count("choices_lambda") ? vm["choices_lambda"].as<size_t>() : 1;
 
 	generate_lambdas(data.lambdas, data.choices_lambda);

From 0fbc26afb91c980adf31d0183d9818c103400f81 Mon Sep 17 00:00:00 2001
From: chicheng <zcc1307@gmail.com>
Date: Fri, 9 Feb 2018 14:30:21 -0500
Subject: [PATCH 006/127] commented out the multiple lambda code in cbify

---
 vowpalwabbit/cbify.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc
index 2fb6169153b..64474fa739b 100644
--- a/vowpalwabbit/cbify.cc
+++ b/vowpalwabbit/cbify.cc
@@ -388,11 +388,11 @@ base_learner* cbify_setup(vw& all)
   learner<cbify>* l;
   if (data.use_adf)
   {
-    l = &init_multiclass_learner(&data, base, predict_or_learn_adf<true>, predict_or_learn_adf<false>, all.p, data.choices_lambda);
+    l = &init_multiclass_learner(&data, base, predict_or_learn_adf<true>, predict_or_learn_adf<false>, all.p, 1);
   }
   else
   {
-    l = &init_multiclass_learner(&data, base, predict_or_learn<true>, predict_or_learn<false>, all.p, data.choices_lambda);
+    l = &init_multiclass_learner(&data, base, predict_or_learn<true>, predict_or_learn<false>, all.p, 1);
   }
   l->set_finish(finish);
 

From 369b3ea24e9ca2b0b4471c82f0d73f5349544093 Mon Sep 17 00:00:00 2001
From: chicheng <zcc1307@gmail.com>
Date: Fri, 9 Feb 2018 15:02:04 -0500
Subject: [PATCH 007/127] the cbexplore approach seems not working, as the
 first stage cannot prepare multiple copies of weights

---
 vowpalwabbit/cb_explore.cc | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/vowpalwabbit/cb_explore.cc b/vowpalwabbit/cb_explore.cc
index beac5556048..28f7d748811 100644
--- a/vowpalwabbit/cb_explore.cc
+++ b/vowpalwabbit/cb_explore.cc
@@ -269,8 +269,8 @@ void predict_or_learn_lambda(cb_explore& data, base_learner& base, example& ec)
     gen_cs_example<false>(data.cbcs, ec, data.cb_label, data.cs_label);
 
 	
-		for (size_t i = 0; i < data.cbcs.num_actions; i++)
-			cout<<"action "<<i<<" has cost "<<data.cs_label.costs[i].x<<endl;
+		//for (size_t i = 0; i < data.cbcs.num_actions; i++)
+		//	cout<<"action "<<i<<" has cost "<<data.cs_label.costs[i].x<<endl;
 
 
 		for (size_t i = 0; i < data.lambda_size; i++)
@@ -286,8 +286,10 @@ void predict_or_learn_lambda(cb_explore& data, base_learner& base, example& ec)
 				//update the cumulative costs of the lambdas
 				
 				data.cost_lambda[i] = data.cost_lambda[i] + data.cs_label.costs[chosen].x;
-				//cout<<"Expert "<<i<<", cumulative cost = "<<data.cost_lambda[i]<<endl;
+				cout<<"lambda "<<data.lambdas[i]<<", cumulative cost = "<<data.cost_lambda[i]<<endl;
 		}
+		cout<<endl;
+
 	}
 
 	float prob = data.epsilon/(float)data.cbcs.num_actions;
@@ -297,7 +299,7 @@ void predict_or_learn_lambda(cb_explore& data, base_learner& base, example& ec)
 	// predict
   // select the lambda that has the minimum cumulative cost (measured by IPS)
 	uint32_t argmin = find_min(data.cost_lambda);
-	cout<<"lambda = " <<data.lambdas[argmin]<<endl;
+	//cout<<"lambda = " <<data.lambdas[argmin]<<endl;
 	base.predict(ec, argmin);
 	uint32_t chosen = ec.pred.multiclass-1;
 	probs[chosen].score = probs[chosen].score + (1 - data.epsilon);	
@@ -639,7 +641,7 @@ base_learner* cb_explore_setup(vw& all)
  		//data.preds.resize(data.lambda_size);
 		//sprintf(type_string, "%f", data.n_2);
 		//*all.file_options << " --phi " << type_string;
-		l = &init_learner(&data, base, predict_or_learn_lambda<true>, predict_or_learn_lambda<false>, data.lambda_size + 1, prediction_type::action_probs);
+		l = &init_learner(&data, base, predict_or_learn_lambda<true>, predict_or_learn_lambda<false>, data.lambda_size, prediction_type::action_probs);
 
   }
   else if (vm.count("bag"))

From 8f096a5fb78b1c2aae7a870ac33bc90adc512b48 Mon Sep 17 00:00:00 2001
From: chicheng <zcc1307@gmail.com>
Date: Fri, 9 Feb 2018 17:30:04 -0500
Subject: [PATCH 008/127] .

---
 vowpalwabbit/cbify.cc | 29 ++++++++++++++---------------
 1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc
index 64474fa739b..46e12a28d76 100644
--- a/vowpalwabbit/cbify.cc
+++ b/vowpalwabbit/cbify.cc
@@ -165,8 +165,8 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 	else
 		is_supervised = false;
 
-	//uint32_t argmin;
-	//argmin = find_min(data.cumulative_costs);
+	uint32_t argmin;
+	argmin = find_min(data.cumulative_costs);
 	//cout<<argmin<<endl;
 	//if (argmin != 0)
 	//	cout<<"argmin is not zero"<<endl;
@@ -191,8 +191,7 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 		ec.l.cs = csl;
 
 		//predict
-		//data.all->cost_sensitive->predict(ec, argmin);
-		data.all->cost_sensitive->predict(ec);
+		data.all->cost_sensitive->predict(ec, argmin);
 		//uint32_t chosen = ec.pred.multiclass-1;	
 		//cout<<ec.pred.multiclass<<endl;
 
@@ -210,8 +209,8 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 		ec.l.cb = data.cb_label;
 		ec.pred.a_s = data.a_s;
 		
-		//base.predict(ec, argmin);
-		base.predict(ec);
+		base.predict(ec, argmin);
+		//base.predict(ec);
 		//data.probs = ec.pred.scalars;
 
 		uint32_t action = data.mwt_explorer->Choose_Action(*data.generic_explorer, StringUtils::to_string(data.example_counter++), ec);
@@ -229,23 +228,23 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 		ec.l.cb = data.cb_label;
 
 		//IPS for approximating the cumulative costs for all lambdas
-		/*
 		for (uint32_t i = 0; i < data.choices_lambda; i++)
 		{
-			example ec2 = ec;
-			data.all->cost_sensitive->predict(ec2, i);
-			if (ec2.pred.multiclass == cl.action)
+			//example ec2 = ec;
+			data.all->cost_sensitive->predict(ec, i);
+			//cout<<ec2.pred.multiclass<<endl;
+			if (ec.pred.multiclass == cl.action)
 				data.cumulative_costs[i] += cl.cost / cl.probability;
 			//cout<<data.cumulative_costs[i]<<endl;
 		}
+
+		
+		//base.learn(ec);
 		for (uint32_t i = 0; i < data.choices_lambda; i++)
 		{
 			ec.weight = data.lambdas[i] / (1-data.lambdas[i]);
 			base.learn(ec, i);
 		}
-		*/
-
-		base.learn(ec);
 
 		data.a_s.erase();
 		data.a_s = ec.pred.a_s;
@@ -388,11 +387,11 @@ base_learner* cbify_setup(vw& all)
   learner<cbify>* l;
   if (data.use_adf)
   {
-    l = &init_multiclass_learner(&data, base, predict_or_learn_adf<true>, predict_or_learn_adf<false>, all.p, 1);
+    l = &init_multiclass_learner(&data, base, predict_or_learn_adf<true>, predict_or_learn_adf<false>, all.p, data.choices_lambda);
   }
   else
   {
-    l = &init_multiclass_learner(&data, base, predict_or_learn<true>, predict_or_learn<false>, all.p, 1);
+    l = &init_multiclass_learner(&data, base, predict_or_learn<true>, predict_or_learn<false>, all.p, data.choices_lambda);
   }
   l->set_finish(finish);
 

From 904134f0150dfa35c3849282c7bb9b863067e0f3 Mon Sep 17 00:00:00 2001
From: chicheng <zcc1307@gmail.com>
Date: Mon, 12 Feb 2018 11:40:44 -0500
Subject: [PATCH 009/127] properly store the temp labels

---
 vowpalwabbit/cbify.cc | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc
index 46e12a28d76..a298df9ba17 100644
--- a/vowpalwabbit/cbify.cc
+++ b/vowpalwabbit/cbify.cc
@@ -58,6 +58,7 @@ struct cbify
 	v_array<float> cumulative_costs;
 	v_array<float> lambdas;
 	size_t num_actions;
+	COST_SENSITIVE::label csl;
 
 };
 
@@ -173,22 +174,24 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 
 	//Store the multiclass input label
 	MULTICLASS::label_t ld = ec.l.multi;
+	COST_SENSITIVE::label csd = ec.l.cs;
 
 	//cout<<ld.label<<endl;
   
 	if (is_supervised) // Call the cost-sensitive learner directly
 	{
 		//generate cost-sensitive label
-		COST_SENSITIVE::label csl;
-    csl.costs.resize(data.num_actions);
-    csl.costs.end() = csl.costs.begin()+data.num_actions;
+		data.csl.costs.erase();
+    data.csl.costs.resize(data.num_actions);
+    data.csl.costs.end() = data.csl.costs.begin()+data.num_actions;
 		for (uint32_t j = 0; j < data.num_actions; j++)
 		{
-			csl.costs[j].class_index = j+1;
-			csl.costs[j].x = loss(data, ld.label, j+1);
+			data.csl.costs[j].class_index = j+1;
+			data.csl.costs[j].x = loss(data, ld.label, j+1);
 		}
 
-		ec.l.cs = csl;
+		ec.l.cs = data.csl;
+
 
 		//predict
 		data.all->cost_sensitive->predict(ec, argmin);
@@ -202,6 +205,7 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 			data.all->cost_sensitive->learn(ec, i);
 		}
 		ec.l.multi = ld;
+		ec.l.cs = csd;
 	}
 	else //Call the cb_explore algorithm. It returns a vector of probabilities for each action
 	{
@@ -249,6 +253,7 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 		data.a_s.erase();
 		data.a_s = ec.pred.a_s;
 		ec.l.multi = ld;
+		ec.l.cs = csd;
 		ec.pred.multiclass = action;
 	}
 }

From e2713440212d9f7430e067590cf0b0db9ee5a2f2 Mon Sep 17 00:00:00 2001
From: chicheng <zcc1307@gmail.com>
Date: Mon, 12 Feb 2018 11:48:19 -0500
Subject: [PATCH 010/127] back

---
 vowpalwabbit/cbify.cc | 25 ++++++++++---------------
 1 file changed, 10 insertions(+), 15 deletions(-)

diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc
index a298df9ba17..44cce764577 100644
--- a/vowpalwabbit/cbify.cc
+++ b/vowpalwabbit/cbify.cc
@@ -58,7 +58,6 @@ struct cbify
 	v_array<float> cumulative_costs;
 	v_array<float> lambdas;
 	size_t num_actions;
-	COST_SENSITIVE::label csl;
 
 };
 
@@ -174,24 +173,22 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 
 	//Store the multiclass input label
 	MULTICLASS::label_t ld = ec.l.multi;
-	COST_SENSITIVE::label csd = ec.l.cs;
 
 	//cout<<ld.label<<endl;
   
 	if (is_supervised) // Call the cost-sensitive learner directly
 	{
 		//generate cost-sensitive label
-		data.csl.costs.erase();
-    data.csl.costs.resize(data.num_actions);
-    data.csl.costs.end() = data.csl.costs.begin()+data.num_actions;
+		COST_SENSITIVE::label csl;
+    csl.costs.resize(data.num_actions);
+    csl.costs.end() = csl.costs.begin()+data.num_actions;
 		for (uint32_t j = 0; j < data.num_actions; j++)
 		{
-			data.csl.costs[j].class_index = j+1;
-			data.csl.costs[j].x = loss(data, ld.label, j+1);
+			csl.costs[j].class_index = j+1;
+			csl.costs[j].x = loss(data, ld.label, j+1);
 		}
 
-		ec.l.cs = data.csl;
-
+		ec.l.cs = csl;
 
 		//predict
 		data.all->cost_sensitive->predict(ec, argmin);
@@ -205,7 +202,6 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 			data.all->cost_sensitive->learn(ec, i);
 		}
 		ec.l.multi = ld;
-		ec.l.cs = csd;
 	}
 	else //Call the cb_explore algorithm. It returns a vector of probabilities for each action
 	{
@@ -234,12 +230,12 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 		//IPS for approximating the cumulative costs for all lambdas
 		for (uint32_t i = 0; i < data.choices_lambda; i++)
 		{
-			//example ec2 = ec;
-			data.all->cost_sensitive->predict(ec, i);
+			example ec2 = ec;
+			data.all->cost_sensitive->predict(ec2, i);
 			//cout<<ec2.pred.multiclass<<endl;
-			if (ec.pred.multiclass == cl.action)
+			if (ec2.pred.multiclass == cl.action)
 				data.cumulative_costs[i] += cl.cost / cl.probability;
-			//cout<<data.cumulative_costs[i]<<endl;
+			  cout<<data.cumulative_costs[i]<<endl;
 		}
 
 		
@@ -253,7 +249,6 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 		data.a_s.erase();
 		data.a_s = ec.pred.a_s;
 		ec.l.multi = ld;
-		ec.l.cs = csd;
 		ec.pred.multiclass = action;
 	}
 }

From 887952581bba1d3c336af6c64e9156b8fecfcc99 Mon Sep 17 00:00:00 2001
From: chicheng <zcc1307@gmail.com>
Date: Mon, 12 Feb 2018 18:04:17 -0500
Subject: [PATCH 011/127] .

---
 data_gen.py | 92 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 92 insertions(+)
 create mode 100644 data_gen.py

diff --git a/data_gen.py b/data_gen.py
new file mode 100644
index 00000000000..f1c15ae7716
--- /dev/null
+++ b/data_gen.py
@@ -0,0 +1,92 @@
+import random
+import numpy as np
+
+classes = 10
+m = 100
+
+def gen_keyword():
+
+	kwperclass = 20
+
+	keyword = np.zeros((classes, m))
+
+	for i in range(classes):
+		shuffled = range(m)
+		random.shuffle(shuffled)
+
+		for j in range(kwperclass):
+			keyword[i,shuffled[j]] = 1
+
+	return keyword
+
+
+def classify(classifier, example):
+
+		result = classifier.dot(example)
+
+		return np.argmax(result)
+
+
+
+if __name__ == '__main__':
+
+
+	filename = "text_lownoise"
+
+	f = open(filename+".vw", "w")
+	g = open(filename+"_m.vw", "w")
+
+	keyword = gen_keyword()	
+
+
+	samples = 10000
+	fprob = 0
+
+	cs = False
+
+	for i in range(samples):
+		c = random.randint(0, classes-1)
+
+		#generate a pair of datasets (one is cost-sensitive, the other is multiclass)
+		
+		for l in range(classes):
+			f.write(str(l+1)+':')
+			cost = 1
+			if l == c:
+				cost = 0
+			f.write(str(cost)+' ')
+		
+		g.write(str(c+1))
+			
+
+
+		f.write(' | ')
+		g.write(' | ')
+
+		vec = np.zeros(m)
+
+		for j in range(m):
+			flip = np.random.choice([False,True],p=[1-fprob, fprob])
+			if flip:
+				vec[j] = 2 * (1-keyword[c][j]) - 1
+			else:
+				vec[j] = 2 * keyword[c][j] - 1		
+		
+		for j in range(m):
+			f.write('w'+str(j)+':')
+			f.write(str(vec[j])+' ')
+			g.write('w'+str(j)+':')
+			g.write(str(vec[j])+' ')
+
+		#print 'Is the prediction equal to the class label? ', classify(keyword, vec) == c
+
+		f.write('\n')
+		g.write('\n')
+
+	f.close()
+	g.close()
+		
+		
+
+
+

From ced4bbdf559410f1069a52a36b27baf2db333620 Mon Sep 17 00:00:00 2001
From: chicheng <zcc1307@gmail.com>
Date: Tue, 13 Feb 2018 10:51:56 -0500
Subject: [PATCH 012/127] fixed the bug with assigning cb label before cost
 sensitive prediction - the ec.l field is anunion

---
 Makefile              |  2 +-
 vowpalwabbit/cbify.cc | 26 +++++++++++++++-----------
 2 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/Makefile b/Makefile
index ffa65608520..b01ec7db8cc 100644
--- a/Makefile
+++ b/Makefile
@@ -71,7 +71,7 @@ FLAGS = -std=c++0x $(CFLAGS) $(LDFLAGS) $(ARCH) $(WARN_FLAGS) $(OPTIM_FLAGS) -D_
 #CXX = g++
 
 # for valgrind / gdb debugging
-#FLAGS = -std=c++0x $(CFLAGS) $(LDFLAGS) $(ARCH) $(WARN_FLAGS) -D_FILE_OFFSET_BITS=64 $(BOOST_INCLUDE) $(JSON_INCLUDE) -g -O0  -fPIC
+FLAGS = -std=c++0x $(CFLAGS) $(LDFLAGS) $(ARCH) $(WARN_FLAGS) -D_FILE_OFFSET_BITS=64 $(BOOST_INCLUDE) $(JSON_INCLUDE) -g -O0  -fPIC
 
 # for valgrind profiling: run 'valgrind --tool=callgrind PROGRAM' then 'callgrind_annotate --tree=both --inclusive=yes'
 #FLAGS = -std=c++0x $(CFLAGS) $(LDFLAGS) -Wall $(ARCH) -ffast-math -D_FILE_OFFSET_BITS=64 $(BOOST_INCLUDE) $(JSON_INCLUDE)  -g -fomit-frame-pointer -ffast-math -fno-strict-aliasing  -fPIC
diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc
index 44cce764577..be40e8bae24 100644
--- a/vowpalwabbit/cbify.cc
+++ b/vowpalwabbit/cbify.cc
@@ -179,7 +179,7 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 	if (is_supervised) // Call the cost-sensitive learner directly
 	{
 		//generate cost-sensitive label
-		COST_SENSITIVE::label csl;
+		COST_SENSITIVE::label csl = calloc_or_throw<COST_SENSITIVE::label>();
     csl.costs.resize(data.num_actions);
     csl.costs.end() = csl.costs.begin()+data.num_actions;
 		for (uint32_t j = 0; j < data.num_actions; j++)
@@ -192,6 +192,7 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 
 		//predict
 		data.all->cost_sensitive->predict(ec, argmin);
+		auto old_pred = ec.pred;
 		//uint32_t chosen = ec.pred.multiclass-1;	
 		//cout<<ec.pred.multiclass<<endl;
 
@@ -202,6 +203,7 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 			data.all->cost_sensitive->learn(ec, i);
 		}
 		ec.l.multi = ld;
+    ec.pred = old_pred;
 	}
 	else //Call the cb_explore algorithm. It returns a vector of probabilities for each action
 	{
@@ -210,6 +212,7 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 		ec.pred.a_s = data.a_s;
 		
 		base.predict(ec, argmin);
+		auto old_pred = ec.pred;
 		//base.predict(ec);
 		//data.probs = ec.pred.scalars;
 
@@ -223,23 +226,24 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 		  THROW("No action with non-zero probability found!");
 		cl.cost = loss(data, ld.label, cl.action);
 
-		//Create a new cb label
-		data.cb_label.costs.push_back(cl);
-		ec.l.cb = data.cb_label;
-
 		//IPS for approximating the cumulative costs for all lambdas
 		for (uint32_t i = 0; i < data.choices_lambda; i++)
 		{
-			example ec2 = ec;
-			data.all->cost_sensitive->predict(ec2, i);
+			//example ec2 = ec;
+			//assert(0);
+			data.all->cost_sensitive->predict(ec, i);
 			//cout<<ec2.pred.multiclass<<endl;
-			if (ec2.pred.multiclass == cl.action)
+			if (ec.pred.multiclass == cl.action)
 				data.cumulative_costs[i] += cl.cost / cl.probability;
-			  cout<<data.cumulative_costs[i]<<endl;
+			  //cout<<data.cumulative_costs[i]<<endl;
 		}
 
-		
+		//Create a new cb label
+		data.cb_label.costs.push_back(cl);
+		ec.l.cb = data.cb_label;
+
 		//base.learn(ec);
+		ec.pred = old_pred;
 		for (uint32_t i = 0; i < data.choices_lambda; i++)
 		{
 			ec.weight = data.lambdas[i] / (1-data.lambdas[i]);
@@ -249,7 +253,7 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 		data.a_s.erase();
 		data.a_s = ec.pred.a_s;
 		ec.l.multi = ld;
-		ec.pred.multiclass = action;
+	  ec.pred.multiclass = action;
 	}
 }
 

From ac71d8da1d45b9e90cf3e705bcc081ec6e7235f2 Mon Sep 17 00:00:00 2001
From: chicheng <zcc1307@gmail.com>
Date: Tue, 13 Feb 2018 14:50:06 -0500
Subject: [PATCH 013/127] the cumulative cost become diverse

---
 vowpalwabbit/cbify.cc | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc
index be40e8bae24..855d18154b0 100644
--- a/vowpalwabbit/cbify.cc
+++ b/vowpalwabbit/cbify.cc
@@ -192,7 +192,7 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 
 		//predict
 		data.all->cost_sensitive->predict(ec, argmin);
-		auto old_pred = ec.pred;
+		//auto old_pred = ec.pred;
 		//uint32_t chosen = ec.pred.multiclass-1;	
 		//cout<<ec.pred.multiclass<<endl;
 
@@ -203,7 +203,7 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 			data.all->cost_sensitive->learn(ec, i);
 		}
 		ec.l.multi = ld;
-    ec.pred = old_pred;
+    //ec.pred = old_pred;
 	}
 	else //Call the cb_explore algorithm. It returns a vector of probabilities for each action
 	{
@@ -229,14 +229,14 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 		//IPS for approximating the cumulative costs for all lambdas
 		for (uint32_t i = 0; i < data.choices_lambda; i++)
 		{
-			//example ec2 = ec;
 			//assert(0);
 			data.all->cost_sensitive->predict(ec, i);
-			//cout<<ec2.pred.multiclass<<endl;
+			//cout<<ec.pred.multiclass<<endl;
 			if (ec.pred.multiclass == cl.action)
-				data.cumulative_costs[i] += cl.cost / cl.probability;
-			  //cout<<data.cumulative_costs[i]<<endl;
+				data.cumulative_costs[i] += cl.cost / cl.probability; 
+			cout<<data.cumulative_costs[i]<<endl;
 		}
+		cout<<endl;
 
 		//Create a new cb label
 		data.cb_label.costs.push_back(cl);
@@ -246,7 +246,9 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 		ec.pred = old_pred;
 		for (uint32_t i = 0; i < data.choices_lambda; i++)
 		{
-			ec.weight = data.lambdas[i] / (1-data.lambdas[i]);
+			//ec.weight = data.lambdas[i] / (1-data.lambdas[i]);
+			ec.l.cb.costs[0].cost *= data.lambdas[i] / (1-data.lambdas[i]);
+
 			base.learn(ec, i);
 		}
 

From 9debba8fa5830fbc063988ba7d3caf18054aa483 Mon Sep 17 00:00:00 2001
From: chicheng <zcc1307@gmail.com>
Date: Tue, 13 Feb 2018 18:12:46 -0500
Subject: [PATCH 014/127] modified csoaa so that it can take example weights
 now.

---
 vowpalwabbit/cbify.cc | 30 ++++++++++++++++--------------
 vowpalwabbit/csoaa.cc |  5 ++++-
 2 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc
index 855d18154b0..bb653d69d6c 100644
--- a/vowpalwabbit/cbify.cc
+++ b/vowpalwabbit/cbify.cc
@@ -234,9 +234,9 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 			//cout<<ec.pred.multiclass<<endl;
 			if (ec.pred.multiclass == cl.action)
 				data.cumulative_costs[i] += cl.cost / cl.probability; 
-			cout<<data.cumulative_costs[i]<<endl;
+			//cout<<data.cumulative_costs[i]<<endl;
 		}
-		cout<<endl;
+		//cout<<endl;
 
 		//Create a new cb label
 		data.cb_label.costs.push_back(cl);
@@ -244,10 +244,12 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 
 		//base.learn(ec);
 		ec.pred = old_pred;
+
 		for (uint32_t i = 0; i < data.choices_lambda; i++)
 		{
-			//ec.weight = data.lambdas[i] / (1-data.lambdas[i]);
-			ec.l.cb.costs[0].cost *= data.lambdas[i] / (1-data.lambdas[i]);
+			ec.weight = data.lambdas[i] / (1-data.lambdas[i]);
+			//ec.l.cb.costs[0].cost = 0;
+			//cl.cost * data.lambdas[i] / (1-data.lambdas[i]);
 
 			base.learn(ec, i);
 		}
@@ -316,17 +318,17 @@ void init_adf_data(cbify& data, const size_t num_actions)
 void generate_lambdas(v_array<float>& lambdas, size_t lambda_size)
 {
 	lambdas = v_init<float>();
+	uint32_t mid = lambda_size / 2;
 	for (uint32_t i = 0; i < lambda_size; i++)
-		if (i%2 == 0)
-		{
-			lambdas.push_back(pow(0.5f, floor(i/2) + 1));
-			//cout<<pow(0.5f, floor(i/2) + 1)<<endl;
-		}			
-		else
-		{	
-			lambdas.push_back(1 - pow(0.5f, floor(i/2) + 2));
-			//cout<<1 - pow(0.5f, floor(i/2) + 2)<<endl;
-		}
+		lambdas.push_back(0);
+
+	lambdas[mid] = 0.5;
+	for (uint32_t i = mid; i > 0; i--)
+		lambdas[i-1] = lambdas[i] / 2;
+
+	for (uint32_t i = mid+1; i < lambda_size; i++)
+		lambdas[i] = 1 - (1-lambdas[i-1]) / 2;
+
 }
 
 base_learner* cbify_setup(vw& all)
diff --git a/vowpalwabbit/csoaa.cc b/vowpalwabbit/csoaa.cc
index 0abe97e640e..655b98395de 100644
--- a/vowpalwabbit/csoaa.cc
+++ b/vowpalwabbit/csoaa.cc
@@ -31,9 +31,12 @@ inline void inner_loop(base_learner& base, example& ec, uint32_t i, float cost,
 {
   if (is_learn)
   {
-    ec.weight = (cost == FLT_MAX) ? 0.f : 1.f;
+    float old_weight = ec.weight;
+		if (cost == FLT_MAX) ec.weight = 0.f;
+    //ec.weight = (cost == FLT_MAX) ? 0.f : 1.f;
     ec.l.simple.label = cost;
     base.learn(ec, i-1);
+    //ec.weight = old_weight;
   }
   else
     base.predict(ec, i-1);

From e295aff2606cd445aad0bcc30af166294bc1d93f Mon Sep 17 00:00:00 2001
From: chicheng <zcc1307@gmail.com>
Date: Fri, 16 Feb 2018 12:54:32 -0500
Subject: [PATCH 015/127] .

---
 vowpalwabbit/cbify.cc | 37 ++++++++++++++++++++++++-------------
 vowpalwabbit/csoaa.cc |  2 +-
 2 files changed, 25 insertions(+), 14 deletions(-)

diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc
index bb653d69d6c..5dc8f0acbb7 100644
--- a/vowpalwabbit/cbify.cc
+++ b/vowpalwabbit/cbify.cc
@@ -58,6 +58,9 @@ struct cbify
 	v_array<float> cumulative_costs;
 	v_array<float> lambdas;
 	size_t num_actions;
+	bool ind_bandit;
+	bool ind_supervised;
+
 
 };
 
@@ -196,11 +199,13 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 		//uint32_t chosen = ec.pred.multiclass-1;	
 		//cout<<ec.pred.multiclass<<endl;
 
-
-		for (uint32_t i = 0; i < data.choices_lambda; i++)
+		if (data.ind_supervised)
 		{
-			ec.weight = 1;
-			data.all->cost_sensitive->learn(ec, i);
+			for (uint32_t i = 0; i < data.choices_lambda; i++)
+			{
+				ec.weight = 1;
+				data.all->cost_sensitive->learn(ec, i);
+			}
 		}
 		ec.l.multi = ld;
     //ec.pred = old_pred;
@@ -244,16 +249,17 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 
 		//base.learn(ec);
 		ec.pred = old_pred;
-
-		for (uint32_t i = 0; i < data.choices_lambda; i++)
+		if (data.ind_bandit)
 		{
-			ec.weight = data.lambdas[i] / (1-data.lambdas[i]);
-			//ec.l.cb.costs[0].cost = 0;
-			//cl.cost * data.lambdas[i] / (1-data.lambdas[i]);
-
-			base.learn(ec, i);
+			for (uint32_t i = 0; i < data.choices_lambda; i++)
+			{
+				ec.weight = data.lambdas[i] / (1-data.lambdas[i]);
+				//ec.l.cb.costs[0].cost = 0;
+				//cl.cost * data.lambdas[i] / (1-data.lambdas[i]);
+
+				base.learn(ec, i);
+			}
 		}
-
 		data.a_s.erase();
 		data.a_s = ec.pred.a_s;
 		ec.l.multi = ld;
@@ -340,7 +346,9 @@ base_learner* cbify_setup(vw& all)
   ("loss0", po::value<float>(), "loss for correct label")
   ("loss1", po::value<float>(), "loss for incorrect label")
 	("warm_start", po::value<size_t>(), "number of training examples for fully-supervised warm start")
-  ("choices_lambda", po::value<size_t>(), "numbers of lambdas importance weights to aggregate");
+  ("choices_lambda", po::value<size_t>(), "numbers of lambdas importance weights to aggregate")
+	("no_supervised", "indicator of using supervised only")
+	("no_bandit", "indicator of using bandit only");
   add_options(all);
 
   po::variables_map& vm = all.vm;
@@ -350,6 +358,9 @@ base_learner* cbify_setup(vw& all)
   data.use_adf = count(all.args.begin(), all.args.end(),"--cb_explore_adf") > 0;
   data.loss0 = vm.count("loss0") ? vm["loss0"].as<float>() : 0.f;
   data.loss1 = vm.count("loss1") ? vm["loss1"].as<float>() : 1.f;
+	data.ind_supervised = vm.count("no_supervised") ? false : true;
+	data.ind_bandit = vm.count("no_bandit") ? false : true;
+
   data.recorder = new vw_recorder();
   data.mwt_explorer = new MwtExplorer<example>("vw",*data.recorder);
   data.scorer = new vw_scorer();
diff --git a/vowpalwabbit/csoaa.cc b/vowpalwabbit/csoaa.cc
index 655b98395de..740f0af7b33 100644
--- a/vowpalwabbit/csoaa.cc
+++ b/vowpalwabbit/csoaa.cc
@@ -36,7 +36,7 @@ inline void inner_loop(base_learner& base, example& ec, uint32_t i, float cost,
     //ec.weight = (cost == FLT_MAX) ? 0.f : 1.f;
     ec.l.simple.label = cost;
     base.learn(ec, i-1);
-    //ec.weight = old_weight;
+    ec.weight = old_weight;
   }
   else
     base.predict(ec, i-1);

From ed2f2bf5d09b8c97645b0282c0015aaba2c47dc2 Mon Sep 17 00:00:00 2001
From: chicheng <zcc1307@gmail.com>
Date: Fri, 16 Feb 2018 12:55:16 -0500
Subject: [PATCH 016/127] added some results of warm starting

---
 results.txt | 178 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 178 insertions(+)
 create mode 100644 results.txt

diff --git a/results.txt b/results.txt
new file mode 100644
index 00000000000..5be1452c1b9
--- /dev/null
+++ b/results.txt
@@ -0,0 +1,178 @@
+
+chiczhan@chiczhan-ubuntu:~/datasets$ ~/local_vw/vowpal_wabbit/vowpalwabbit/vw --cbify 10 -d text_lownoise_m.vw --warm_start 15 --choices_lambda 10 --cb_type ips
+Num weight bits = 18
+learning rate = 0.5
+initial_t = 0
+power_t = 0.5
+using no cache
+Reading datafile = text_lownoise_m.vw
+num sources = 1
+average  since         example        example  current  current  current
+loss     last          counter         weight    label  predict features
+1.000000 1.000000            1            1.0        4        1      101
+1.000000 1.000000            2            2.0       10        4      101
+0.750000 0.500000            4            4.0        7       10      101
+0.625000 0.500000            8            8.0        8        8      101
+0.130435 0.026316           16           46.0       10       10      101
+0.629630 1.000000           18          108.0        2       10      101
+0.560345 0.500000           22          232.0        3        7      101
+0.529167 0.500000           30          480.0        9        8      101
+0.355533 0.187500           46          976.0        8        8      101
+0.365346 0.375000           78         1968.0        2        7      101
+0.480010 0.593750          142         3952.0        9        5      101
+0.517424 0.554688          270         7920.0        8        8      101
+0.496973 0.476562          526        15856.0        8        8      101
+0.472107 0.447266         1038        31728.0        2        9      101
+0.441124 0.410156         2062        63472.0        1        1      101
+0.348968 0.256836         4110       126960.0        8        1      101
+0.242348 0.135742         8206       253936.0        8        8      101
+
+finished run
+number of examples per pass = 10000
+passes used = 1
+weighted example sum = 309550.000000
+weighted label sum = 0.000000
+average loss = 0.209223
+total feature number = 1010000
+chiczhan@chiczhan-ubuntu:~/datasets$ ~/local_vw/vowpal_wabbit/vowpalwabbit/vw --cbify 10 -d text_lownoise_m.vw --warm_start 15 --choices_lambda 10 --cb_type ips --no_supervised
+Num weight bits = 18
+learning rate = 0.5
+initial_t = 0
+power_t = 0.5
+using no cache
+Reading datafile = text_lownoise_m.vw
+num sources = 1
+average  since         example        example  current  current  current
+loss     last          counter         weight    label  predict features
+1.000000 1.000000            1            1.0        4        1      101
+1.000000 1.000000            2            2.0       10        1      101
+1.000000 1.000000            4            4.0        7        1      101
+0.875000 0.750000            8            8.0        8        1      101
+0.978261 1.000000           16           46.0       10        1      101
+0.990741 1.000000           18          108.0        2        3      101
+0.995690 1.000000           22          232.0        3        7      101
+0.933333 0.875000           30          480.0        9        7      101
+0.871926 0.812500           46          976.0        8        8      101
+0.715955 0.562500           78         1968.0        2        2      101
+0.693826 0.671875          142         3952.0        9        3      101
+0.647601 0.601562          270         7920.0        8        8      101
+0.648020 0.648438          526        15856.0        8        8      101
+0.666793 0.685547         1038        31728.0        2        6      101
+0.622936 0.579102         2062        63472.0        1        1      101
+0.513603 0.404297         4110       126960.0        8        1      101
+0.413289 0.312988         8206       253936.0        8        8      101
+
+finished run
+number of examples per pass = 10000
+passes used = 1
+weighted example sum = 309550.000000
+weighted label sum = 0.000000
+average loss = 0.354960
+total feature number = 1010000
+chiczhan@chiczhan-ubuntu:~/datasets$ ~/local_vw/vowpal_wabbit/vowpalwabbit/vw --cbify 10 -d text_lownoise_m.vw --warm_start 15 --choices_lambda 10 --cb_type ips --no_bandit
+Num weight bits = 18
+learning rate = 0.5
+initial_t = 0
+power_t = 0.5
+using no cache
+Reading datafile = text_lownoise_m.vw
+num sources = 1
+average  since         example        example  current  current  current
+loss     last          counter         weight    label  predict features
+1.000000 1.000000            1            1.0        4        1      101
+1.000000 1.000000            2            2.0       10        4      101
+0.750000 0.500000            4            4.0        7       10      101
+0.625000 0.500000            8            8.0        8        8      101
+0.375000 0.125000           16           16.0       10       10      101
+0.437500 0.500000           32           32.0        8        8      101
+0.406250 0.375000           64           64.0        3        5      101
+0.476562 0.546875          128          128.0        3        5      101
+0.480469 0.484375          256          256.0       10       10      101
+0.443359 0.406250          512          512.0        2       10      101
+0.445312 0.447266         1024         1024.0        1        1      101
+0.438965 0.432617         2048         2048.0        9        5      101
+0.430176 0.421387         4096         4096.0        4        4      101
+0.423340 0.416504         8192         8192.0       10       10      101
+
+finished run
+number of examples per pass = 10000
+passes used = 1
+weighted example sum = 10000.000000
+weighted label sum = 0.000000
+average loss = 0.426300
+total feature number = 1010000
+
+chiczhan@chiczhan-ubuntu:~/datasets$ ~/local_vw/vowpal_wabbit/vowpalwabbit/vw --cbify 10 -d text_highnoise_m.vw --warm_start 40 --choices_lambda 20 --cb_type ips 
+Num weight bits = 18
+learning rate = 0.5
+initial_t = 0
+power_t = 0.5
+using no cache
+Reading datafile = text_highnoise_m.vw
+num sources = 1
+average  since         example        example  current  current  current
+loss     last          counter         weight    label  predict features
+0.000000 0.000000            1            1.0        1        1       34
+0.500000 1.000000            2            2.0        7        1       41
+0.750000 1.000000            4            4.0        7        9       36
+0.750000 0.750000            8            8.0        2        5       38
+0.750000 0.750000           16           16.0        9        9       40
+0.812500 0.875000           32           32.0        8        3       45
+0.991533 0.997090           41         1063.0        1        2       39
+0.668060 0.500000           43         3109.0        5        5       33
+0.714623 0.750000           47         7201.0        7        7       35
+0.600455 0.500000           55        15385.0       10       10       42
+0.516455 0.437500           71        31753.0        9        9       32
+0.587418 0.656250          103        64489.0        7        3       42
+0.629966 0.671875          167       129961.0        6        6       41
+0.678446 0.726562          295       260905.0        2        6       37
+0.684938 0.691406          551       522793.0        6        8       42
+0.706747 0.728516         1063      1046569.0        8        9       43
+0.677090 0.647461         2087      2094121.0        2        2       37
+0.672040 0.666992         4135      4189225.0        1        1       45
+0.663167 0.654297         8231      8379433.0       10        5       33
+
+finished run
+number of examples per pass = 10000
+passes used = 1
+weighted example sum = 10189120.000000
+weighted label sum = 0.000000
+average loss = 0.663153
+total feature number = 390046
+chiczhan@chiczhan-ubuntu:~/datasets$ ~/local_vw/vowpal_wabbit/vowpalwabbit/vw --cbify 10 -d text_highnoise_m.vw --warm_start 40 --choices_lambda 20 --cb_type ips --no_supervised
+Num weight bits = 18
+learning rate = 0.5
+initial_t = 0
+power_t = 0.5
+using no cache
+Reading datafile = text_highnoise_m.vw
+num sources = 1
+average  since         example        example  current  current  current
+loss     last          counter         weight    label  predict features
+0.000000 0.000000            1            1.0        1        1       34
+0.500000 1.000000            2            2.0        7        1       41
+0.750000 1.000000            4            4.0        7        1       36
+0.875000 1.000000            8            8.0        2        1       38
+0.937500 1.000000           16           16.0        9        1       40
+0.937500 0.937500           32           32.0        8        1       45
+0.035748 0.007759           41         1063.0        1        1       39
+0.670312 1.000000           43         3109.0        5        2       33
+0.715595 0.750000           47         7201.0        7        4       35
+0.866883 1.000000           55        15385.0       10        4       42
+0.903285 0.937500           71        31753.0        9        5       32
+0.888927 0.875000          103        64489.0        7        2       42
+0.874039 0.859375          167       129961.0        6        6       41
+0.913731 0.953125          295       260905.0        2        2       37
+0.876718 0.839844          551       522793.0        6        7       42
+0.864128 0.851562         1063      1046569.0        8        6       43
+0.851980 0.839844         2087      2094121.0        2        4       37
+0.848841 0.845703         4135      4189225.0        1        1       45
+0.837139 0.825439         8231      8379433.0       10        5       33
+
+finished run
+number of examples per pass = 10000
+passes used = 1
+weighted example sum = 10189120.000000
+weighted label sum = 0.000000
+average loss = 0.834037
+total feature number = 390046

From 0da506a88b74327c66589e1c9697b273febe280c Mon Sep 17 00:00:00 2001
From: chicheng <zcc1307@gmail.com>
Date: Fri, 16 Feb 2018 12:57:08 -0500
Subject: [PATCH 017/127] added some results of warm starting

---
 results.txt | 116 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 116 insertions(+)

diff --git a/results.txt b/results.txt
index 5be1452c1b9..4c0daef948f 100644
--- a/results.txt
+++ b/results.txt
@@ -176,3 +176,119 @@ weighted example sum = 10189120.000000
 weighted label sum = 0.000000
 average loss = 0.834037
 total feature number = 390046
+
+
+
+
+
+
+
+chiczhan@chiczhan-ubuntu:~/datasets$ ~/local_vw/vowpal_wabbit/vowpalwabbit/vw --cbify 10 -d text_highnoise_m.vw --warm_start 100 --choices_lambda 20 --cb_type ips --no_supervised
+Num weight bits = 18
+learning rate = 0.5
+initial_t = 0
+power_t = 0.5
+using no cache
+Reading datafile = text_highnoise_m.vw
+num sources = 1
+average  since         example        example  current  current  current
+loss     last          counter         weight    label  predict features
+0.000000 0.000000            1            1.0        1        1       34
+0.500000 1.000000            2            2.0        7        1       41
+0.750000 1.000000            4            4.0        7        1       36
+0.875000 1.000000            8            8.0        2        1       38
+0.937500 1.000000           16           16.0        9        1       40
+0.937500 0.937500           32           32.0        8        1       45
+0.921875 0.906250           64           64.0        6        1       37
+0.991095 0.995279          101         1123.0        8        1       31
+0.996844 1.000000          103         3169.0        7        3       42
+0.998623 1.000000          107         7261.0        4        1       40
+0.933118 0.875000          115        15445.0        2        4       40
+0.967529 1.000000          131        31813.0        8       10       42
+0.920603 0.875000          163        64549.0       10        9       46
+0.897640 0.875000          227       130021.0        4        2       32
+0.858839 0.820312          355       260965.0        5        7       42
+0.835629 0.812500          611       522853.0        8        7       34
+0.838716 0.841797         1123      1046629.0        4        9       40
+0.837326 0.835938         2147      2094181.0        9        4       43
+0.831015 0.824707         4195      4189285.0        7        1       39
+0.826152 0.821289         8291      8379493.0        7        5       39
+
+finished run
+number of examples per pass = 10000
+passes used = 1
+weighted example sum = 10127800.000000
+weighted label sum = 0.000000
+average loss = 0.825455
+total feature number = 390046
+chiczhan@chiczhan-ubuntu:~/datasets$ ~/local_vw/vowpal_wabbit/vowpalwabbit/vw --cbify 10 -d text_highnoise_m.vw --warm_start 100 --choices_lambda 20 --cb_type ips --no_bandit
+Num weight bits = 18
+learning rate = 0.5
+initial_t = 0
+power_t = 0.5
+using no cache
+Reading datafile = text_highnoise_m.vw
+num sources = 1
+average  since         example        example  current  current  current
+loss     last          counter         weight    label  predict features
+0.000000 0.000000            1            1.0        1        1       34
+0.500000 1.000000            2            2.0        7        1       41
+0.750000 1.000000            4            4.0        7        9       36
+0.750000 0.750000            8            8.0        2        5       38
+0.750000 0.750000           16           16.0        9        9       40
+0.812500 0.875000           32           32.0        8        3       45
+0.703125 0.593750           64           64.0        6        6       37
+0.578125 0.453125          128          128.0       10       10       36
+0.488281 0.398438          256          256.0        6        6       37
+0.443359 0.398438          512          512.0       10       10       46
+0.416992 0.390625         1024         1024.0        4        8       37
+0.395020 0.373047         2048         2048.0        9        2       39
+0.382568 0.370117         4096         4096.0        4        8       41
+0.374878 0.367188         8192         8192.0        1        1       40
+
+finished run
+number of examples per pass = 10000
+passes used = 1
+weighted example sum = 10000.000000
+weighted label sum = 0.000000
+average loss = 0.372700
+total feature number = 390046
+chiczhan@chiczhan-ubuntu:~/datasets$ ~/local_vw/vowpal_wabbit/vowpalwabbit/vw --cbify 10 -d text_highnoise_m.vw --warm_start 100 --choices_lambda 20 --cb_type ips
+Num weight bits = 18
+learning rate = 0.5
+initial_t = 0
+power_t = 0.5
+using no cache
+Reading datafile = text_highnoise_m.vw
+num sources = 1
+average  since         example        example  current  current  current
+loss     last          counter         weight    label  predict features
+0.000000 0.000000            1            1.0        1        1       34
+0.500000 1.000000            2            2.0        7        1       41
+0.750000 1.000000            4            4.0        7        9       36
+0.750000 0.750000            8            8.0        2        5       38
+0.750000 0.750000           16           16.0        9        9       40
+0.812500 0.875000           32           32.0        8        3       45
+0.703125 0.593750           64           64.0        6        6       37
+0.059662 0.020774          101         1123.0        8        8       31
+0.343957 0.500000          103         3169.0        7        7       42
+0.291007 0.250000          107         7261.0        4        4       40
+0.136808 0.000000          115        15445.0        2        2       40
+0.195046 0.250000          131        31813.0        8        8       42
+0.333855 0.468750          163        64549.0       10        3       46
+0.456857 0.578125          227       130021.0        4        2       32
+0.498105 0.539062          355       260965.0        5        5       42
+0.512750 0.527344          611       522853.0        8        8       34
+0.463363 0.414062         1123      1046629.0        4        6       40
+0.542263 0.621094         2147      2094181.0        9        9       43
+0.562640 0.583008         4195      4189285.0        7        1       39
+0.484681 0.406738         8291      8379493.0        7        7       39
+
+finished run
+number of examples per pass = 10000
+passes used = 1
+weighted example sum = 10127800.000000
+weighted label sum = 0.000000
+average loss = 0.473638
+total feature number = 390046
+

From 122c8a30c4e600a42f74cc92445c96ed18fdaa3f Mon Sep 17 00:00:00 2001
From: chicheng <zcc1307@gmail.com>
Date: Mon, 19 Feb 2018 16:18:48 -0500
Subject: [PATCH 018/127] before modifying cbify adf code

---
 vowpalwabbit/cbify.cc | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc
index 5dc8f0acbb7..0911adb2dd3 100644
--- a/vowpalwabbit/cbify.cc
+++ b/vowpalwabbit/cbify.cc
@@ -159,6 +159,7 @@ template <bool is_learn>
 void predict_or_learn(cbify& data, base_learner& base, example& ec)
 {
 	bool is_supervised;
+	float old_weight;
 
 	if (data.warm_start_period > 0)
 	{
@@ -249,11 +250,13 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 
 		//base.learn(ec);
 		ec.pred = old_pred;
+		old_weight = ec.weight;
+
 		if (data.ind_bandit)
 		{
 			for (uint32_t i = 0; i < data.choices_lambda; i++)
 			{
-				ec.weight = data.lambdas[i] / (1-data.lambdas[i]);
+				ec.weight = old_weight * data.lambdas[i] / (1-data.lambdas[i]);
 				//ec.l.cb.costs[0].cost = 0;
 				//cl.cost * data.lambdas[i] / (1-data.lambdas[i]);
 
@@ -264,6 +267,7 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 		data.a_s = ec.pred.a_s;
 		ec.l.multi = ld;
 	  ec.pred.multiclass = action;
+		ec.weight = old_weight;
 	}
 }
 

From c01f8ccc89ae8d2151eb7bce28f304bfaca1044f Mon Sep 17 00:00:00 2001
From: chicheng <zcc1307@gmail.com>
Date: Mon, 19 Feb 2018 18:30:11 -0500
Subject: [PATCH 019/127] start modifying cbify adf code

---
 vowpalwabbit/cbify.cc | 145 +++++++++++++++++++++++++++---------------
 1 file changed, 92 insertions(+), 53 deletions(-)

diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc
index 0911adb2dd3..d4852f959cd 100644
--- a/vowpalwabbit/cbify.cc
+++ b/vowpalwabbit/cbify.cc
@@ -155,6 +155,33 @@ uint32_t find_min(v_array<float> arr)
 	return argmin;
 }
 
+void accumulate_costs_ips(cbify& data, example& ec, CB::cb_class& cl)
+{
+	//IPS for approximating the cumulative costs for all lambdas
+	for (uint32_t i = 0; i < data.choices_lambda; i++)
+	{
+		data.all->cost_sensitive->predict(ec, i);
+		if (ec.pred.multiclass == cl.action)
+			data.cumulative_costs[i] += cl.cost / cl.probability; 
+		//cout<<data.cumulative_costs[i]<<endl;
+	}
+	//cout<<endl;
+
+}
+
+void accumulate_costs_ips_adf(cbify& data, example& ec, CB::cb_class& cl)
+{
+	//IPS for approximating the cumulative costs for all lambdas
+	for (uint32_t i = 0; i < data.choices_lambda; i++)
+	{
+
+
+	}
+
+}
+
+
+
 template <bool is_learn>
 void predict_or_learn(cbify& data, base_learner& base, example& ec)
 {
@@ -171,9 +198,6 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 
 	uint32_t argmin;
 	argmin = find_min(data.cumulative_costs);
-	//cout<<argmin<<endl;
-	//if (argmin != 0)
-	//	cout<<"argmin is not zero"<<endl;
 
 	//Store the multiclass input label
 	MULTICLASS::label_t ld = ec.l.multi;
@@ -196,9 +220,6 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 
 		//predict
 		data.all->cost_sensitive->predict(ec, argmin);
-		//auto old_pred = ec.pred;
-		//uint32_t chosen = ec.pred.multiclass-1;	
-		//cout<<ec.pred.multiclass<<endl;
 
 		if (data.ind_supervised)
 		{
@@ -209,7 +230,6 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 			}
 		}
 		ec.l.multi = ld;
-    //ec.pred = old_pred;
 	}
 	else //Call the cb_explore algorithm. It returns a vector of probabilities for each action
 	{
@@ -219,8 +239,6 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 		
 		base.predict(ec, argmin);
 		auto old_pred = ec.pred;
-		//base.predict(ec);
-		//data.probs = ec.pred.scalars;
 
 		uint32_t action = data.mwt_explorer->Choose_Action(*data.generic_explorer, StringUtils::to_string(data.example_counter++), ec);
 
@@ -232,23 +250,13 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 		  THROW("No action with non-zero probability found!");
 		cl.cost = loss(data, ld.label, cl.action);
 
-		//IPS for approximating the cumulative costs for all lambdas
-		for (uint32_t i = 0; i < data.choices_lambda; i++)
-		{
-			//assert(0);
-			data.all->cost_sensitive->predict(ec, i);
-			//cout<<ec.pred.multiclass<<endl;
-			if (ec.pred.multiclass == cl.action)
-				data.cumulative_costs[i] += cl.cost / cl.probability; 
-			//cout<<data.cumulative_costs[i]<<endl;
-		}
-		//cout<<endl;
+		// accumulate the cumulative costs of lambdas
+		accumulate_costs_ips(data, base, ec);
 
 		//Create a new cb label
 		data.cb_label.costs.push_back(cl);
 		ec.l.cb = data.cb_label;
 
-		//base.learn(ec);
 		ec.pred = old_pred;
 		old_weight = ec.weight;
 
@@ -257,9 +265,6 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 			for (uint32_t i = 0; i < data.choices_lambda; i++)
 			{
 				ec.weight = old_weight * data.lambdas[i] / (1-data.lambdas[i]);
-				//ec.l.cb.costs[0].cost = 0;
-				//cl.cost * data.lambdas[i] / (1-data.lambdas[i]);
-
 				base.learn(ec, i);
 			}
 		}
@@ -274,39 +279,73 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 template <bool is_learn>
 void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 {
+	bool is_supervised;
+	float old_weight;
+
+	if (data.warm_start_period > 0)
+	{
+		is_supervised = true;
+		data.warm_start_period--;
+	}	
+	else
+		is_supervised = false;
+
+	uint32_t argmin;
+	argmin = find_min(data.cumulative_costs);
+
   //Store the multiclass input label
   MULTICLASS::label_t ld = ec.l.multi;
 
   copy_example_to_adf(data, ec);
-  for (size_t a = 0; a < data.adf_data.num_actions; ++a)
-  {
-    base.predict(data.adf_data.ecs[a]);
-  }
-  base.predict(*data.adf_data.empty_example);
-  // get output scores
-  auto& out_ec = data.adf_data.ecs[0];
-  uint32_t idx = data.mwt_explorer->Choose_Action(
-                   *data.generic_explorer,
-                   StringUtils::to_string(data.example_counter++), out_ec) - 1;
-
-  CB::cb_class cl;
-  cl.action = out_ec.pred.a_s[idx].action + 1;
-  cl.probability = out_ec.pred.a_s[idx].score;
-
-  if(!cl.action)
-    THROW("No action with non-zero probability found!");
-  cl.cost = loss(data, ld.label, cl.action);
-
-  // add cb label to chosen action
-  auto& lab = data.adf_data.ecs[cl.action - 1].l.cb;
-  lab.costs.push_back(cl);
-
-  for (size_t a = 0; a < data.adf_data.num_actions; ++a)
-  {
-    base.learn(data.adf_data.ecs[a]);
-  }
-  base.learn(*data.adf_data.empty_example);
-  ec.pred.multiclass = cl.action;
+
+	if (is_supervised) // Call the cost-sensitive learner directly
+	{
+
+	}
+	else // call the bandit learner
+	{
+		for (size_t a = 0; a < data.adf_data.num_actions; ++a)
+		{
+		  base.predict(data.adf_data.ecs[a], argmin);
+		}
+		base.predict(*data.adf_data.empty_example, argmin);
+		// get output scores
+		auto& out_ec = data.adf_data.ecs[0];
+		uint32_t idx = data.mwt_explorer->Choose_Action(
+		                 *data.generic_explorer,
+		                 StringUtils::to_string(data.example_counter++), out_ec) - 1;
+
+		CB::cb_class cl;
+		cl.action = out_ec.pred.a_s[idx].action + 1;
+		cl.probability = out_ec.pred.a_s[idx].score;
+
+		if(!cl.action)
+		  THROW("No action with non-zero probability found!");
+		cl.cost = loss(data, ld.label, cl.action);
+
+		// accumulate the cumulative costs of lambdas
+		accumulate_costs_ips_adf(data, base, ec);
+
+
+
+		// add cb label to chosen action
+		auto& lab = data.adf_data.ecs[cl.action - 1].l.cb;
+		lab.costs.push_back(cl);
+
+	
+		if (data.ind_bandit)
+		{
+			for (uint32_t i = 0; i < data.choices_lambda; i++)
+			{
+				for (size_t a = 0; a < data.adf_data.num_actions; ++a)
+				{
+					base.learn(data.adf_data.ecs[a], i);
+				}
+				base.learn(*data.adf_data.empty_example, i);
+			}
+		}
+		ec.pred.multiclass = cl.action;
+	}
 }
 
 void init_adf_data(cbify& data, const size_t num_actions)

From 0d4d633efa5637ceece5499961699f547e6b5803 Mon Sep 17 00:00:00 2001
From: chicheng <zcc1307@gmail.com>
Date: Wed, 21 Feb 2018 14:41:44 -0500
Subject: [PATCH 020/127] unkwown segfault error

---
 vowpalwabbit/cbify.cc | 90 ++++++++++++++++++++++++++++++++++++-------
 1 file changed, 76 insertions(+), 14 deletions(-)

diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc
index d4852f959cd..9f663760f35 100644
--- a/vowpalwabbit/cbify.cc
+++ b/vowpalwabbit/cbify.cc
@@ -169,14 +169,36 @@ void accumulate_costs_ips(cbify& data, example& ec, CB::cb_class& cl)
 
 }
 
-void accumulate_costs_ips_adf(cbify& data, example& ec, CB::cb_class& cl)
+void accumulate_costs_ips_adf(cbify& data, CB::cb_class& cl)
 {
+	float best_score = FLT_MAX;
+	uint32_t best_action;
+
+
 	//IPS for approximating the cumulative costs for all lambdas
 	for (uint32_t i = 0; i < data.choices_lambda; i++)
 	{
+		for (size_t a = 0; a < data.adf_data.num_actions; ++a)
+		{
+			//data.all->cost_sensitive->predict(data.adf_data.ecs[a], i);
+	
+  	  //data.adf_data.empty_example->in_use = true;
+	    //data.adf_data.empty_example->ft_offset = data.all->cost_sensitive->offset;
+			//data.all->cost_sensitive->predict(*data.adf_data.empty_example, i);
+			
+			if (data.adf_data.ecs[a].partial_prediction < best_score)
+			{
+				best_score = data.adf_data.ecs[a].partial_prediction;
+				best_action = a;
+			}
 
+		}
+		if (best_action == cl.action - 1)
+		data.cumulative_costs[i] += cl.cost / cl.probability; 
+	  //cout<<data.cumulative_costs[i]<<endl;
 
 	}
+	//cout<<endl;
 
 }
 
@@ -251,7 +273,7 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 		cl.cost = loss(data, ld.label, cl.action);
 
 		// accumulate the cumulative costs of lambdas
-		accumulate_costs_ips(data, base, ec);
+		accumulate_costs_ips(data, ec, cl);
 
 		//Create a new cb label
 		data.cb_label.costs.push_back(cl);
@@ -300,9 +322,44 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 
 	if (is_supervised) // Call the cost-sensitive learner directly
 	{
+		//generate cost-sensitive label
+		/*COST_SENSITIVE::label csl = calloc_or_throw<COST_SENSITIVE::label>();
+    csl.costs.resize(data.num_actions);
+    csl.costs.end() = csl.costs.begin()+data.num_actions;
+		for (uint32_t j = 0; j < data.num_actions; j++)
+		{
+			csl.costs[j].class_index = j+1;
+			csl.costs[j].x = loss(data, ld.label, j+1);
+		}
 
-	}
-	else // call the bandit learner
+		ec.l.cs = csl;
+		*/
+		//predict
+		//data.all->cost_sensitive->predict(ec, argmin);
+
+		for (size_t a = 0; a < data.adf_data.num_actions; ++a)
+		{
+		  //data.all->cost_sensitive->predict(data.adf_data.ecs[a], argmin);
+			base.predict(data.adf_data.ecs[a], argmin);
+		}
+		//data.all->cost_sensitive->predict(*data.adf_data.empty_example, argmin);
+		base.predict(*data.adf_data.empty_example, argmin);
+
+
+		if (data.ind_supervised)
+		{
+			for (uint32_t i = 0; i < data.choices_lambda; i++)
+			{		
+				for (size_t a = 0; a < data.adf_data.num_actions; ++a)
+				{
+					data.all->cost_sensitive->learn(data.adf_data.ecs[a], i);
+				}
+				data.all->cost_sensitive->learn(*data.adf_data.empty_example, i);
+			}
+		}
+		ec.l.multi = ld;
+	} 
+	else// call the bandit learner
 	{
 		for (size_t a = 0; a < data.adf_data.num_actions; ++a)
 		{
@@ -324,27 +381,32 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 		cl.cost = loss(data, ld.label, cl.action);
 
 		// accumulate the cumulative costs of lambdas
-		accumulate_costs_ips_adf(data, base, ec);
-
-
+		accumulate_costs_ips_adf(data, cl);
 
 		// add cb label to chosen action
 		auto& lab = data.adf_data.ecs[cl.action - 1].l.cb;
 		lab.costs.push_back(cl);
 
-	
 		if (data.ind_bandit)
 		{
-			for (uint32_t i = 0; i < data.choices_lambda; i++)
-			{
+			//for (uint32_t i = 0; i < data.choices_lambda; i++)
+			//{
+				//ec.weight = old_weight * data.lambdas[i] / (1-data.lambdas[i]);
 				for (size_t a = 0; a < data.adf_data.num_actions; ++a)
 				{
-					base.learn(data.adf_data.ecs[a], i);
+					//old_weight = data.adf_data.ecs[a].weight;
+					//data.adf_data.ecs[a].weight = data.lambdas[i] / (1 - data.lambdas[i] );
+					base.learn(data.adf_data.ecs[a]);
+					//data.adf_data.ecs[a].weight = old_weight;
 				}
-				base.learn(*data.adf_data.empty_example, i);
-			}
+
+			  //old_weight = data.adf_data.empty_example->weight;
+				//data.adf_data.empty_example->weight = data.lambdas[i] / (1 - data.lambdas[i] );
+				base.learn(*data.adf_data.empty_example);
+				//data.adf_data.empty_example->weight = old_weight;
+			//}
 		}
-		ec.pred.multiclass = cl.action;
+		//ec.pred.multiclass = cl.action;
 	}
 }
 

From ded8f531be386329f6acf0fd05e81573697e1271 Mon Sep 17 00:00:00 2001
From: chicheng <zcc1307@gmail.com>
Date: Wed, 21 Feb 2018 17:47:16 -0500
Subject: [PATCH 021/127] everything good except for the cost sensitive learn
 part

---
 vowpalwabbit/cbify.cc | 115 +++++++++++++++++++++++++++++++++++++++---
 1 file changed, 109 insertions(+), 6 deletions(-)

diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc
index d4852f959cd..0c969a60c17 100644
--- a/vowpalwabbit/cbify.cc
+++ b/vowpalwabbit/cbify.cc
@@ -60,6 +60,7 @@ struct cbify
 	size_t num_actions;
 	bool ind_bandit;
 	bool ind_supervised;
+	COST_SENSITIVE::label csl;
 
 
 };
@@ -169,15 +170,36 @@ void accumulate_costs_ips(cbify& data, example& ec, CB::cb_class& cl)
 
 }
 
-void accumulate_costs_ips_adf(cbify& data, example& ec, CB::cb_class& cl)
+void accumulate_costs_ips_adf(cbify& data, example& ec, CB::cb_class& cl, base_learner& base)
 {
+	float best_score;
+	uint32_t best_action;
+	example* ecs = data.adf_data.ecs;
+
+
 	//IPS for approximating the cumulative costs for all lambdas
 	for (uint32_t i = 0; i < data.choices_lambda; i++)
 	{
+		for (size_t a = 0; a < data.adf_data.num_actions; ++a)
+		{
+		  base.predict(ecs[a], i);
+		
+			base.predict(*data.adf_data.empty_example, i);
 
+			if ( (a == 0) || (ecs[a].partial_prediction < best_score) )
+			{
+				best_action = a + 1;
+				best_score = ecs[a].partial_prediction;
+			}
+		}
+		
+		if (best_action == cl.action)
+			data.cumulative_costs[i] += cl.cost / cl.probability;
 
+		cout<<data.cumulative_costs[i]<<endl;
 	}
-
+	cout<<endl;
+	
 }
 
 
@@ -207,7 +229,7 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 	if (is_supervised) // Call the cost-sensitive learner directly
 	{
 		//generate cost-sensitive label
-		COST_SENSITIVE::label csl = calloc_or_throw<COST_SENSITIVE::label>();
+		COST_SENSITIVE::label& csl = data.csl;
     csl.costs.resize(data.num_actions);
     csl.costs.end() = csl.costs.begin()+data.num_actions;
 		for (uint32_t j = 0; j < data.num_actions; j++)
@@ -251,7 +273,7 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 		cl.cost = loss(data, ld.label, cl.action);
 
 		// accumulate the cumulative costs of lambdas
-		accumulate_costs_ips(data, base, ec);
+		accumulate_costs_ips(data, ec, cl);
 
 		//Create a new cb label
 		data.cb_label.costs.push_back(cl);
@@ -281,6 +303,7 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 {
 	bool is_supervised;
 	float old_weight;
+	uint32_t argmin;
 
 	if (data.warm_start_period > 0)
 	{
@@ -290,7 +313,7 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 	else
 		is_supervised = false;
 
-	uint32_t argmin;
+	
 	argmin = find_min(data.cumulative_costs);
 
   //Store the multiclass input label
@@ -300,7 +323,83 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 
 	if (is_supervised) // Call the cost-sensitive learner directly
 	{
+		float best_score;
+		uint32_t best_action;
+		example* ecs =  data.adf_data.ecs;
+	
+		for (size_t a = 0; a < data.adf_data.num_actions; ++a)
+		{
+		  base.predict(ecs[a], argmin);
+		}
+		base.predict(*data.adf_data.empty_example, argmin);
+
+		for (size_t a = 0; a < data.adf_data.num_actions; ++a)
+		{
+			if ( (a == 0) || (ecs[a].partial_prediction < best_score) )
+			{
+				best_action = a + 1;
+				best_score = ecs[a].partial_prediction;
+			}
+		}
+		
+
+		//data.all->cost_sensitive->predict(ec,argmin);
+
 
+		//generate cost-sensitive label
+		COST_SENSITIVE::label& csl = data.csl;
+    csl.costs.resize(data.num_actions);
+    csl.costs.end() = csl.costs.begin()+data.num_actions;
+		for (uint32_t j = 0; j < data.num_actions; j++)
+		{
+			csl.costs[j].class_index = j+1;
+			csl.costs[j].x = loss(data, ld.label, j+1);
+		}
+
+		ec.l.cs = csl;
+		
+		
+		/*
+		if (data.ind_supervised)
+		{
+			for (uint32_t i = 0; i < data.choices_lambda; i++)
+			{
+				for (size_t a = 0; a < data.adf_data.num_actions; ++a)
+				{
+					COST_SENSITIVE::label& lab = ecs[a].l.cs;
+					lab.costs.erase();
+					lab.costs.resize(1);
+
+					lab.costs[0].class_index = a+1;
+					lab.costs[0].x = loss(data, ld.label, a+1);
+
+					ecs[a].weight = 1;
+					//base.learn(ecs[a], i);
+					data.all->cost_sensitive->learn(ecs[a],i);
+				}
+				//base.learn(*data.adf_data.empty_example, i);
+				COST_SENSITIVE::label& lab = data.adf_data.empty_example->l.cs;
+				lab.costs.erase();
+				COST_SENSITIVE::wclass wc = { 0., 0, 0., 0. };
+				lab.costs.push_back(wc);				
+
+				data.all->cost_sensitive->learn(*data.adf_data.empty_example,i);
+			}
+		}
+		
+
+
+		if (data.ind_supervised)
+		{
+			for (uint32_t i = 0; i < data.choices_lambda; i++)
+			{
+				data.all->cost_sensitive->learn(ec,i);
+			}
+		}
+		*/
+
+		ec.pred.multiclass = best_action;
+		ec.l.multi = ld;
 	}
 	else // call the bandit learner
 	{
@@ -324,7 +423,7 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 		cl.cost = loss(data, ld.label, cl.action);
 
 		// accumulate the cumulative costs of lambdas
-		accumulate_costs_ips_adf(data, base, ec);
+		accumulate_costs_ips_adf(data, ec, cl, base);
 
 
 
@@ -339,7 +438,10 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 			{
 				for (size_t a = 0; a < data.adf_data.num_actions; ++a)
 				{
+					old_weight = data.adf_data.ecs[a].weight;
+					data.adf_data.ecs[a].weight = data.lambdas[i] / (1- data.lambdas[i]);
 					base.learn(data.adf_data.ecs[a], i);
+					data.adf_data.ecs[a].weight = old_weight;
 				}
 				base.learn(*data.adf_data.empty_example, i);
 			}
@@ -411,6 +513,7 @@ base_learner* cbify_setup(vw& all)
   //data.probs = v_init<float>();
   data.generic_explorer = new GenericExplorer<example>(*data.scorer, (u32)num_actions);
   data.all = &all;
+	data.csl = calloc_or_throw<COST_SENSITIVE::label>();
 
 	//cout<<data.warm_start_period<<endl;
 	data.warm_start_period = vm.count("warm_start") ? vm["warm_start"].as<size_t>() : 0;

From 68d860068813546434e9fb2405f3e5b849308fb5 Mon Sep 17 00:00:00 2001
From: chicheng <zcc1307@gmail.com>
Date: Thu, 22 Feb 2018 11:47:41 -0500
Subject: [PATCH 022/127] .

---
 vowpalwabbit/cbify.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc
index 0c969a60c17..358547ff441 100644
--- a/vowpalwabbit/cbify.cc
+++ b/vowpalwabbit/cbify.cc
@@ -196,9 +196,9 @@ void accumulate_costs_ips_adf(cbify& data, example& ec, CB::cb_class& cl, base_l
 		if (best_action == cl.action)
 			data.cumulative_costs[i] += cl.cost / cl.probability;
 
-		cout<<data.cumulative_costs[i]<<endl;
+		//cout<<data.cumulative_costs[i]<<endl;
 	}
-	cout<<endl;
+	//cout<<endl;
 	
 }
 

From aace03713ba63656c2b8cda2ca3209cbc3a20c72 Mon Sep 17 00:00:00 2001
From: chicheng <zcc1307@gmail.com>
Date: Thu, 22 Feb 2018 18:32:40 -0500
Subject: [PATCH 023/127] .

---
 vowpalwabbit/cbify.cc | 54 ++++++++++++++++++++++++++++---------------
 1 file changed, 35 insertions(+), 19 deletions(-)

diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc
index 358547ff441..8fb9ac55e8a 100644
--- a/vowpalwabbit/cbify.cc
+++ b/vowpalwabbit/cbify.cc
@@ -304,6 +304,9 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 	bool is_supervised;
 	float old_weight;
 	uint32_t argmin;
+	uint32_t best_action;
+	float best_score;
+	example* ecs =  data.adf_data.ecs;
 
 	if (data.warm_start_period > 0)
 	{
@@ -323,10 +326,7 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 
 	if (is_supervised) // Call the cost-sensitive learner directly
 	{
-		float best_score;
-		uint32_t best_action;
-		example* ecs =  data.adf_data.ecs;
-	
+
 		for (size_t a = 0; a < data.adf_data.num_actions; ++a)
 		{
 		  base.predict(ecs[a], argmin);
@@ -341,13 +341,15 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 				best_score = ecs[a].partial_prediction;
 			}
 		}
-		
+	
+		//cout<<best_action<<" "<<ecs[data.adf_data.num_actions-1].pred.multiclass<<endl;
+
 
 		//data.all->cost_sensitive->predict(ec,argmin);
 
 
 		//generate cost-sensitive label
-		COST_SENSITIVE::label& csl = data.csl;
+		/*COST_SENSITIVE::label& csl = data.csl;
     csl.costs.resize(data.num_actions);
     csl.costs.end() = csl.costs.begin()+data.num_actions;
 		for (uint32_t j = 0; j < data.num_actions; j++)
@@ -357,38 +359,41 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 		}
 
 		ec.l.cs = csl;
+		*/
+
+		COST_SENSITIVE::label& csl = data.csl;
+		COST_SENSITIVE::wclass wc = {0, 0, 0, 0};
+		
+		csl.costs.erase();
+		csl.costs.push_back(wc);
 		
 		
-		/*
 		if (data.ind_supervised)
 		{
 			for (uint32_t i = 0; i < data.choices_lambda; i++)
 			{
 				for (size_t a = 0; a < data.adf_data.num_actions; ++a)
 				{
-					COST_SENSITIVE::label& lab = ecs[a].l.cs;
-					lab.costs.erase();
-					lab.costs.resize(1);
+					csl.costs[0].class_index = a+1;
+					csl.costs[0].x = loss(data, ld.label, a+1);
 
-					lab.costs[0].class_index = a+1;
-					lab.costs[0].x = loss(data, ld.label, a+1);
+					ecs[a].l.cs = csl;
 
 					ecs[a].weight = 1;
 					//base.learn(ecs[a], i);
 					data.all->cost_sensitive->learn(ecs[a],i);
 				}
 				//base.learn(*data.adf_data.empty_example, i);
-				COST_SENSITIVE::label& lab = data.adf_data.empty_example->l.cs;
-				lab.costs.erase();
-				COST_SENSITIVE::wclass wc = { 0., 0, 0., 0. };
-				lab.costs.push_back(wc);				
-
+				
+				csl.costs[0].class_index = 0;
+				csl.costs[0].x = 0;
+				data.adf_data.empty_example->l.cs = csl;			
 				data.all->cost_sensitive->learn(*data.adf_data.empty_example,i);
 			}
 		}
 		
 
-
+		/*
 		if (data.ind_supervised)
 		{
 			for (uint32_t i = 0; i < data.choices_lambda; i++)
@@ -405,9 +410,20 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 	{
 		for (size_t a = 0; a < data.adf_data.num_actions; ++a)
 		{
-		  base.predict(data.adf_data.ecs[a], argmin);
+		  base.predict(ecs[a], argmin);
 		}
 		base.predict(*data.adf_data.empty_example, argmin);
+
+		for (size_t a = 0; a < data.adf_data.num_actions; ++a)
+		{
+			if ( (a == 0) || (ecs[a].partial_prediction < best_score) )
+			{
+				best_action = a + 1;
+				best_score = ecs[a].partial_prediction;
+			}
+		}
+
+
 		// get output scores
 		auto& out_ec = data.adf_data.ecs[0];
 		uint32_t idx = data.mwt_explorer->Choose_Action(

From 41127f8de1e1820dd31571a312114433f0f3d913 Mon Sep 17 00:00:00 2001
From: chicheng <zcc1307@gmail.com>
Date: Fri, 23 Feb 2018 18:21:35 -0500
Subject: [PATCH 024/127] fixed the bug of empty example cost wrongly set

---
 vowpalwabbit/cbify.cc          | 190 ++++++++++++++++-----------------
 vowpalwabbit/cost_sensitive.cc |   1 +
 vowpalwabbit/csoaa.cc          |  13 ++-
 vowpalwabbit/example.h         |   1 +
 4 files changed, 107 insertions(+), 98 deletions(-)

diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc
index 8fb9ac55e8a..176152c992a 100644
--- a/vowpalwabbit/cbify.cc
+++ b/vowpalwabbit/cbify.cc
@@ -60,7 +60,8 @@ struct cbify
 	size_t num_actions;
 	bool ind_bandit;
 	bool ind_supervised;
-	COST_SENSITIVE::label csl;
+	COST_SENSITIVE::label* csls;
+	COST_SENSITIVE::label* csl_empty;
 
 
 };
@@ -92,6 +93,10 @@ void finish(cbify& data)
   delete_it(data.mwt_explorer);
   delete_it(data.recorder);
   data.a_s.delete_v();
+	data.lambdas.delete_v();
+	data.cumulative_costs.delete_v();
+	free(data.csls);
+		
   if (data.use_adf)
   {
     for (size_t a = 0; a < data.adf_data.num_actions; ++a)
@@ -101,6 +106,8 @@ void finish(cbify& data)
     VW::dealloc_example(CB::cb_label.delete_label, *data.adf_data.empty_example);
     free(data.adf_data.ecs);
     free(data.adf_data.empty_example);
+
+		free(data.csl_empty);
   }
 }
 
@@ -156,6 +163,49 @@ uint32_t find_min(v_array<float> arr)
 	return argmin;
 }
 
+uint32_t predict_sublearner(cbify& data, base_learner& base, uint32_t i)
+{
+	uint32_t best_action, best_action_dir;
+	float best_score;
+
+	example* ecs = data.adf_data.ecs;
+	example* empty = data.adf_data.empty_example;
+
+
+	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
+	{
+	  base.predict(ecs[a], i);
+		//data.all->cost_sensitive->predict(ecs[a], argmin);
+	}
+	base.predict(*empty, i);
+	//data.all->cost_sensitive->predict(*empty, argmin);
+	
+
+	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
+	{
+		if ( (a == 0) || (ecs[a].partial_prediction < best_score) )
+		{
+			best_action = a + 1;
+			best_score = ecs[a].partial_prediction;
+		}
+	}
+
+	/*for (size_t a = 0; a < data.adf_data.num_actions; ++a)
+	{
+		if ( ecs[a].pred.multiclass != 0 )
+			best_action_dir = ecs[a].pred.multiclass;
+	}
+
+	cout<<best_action<<" "<<best_action_dir<<endl;
+	*/
+	//assert(best_action == best_action_dir);
+
+	return best_action;
+
+}
+
+
+
 void accumulate_costs_ips(cbify& data, example& ec, CB::cb_class& cl)
 {
 	//IPS for approximating the cumulative costs for all lambdas
@@ -172,26 +222,12 @@ void accumulate_costs_ips(cbify& data, example& ec, CB::cb_class& cl)
 
 void accumulate_costs_ips_adf(cbify& data, example& ec, CB::cb_class& cl, base_learner& base)
 {
-	float best_score;
 	uint32_t best_action;
-	example* ecs = data.adf_data.ecs;
-
 
 	//IPS for approximating the cumulative costs for all lambdas
 	for (uint32_t i = 0; i < data.choices_lambda; i++)
 	{
-		for (size_t a = 0; a < data.adf_data.num_actions; ++a)
-		{
-		  base.predict(ecs[a], i);
-		
-			base.predict(*data.adf_data.empty_example, i);
-
-			if ( (a == 0) || (ecs[a].partial_prediction < best_score) )
-			{
-				best_action = a + 1;
-				best_score = ecs[a].partial_prediction;
-			}
-		}
+		best_action = predict_sublearner(data, base, i);
 		
 		if (best_action == cl.action)
 			data.cumulative_costs[i] += cl.cost / cl.probability;
@@ -209,6 +245,7 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 {
 	bool is_supervised;
 	float old_weight;
+	uint32_t argmin;
 
 	if (data.warm_start_period > 0)
 	{
@@ -218,7 +255,6 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 	else
 		is_supervised = false;
 
-	uint32_t argmin;
 	argmin = find_min(data.cumulative_costs);
 
 	//Store the multiclass input label
@@ -229,7 +265,7 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 	if (is_supervised) // Call the cost-sensitive learner directly
 	{
 		//generate cost-sensitive label
-		COST_SENSITIVE::label& csl = data.csl;
+		COST_SENSITIVE::label& csl = *data.csls;
     csl.costs.resize(data.num_actions);
     csl.costs.end() = csl.costs.begin()+data.num_actions;
 		for (uint32_t j = 0; j < data.num_actions; j++)
@@ -298,6 +334,7 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 	}
 }
 
+
 template <bool is_learn>
 void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 {
@@ -305,8 +342,8 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 	float old_weight;
 	uint32_t argmin;
 	uint32_t best_action;
-	float best_score;
 	example* ecs =  data.adf_data.ecs;
+	example* empty_example = data.adf_data.empty_example;
 
 	if (data.warm_start_period > 0)
 	{
@@ -326,47 +363,14 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 
 	if (is_supervised) // Call the cost-sensitive learner directly
 	{
+		best_action = predict_sublearner(data, base, argmin);
 
-		for (size_t a = 0; a < data.adf_data.num_actions; ++a)
-		{
-		  base.predict(ecs[a], argmin);
-		}
-		base.predict(*data.adf_data.empty_example, argmin);
-
-		for (size_t a = 0; a < data.adf_data.num_actions; ++a)
-		{
-			if ( (a == 0) || (ecs[a].partial_prediction < best_score) )
-			{
-				best_action = a + 1;
-				best_score = ecs[a].partial_prediction;
-			}
-		}
-	
 		//cout<<best_action<<" "<<ecs[data.adf_data.num_actions-1].pred.multiclass<<endl;
-
-
 		//data.all->cost_sensitive->predict(ec,argmin);
 
-
 		//generate cost-sensitive label
-		/*COST_SENSITIVE::label& csl = data.csl;
-    csl.costs.resize(data.num_actions);
-    csl.costs.end() = csl.costs.begin()+data.num_actions;
-		for (uint32_t j = 0; j < data.num_actions; j++)
-		{
-			csl.costs[j].class_index = j+1;
-			csl.costs[j].x = loss(data, ld.label, j+1);
-		}
-
-		ec.l.cs = csl;
-		*/
 
-		COST_SENSITIVE::label& csl = data.csl;
-		COST_SENSITIVE::wclass wc = {0, 0, 0, 0};
-		
-		csl.costs.erase();
-		csl.costs.push_back(wc);
-		
+		COST_SENSITIVE::label* csls = data.csls;
 		
 		if (data.ind_supervised)
 		{
@@ -374,35 +378,18 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 			{
 				for (size_t a = 0; a < data.adf_data.num_actions; ++a)
 				{
-					csl.costs[0].class_index = a+1;
-					csl.costs[0].x = loss(data, ld.label, a+1);
-
-					ecs[a].l.cs = csl;
+					csls[a].costs[0].class_index = a+1;
+					csls[a].costs[0].x = loss(data, ld.label, a+1);
 
-					ecs[a].weight = 1;
-					//base.learn(ecs[a], i);
+					ecs[a].l.cs = csls[a];
+					ecs[a].weight *= 1;
+					//					cout << "size cbify = " << ecs[a].l.cs.costs.size() << endl;
 					data.all->cost_sensitive->learn(ecs[a],i);
 				}
-				//base.learn(*data.adf_data.empty_example, i);
-				
-				csl.costs[0].class_index = 0;
-				csl.costs[0].x = 0;
-				data.adf_data.empty_example->l.cs = csl;			
-				data.all->cost_sensitive->learn(*data.adf_data.empty_example,i);
-			}
-		}
-		
-
-		/*
-		if (data.ind_supervised)
-		{
-			for (uint32_t i = 0; i < data.choices_lambda; i++)
-			{
-				data.all->cost_sensitive->learn(ec,i);
+				empty_example->l.cs = *data.csl_empty;			
+				data.all->cost_sensitive->learn(*empty_example,i);
 			}
 		}
-		*/
-
 		ec.pred.multiclass = best_action;
 		ec.l.multi = ld;
 	}
@@ -412,17 +399,7 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 		{
 		  base.predict(ecs[a], argmin);
 		}
-		base.predict(*data.adf_data.empty_example, argmin);
-
-		for (size_t a = 0; a < data.adf_data.num_actions; ++a)
-		{
-			if ( (a == 0) || (ecs[a].partial_prediction < best_score) )
-			{
-				best_action = a + 1;
-				best_score = ecs[a].partial_prediction;
-			}
-		}
-
+		base.predict(*empty_example, argmin);
 
 		// get output scores
 		auto& out_ec = data.adf_data.ecs[0];
@@ -441,8 +418,6 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 		// accumulate the cumulative costs of lambdas
 		accumulate_costs_ips_adf(data, ec, cl, base);
 
-
-
 		// add cb label to chosen action
 		auto& lab = data.adf_data.ecs[cl.action - 1].l.cb;
 		lab.costs.push_back(cl);
@@ -454,12 +429,12 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 			{
 				for (size_t a = 0; a < data.adf_data.num_actions; ++a)
 				{
-					old_weight = data.adf_data.ecs[a].weight;
-					data.adf_data.ecs[a].weight = data.lambdas[i] / (1- data.lambdas[i]);
-					base.learn(data.adf_data.ecs[a], i);
-					data.adf_data.ecs[a].weight = old_weight;
+					old_weight = ecs[a].weight;
+					ecs[a].weight *= data.lambdas[i] / (1- data.lambdas[i]);
+					base.learn(ecs[a], i);
+					ecs[a].weight = old_weight;
 				}
-				base.learn(*data.adf_data.empty_example, i);
+				base.learn(*empty_example, i);
 			}
 		}
 		ec.pred.multiclass = cl.action;
@@ -480,6 +455,21 @@ void init_adf_data(cbify& data, const size_t num_actions)
   }
   CB::cb_label.default_label(&adf_data.empty_example->l.cb);
   adf_data.empty_example->in_use = true;
+
+	data.csls = calloc_or_throw<COST_SENSITIVE::label>(num_actions);
+	data.csl_empty = calloc_or_throw<COST_SENSITIVE::label>(1);
+
+	data.csl_empty->costs.erase();					
+	data.csl_empty->costs.push_back({0, 0, 0, 0});
+	data.csl_empty->costs[0].class_index = 0;
+	data.csl_empty->costs[0].x = FLT_MAX;
+
+	for (size_t a = 0; a < num_actions; ++a)
+	{
+		data.csls[a].costs.erase();
+		data.csls[a].costs.push_back({0, 0, 0, 0});	
+	}
+
 }
 
 void generate_lambdas(v_array<float>& lambdas, size_t lambda_size)
@@ -529,7 +519,7 @@ base_learner* cbify_setup(vw& all)
   //data.probs = v_init<float>();
   data.generic_explorer = new GenericExplorer<example>(*data.scorer, (u32)num_actions);
   data.all = &all;
-	data.csl = calloc_or_throw<COST_SENSITIVE::label>();
+	
 
 	//cout<<data.warm_start_period<<endl;
 	data.warm_start_period = vm.count("warm_start") ? vm["warm_start"].as<size_t>() : 0;
@@ -542,11 +532,17 @@ base_learner* cbify_setup(vw& all)
 		data.cumulative_costs.push_back(0.);
 
 	data.num_actions = num_actions;
+	
 
   if (data.use_adf)
   {
     init_adf_data(data, num_actions);
   }
+	else
+	{
+		data.csls = calloc_or_throw<COST_SENSITIVE::label>(1);
+	}
+
 
   if (count(all.args.begin(), all.args.end(),"--cb_explore") == 0 && !data.use_adf)
   {
diff --git a/vowpalwabbit/cost_sensitive.cc b/vowpalwabbit/cost_sensitive.cc
index 28414218235..f8c7863fc2a 100644
--- a/vowpalwabbit/cost_sensitive.cc
+++ b/vowpalwabbit/cost_sensitive.cc
@@ -302,6 +302,7 @@ void output_example(vw& all, example& ec)
 bool example_is_test(example& ec)
 {
   v_array<COST_SENSITIVE::wclass> costs = ec.l.cs.costs;
+  //cout << "is_test " << costs.size() << endl;
   if (costs.size() == 0) return true;
   for (size_t j=0; j<costs.size(); j++)
     if (costs[j].x != FLT_MAX) return false;
diff --git a/vowpalwabbit/csoaa.cc b/vowpalwabbit/csoaa.cc
index 740f0af7b33..170dd0de0a6 100644
--- a/vowpalwabbit/csoaa.cc
+++ b/vowpalwabbit/csoaa.cc
@@ -273,6 +273,7 @@ bool test_ldf_sequence(ldf& data, size_t start_K)
   for (size_t k=start_K; k<data.ec_seq.size(); k++)
   {
     example *ec = data.ec_seq[k];
+    //    cout << " size = " << ec->l.cs.costs.size();
     // Each sub-example must have just one cost
     assert(ec->l.cs.costs.size()==1);
 
@@ -284,6 +285,7 @@ bool test_ldf_sequence(ldf& data, size_t start_K)
     if (ec_is_example_header(*ec))
       THROW("warning: example headers at position " << k << ": can only have in initial position!");
   }
+  //  cout << endl;
   return isTest;
 }
 
@@ -404,11 +406,13 @@ void do_actual_learning_oaa(ldf& data, base_learner& base, size_t start_K)
 template <bool is_learn>
 void do_actual_learning(ldf& data, base_learner& base)
 {
+  //  cout << "called do_actual_learning" << endl;
   if (data.ec_seq.size() <= 0) return;  // nothing to do
   /////////////////////// handle label definitions
 
   if (ec_seq_is_label_definition(data.ec_seq))
   {
+    //    cout << "length is " << data.ec_seq.size() << endl;
     for (size_t i=0; i<data.ec_seq.size(); i++)
     {
       features new_fs = data.ec_seq[i]->feature_space[data.ec_seq[i]->indices[0]];
@@ -794,7 +798,12 @@ void predict_or_learn(ldf& data, base_learner& base, example &ec)
   data.ft_offset = ec.ft_offset;
   bool is_test_ec = COST_SENSITIVE::example_is_test(ec);
   bool need_to_break = data.ec_seq.size() >= all->p->ring_size - 2;
-
+  /*if (is_learn)
+    cout << "is_learn ";
+  else
+  cout << "predict ";*/
+  //  cout << "data.ec_seq.size() = " << data.ec_seq.size() << " is_test_ec = " << is_test_ec << endl;
+  
   // singleline is used by library/ezexample_predict
   if (data.is_singleline)
   {
@@ -813,6 +822,7 @@ void predict_or_learn(ldf& data, base_learner& base, example &ec)
   }
   else if ((example_is_newline(ec) && is_test_ec) || need_to_break)
   {
+    //    cout << "newline" << endl;
     if (need_to_break && data.first_pass)
       data.all->trace_message << "warning: length of sequence at " << ec.example_counter << " exceeds ring size; breaking apart" << endl;
     do_actual_learning<is_learn>(data, base);
@@ -820,6 +830,7 @@ void predict_or_learn(ldf& data, base_learner& base, example &ec)
   }
   else
   {
+    //    cout << "not newline" << endl;
     if (data.need_to_clear)    // should only happen if we're NOT driving
     {
       data.ec_seq.erase();
diff --git a/vowpalwabbit/example.h b/vowpalwabbit/example.h
index 8641653bbda..1b8e8a1457b 100644
--- a/vowpalwabbit/example.h
+++ b/vowpalwabbit/example.h
@@ -131,6 +131,7 @@ void free_flatten_example(flat_example* fec);
 
 inline int example_is_newline(example& ec)
 { // if only index is constant namespace or no index
+  //  std::cout << "call e_i_n " << ec.indices.size() << " " << ec.tag.size() << std::endl;
   if (ec.tag.size() > 0) return false;
   return ((ec.indices.size() == 0) ||
           ((ec.indices.size() == 1) &&

From 94c8103575a5ff4763323e07c61c8bc744bea080 Mon Sep 17 00:00:00 2001
From: chicheng <zcc1307@gmail.com>
Date: Mon, 26 Feb 2018 17:56:02 -0500
Subject: [PATCH 025/127] fixed the bug of empty example cost wrongly set

---
 vowpalwabbit/cbify.cc | 45 +++++++++++++++++++++++++++++--------------
 1 file changed, 31 insertions(+), 14 deletions(-)

diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc
index 176152c992a..da43bb885a2 100644
--- a/vowpalwabbit/cbify.cc
+++ b/vowpalwabbit/cbify.cc
@@ -62,6 +62,7 @@ struct cbify
 	bool ind_supervised;
 	COST_SENSITIVE::label* csls;
 	COST_SENSITIVE::label* csl_empty;
+	bool warm_start;
 
 
 };
@@ -99,11 +100,23 @@ void finish(cbify& data)
 		
   if (data.use_adf)
   {
-    for (size_t a = 0; a < data.adf_data.num_actions; ++a)
-    {
-      VW::dealloc_example(CB::cb_label.delete_label, data.adf_data.ecs[a]);
-    }
-    VW::dealloc_example(CB::cb_label.delete_label, *data.adf_data.empty_example);
+		if (data.warm_start)
+		{
+		  for (size_t a = 0; a < data.adf_data.num_actions; ++a)
+		  {
+		    VW::dealloc_example(COST_SENSITIVE::cs_label.delete_label, data.adf_data.ecs[a]);
+		  }
+		  VW::dealloc_example(COST_SENSITIVE::cs_label.delete_label, *data.adf_data.empty_example);
+		}
+		else
+		{
+		  for (size_t a = 0; a < data.adf_data.num_actions; ++a)
+		  {
+		    VW::dealloc_example(CB::cb_label.delete_label, data.adf_data.ecs[a]);
+		  }
+		  VW::dealloc_example(CB::cb_label.delete_label, *data.adf_data.empty_example);
+		}
+
     free(data.adf_data.ecs);
     free(data.adf_data.empty_example);
 
@@ -243,17 +256,16 @@ void accumulate_costs_ips_adf(cbify& data, example& ec, CB::cb_class& cl, base_l
 template <bool is_learn>
 void predict_or_learn(cbify& data, base_learner& base, example& ec)
 {
-	bool is_supervised;
 	float old_weight;
 	uint32_t argmin;
 
 	if (data.warm_start_period > 0)
 	{
-		is_supervised = true;
+		data.warm_start = true;
 		data.warm_start_period--;
 	}	
 	else
-		is_supervised = false;
+		data.warm_start = false;
 
 	argmin = find_min(data.cumulative_costs);
 
@@ -262,7 +274,7 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 
 	//cout<<ld.label<<endl;
   
-	if (is_supervised) // Call the cost-sensitive learner directly
+	if (data.warm_start) // Call the cost-sensitive learner directly
 	{
 		//generate cost-sensitive label
 		COST_SENSITIVE::label& csl = *data.csls;
@@ -338,7 +350,6 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 template <bool is_learn>
 void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 {
-	bool is_supervised;
 	float old_weight;
 	uint32_t argmin;
 	uint32_t best_action;
@@ -347,11 +358,11 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 
 	if (data.warm_start_period > 0)
 	{
-		is_supervised = true;
+		data.warm_start = true;
 		data.warm_start_period--;
 	}	
 	else
-		is_supervised = false;
+		data.warm_start = false;
 
 	
 	argmin = find_min(data.cumulative_costs);
@@ -361,8 +372,14 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 
   copy_example_to_adf(data, ec);
 
-	if (is_supervised) // Call the cost-sensitive learner directly
+	if (data.warm_start) // Call the cost-sensitive learner directly
 	{
+		for (size_t a = 0; a < data.adf_data.num_actions; ++a)
+		{
+			ecs[a].l.cs.costs.delete_v();
+		}
+
+
 		best_action = predict_sublearner(data, base, argmin);
 
 		//cout<<best_action<<" "<<ecs[data.adf_data.num_actions-1].pred.multiclass<<endl;
@@ -380,7 +397,7 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 				{
 					csls[a].costs[0].class_index = a+1;
 					csls[a].costs[0].x = loss(data, ld.label, a+1);
-
+		
 					ecs[a].l.cs = csls[a];
 					ecs[a].weight *= 1;
 					//					cout << "size cbify = " << ecs[a].l.cs.costs.size() << endl;

From 0a25495b9e106e08792f6fd8ee7ff5de2ffa35ba Mon Sep 17 00:00:00 2001
From: chicheng <zcc1307@gmail.com>
Date: Wed, 28 Feb 2018 12:52:52 -0500
Subject: [PATCH 026/127] partially fix the importance weight issue

---
 vowpalwabbit/cb_explore_adf.cc |  2 ++
 vowpalwabbit/cbify.cc          | 56 ++++++++++++++++------------------
 vowpalwabbit/csoaa.cc          |  2 ++
 vowpalwabbit/gd.cc             |  1 +
 vowpalwabbit/gen_cs_example.cc |  2 +-
 5 files changed, 32 insertions(+), 31 deletions(-)

diff --git a/vowpalwabbit/cb_explore_adf.cc b/vowpalwabbit/cb_explore_adf.cc
index 4c530a47195..728ea1911c0 100644
--- a/vowpalwabbit/cb_explore_adf.cc
+++ b/vowpalwabbit/cb_explore_adf.cc
@@ -71,6 +71,7 @@ void multiline_learn_or_predict(base_learner& base, v_array<example*>& examples,
 {
   for (example* ec : examples)
   {
+    //cout << "example weight in m_l: " << ec->weight << endl;
     uint64_t old_offset = ec->ft_offset;
     ec->ft_offset = offset;
     if (is_learn)
@@ -139,6 +140,7 @@ void predict_or_learn_first(cb_explore_adf& data, base_learner& base, v_array<ex
 template <bool is_learn>
 void predict_or_learn_greedy(cb_explore_adf& data, base_learner& base, v_array<example*>& examples)
 {
+  //cout << "in p_or_l_g" << endl;
   //Explore uniform random an epsilon fraction of the time.
   if (is_learn && test_adf_sequence(data.ec_seq) != nullptr)
     multiline_learn_or_predict<true>(base, examples, data.offset);
diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc
index da43bb885a2..f8ab7062ade 100644
--- a/vowpalwabbit/cbify.cc
+++ b/vowpalwabbit/cbify.cc
@@ -63,6 +63,7 @@ struct cbify
 	COST_SENSITIVE::label* csls;
 	COST_SENSITIVE::label* csl_empty;
 	bool warm_start;
+	float* old_weights; 
 
 
 };
@@ -121,6 +122,7 @@ void finish(cbify& data)
     free(data.adf_data.empty_example);
 
 		free(data.csl_empty);
+		free(data.old_weights);
   }
 }
 
@@ -178,9 +180,6 @@ uint32_t find_min(v_array<float> arr)
 
 uint32_t predict_sublearner(cbify& data, base_learner& base, uint32_t i)
 {
-	uint32_t best_action, best_action_dir;
-	float best_score;
-
 	example* ecs = data.adf_data.ecs;
 	example* empty = data.adf_data.empty_example;
 
@@ -194,26 +193,19 @@ uint32_t predict_sublearner(cbify& data, base_learner& base, uint32_t i)
 	//data.all->cost_sensitive->predict(*empty, argmin);
 	
 
-	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
-	{
-		if ( (a == 0) || (ecs[a].partial_prediction < best_score) )
-		{
-			best_action = a + 1;
-			best_score = ecs[a].partial_prediction;
-		}
-	}
-
-	/*for (size_t a = 0; a < data.adf_data.num_actions; ++a)
-	{
-		if ( ecs[a].pred.multiclass != 0 )
-			best_action_dir = ecs[a].pred.multiclass;
-	}
-
-	cout<<best_action<<" "<<best_action_dir<<endl;
-	*/
+	//float best_score;
+	//for (size_t a = 0; a < data.adf_data.num_actions; ++a)
+	//{
+	//	if ( (a == 0) || (ecs[a].partial_prediction < best_score) )
+	//	{
+	//		best_action = a + 1;
+	//		best_score = ecs[a].partial_prediction;
+	//	}
+	//}
+	//best_action_dir = ecs[0].pred.a_s[0].action+1;
 	//assert(best_action == best_action_dir);
 
-	return best_action;
+	return ecs[0].pred.a_s[0].action+1;
 
 }
 
@@ -350,7 +342,6 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 template <bool is_learn>
 void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 {
-	float old_weight;
 	uint32_t argmin;
 	uint32_t best_action;
 	example* ecs =  data.adf_data.ecs;
@@ -374,12 +365,6 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 
 	if (data.warm_start) // Call the cost-sensitive learner directly
 	{
-		for (size_t a = 0; a < data.adf_data.num_actions; ++a)
-		{
-			ecs[a].l.cs.costs.delete_v();
-		}
-
-
 		best_action = predict_sublearner(data, base, argmin);
 
 		//cout<<best_action<<" "<<ecs[data.adf_data.num_actions-1].pred.multiclass<<endl;
@@ -446,14 +431,23 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 			{
 				for (size_t a = 0; a < data.adf_data.num_actions; ++a)
 				{
-					old_weight = ecs[a].weight;
+					data.old_weights[a] = ecs[a].weight;
 					ecs[a].weight *= data.lambdas[i] / (1- data.lambdas[i]);
 					base.learn(ecs[a], i);
-					ecs[a].weight = old_weight;
 				}
 				base.learn(*empty_example, i);
+
+				for (size_t a = 0; a < data.adf_data.num_actions; ++a)
+					ecs[a].weight = data.old_weights[a];
+	
+				//old_weight = empty_example->weight;
+				//empty_example->weight = data.lambdas[i] / (1- data.lambdas[i]);
+				//empty_example->weight = old_weight;
+				//cout << "about to finish in cbify" << endl;		
+				//cout << "finished in cbify" << endl;
 			}
 		}
+
 		ec.pred.multiclass = cl.action;
 	}
 }
@@ -476,6 +470,8 @@ void init_adf_data(cbify& data, const size_t num_actions)
 	data.csls = calloc_or_throw<COST_SENSITIVE::label>(num_actions);
 	data.csl_empty = calloc_or_throw<COST_SENSITIVE::label>(1);
 
+	data.old_weights = calloc_or_throw<float>(num_actions);
+
 	data.csl_empty->costs.erase();					
 	data.csl_empty->costs.push_back({0, 0, 0, 0});
 	data.csl_empty->costs[0].class_index = 0;
diff --git a/vowpalwabbit/csoaa.cc b/vowpalwabbit/csoaa.cc
index 170dd0de0a6..0bc18af9e02 100644
--- a/vowpalwabbit/csoaa.cc
+++ b/vowpalwabbit/csoaa.cc
@@ -371,10 +371,12 @@ void do_actual_learning_oaa(ldf& data, base_learner& base, size_t start_K)
 
     simple_label.initial = 0.;
     float old_weight = ec->weight;
+    //cout << "weight = " << ec->weight << endl;
     if (!data.treat_as_classifier)   // treat like regression
       simple_label.label = costs[0].x;
     else     // treat like classification
     {
+      //cout << "here" << endl;
       if (costs[0].x <= min_cost)
       {
         simple_label.label = -1.;
diff --git a/vowpalwabbit/gd.cc b/vowpalwabbit/gd.cc
index 0fb5a552094..9d996b30f53 100644
--- a/vowpalwabbit/gd.cc
+++ b/vowpalwabbit/gd.cc
@@ -646,6 +646,7 @@ void learn(gd& g, base_learner& base, example& ec)
   assert(ec.l.simple.label != FLT_MAX);
   assert(ec.weight > 0.);
   g.predict(g,base,ec);
+  //cout << "iw = " << ec.weight << endl;
   update<sparse_l2, invariant, sqrt_rate, feature_mask_off, adax, adaptive, normalized, spare>(g,base,ec);
 }
 
diff --git a/vowpalwabbit/gen_cs_example.cc b/vowpalwabbit/gen_cs_example.cc
index 363ce97925b..14ce7d43748 100644
--- a/vowpalwabbit/gen_cs_example.cc
+++ b/vowpalwabbit/gen_cs_example.cc
@@ -49,7 +49,7 @@ void gen_cs_example_ips(v_array<example*> examples, COST_SENSITIVE::label& cs_la
   for (uint32_t i = 0; i < examples.size()-1; i++)
   {
     CB::label ld = examples[i]->l.cb;
-
+    //std::cout << "example weight = " << examples[i]->weight << std::endl;
     COST_SENSITIVE::wclass wc = {0.,i,0.,0.};
     if (shared && i > 0)
       wc.class_index = (uint32_t)i-1;

From 46d91c0e6bf9b76d82d22cca8a39332d0a297c82 Mon Sep 17 00:00:00 2001
From: chicheng <zcc1307@gmail.com>
Date: Wed, 28 Feb 2018 16:22:44 -0500
Subject: [PATCH 027/127] fixed memory leak bug

---
 vowpalwabbit/cbify.cc | 72 ++++++++++++++++++++++++-------------------
 1 file changed, 41 insertions(+), 31 deletions(-)

diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc
index f8ab7062ade..06aee399a81 100644
--- a/vowpalwabbit/cbify.cc
+++ b/vowpalwabbit/cbify.cc
@@ -62,6 +62,8 @@ struct cbify
 	bool ind_supervised;
 	COST_SENSITIVE::label* csls;
 	COST_SENSITIVE::label* csl_empty;
+	CB::label* cbls;
+	CB::label* cbl_empty;
 	bool warm_start;
 	float* old_weights; 
 
@@ -97,33 +99,36 @@ void finish(cbify& data)
   data.a_s.delete_v();
 	data.lambdas.delete_v();
 	data.cumulative_costs.delete_v();
-	free(data.csls);
+	
 		
   if (data.use_adf)
   {
-		if (data.warm_start)
-		{
-		  for (size_t a = 0; a < data.adf_data.num_actions; ++a)
-		  {
-		    VW::dealloc_example(COST_SENSITIVE::cs_label.delete_label, data.adf_data.ecs[a]);
-		  }
-		  VW::dealloc_example(COST_SENSITIVE::cs_label.delete_label, *data.adf_data.empty_example);
-		}
-		else
-		{
-		  for (size_t a = 0; a < data.adf_data.num_actions; ++a)
-		  {
-		    VW::dealloc_example(CB::cb_label.delete_label, data.adf_data.ecs[a]);
-		  }
-		  VW::dealloc_example(CB::cb_label.delete_label, *data.adf_data.empty_example);
-		}
+	  for (size_t a = 0; a < data.adf_data.num_actions; ++a)
+	  {  
+			VW::dealloc_example(CB::cb_label.delete_label, data.adf_data.ecs[a]);
+			data.adf_data.ecs[a].pred.a_s.delete_v();
+	  }
+	  VW::dealloc_example(CB::cb_label.delete_label, *data.adf_data.empty_example);
+		data.adf_data.empty_example->pred.a_s.delete_v();
 
     free(data.adf_data.ecs);
     free(data.adf_data.empty_example);
 
+		for (size_t a = 0; a < data.adf_data.num_actions; ++a)
+			data.csls[a].costs.delete_v();
+		
+		data.csl_empty->costs.delete_v();
+
 		free(data.csl_empty);
+		free(data.cbl_empty);
+
 		free(data.old_weights);
+		free(data.cbls);
+
   }
+	free(data.csls);
+	
+
 }
 
 void copy_example_to_adf(cbify& data, example& ec)
@@ -344,7 +349,7 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 {
 	uint32_t argmin;
 	uint32_t best_action;
-	example* ecs =  data.adf_data.ecs;
+	example* ecs = data.adf_data.ecs;
 	example* empty_example = data.adf_data.empty_example;
 
 	if (data.warm_start_period > 0)
@@ -367,12 +372,16 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 	{
 		best_action = predict_sublearner(data, base, argmin);
 
-		//cout<<best_action<<" "<<ecs[data.adf_data.num_actions-1].pred.multiclass<<endl;
 		//data.all->cost_sensitive->predict(ec,argmin);
 
 		//generate cost-sensitive label
+		// ecs[a].weight *= 1;
+		//				cout << "size cbify = " << ecs[a].l.cs.costs.size() << endl;
 
 		COST_SENSITIVE::label* csls = data.csls;
+		COST_SENSITIVE::label* csl_empty = data.csl_empty;
+		CB::label* cbls = data.cbls;
+		CB::label* cbl_empty = data.cbl_empty;
 		
 		if (data.ind_supervised)
 		{
@@ -383,13 +392,17 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 					csls[a].costs[0].class_index = a+1;
 					csls[a].costs[0].x = loss(data, ld.label, a+1);
 		
+					cbls[a] = ecs[a].l.cb;
 					ecs[a].l.cs = csls[a];
-					ecs[a].weight *= 1;
-					//					cout << "size cbify = " << ecs[a].l.cs.costs.size() << endl;
 					data.all->cost_sensitive->learn(ecs[a],i);
 				}
-				empty_example->l.cs = *data.csl_empty;			
+				*cbl_empty = empty_example->l.cb;
+				empty_example->l.cs = *csl_empty;			
 				data.all->cost_sensitive->learn(*empty_example,i);
+
+				for (size_t a = 0; a < data.adf_data.num_actions; ++a)
+					ecs[a].l.cb = cbls[a];
+				empty_example->l.cb = *cbl_empty;
 			}
 		}
 		ec.pred.multiclass = best_action;
@@ -439,12 +452,6 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 
 				for (size_t a = 0; a < data.adf_data.num_actions; ++a)
 					ecs[a].weight = data.old_weights[a];
-	
-				//old_weight = empty_example->weight;
-				//empty_example->weight = data.lambdas[i] / (1- data.lambdas[i]);
-				//empty_example->weight = old_weight;
-				//cout << "about to finish in cbify" << endl;		
-				//cout << "finished in cbify" << endl;
 			}
 		}
 
@@ -466,20 +473,23 @@ void init_adf_data(cbify& data, const size_t num_actions)
   }
   CB::cb_label.default_label(&adf_data.empty_example->l.cb);
   adf_data.empty_example->in_use = true;
+	adf_data.empty_example->pred.a_s = v_init<action_score>();
+
 
 	data.csls = calloc_or_throw<COST_SENSITIVE::label>(num_actions);
 	data.csl_empty = calloc_or_throw<COST_SENSITIVE::label>(1);
+	data.cbls = calloc_or_throw<CB::label>(num_actions);
+	data.cbl_empty = calloc_or_throw<CB::label>(1);
 
-	data.old_weights = calloc_or_throw<float>(num_actions);
 
-	data.csl_empty->costs.erase();					
+	data.old_weights = calloc_or_throw<float>(num_actions);
+				
 	data.csl_empty->costs.push_back({0, 0, 0, 0});
 	data.csl_empty->costs[0].class_index = 0;
 	data.csl_empty->costs[0].x = FLT_MAX;
 
 	for (size_t a = 0; a < num_actions; ++a)
 	{
-		data.csls[a].costs.erase();
 		data.csls[a].costs.push_back({0, 0, 0, 0});	
 	}
 

From fad3955543240e2be46a030ae5b50f72a02b50e4 Mon Sep 17 00:00:00 2001
From: chicheng <zcc1307@gmail.com>
Date: Fri, 2 Mar 2018 14:55:48 -0500
Subject: [PATCH 028/127] start changing the sample size paramters

---
 vowpalwabbit/cb_adf.cc        | 7 ++++++-
 vowpalwabbit/cbify.cc         | 9 +++++++--
 vowpalwabbit/gen_cs_example.h | 3 +++
 3 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/vowpalwabbit/cb_adf.cc b/vowpalwabbit/cb_adf.cc
index ad5f21ed2e4..91fce9bca4c 100644
--- a/vowpalwabbit/cb_adf.cc
+++ b/vowpalwabbit/cb_adf.cc
@@ -113,7 +113,9 @@ void learn_MTR(cb_adf& mydata, base_learner& base, v_array<example*>& examples)
   gen_cs_example_mtr(mydata.gen_cs, examples, mydata.cs_labels);
   uint32_t nf = (uint32_t)examples[mydata.gen_cs.mtr_example]->num_features;
   float old_weight = examples[mydata.gen_cs.mtr_example]->weight;
-  examples[mydata.gen_cs.mtr_example]->weight *= 1.f / examples[mydata.gen_cs.mtr_example]->l.cb.costs[0].probability * ((float)mydata.gen_cs.event_sum / (float)mydata.gen_cs.action_sum);
+
+	//adjust the importance weight to scale by a factor of 1/K (the last term)
+  examples[mydata.gen_cs.mtr_example]->weight *= 1.f / examples[mydata.gen_cs.mtr_example]->l.cb.costs[0].probability * ((float)mydata.gen_cs.event_sum / (float)mydata.gen_cs.action_sum) * (1.f / mydata.gen_cs.num_actions);
   GEN_CS::call_cs_ldf<true>(base, mydata.gen_cs.mtr_ec_seq, mydata.cb_labels, mydata.cs_labels, mydata.prepped_cs_labels, mydata.offset);
   examples[mydata.gen_cs.mtr_example]->num_features = nf;
   examples[mydata.gen_cs.mtr_example]->weight = old_weight;
@@ -395,6 +397,9 @@ base_learner* cb_adf_setup(vw& all)
 
   ld.all = &all;
 
+	cb_to_cs_adf& c = ld.gen_cs;
+	c.num_actions = (uint32_t)(all.vm["cbify"].as<size_t>());
+
   // number of weight vectors needed
   size_t problem_multiplier = 1;//default for IPS
   bool check_baseline_enabled = false;
diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc
index 06aee399a81..a959d9bef9b 100644
--- a/vowpalwabbit/cbify.cc
+++ b/vowpalwabbit/cbify.cc
@@ -261,8 +261,10 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 		data.warm_start = true;
 		data.warm_start_period--;
 	}	
-	else
-		data.warm_start = false;
+	else if (bandit_period > 0)
+	{
+		data.bandit = true;
+	}
 
 	argmin = find_min(data.cumulative_costs);
 
@@ -546,6 +548,9 @@ base_learner* cbify_setup(vw& all)
 
 	//cout<<data.warm_start_period<<endl;
 	data.warm_start_period = vm.count("warm_start") ? vm["warm_start"].as<size_t>() : 0;
+	data.bandit_period = vm.count("bandit") ?  vm["bandit"].as<size_t>() : UINT32_MAX; //ideally should be the size of the dataset
+	data.test_period = vm.count("test") ? vm["test"].as<size_t>() : 0;
+
 	//cout<<data.warm_start_period<<endl;
 	data.choices_lambda = vm.count("choices_lambda") ? vm["choices_lambda"].as<size_t>() : 1;
 
diff --git a/vowpalwabbit/gen_cs_example.h b/vowpalwabbit/gen_cs_example.h
index b634d04e148..84080c3bfaa 100644
--- a/vowpalwabbit/gen_cs_example.h
+++ b/vowpalwabbit/gen_cs_example.h
@@ -39,6 +39,9 @@ struct cb_to_cs_adf
   COST_SENSITIVE::label pred_scores;
   CB::cb_class known_cost;
   LEARNER::base_learner* scorer;
+
+	//for scaling the weights of MTR
+	uint32_t num_actions;
 };
 
 CB::cb_class* get_observed_cost(CB::label& ld);

From 1351a316d371ee86e20ea52f99269cdd3a309ed4 Mon Sep 17 00:00:00 2001
From: chicheng <zcc1307@gmail.com>
Date: Fri, 2 Mar 2018 17:15:47 -0500
Subject: [PATCH 029/127] adding the bandit period as an explicit option

---
 vowpalwabbit/cbify.cc | 55 ++++++++++++++++++++++++-------------------
 1 file changed, 31 insertions(+), 24 deletions(-)

diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc
index a959d9bef9b..b302636e3af 100644
--- a/vowpalwabbit/cbify.cc
+++ b/vowpalwabbit/cbify.cc
@@ -55,6 +55,8 @@ struct cbify
 
 	size_t choices_lambda;
 	size_t warm_start_period;
+	size_t bandit_period;
+
 	v_array<float> cumulative_costs;
 	v_array<float> lambdas;
 	size_t num_actions;
@@ -256,16 +258,6 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 	float old_weight;
 	uint32_t argmin;
 
-	if (data.warm_start_period > 0)
-	{
-		data.warm_start = true;
-		data.warm_start_period--;
-	}	
-	else if (bandit_period > 0)
-	{
-		data.bandit = true;
-	}
-
 	argmin = find_min(data.cumulative_costs);
 
 	//Store the multiclass input label
@@ -273,8 +265,10 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 
 	//cout<<ld.label<<endl;
   
-	if (data.warm_start) // Call the cost-sensitive learner directly
+	if (data.warm_start_period > 0) // Call the cost-sensitive learner directly
 	{
+		data.warm_start_period--;
+
 		//generate cost-sensitive label
 		COST_SENSITIVE::label& csl = *data.csls;
     csl.costs.resize(data.num_actions);
@@ -299,9 +293,12 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 			}
 		}
 		ec.l.multi = ld;
+		ec.weight = 0;
 	}
-	else //Call the cb_explore algorithm. It returns a vector of probabilities for each action
+	else if (data.bandit_period > 0)//Call the cb_explore algorithm. It returns a vector of probabilities for each action
 	{
+		data.bandit_period--;
+
 		data.cb_label.costs.erase();
 		ec.l.cb = data.cb_label;
 		ec.pred.a_s = data.a_s;
@@ -343,6 +340,16 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 	  ec.pred.multiclass = action;
 		ec.weight = old_weight;
 	}
+	else
+	{
+		//skipping
+		//base.predict(ec, argmin);
+		ec.pred.multiclass = 0;
+		ec.weight = 0;
+
+	}
+
+
 }
 
 
@@ -353,15 +360,6 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 	uint32_t best_action;
 	example* ecs = data.adf_data.ecs;
 	example* empty_example = data.adf_data.empty_example;
-
-	if (data.warm_start_period > 0)
-	{
-		data.warm_start = true;
-		data.warm_start_period--;
-	}	
-	else
-		data.warm_start = false;
-
 	
 	argmin = find_min(data.cumulative_costs);
 
@@ -370,8 +368,10 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 
   copy_example_to_adf(data, ec);
 
-	if (data.warm_start) // Call the cost-sensitive learner directly
+	if (data.warm_start_period > 0) // Call the cost-sensitive learner directly
 	{
+		data.warm_start_period--;
+
 		best_action = predict_sublearner(data, base, argmin);
 
 		//data.all->cost_sensitive->predict(ec,argmin);
@@ -410,8 +410,10 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 		ec.pred.multiclass = best_action;
 		ec.l.multi = ld;
 	}
-	else // call the bandit learner
+	else if (data.bandit_period > 0) // call the bandit learner
 	{
+		data.bandit_period--;
+
 		for (size_t a = 0; a < data.adf_data.num_actions; ++a)
 		{
 		  base.predict(ecs[a], argmin);
@@ -459,6 +461,11 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 
 		ec.pred.multiclass = cl.action;
 	}
+	else
+	{
+		ec.pred.multiclass = 0;
+		ec.weight = 0;
+	}
 }
 
 void init_adf_data(cbify& data, const size_t num_actions)
@@ -522,6 +529,7 @@ base_learner* cbify_setup(vw& all)
   ("loss0", po::value<float>(), "loss for correct label")
   ("loss1", po::value<float>(), "loss for incorrect label")
 	("warm_start", po::value<size_t>(), "number of training examples for fully-supervised warm start")
+	("bandit", po::value<size_t>(), "number of training examples for bandit processing")
   ("choices_lambda", po::value<size_t>(), "numbers of lambdas importance weights to aggregate")
 	("no_supervised", "indicator of using supervised only")
 	("no_bandit", "indicator of using bandit only");
@@ -549,7 +557,6 @@ base_learner* cbify_setup(vw& all)
 	//cout<<data.warm_start_period<<endl;
 	data.warm_start_period = vm.count("warm_start") ? vm["warm_start"].as<size_t>() : 0;
 	data.bandit_period = vm.count("bandit") ?  vm["bandit"].as<size_t>() : UINT32_MAX; //ideally should be the size of the dataset
-	data.test_period = vm.count("test") ? vm["test"].as<size_t>() : 0;
 
 	//cout<<data.warm_start_period<<endl;
 	data.choices_lambda = vm.count("choices_lambda") ? vm["choices_lambda"].as<size_t>() : 1;

From e7384bb63fafcc4b85ec62f204ec507d4f735804 Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Tue, 6 Mar 2018 16:10:18 -0500
Subject: [PATCH 030/127] file reorg

---
 results.txt                        | 294 -----------------------------
 data_gen.py => scripts/data_gen.py |  54 +++---
 scripts/plot_warm_start.py         | 141 ++++++++++++++
 3 files changed, 166 insertions(+), 323 deletions(-)
 delete mode 100644 results.txt
 rename data_gen.py => scripts/data_gen.py (69%)
 create mode 100644 scripts/plot_warm_start.py

diff --git a/results.txt b/results.txt
deleted file mode 100644
index 4c0daef948f..00000000000
--- a/results.txt
+++ /dev/null
@@ -1,294 +0,0 @@
-
-chiczhan@chiczhan-ubuntu:~/datasets$ ~/local_vw/vowpal_wabbit/vowpalwabbit/vw --cbify 10 -d text_lownoise_m.vw --warm_start 15 --choices_lambda 10 --cb_type ips
-Num weight bits = 18
-learning rate = 0.5
-initial_t = 0
-power_t = 0.5
-using no cache
-Reading datafile = text_lownoise_m.vw
-num sources = 1
-average  since         example        example  current  current  current
-loss     last          counter         weight    label  predict features
-1.000000 1.000000            1            1.0        4        1      101
-1.000000 1.000000            2            2.0       10        4      101
-0.750000 0.500000            4            4.0        7       10      101
-0.625000 0.500000            8            8.0        8        8      101
-0.130435 0.026316           16           46.0       10       10      101
-0.629630 1.000000           18          108.0        2       10      101
-0.560345 0.500000           22          232.0        3        7      101
-0.529167 0.500000           30          480.0        9        8      101
-0.355533 0.187500           46          976.0        8        8      101
-0.365346 0.375000           78         1968.0        2        7      101
-0.480010 0.593750          142         3952.0        9        5      101
-0.517424 0.554688          270         7920.0        8        8      101
-0.496973 0.476562          526        15856.0        8        8      101
-0.472107 0.447266         1038        31728.0        2        9      101
-0.441124 0.410156         2062        63472.0        1        1      101
-0.348968 0.256836         4110       126960.0        8        1      101
-0.242348 0.135742         8206       253936.0        8        8      101
-
-finished run
-number of examples per pass = 10000
-passes used = 1
-weighted example sum = 309550.000000
-weighted label sum = 0.000000
-average loss = 0.209223
-total feature number = 1010000
-chiczhan@chiczhan-ubuntu:~/datasets$ ~/local_vw/vowpal_wabbit/vowpalwabbit/vw --cbify 10 -d text_lownoise_m.vw --warm_start 15 --choices_lambda 10 --cb_type ips --no_supervised
-Num weight bits = 18
-learning rate = 0.5
-initial_t = 0
-power_t = 0.5
-using no cache
-Reading datafile = text_lownoise_m.vw
-num sources = 1
-average  since         example        example  current  current  current
-loss     last          counter         weight    label  predict features
-1.000000 1.000000            1            1.0        4        1      101
-1.000000 1.000000            2            2.0       10        1      101
-1.000000 1.000000            4            4.0        7        1      101
-0.875000 0.750000            8            8.0        8        1      101
-0.978261 1.000000           16           46.0       10        1      101
-0.990741 1.000000           18          108.0        2        3      101
-0.995690 1.000000           22          232.0        3        7      101
-0.933333 0.875000           30          480.0        9        7      101
-0.871926 0.812500           46          976.0        8        8      101
-0.715955 0.562500           78         1968.0        2        2      101
-0.693826 0.671875          142         3952.0        9        3      101
-0.647601 0.601562          270         7920.0        8        8      101
-0.648020 0.648438          526        15856.0        8        8      101
-0.666793 0.685547         1038        31728.0        2        6      101
-0.622936 0.579102         2062        63472.0        1        1      101
-0.513603 0.404297         4110       126960.0        8        1      101
-0.413289 0.312988         8206       253936.0        8        8      101
-
-finished run
-number of examples per pass = 10000
-passes used = 1
-weighted example sum = 309550.000000
-weighted label sum = 0.000000
-average loss = 0.354960
-total feature number = 1010000
-chiczhan@chiczhan-ubuntu:~/datasets$ ~/local_vw/vowpal_wabbit/vowpalwabbit/vw --cbify 10 -d text_lownoise_m.vw --warm_start 15 --choices_lambda 10 --cb_type ips --no_bandit
-Num weight bits = 18
-learning rate = 0.5
-initial_t = 0
-power_t = 0.5
-using no cache
-Reading datafile = text_lownoise_m.vw
-num sources = 1
-average  since         example        example  current  current  current
-loss     last          counter         weight    label  predict features
-1.000000 1.000000            1            1.0        4        1      101
-1.000000 1.000000            2            2.0       10        4      101
-0.750000 0.500000            4            4.0        7       10      101
-0.625000 0.500000            8            8.0        8        8      101
-0.375000 0.125000           16           16.0       10       10      101
-0.437500 0.500000           32           32.0        8        8      101
-0.406250 0.375000           64           64.0        3        5      101
-0.476562 0.546875          128          128.0        3        5      101
-0.480469 0.484375          256          256.0       10       10      101
-0.443359 0.406250          512          512.0        2       10      101
-0.445312 0.447266         1024         1024.0        1        1      101
-0.438965 0.432617         2048         2048.0        9        5      101
-0.430176 0.421387         4096         4096.0        4        4      101
-0.423340 0.416504         8192         8192.0       10       10      101
-
-finished run
-number of examples per pass = 10000
-passes used = 1
-weighted example sum = 10000.000000
-weighted label sum = 0.000000
-average loss = 0.426300
-total feature number = 1010000
-
-chiczhan@chiczhan-ubuntu:~/datasets$ ~/local_vw/vowpal_wabbit/vowpalwabbit/vw --cbify 10 -d text_highnoise_m.vw --warm_start 40 --choices_lambda 20 --cb_type ips 
-Num weight bits = 18
-learning rate = 0.5
-initial_t = 0
-power_t = 0.5
-using no cache
-Reading datafile = text_highnoise_m.vw
-num sources = 1
-average  since         example        example  current  current  current
-loss     last          counter         weight    label  predict features
-0.000000 0.000000            1            1.0        1        1       34
-0.500000 1.000000            2            2.0        7        1       41
-0.750000 1.000000            4            4.0        7        9       36
-0.750000 0.750000            8            8.0        2        5       38
-0.750000 0.750000           16           16.0        9        9       40
-0.812500 0.875000           32           32.0        8        3       45
-0.991533 0.997090           41         1063.0        1        2       39
-0.668060 0.500000           43         3109.0        5        5       33
-0.714623 0.750000           47         7201.0        7        7       35
-0.600455 0.500000           55        15385.0       10       10       42
-0.516455 0.437500           71        31753.0        9        9       32
-0.587418 0.656250          103        64489.0        7        3       42
-0.629966 0.671875          167       129961.0        6        6       41
-0.678446 0.726562          295       260905.0        2        6       37
-0.684938 0.691406          551       522793.0        6        8       42
-0.706747 0.728516         1063      1046569.0        8        9       43
-0.677090 0.647461         2087      2094121.0        2        2       37
-0.672040 0.666992         4135      4189225.0        1        1       45
-0.663167 0.654297         8231      8379433.0       10        5       33
-
-finished run
-number of examples per pass = 10000
-passes used = 1
-weighted example sum = 10189120.000000
-weighted label sum = 0.000000
-average loss = 0.663153
-total feature number = 390046
-chiczhan@chiczhan-ubuntu:~/datasets$ ~/local_vw/vowpal_wabbit/vowpalwabbit/vw --cbify 10 -d text_highnoise_m.vw --warm_start 40 --choices_lambda 20 --cb_type ips --no_supervised
-Num weight bits = 18
-learning rate = 0.5
-initial_t = 0
-power_t = 0.5
-using no cache
-Reading datafile = text_highnoise_m.vw
-num sources = 1
-average  since         example        example  current  current  current
-loss     last          counter         weight    label  predict features
-0.000000 0.000000            1            1.0        1        1       34
-0.500000 1.000000            2            2.0        7        1       41
-0.750000 1.000000            4            4.0        7        1       36
-0.875000 1.000000            8            8.0        2        1       38
-0.937500 1.000000           16           16.0        9        1       40
-0.937500 0.937500           32           32.0        8        1       45
-0.035748 0.007759           41         1063.0        1        1       39
-0.670312 1.000000           43         3109.0        5        2       33
-0.715595 0.750000           47         7201.0        7        4       35
-0.866883 1.000000           55        15385.0       10        4       42
-0.903285 0.937500           71        31753.0        9        5       32
-0.888927 0.875000          103        64489.0        7        2       42
-0.874039 0.859375          167       129961.0        6        6       41
-0.913731 0.953125          295       260905.0        2        2       37
-0.876718 0.839844          551       522793.0        6        7       42
-0.864128 0.851562         1063      1046569.0        8        6       43
-0.851980 0.839844         2087      2094121.0        2        4       37
-0.848841 0.845703         4135      4189225.0        1        1       45
-0.837139 0.825439         8231      8379433.0       10        5       33
-
-finished run
-number of examples per pass = 10000
-passes used = 1
-weighted example sum = 10189120.000000
-weighted label sum = 0.000000
-average loss = 0.834037
-total feature number = 390046
-
-
-
-
-
-
-
-chiczhan@chiczhan-ubuntu:~/datasets$ ~/local_vw/vowpal_wabbit/vowpalwabbit/vw --cbify 10 -d text_highnoise_m.vw --warm_start 100 --choices_lambda 20 --cb_type ips --no_supervised
-Num weight bits = 18
-learning rate = 0.5
-initial_t = 0
-power_t = 0.5
-using no cache
-Reading datafile = text_highnoise_m.vw
-num sources = 1
-average  since         example        example  current  current  current
-loss     last          counter         weight    label  predict features
-0.000000 0.000000            1            1.0        1        1       34
-0.500000 1.000000            2            2.0        7        1       41
-0.750000 1.000000            4            4.0        7        1       36
-0.875000 1.000000            8            8.0        2        1       38
-0.937500 1.000000           16           16.0        9        1       40
-0.937500 0.937500           32           32.0        8        1       45
-0.921875 0.906250           64           64.0        6        1       37
-0.991095 0.995279          101         1123.0        8        1       31
-0.996844 1.000000          103         3169.0        7        3       42
-0.998623 1.000000          107         7261.0        4        1       40
-0.933118 0.875000          115        15445.0        2        4       40
-0.967529 1.000000          131        31813.0        8       10       42
-0.920603 0.875000          163        64549.0       10        9       46
-0.897640 0.875000          227       130021.0        4        2       32
-0.858839 0.820312          355       260965.0        5        7       42
-0.835629 0.812500          611       522853.0        8        7       34
-0.838716 0.841797         1123      1046629.0        4        9       40
-0.837326 0.835938         2147      2094181.0        9        4       43
-0.831015 0.824707         4195      4189285.0        7        1       39
-0.826152 0.821289         8291      8379493.0        7        5       39
-
-finished run
-number of examples per pass = 10000
-passes used = 1
-weighted example sum = 10127800.000000
-weighted label sum = 0.000000
-average loss = 0.825455
-total feature number = 390046
-chiczhan@chiczhan-ubuntu:~/datasets$ ~/local_vw/vowpal_wabbit/vowpalwabbit/vw --cbify 10 -d text_highnoise_m.vw --warm_start 100 --choices_lambda 20 --cb_type ips --no_bandit
-Num weight bits = 18
-learning rate = 0.5
-initial_t = 0
-power_t = 0.5
-using no cache
-Reading datafile = text_highnoise_m.vw
-num sources = 1
-average  since         example        example  current  current  current
-loss     last          counter         weight    label  predict features
-0.000000 0.000000            1            1.0        1        1       34
-0.500000 1.000000            2            2.0        7        1       41
-0.750000 1.000000            4            4.0        7        9       36
-0.750000 0.750000            8            8.0        2        5       38
-0.750000 0.750000           16           16.0        9        9       40
-0.812500 0.875000           32           32.0        8        3       45
-0.703125 0.593750           64           64.0        6        6       37
-0.578125 0.453125          128          128.0       10       10       36
-0.488281 0.398438          256          256.0        6        6       37
-0.443359 0.398438          512          512.0       10       10       46
-0.416992 0.390625         1024         1024.0        4        8       37
-0.395020 0.373047         2048         2048.0        9        2       39
-0.382568 0.370117         4096         4096.0        4        8       41
-0.374878 0.367188         8192         8192.0        1        1       40
-
-finished run
-number of examples per pass = 10000
-passes used = 1
-weighted example sum = 10000.000000
-weighted label sum = 0.000000
-average loss = 0.372700
-total feature number = 390046
-chiczhan@chiczhan-ubuntu:~/datasets$ ~/local_vw/vowpal_wabbit/vowpalwabbit/vw --cbify 10 -d text_highnoise_m.vw --warm_start 100 --choices_lambda 20 --cb_type ips
-Num weight bits = 18
-learning rate = 0.5
-initial_t = 0
-power_t = 0.5
-using no cache
-Reading datafile = text_highnoise_m.vw
-num sources = 1
-average  since         example        example  current  current  current
-loss     last          counter         weight    label  predict features
-0.000000 0.000000            1            1.0        1        1       34
-0.500000 1.000000            2            2.0        7        1       41
-0.750000 1.000000            4            4.0        7        9       36
-0.750000 0.750000            8            8.0        2        5       38
-0.750000 0.750000           16           16.0        9        9       40
-0.812500 0.875000           32           32.0        8        3       45
-0.703125 0.593750           64           64.0        6        6       37
-0.059662 0.020774          101         1123.0        8        8       31
-0.343957 0.500000          103         3169.0        7        7       42
-0.291007 0.250000          107         7261.0        4        4       40
-0.136808 0.000000          115        15445.0        2        2       40
-0.195046 0.250000          131        31813.0        8        8       42
-0.333855 0.468750          163        64549.0       10        3       46
-0.456857 0.578125          227       130021.0        4        2       32
-0.498105 0.539062          355       260965.0        5        5       42
-0.512750 0.527344          611       522853.0        8        8       34
-0.463363 0.414062         1123      1046629.0        4        6       40
-0.542263 0.621094         2147      2094181.0        9        9       43
-0.562640 0.583008         4195      4189285.0        7        1       39
-0.484681 0.406738         8291      8379493.0        7        7       39
-
-finished run
-number of examples per pass = 10000
-passes used = 1
-weighted example sum = 10127800.000000
-weighted label sum = 0.000000
-average loss = 0.473638
-total feature number = 390046
-
diff --git a/data_gen.py b/scripts/data_gen.py
similarity index 69%
rename from data_gen.py
rename to scripts/data_gen.py
index f1c15ae7716..41bdee73c8f 100644
--- a/data_gen.py
+++ b/scripts/data_gen.py
@@ -3,11 +3,9 @@
 
 classes = 10
 m = 100
+kwperclass = 20
 
 def gen_keyword():
-
-	kwperclass = 20
-
 	keyword = np.zeros((classes, m))
 
 	for i in range(classes):
@@ -21,44 +19,26 @@ def gen_keyword():
 
 
 def classify(classifier, example):
-
 		result = classifier.dot(example)
-
 		return np.argmax(result)
 
-
-
-if __name__ == '__main__':
-
-
-	filename = "text_lownoise"
+def gen_datasets(filename, keyword, num_samples, fprob):
 
 	f = open(filename+".vw", "w")
 	g = open(filename+"_m.vw", "w")
 
-	keyword = gen_keyword()	
-
-
-	samples = 10000
-	fprob = 0
-
-	cs = False
-
-	for i in range(samples):
+	for i in range(num_samples):
 		c = random.randint(0, classes-1)
 
 		#generate a pair of datasets (one is cost-sensitive, the other is multiclass)
-		
 		for l in range(classes):
 			f.write(str(l+1)+':')
 			cost = 1
 			if l == c:
 				cost = 0
 			f.write(str(cost)+' ')
-		
-		g.write(str(c+1))
-			
 
+		g.write(str(c+1))
 
 		f.write(' | ')
 		g.write(' | ')
@@ -70,8 +50,8 @@ def classify(classifier, example):
 			if flip:
 				vec[j] = 2 * (1-keyword[c][j]) - 1
 			else:
-				vec[j] = 2 * keyword[c][j] - 1		
-		
+				vec[j] = 2 * keyword[c][j] - 1
+
 		for j in range(m):
 			f.write('w'+str(j)+':')
 			f.write(str(vec[j])+' ')
@@ -79,14 +59,30 @@ def classify(classifier, example):
 			g.write(str(vec[j])+' ')
 
 		#print 'Is the prediction equal to the class label? ', classify(keyword, vec) == c
-
 		f.write('\n')
 		g.write('\n')
 
 	f.close()
 	g.close()
-		
-		
 
 
 
+if __name__ == '__main__':
+
+	keyword = gen_keyword()
+	# Remember to generate a pair of datasets at the same time
+	# so that the class-dependent feature is retained
+
+
+	num_samples = 10000
+	fprob = 0.1
+	filename = "source1"+'_'+str(fprob)
+
+	gen_datasets(filename, keyword, num_samples, fprob)
+
+
+	num_samples = 10000
+	fprob = 0.1
+	filename = "source2"+'_'+str(fprob)
+
+	gen_datasets(filename, keyword, num_samples, fprob)
diff --git a/scripts/plot_warm_start.py b/scripts/plot_warm_start.py
new file mode 100644
index 00000000000..c0d06afd20b
--- /dev/null
+++ b/scripts/plot_warm_start.py
@@ -0,0 +1,141 @@
+import matplotlib
+import matplotlib.pyplot as plt
+import subprocess
+import pylab
+from itertools import product
+
+class model:
+	def __init__(self):
+		self.no_bandit = False
+		self.no_supervised = False
+
+def collect_stats(mod):
+
+	filename = mod.filename
+	# using progress parameter
+	# num_rows = mod.bandit / mod.progress
+
+
+
+	avg_loss = []
+	last_loss = []
+	wt = []
+	end_table = False
+
+	f = open(filename, 'r')
+	linenumber = 0
+	for line in f:
+		if not line.strip():
+			end_table = True
+		if linenumber >= 9 and (not end_table):
+			items = line.split()
+			avg_loss.append(float(items[0]))
+			last_loss.append(float(items[1]))
+			wt.append(float(items[3]))
+		linenumber += 1
+
+	return avg_loss, last_loss, wt
+
+def execute_vw(mod):
+
+	alg_option = ' '
+	if mod.no_bandit:
+		alg_option += ' --no_bandit '
+	if mod.no_supervised:
+		alg_option += ' --no_supervised '
+	if mod.no_exploration:
+		alg_option += ' --epsilon 0.0 '
+	if mod.cb_type == 'mtr':
+		mod.adf_on = True;
+	if mod.adf_on:
+		alg_option += ' --cb_explore_adf '
+
+	cmd_catfile = '( head -n ' + str(mod.warm_start) + ' ' + mod.dataset_supervised + ';' + ' head -n ' + str(mod.bandit) + ' ' + mod.dataset_bandit + '; )'
+	cmd_vw = mod.vw_path + ' --cbify ' + str(mod.num_classes) + ' --cb_type ' + str(mod.cb_type) + ' --warm_start ' + str(mod.warm_start) + ' --bandit ' + str(mod.bandit) + ' --choices_lambda ' + str(mod.choices_lambda) + alg_option
+	#+ ' --progress ' + str(mod.progress)
+
+	cmd = cmd_catfile + ' | ' + cmd_vw
+
+	print cmd
+
+	f = open(mod.filename, 'w')
+	process = subprocess.Popen(cmd, shell=True, stdout=f, stderr=f)
+	#subprocess.check_call(cmd, shell=True)
+	process.wait()
+	f.close()
+
+def gen_comparison_graph(mod):
+
+	for mod.warm_start in mod.choices_warm_start:
+
+		config_name = str(mod.fprob1)+'_'+str(mod.fprob2)+'_'+str(mod.warm_start)+'_'+str(mod.bandit)+ '_' + str(mod.cb_type) + '_' + str(mod.choices_lambda)
+
+		# combined approach
+		mod.no_bandit = False
+		mod.no_supervised = False
+		mod.no_exploration = False
+		mod.filename = config_name
+		execute_vw(mod)
+		avg_loss_comb, last_loss_comb, wt_comb = collect_stats(mod)
+		line = plt.plot(wt_comb, avg_loss_comb, 'r', label=('Combined approach, #lambdas=' + str(mod.choices_lambda) ))
+
+		# bandit only approach
+		mod.no_bandit = False
+		mod.no_supervised = True
+		mod.no_exploration = False
+		mod.filename = config_name+'_no_supervised'
+		execute_vw(mod)
+		avg_loss_band_only, last_loss_band_only, wt_band_only = collect_stats(mod)
+		line = plt.plot(wt_band_only, avg_loss_band_only, 'b', label='Bandit only')
+
+		# supervised only approach
+		mod.no_bandit = True
+		mod.no_supervised = False
+		mod.no_exploration = False
+		mod.filename = config_name+'_no_bandit'
+		execute_vw(mod)
+		avg_loss_sup_only, last_loss_sup_only, wt_sup_only = collect_stats(mod)
+		line = plt.plot(wt_sup_only, avg_loss_sup_only, 'g', label='Supervised only')
+
+		pylab.legend()
+		pylab.xlabel('#bandit examples')
+		pylab.ylabel('Progressive validation error')
+		pylab.title('Source 1 feature flipping prob = ' + str(mod.fprob1) + '; source 2 feature flipping prob = ' + str(mod.fprob2) + 'cb_type = '+ mod.cb_type )
+		pylab.savefig('figs/'+config_name +'.png')
+		plt.gcf().clear()
+		print('')
+		#plt.show()
+
+
+
+
+if __name__ == '__main__':
+
+	mod = model()
+
+	mod.vw_path = './vowpalwabbit/vw'
+	#mod.warm_start = 50
+	mod.bandit = 4096
+	mod.num_classes = 10
+	#mod.cb_type = 'mtr'  #'ips'
+    #mod.choices_lambda = 10
+	#mod.progress = 25
+	mod.adf_on = True
+
+	mod.choices_warm_start = [pow(2,i) for i in range(11)] #put it here in order to plot 2d mesh
+	# we are implicitly iterating over the bandit sample sizes
+	#choices_fprob1 = [0.1, 0.2, 0.3]
+	#choices_fprob2 = [0.1, 0.2, 0.3]
+	#choices_cb_types = ['mtr', 'ips']
+	choices_choices_lambda = [pow(2,i) for i in range(5)]
+
+	#for correctness test
+	#mod.choices_warm_start = [20]
+	choices_fprob1 = [0.1]
+	choices_fprob2 = [0.1]
+
+
+	for mod.fprob1, mod.fprob2, mod.cb_type, mod.choices_lambda in product(choices_fprob1, choices_fprob2, choices_cb_types, choices_choices_lambda):
+		mod.dataset_supervised = './source1_' + str(mod.fprob1) + '_m.vw'
+		mod.dataset_bandit = './source2_' + str(mod.fprob2) + '_m.vw'
+		gen_comparison_graph(mod)

From 630fd5fa49bf1bfa11e39e896fd7cc21cafa7922 Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Fri, 9 Mar 2018 17:10:45 -0500
Subject: [PATCH 031/127] tweak the python script

---
 scripts/plot_warm_start.py | 35 ++++++++++++++++++++---------------
 1 file changed, 20 insertions(+), 15 deletions(-)

diff --git a/scripts/plot_warm_start.py b/scripts/plot_warm_start.py
index c0d06afd20b..d3edec6f2dd 100644
--- a/scripts/plot_warm_start.py
+++ b/scripts/plot_warm_start.py
@@ -3,6 +3,7 @@
 import subprocess
 import pylab
 from itertools import product
+import os.path
 
 class model:
 	def __init__(self):
@@ -14,9 +15,6 @@ def collect_stats(mod):
 	filename = mod.filename
 	# using progress parameter
 	# num_rows = mod.bandit / mod.progress
-
-
-
 	avg_loss = []
 	last_loss = []
 	wt = []
@@ -51,8 +49,7 @@ def execute_vw(mod):
 		alg_option += ' --cb_explore_adf '
 
 	cmd_catfile = '( head -n ' + str(mod.warm_start) + ' ' + mod.dataset_supervised + ';' + ' head -n ' + str(mod.bandit) + ' ' + mod.dataset_bandit + '; )'
-	cmd_vw = mod.vw_path + ' --cbify ' + str(mod.num_classes) + ' --cb_type ' + str(mod.cb_type) + ' --warm_start ' + str(mod.warm_start) + ' --bandit ' + str(mod.bandit) + ' --choices_lambda ' + str(mod.choices_lambda) + alg_option
-	#+ ' --progress ' + str(mod.progress)
+	cmd_vw = mod.vw_path + ' --cbify ' + str(mod.num_classes) + ' --cb_type ' + str(mod.cb_type) + ' --warm_start ' + str(mod.warm_start) + ' --bandit ' + str(mod.bandit) + ' --choices_lambda ' + str(mod.choices_lambda) + alg_option + ' --progress ' + str(mod.progress)
 
 	cmd = cmd_catfile + ' | ' + cmd_vw
 
@@ -74,7 +71,7 @@ def gen_comparison_graph(mod):
 		mod.no_bandit = False
 		mod.no_supervised = False
 		mod.no_exploration = False
-		mod.filename = config_name
+		mod.filename = mod.output_dir+'/'+config_name
 		execute_vw(mod)
 		avg_loss_comb, last_loss_comb, wt_comb = collect_stats(mod)
 		line = plt.plot(wt_comb, avg_loss_comb, 'r', label=('Combined approach, #lambdas=' + str(mod.choices_lambda) ))
@@ -83,7 +80,7 @@ def gen_comparison_graph(mod):
 		mod.no_bandit = False
 		mod.no_supervised = True
 		mod.no_exploration = False
-		mod.filename = config_name+'_no_supervised'
+		mod.filename = mod.output_dir+'/'+config_name+'_no_supervised'
 		execute_vw(mod)
 		avg_loss_band_only, last_loss_band_only, wt_band_only = collect_stats(mod)
 		line = plt.plot(wt_band_only, avg_loss_band_only, 'b', label='Bandit only')
@@ -92,7 +89,7 @@ def gen_comparison_graph(mod):
 		mod.no_bandit = True
 		mod.no_supervised = False
 		mod.no_exploration = False
-		mod.filename = config_name+'_no_bandit'
+		mod.filename = mod.output_dir+'/'+config_name+'_no_bandit'
 		execute_vw(mod)
 		avg_loss_sup_only, last_loss_sup_only, wt_sup_only = collect_stats(mod)
 		line = plt.plot(wt_sup_only, avg_loss_sup_only, 'g', label='Supervised only')
@@ -101,7 +98,7 @@ def gen_comparison_graph(mod):
 		pylab.xlabel('#bandit examples')
 		pylab.ylabel('Progressive validation error')
 		pylab.title('Source 1 feature flipping prob = ' + str(mod.fprob1) + '; source 2 feature flipping prob = ' + str(mod.fprob2) + 'cb_type = '+ mod.cb_type )
-		pylab.savefig('figs/'+config_name +'.png')
+		pylab.savefig(mod.output_dir+'/'+config_name +'.png')
 		plt.gcf().clear()
 		print('')
 		#plt.show()
@@ -113,13 +110,15 @@ def gen_comparison_graph(mod):
 
 	mod = model()
 
-	mod.vw_path = './vowpalwabbit/vw'
+	mod.vw_path = '../vowpalwabbit/vw'
+	mod.output_dir = '../figs'
+	mod.data_dir = '../data'
 	#mod.warm_start = 50
-	mod.bandit = 4096
+	mod.bandit = 4000
 	mod.num_classes = 10
 	#mod.cb_type = 'mtr'  #'ips'
     #mod.choices_lambda = 10
-	#mod.progress = 25
+	mod.progress = 25
 	mod.adf_on = True
 
 	mod.choices_warm_start = [pow(2,i) for i in range(11)] #put it here in order to plot 2d mesh
@@ -127,6 +126,7 @@ def gen_comparison_graph(mod):
 	#choices_fprob1 = [0.1, 0.2, 0.3]
 	#choices_fprob2 = [0.1, 0.2, 0.3]
 	#choices_cb_types = ['mtr', 'ips']
+	choices_cb_types = ['mtr']
 	choices_choices_lambda = [pow(2,i) for i in range(5)]
 
 	#for correctness test
@@ -134,8 +134,13 @@ def gen_comparison_graph(mod):
 	choices_fprob1 = [0.1]
 	choices_fprob2 = [0.1]
 
-
 	for mod.fprob1, mod.fprob2, mod.cb_type, mod.choices_lambda in product(choices_fprob1, choices_fprob2, choices_cb_types, choices_choices_lambda):
-		mod.dataset_supervised = './source1_' + str(mod.fprob1) + '_m.vw'
-		mod.dataset_bandit = './source2_' + str(mod.fprob2) + '_m.vw'
+		mod.dataset_supervised = mod.data_dir + '/source1_' + str(mod.fprob1) + '_m.vw'
+		mod.dataset_bandit = mod.data_dir + '/source2_' + str(mod.fprob2) + '_m.vw'
+		if not os.path.isfile(mod.dataset_supervised):
+			print 'The supervised dataset does not exist!'
+			break
+		if not os.path.isfile(mod.dataset_bandit):
+			print 'The bandit dataset does not exist!'
+			break
 		gen_comparison_graph(mod)

From a7d5360c9e1d4dbe6efd09178509d4c41c0ec6fb Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Tue, 13 Mar 2018 08:58:26 -0400
Subject: [PATCH 032/127] added scatterplot script

---
 scripts/alg_comparison.py  |  64 +++++++++++
 scripts/plot_warm_start.py | 216 +++++++++++++++++++++++++++----------
 scripts/run_vw_job.py      | 205 +++++++++++++++++++++++++++++++++++
 vowpalwabbit/cbify.cc      |  45 ++++----
 4 files changed, 450 insertions(+), 80 deletions(-)
 create mode 100644 scripts/alg_comparison.py
 create mode 100644 scripts/run_vw_job.py

diff --git a/scripts/alg_comparison.py b/scripts/alg_comparison.py
new file mode 100644
index 00000000000..c0556442dc9
--- /dev/null
+++ b/scripts/alg_comparison.py
@@ -0,0 +1,64 @@
+import matplotlib
+matplotlib.use('Agg')
+import matplotlib.pyplot as plt
+import pylab
+import os
+import glob
+
+
+def sum_files(result_path):
+	prevdir = os.getcwd()
+	os.chdir(result_path)
+	dss = sorted(glob.glob('*.sum'))
+	os.chdir(prevdir)
+	return dss
+
+def parse_sum_file(sum_filename):
+	f = open(sum_filename, 'r')
+	line = f.readline()
+	num_cols = len(line.split())
+	f.seek(0)
+	results = [[] for i in range(num_cols)]
+
+	for line in f:
+		splitted = line.split()
+		for i in range(len(splitted)):
+			if (i == 0):
+				results[i].append(splitted[i])
+			else:
+				results[i].append(float(splitted[i]))
+	return results
+
+
+if __name__ == '__main__':
+	results_path = '../figs/'
+	dss = sum_files(results_path)
+
+	all_results = []
+	for i in range(len(dss)):
+		result = parse_sum_file(results_path + dss[i])
+
+		if (i == 0):
+			all_results = result
+		else:
+			num_cols = len(result)
+			for j in range(num_cols):
+				all_results[j] += result[j]
+
+	print all_results
+
+
+
+	# compare combined w/ supervised
+	plt.plot([0,1],[0,1])
+	plt.scatter(all_results[1], all_results[3])
+	plt.title('combined vs supervised only')
+	pylab.savefig('comb_v_super' +'.png')
+	plt.gcf().clear()
+
+	# compare combined w/ bandit
+	plt.plot([0,1],[0,1])
+	plt.scatter(all_results[1], all_results[2])
+	plt.title('combined vs bandit only')
+	pylab.savefig('comb_v_bandit' +'.png')
+	plt.gcf().clear()
diff --git a/scripts/plot_warm_start.py b/scripts/plot_warm_start.py
index c0d06afd20b..9b4be0d84c4 100644
--- a/scripts/plot_warm_start.py
+++ b/scripts/plot_warm_start.py
@@ -1,8 +1,16 @@
 import matplotlib
+matplotlib.use('Agg')
 import matplotlib.pyplot as plt
 import subprocess
 import pylab
 from itertools import product
+import os
+import math
+import argparse
+import time
+import glob
+import re
+
 
 class model:
 	def __init__(self):
@@ -11,18 +19,17 @@ def __init__(self):
 
 def collect_stats(mod):
 
-	filename = mod.filename
+	vw_output_filename = mod.vw_output_filename
 	# using progress parameter
 	# num_rows = mod.bandit / mod.progress
-
-
+	#print vw_output_filename
 
 	avg_loss = []
 	last_loss = []
 	wt = []
 	end_table = False
 
-	f = open(filename, 'r')
+	f = open(vw_output_filename, 'r')
 	linenumber = 0
 	for line in f:
 		if not line.strip():
@@ -50,15 +57,20 @@ def execute_vw(mod):
 	if mod.adf_on:
 		alg_option += ' --cb_explore_adf '
 
-	cmd_catfile = '( head -n ' + str(mod.warm_start) + ' ' + mod.dataset_supervised + ';' + ' head -n ' + str(mod.bandit) + ' ' + mod.dataset_bandit + '; )'
-	cmd_vw = mod.vw_path + ' --cbify ' + str(mod.num_classes) + ' --cb_type ' + str(mod.cb_type) + ' --warm_start ' + str(mod.warm_start) + ' --bandit ' + str(mod.bandit) + ' --choices_lambda ' + str(mod.choices_lambda) + alg_option
-	#+ ' --progress ' + str(mod.progress)
+	# using two datasets
+	#cmd_catfile = '( head -n ' + str(mod.warm_start) + ' ' + mod.dataset_supervised + ';' + ' head -n ' + str(mod.bandit) + ' ' + mod.dataset_bandit + '; )'
+	# using only one dataset
+	#cmd_catfile = '( head -n ' + str(mod.warm_start + mod.bandit) + ' ' + mod.dataset + '; )'
+	#cmd_catfile = '( cat ' + mod.ds_path+mod.dataset + '; )'
 
-	cmd = cmd_catfile + ' | ' + cmd_vw
+	cmd_vw = mod.vw_path + ' --cbify ' + str(mod.num_classes) + ' --cb_type ' + str(mod.cb_type) + ' --warm_start ' + str(mod.warm_start) + ' --bandit ' + str(mod.bandit) + ' --choices_lambda ' + str(mod.choices_lambda) + alg_option + ' --progress ' + str(mod.progress) + ' -d ' + mod.ds_path + mod.dataset
+
+	cmd = cmd_vw
+	#cmd = cmd_catfile + ' | ' + cmd_vw
 
 	print cmd
 
-	f = open(mod.filename, 'w')
+	f = open(mod.vw_output_filename, 'w')
 	process = subprocess.Popen(cmd, shell=True, stdout=f, stderr=f)
 	#subprocess.check_call(cmd, shell=True)
 	process.wait()
@@ -66,76 +78,164 @@ def execute_vw(mod):
 
 def gen_comparison_graph(mod):
 
-	for mod.warm_start in mod.choices_warm_start:
-
-		config_name = str(mod.fprob1)+'_'+str(mod.fprob2)+'_'+str(mod.warm_start)+'_'+str(mod.bandit)+ '_' + str(mod.cb_type) + '_' + str(mod.choices_lambda)
-
-		# combined approach
-		mod.no_bandit = False
-		mod.no_supervised = False
-		mod.no_exploration = False
-		mod.filename = config_name
-		execute_vw(mod)
-		avg_loss_comb, last_loss_comb, wt_comb = collect_stats(mod)
-		line = plt.plot(wt_comb, avg_loss_comb, 'r', label=('Combined approach, #lambdas=' + str(mod.choices_lambda) ))
-
-		# bandit only approach
-		mod.no_bandit = False
-		mod.no_supervised = True
-		mod.no_exploration = False
-		mod.filename = config_name+'_no_supervised'
-		execute_vw(mod)
-		avg_loss_band_only, last_loss_band_only, wt_band_only = collect_stats(mod)
-		line = plt.plot(wt_band_only, avg_loss_band_only, 'b', label='Bandit only')
-
-		# supervised only approach
-		mod.no_bandit = True
-		mod.no_supervised = False
-		mod.no_exploration = False
-		mod.filename = config_name+'_no_bandit'
-		execute_vw(mod)
-		avg_loss_sup_only, last_loss_sup_only, wt_sup_only = collect_stats(mod)
-		line = plt.plot(wt_sup_only, avg_loss_sup_only, 'g', label='Supervised only')
-
-		pylab.legend()
-		pylab.xlabel('#bandit examples')
-		pylab.ylabel('Progressive validation error')
-		pylab.title('Source 1 feature flipping prob = ' + str(mod.fprob1) + '; source 2 feature flipping prob = ' + str(mod.fprob2) + 'cb_type = '+ mod.cb_type )
-		pylab.savefig('figs/'+config_name +'.png')
-		plt.gcf().clear()
-		print('')
-		#plt.show()
-
+	mod.num_lines = get_num_lines(mod.ds_path+mod.dataset)
+	mod.warm_start = int(math.floor(mod.warm_start_frac * mod.num_lines))
+	mod.bandit = mod.num_lines - mod.warm_start
+	mod.progress = int(math.floor(mod.bandit / mod.num_checkpoints))
+
+	#config_name = str(mod.dataset) + '_' + str(mod.fprob1)+'_'+str(mod.fprob2)+'_'+str(mod.warm_start)+'_'+str(mod.bandit)+ '_' + str(mod.cb_type) + '_' + str(mod.choices_lambda)
+
+	config_name = str(mod.dataset) + '_'+str(mod.warm_start)+ '_' + str(mod.cb_type) + '_' + str(mod.choices_lambda)
+
+	# combined approach
+	mod.no_bandit = False
+	mod.no_supervised = False
+	mod.no_exploration = False
+	mod.vw_output_filename = mod.results_path+config_name+'.txt'
+	execute_vw(mod)
+	'''
+	avg_loss_comb, last_loss_comb, wt_comb = collect_stats(mod)
+	line = plt.plot(wt_comb, avg_loss_comb, 'r', label=('Combined approach, #lambdas=' + str(mod.choices_lambda) ))
+	'''
+	avg_error_comb = avg_error(mod)
+
+	# bandit only approach
+	mod.no_bandit = False
+	mod.no_supervised = True
+	mod.no_exploration = False
+	mod.vw_output_filename = mod.results_path+config_name+'_no_supervised'+'.txt'
+	execute_vw(mod)
+	'''
+	avg_loss_band_only, last_loss_band_only, wt_band_only = collect_stats(mod)
+	line = plt.plot(wt_band_only, avg_loss_band_only, 'b', label='Bandit only')
+	'''
+	avg_error_band_only = avg_error(mod)
+
+	# supervised only approach
+	mod.no_bandit = True
+	mod.no_supervised = False
+	mod.no_exploration = False
+	mod.vw_output_filename = mod.results_path+config_name+'_no_bandit'+'.txt'
+	execute_vw(mod)
+	'''
+	avg_loss_sup_only, last_loss_sup_only, wt_sup_only = collect_stats(mod)
+	# for supervised only, we simply plot a horizontal line using the last point
+	len_avg_loss = len(avg_loss_sup_only)
+	avg_loss = avg_loss_sup_only[len_avg_loss-1]
+	avg_loss_sup_only = [avg_loss for i in range(len_avg_loss)]
+	line = plt.plot(wt_sup_only, avg_loss_sup_only, 'g', label='Supervised only')
+	'''
+	avg_error_sup_only = avg_error(mod)
+
+	summary_file = open(mod.results_path+str(mod.task_id)+'of'+str(mod.num_tasks)+'.sum', 'a')
+	summary_file.write(config_name + ' ' + str(avg_error_comb) + ' ' + str(avg_error_band_only) + ' ' + str(avg_error_sup_only) + '\n')
+
+	print('')
+
+	'''
+	pylab.legend()
+	pylab.xlabel('#bandit examples')
+	pylab.ylabel('Progressive validation error')
+	pylab.title(mod.dataset + ' warm_start = ' + str(mod.warm_start) + ' cb_type = ' + mod.cb_type)
+	#pylab.title('Source 1 feature flipping prob = ' + str(mod.fprob1) + '; source 2 feature flipping prob = ' + str(mod.fprob2) + 'cb_type = '+ mod.cb_type )
+	pylab.savefig(mod.results_path+config_name +'.png')
+	plt.gcf().clear()
+
+	#plt.show()
+	'''
+
+def ds_files(ds_path):
+	prevdir = os.getcwd()
+	os.chdir(ds_path)
+	dss = sorted(glob.glob('*.vw.gz'))
+	os.chdir(prevdir)
+	return dss
+
+
+def ds_per_task(dss, num_tasks, task_id):
+	ds_task = []
+	for i in range(len(dss)):
+		if (i % num_tasks == task_id):
+			ds_task.append(dss[i])
+
+	return ds_task
+
+def get_num_lines(dataset_name):
+	ps = subprocess.Popen(('zcat', dataset_name), stdout=subprocess.PIPE)
+	output = subprocess.check_output(('wc', '-l'), stdin=ps.stdout)
+	ps.wait()
+	return int(output)
+
+def avg_error(mod):
+	vw_output = open(mod.vw_output_filename, 'r')
+	vw_output_text = vw_output.read()
+	rgx = re.compile('^average loss = (.*)$', flags=re.M)
+	return float(rgx.findall(vw_output_text)[0])
 
 
 
 if __name__ == '__main__':
+	parser = argparse.ArgumentParser(description='vw job')
+	parser.add_argument('task_id', type=int, help='task ID, between 0 and num_tasks - 1')
+	parser.add_argument('num_tasks', type=int)
+	parser.add_argument('--results_dir', default='../figs/')
+	args = parser.parse_args()
+	if args.task_id == 0:
+		if not os.path.exists(args.results_dir):
+			os.makedirs(args.results_dir)
+			import stat
+			os.chmod(args.results_dir, os.stat(args.results_dir).st_mode | stat.S_IWOTH)
+	else:
+		while not os.path.exists(args.results_dir):
+			time.sleep(1)
 
 	mod = model()
+	mod.num_tasks = args.num_tasks
+	mod.task_id = args.task_id
+
+	mod.ds_path = '../data/'
+	mod.vw_path = '../vowpalwabbit/vw'
+	mod.results_path = args.results_dir
 
-	mod.vw_path = './vowpalwabbit/vw'
+	#DIR_PATTERN = '../results/cbresults_{}/'
+
+	mod.num_checkpoints = 100
 	#mod.warm_start = 50
-	mod.bandit = 4096
+	#mod.bandit = 4096
 	mod.num_classes = 10
 	#mod.cb_type = 'mtr'  #'ips'
     #mod.choices_lambda = 10
 	#mod.progress = 25
 	mod.adf_on = True
 
-	mod.choices_warm_start = [pow(2,i) for i in range(11)] #put it here in order to plot 2d mesh
+	# use fractions instead of absolute numbers
+
+	mod.choices_warm_start = [0.01 * pow(2, i) for i in range(5)]
+	#mod.choices_bandit = [0.01 * pow(2, i) for i in range(5)]
+
+	#mod.choices_warm_start = [pow(2,i) for i in range(11)] #put it here in order to plot 2d mesh
 	# we are implicitly iterating over the bandit sample sizes
 	#choices_fprob1 = [0.1, 0.2, 0.3]
 	#choices_fprob2 = [0.1, 0.2, 0.3]
 	#choices_cb_types = ['mtr', 'ips']
-	choices_choices_lambda = [pow(2,i) for i in range(5)]
+	choices_cb_types = ['mtr', 'ips']
+	#choices_choices_lambda = [pow(2,i) for i in range(10,11)]
+	choices_choices_lambda = [i for i in range(10,11)]
 
 	#for correctness test
 	#mod.choices_warm_start = [20]
-	choices_fprob1 = [0.1]
-	choices_fprob2 = [0.1]
+	#choices_fprob1 = [0.1]
+	#choices_fprob2 = [0.1]
+
+	dss = ds_files(mod.ds_path)
+	mod.ds_task = ds_per_task(dss, args.num_tasks, args.task_id)
+
+	print mod.ds_task
 
+	# we only need to vary the warm start fraction, and there is no need to vary the bandit fraction,
+	# as each run of vw automatically accumulates the bandit dataset
 
-	for mod.fprob1, mod.fprob2, mod.cb_type, mod.choices_lambda in product(choices_fprob1, choices_fprob2, choices_cb_types, choices_choices_lambda):
-		mod.dataset_supervised = './source1_' + str(mod.fprob1) + '_m.vw'
-		mod.dataset_bandit = './source2_' + str(mod.fprob2) + '_m.vw'
+	for mod.cb_type, mod.choices_lambda, mod.dataset, mod.warm_start_frac in product(choices_cb_types, choices_choices_lambda, mod.ds_task, mod.choices_warm_start):
+		#mod.dataset_supervised = './vw_' + str(mod.fprob1) + '_m.vw'
+		#mod.dataset_bandit = './source2_' + str(mod.fprob2) + '_m.vw'
 		gen_comparison_graph(mod)
diff --git a/scripts/run_vw_job.py b/scripts/run_vw_job.py
new file mode 100644
index 00000000000..d2551819f4e
--- /dev/null
+++ b/scripts/run_vw_job.py
@@ -0,0 +1,205 @@
+import argparse
+import os
+import re
+import subprocess
+import sys
+import time
+
+USE_ADF = True
+USE_CS = False
+
+VW = '/scratch/clear/abietti/.local/bin/vw'
+if USE_CS:
+    VW_DS_DIR = '/scratch/clear/abietti/cb_eval/vwshuffled_cs/'
+    DIR_PATTERN = '/scratch/clear/abietti/cb_eval/res_cs/cbresults_{}/'
+else:
+    VW_DS_DIR = '/scratch/clear/abietti/cb_eval/vwshuffled/'
+    DIR_PATTERN = '/scratch/clear/abietti/cb_eval/res/cbresults_{}/'
+# VW_DS_DIR = '/bscratch/b-albiet/vwshuffled/'
+# DIR_PATTERN = '/bscratch/b-albiet/cbresults_{}/'
+
+rgx = re.compile('^average loss = (.*)$', flags=re.M)
+
+
+def expand_cover(policies):
+    algs = []
+    for psi in [0, 0.01, 0.1, 1.0]:
+        algs.append(('cover', policies, 'psi', psi))
+        algs.append(('cover', policies, 'psi', psi, 'nounif', None))
+        # algs.append(('cover', policies, 'psi', psi, 'nounifagree', None, 'agree_mellowness', 0.1))
+        # algs.append(('cover', policies, 'psi', psi, 'nounifagree', None, 'agree_mellowness', 0.01))
+    return algs
+
+params_old = {
+    'alg': [
+        ('supervised',),
+        ('epsilon', 0),
+        ('epsilon', 0.02),
+        ('epsilon', 0.05),
+        ('epsilon', 0.1),
+        ('epsilon', 0.05, 'nounifagree', None, 'agree_mellowness', 1.0),
+        ('epsilon', 0.05, 'nounifagree', None, 'agree_mellowness', 1e-2),
+        ('epsilon', 0.05, 'nounifagree', None, 'agree_mellowness', 1e-4),
+        ('epsilon', 0.05, 'nounifagree', None, 'agree_mellowness', 1e-6),
+        # agree
+        ('epsilon', 1, 'nounifagree', None, 'agree_mellowness', 1.0),
+        ('epsilon', 1, 'nounifagree', None, 'agree_mellowness', 1e-2),
+        ('epsilon', 1, 'nounifagree', None, 'agree_mellowness', 1e-4),
+        ('epsilon', 1, 'nounifagree', None, 'agree_mellowness', 1e-6),
+        ('bag', 2),
+        ('bag', 4),
+        ('bag', 8),
+        ('bag', 16),
+        ('bag', 2, 'greedify', None),
+        ('bag', 4, 'greedify', None),
+        ('bag', 8, 'greedify', None),
+        ('bag', 16, 'greedify', None),
+        ] + expand_cover(1) + expand_cover(4) + expand_cover(8) + expand_cover(16),
+    'learning_rate': [0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1.0, 3.0, 10.0],
+    'cb_type': ['dr', 'ips', 'mtr'],
+    }
+
+params = {
+    'alg': [
+        ('epsilon', 0.05, 'nounifagree', None, 'agree_mellowness', 1.0),
+        ('epsilon', 0.05, 'nounifagree', None, 'agree_mellowness', 1e-2),
+        ('epsilon', 0.05, 'nounifagree', None, 'agree_mellowness', 1e-4),
+        ('epsilon', 0.05, 'nounifagree', None, 'agree_mellowness', 1e-6),
+        ('epsilon', 1, 'nounifagree', None, 'agree_mellowness', 1.0),
+        ('epsilon', 1, 'nounifagree', None, 'agree_mellowness', 1e-2),
+        ('epsilon', 1, 'nounifagree', None, 'agree_mellowness', 1e-4),
+        ('epsilon', 1, 'nounifagree', None, 'agree_mellowness', 1e-6),
+        ],
+    'learning_rate': [0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1.0, 3.0, 10.0],
+    'cb_type': ['dr', 'ips', 'mtr'],
+    }
+
+extra_flags = None
+# extra_flags = ['--loss0', '9', '--loss1', '10', '--baseline']
+
+def param_grid():
+    grid = [{}]
+    for k in params:
+        new_grid = []
+        for g in grid:
+            for param in params[k]:
+                gg = g.copy()
+                gg[k] = param
+                new_grid.append(gg)
+        grid = new_grid
+
+    return sorted(grid)
+
+
+def ds_files():
+    import glob
+    return sorted(glob.glob(os.path.join(VW_DS_DIR, '*.vw.gz')))
+
+
+def get_task_name(ds, params):
+    did, n_actions = os.path.basename(ds).split('.')[0].split('_')[1:]
+    did, n_actions = int(did), int(n_actions)
+
+    task_name = 'ds:{}|na:{}'.format(did, n_actions)
+    if len(params) > 1:
+        task_name += '|' + '|'.join('{}:{}'.format(k, v) for k, v in sorted(params.items()) if k != 'alg')
+    task_name += '|' + ':'.join([str(p) for p in params['alg'] if p is not None])
+    return task_name
+
+
+def process(ds, params, results_dir):
+    print 'processing', ds, params
+    did, n_actions = os.path.basename(ds).split('.')[0].split('_')[1:]
+    did, n_actions = int(did), int(n_actions)
+
+    cmd = [VW, ds, '-b', '24']
+    for k, v in params.iteritems():
+        if k == 'alg':
+            if v[0] == 'supervised':
+                cmd += ['--csoaa' if USE_CS else '--oaa', str(n_actions)]
+            else:
+                cmd += ['--cbify', str(n_actions)]
+                if USE_CS:
+                    cmd += ['--cbify_cs']
+                if extra_flags:
+                    cmd += extra_flags
+                if USE_ADF:
+                    cmd += ['--cb_explore_adf']
+                assert len(v) % 2 == 0, 'params should be in pairs of (option, value)'
+                for i in range(len(v) / 2):
+                    cmd += ['--{}'.format(v[2 * i])]
+                    if v[2 * i + 1] is not None:
+                        cmd += [str(v[2 * i + 1])]
+        else:
+            if params['alg'][0] == 'supervised' and k == 'cb_type':
+                pass
+            else:
+	        cmd += ['--{}'.format(k), str(v)]
+
+    print 'running', cmd
+    t = time.time()
+    output = subprocess.check_output(cmd, stderr=subprocess.STDOUT)
+    sys.stderr.write('\n\n{}, {}, time: {}, output:\n'.format(ds, params, time.time() - t))
+    sys.stderr.write(output)
+    pv_loss = float(rgx.findall(output)[0])
+    print 'elapsed time:', time.time() - t, 'pv loss:', pv_loss
+
+    return pv_loss
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='vw job')
+    parser.add_argument('task_id', type=int, help='task ID, between 0 and num_tasks - 1')
+    parser.add_argument('num_tasks', type=int)
+    parser.add_argument('--task_offset', type=int, default=0,
+                        help='offset for task_id in output filenames')
+    parser.add_argument('--results_dir', default=DIR_PATTERN.format('agree01'))
+    parser.add_argument('--name', default=None)
+    parser.add_argument('--test', action='store_true')
+    parser.add_argument('--flags', default=None, help='extra flags for cb algorithms')
+    args = parser.parse_args()
+
+    if args.name is not None:
+        args.results_dir = DIR_PATTERN.format(args.name)
+
+    if args.flags is not None:
+        extra_flags = args.flags.split()
+    grid = param_grid()
+    dss = ds_files()
+    tot_jobs = len(grid) * len(dss)
+
+    if args.task_id == 0:
+        if not os.path.exists(args.results_dir):
+            os.makedirs(args.results_dir)
+            import stat
+            os.chmod(args.results_dir, os.stat(args.results_dir).st_mode | stat.S_IWOTH)
+    else:
+        while not os.path.exists(args.results_dir):
+            time.sleep(1)
+    if not args.test:
+        fname = os.path.join(args.results_dir, 'loss{}.txt'.format(args.task_offset + args.task_id))
+        done_tasks = set()
+        if os.path.exists(fname):
+            done_tasks = set([line.split()[0] for line in open(fname).readlines()])
+        loss_file = open(fname, 'a')
+    idx = args.task_id
+    while idx < tot_jobs:
+        ds = dss[idx / len(grid)]
+        params = grid[idx % len(grid)]
+        if args.test:
+            print ds, params
+        else:
+            task_name = get_task_name(ds, params)
+            if task_name not in done_tasks:
+                try:
+                    pv_loss = process(ds, params, args.results_dir)
+                    loss_file.write('{} {}\n'.format(task_name, pv_loss))
+                    loss_file.flush()
+                    os.fsync(loss_file.fileno())
+                except subprocess.CalledProcessError:
+                    sys.stderr.write('\nERROR: TASK FAILED {} {}\n\n'.format(ds, params))
+                    print 'ERROR: TASK FAILED', ds, params
+        idx += args.num_tasks
+
+    if not args.test:
+        loss_file.close()
diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc
index b302636e3af..2ec09cce29a 100644
--- a/vowpalwabbit/cbify.cc
+++ b/vowpalwabbit/cbify.cc
@@ -67,7 +67,7 @@ struct cbify
 	CB::label* cbls;
 	CB::label* cbl_empty;
 	bool warm_start;
-	float* old_weights; 
+	float* old_weights;
 
 
 };
@@ -101,12 +101,12 @@ void finish(cbify& data)
   data.a_s.delete_v();
 	data.lambdas.delete_v();
 	data.cumulative_costs.delete_v();
-	
-		
+
+
   if (data.use_adf)
   {
 	  for (size_t a = 0; a < data.adf_data.num_actions; ++a)
-	  {  
+	  {
 			VW::dealloc_example(CB::cb_label.delete_label, data.adf_data.ecs[a]);
 			data.adf_data.ecs[a].pred.a_s.delete_v();
 	  }
@@ -118,7 +118,7 @@ void finish(cbify& data)
 
 		for (size_t a = 0; a < data.adf_data.num_actions; ++a)
 			data.csls[a].costs.delete_v();
-		
+
 		data.csl_empty->costs.delete_v();
 
 		free(data.csl_empty);
@@ -129,7 +129,7 @@ void finish(cbify& data)
 
   }
 	free(data.csls);
-	
+
 
 }
 
@@ -175,7 +175,7 @@ uint32_t find_min(v_array<float> arr)
 	{
 		//cout<<arr[i]<<endl;
 		if (arr[i] < min_val)
-		{	
+		{
 			min_val = arr[i];
 			argmin = i;
 		}
@@ -198,7 +198,7 @@ uint32_t predict_sublearner(cbify& data, base_learner& base, uint32_t i)
 	}
 	base.predict(*empty, i);
 	//data.all->cost_sensitive->predict(*empty, argmin);
-	
+
 
 	//float best_score;
 	//for (size_t a = 0; a < data.adf_data.num_actions; ++a)
@@ -225,7 +225,7 @@ void accumulate_costs_ips(cbify& data, example& ec, CB::cb_class& cl)
 	{
 		data.all->cost_sensitive->predict(ec, i);
 		if (ec.pred.multiclass == cl.action)
-			data.cumulative_costs[i] += cl.cost / cl.probability; 
+			data.cumulative_costs[i] += cl.cost / cl.probability;
 		//cout<<data.cumulative_costs[i]<<endl;
 	}
 	//cout<<endl;
@@ -240,14 +240,14 @@ void accumulate_costs_ips_adf(cbify& data, example& ec, CB::cb_class& cl, base_l
 	for (uint32_t i = 0; i < data.choices_lambda; i++)
 	{
 		best_action = predict_sublearner(data, base, i);
-		
+
 		if (best_action == cl.action)
 			data.cumulative_costs[i] += cl.cost / cl.probability;
 
 		//cout<<data.cumulative_costs[i]<<endl;
 	}
 	//cout<<endl;
-	
+
 }
 
 
@@ -264,7 +264,7 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 	MULTICLASS::label_t ld = ec.l.multi;
 
 	//cout<<ld.label<<endl;
-  
+
 	if (data.warm_start_period > 0) // Call the cost-sensitive learner directly
 	{
 		data.warm_start_period--;
@@ -302,7 +302,7 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 		data.cb_label.costs.erase();
 		ec.l.cb = data.cb_label;
 		ec.pred.a_s = data.a_s;
-		
+
 		base.predict(ec, argmin);
 		auto old_pred = ec.pred;
 
@@ -360,7 +360,7 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 	uint32_t best_action;
 	example* ecs = data.adf_data.ecs;
 	example* empty_example = data.adf_data.empty_example;
-	
+
 	argmin = find_min(data.cumulative_costs);
 
   //Store the multiclass input label
@@ -384,7 +384,7 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 		COST_SENSITIVE::label* csl_empty = data.csl_empty;
 		CB::label* cbls = data.cbls;
 		CB::label* cbl_empty = data.cbl_empty;
-		
+
 		if (data.ind_supervised)
 		{
 			for (uint32_t i = 0; i < data.choices_lambda; i++)
@@ -393,13 +393,13 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 				{
 					csls[a].costs[0].class_index = a+1;
 					csls[a].costs[0].x = loss(data, ld.label, a+1);
-		
+
 					cbls[a] = ecs[a].l.cb;
 					ecs[a].l.cs = csls[a];
 					data.all->cost_sensitive->learn(ecs[a],i);
 				}
 				*cbl_empty = empty_example->l.cb;
-				empty_example->l.cs = *csl_empty;			
+				empty_example->l.cs = *csl_empty;
 				data.all->cost_sensitive->learn(*empty_example,i);
 
 				for (size_t a = 0; a < data.adf_data.num_actions; ++a)
@@ -409,6 +409,7 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 		}
 		ec.pred.multiclass = best_action;
 		ec.l.multi = ld;
+		ec.weight = 0;
 	}
 	else if (data.bandit_period > 0) // call the bandit learner
 	{
@@ -441,7 +442,7 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 		auto& lab = data.adf_data.ecs[cl.action - 1].l.cb;
 		lab.costs.push_back(cl);
 
-	
+
 		if (data.ind_bandit)
 		{
 			for (uint32_t i = 0; i < data.choices_lambda; i++)
@@ -492,14 +493,14 @@ void init_adf_data(cbify& data, const size_t num_actions)
 
 
 	data.old_weights = calloc_or_throw<float>(num_actions);
-				
+
 	data.csl_empty->costs.push_back({0, 0, 0, 0});
 	data.csl_empty->costs[0].class_index = 0;
 	data.csl_empty->costs[0].x = FLT_MAX;
 
 	for (size_t a = 0; a < num_actions; ++a)
 	{
-		data.csls[a].costs.push_back({0, 0, 0, 0});	
+		data.csls[a].costs.push_back({0, 0, 0, 0});
 	}
 
 }
@@ -552,7 +553,7 @@ base_learner* cbify_setup(vw& all)
   //data.probs = v_init<float>();
   data.generic_explorer = new GenericExplorer<example>(*data.scorer, (u32)num_actions);
   data.all = &all;
-	
+
 
 	//cout<<data.warm_start_period<<endl;
 	data.warm_start_period = vm.count("warm_start") ? vm["warm_start"].as<size_t>() : 0;
@@ -567,7 +568,7 @@ base_learner* cbify_setup(vw& all)
 		data.cumulative_costs.push_back(0.);
 
 	data.num_actions = num_actions;
-	
+
 
   if (data.use_adf)
   {

From f2f9bb6e56df2cafe1f7c3d34317e0ac530cf00c Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Thu, 15 Mar 2018 13:11:37 -0400
Subject: [PATCH 033/127] retracted the matplotlib inclusion

---
 scripts/plot_warm_start.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/scripts/plot_warm_start.py b/scripts/plot_warm_start.py
index 9b4be0d84c4..53e20114b15 100644
--- a/scripts/plot_warm_start.py
+++ b/scripts/plot_warm_start.py
@@ -1,6 +1,6 @@
-import matplotlib
-matplotlib.use('Agg')
-import matplotlib.pyplot as plt
+#import matplotlib
+#matplotlib.use('Agg')
+#import matplotlib.pyplot as plt
 import subprocess
 import pylab
 from itertools import product

From 8298ec6a890bb7ded411fdaf4279d97f89d589df Mon Sep 17 00:00:00 2001
From: chicheng zhang <zcc1307@gmail.com>
Date: Thu, 15 Mar 2018 22:30:04 +0000
Subject: [PATCH 034/127] .

---
 scripts/plot_warm_start.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/scripts/plot_warm_start.py b/scripts/plot_warm_start.py
index 53e20114b15..d7e69147fcb 100644
--- a/scripts/plot_warm_start.py
+++ b/scripts/plot_warm_start.py
@@ -2,7 +2,7 @@
 #matplotlib.use('Agg')
 #import matplotlib.pyplot as plt
 import subprocess
-import pylab
+#import pylab
 from itertools import product
 import os
 import math
@@ -81,7 +81,7 @@ def gen_comparison_graph(mod):
 	mod.num_lines = get_num_lines(mod.ds_path+mod.dataset)
 	mod.warm_start = int(math.floor(mod.warm_start_frac * mod.num_lines))
 	mod.bandit = mod.num_lines - mod.warm_start
-	mod.progress = int(math.floor(mod.bandit / mod.num_checkpoints))
+	mod.progress = int(math.ceil(mod.bandit / mod.num_checkpoints))
 
 	#config_name = str(mod.dataset) + '_' + str(mod.fprob1)+'_'+str(mod.fprob2)+'_'+str(mod.warm_start)+'_'+str(mod.bandit)+ '_' + str(mod.cb_type) + '_' + str(mod.choices_lambda)
 
@@ -167,8 +167,10 @@ def get_num_lines(dataset_name):
 	return int(output)
 
 def avg_error(mod):
+	print mod.vw_output_filename
 	vw_output = open(mod.vw_output_filename, 'r')
 	vw_output_text = vw_output.read()
+	print vw_output_text
 	rgx = re.compile('^average loss = (.*)$', flags=re.M)
 	return float(rgx.findall(vw_output_text)[0])
 
@@ -178,7 +180,7 @@ def avg_error(mod):
 	parser = argparse.ArgumentParser(description='vw job')
 	parser.add_argument('task_id', type=int, help='task ID, between 0 and num_tasks - 1')
 	parser.add_argument('num_tasks', type=int)
-	parser.add_argument('--results_dir', default='../figs/')
+	parser.add_argument('--results_dir', default='../../figs/')
 	args = parser.parse_args()
 	if args.task_id == 0:
 		if not os.path.exists(args.results_dir):
@@ -193,7 +195,7 @@ def avg_error(mod):
 	mod.num_tasks = args.num_tasks
 	mod.task_id = args.task_id
 
-	mod.ds_path = '../data/'
+	mod.ds_path = '../../vwshuffled/'
 	mod.vw_path = '../vowpalwabbit/vw'
 	mod.results_path = args.results_dir
 

From 543bab9676f9f0f83f2b4d947b0b5c69dd500317 Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Fri, 16 Mar 2018 18:00:55 -0400
Subject: [PATCH 035/127] .

---
 scripts/plot_warm_start.py | 39 ++++++++++++++++++++------------------
 1 file changed, 21 insertions(+), 18 deletions(-)

diff --git a/scripts/plot_warm_start.py b/scripts/plot_warm_start.py
index d7e69147fcb..ca9e897044e 100644
--- a/scripts/plot_warm_start.py
+++ b/scripts/plot_warm_start.py
@@ -1,8 +1,8 @@
-#import matplotlib
-#matplotlib.use('Agg')
-#import matplotlib.pyplot as plt
+import matplotlib
+matplotlib.use('Agg')
+import matplotlib.pyplot as plt
 import subprocess
-#import pylab
+import pylab
 from itertools import product
 import os
 import math
@@ -81,7 +81,7 @@ def gen_comparison_graph(mod):
 	mod.num_lines = get_num_lines(mod.ds_path+mod.dataset)
 	mod.warm_start = int(math.floor(mod.warm_start_frac * mod.num_lines))
 	mod.bandit = mod.num_lines - mod.warm_start
-	mod.progress = int(math.ceil(mod.bandit / mod.num_checkpoints))
+	mod.progress = int(math.ceil(float(mod.bandit) / float(mod.num_checkpoints)))
 
 	#config_name = str(mod.dataset) + '_' + str(mod.fprob1)+'_'+str(mod.fprob2)+'_'+str(mod.warm_start)+'_'+str(mod.bandit)+ '_' + str(mod.cb_type) + '_' + str(mod.choices_lambda)
 
@@ -93,10 +93,10 @@ def gen_comparison_graph(mod):
 	mod.no_exploration = False
 	mod.vw_output_filename = mod.results_path+config_name+'.txt'
 	execute_vw(mod)
-	'''
+
 	avg_loss_comb, last_loss_comb, wt_comb = collect_stats(mod)
 	line = plt.plot(wt_comb, avg_loss_comb, 'r', label=('Combined approach, #lambdas=' + str(mod.choices_lambda) ))
-	'''
+
 	avg_error_comb = avg_error(mod)
 
 	# bandit only approach
@@ -105,10 +105,10 @@ def gen_comparison_graph(mod):
 	mod.no_exploration = False
 	mod.vw_output_filename = mod.results_path+config_name+'_no_supervised'+'.txt'
 	execute_vw(mod)
-	'''
+
 	avg_loss_band_only, last_loss_band_only, wt_band_only = collect_stats(mod)
 	line = plt.plot(wt_band_only, avg_loss_band_only, 'b', label='Bandit only')
-	'''
+
 	avg_error_band_only = avg_error(mod)
 
 	# supervised only approach
@@ -117,22 +117,22 @@ def gen_comparison_graph(mod):
 	mod.no_exploration = False
 	mod.vw_output_filename = mod.results_path+config_name+'_no_bandit'+'.txt'
 	execute_vw(mod)
-	'''
+
 	avg_loss_sup_only, last_loss_sup_only, wt_sup_only = collect_stats(mod)
 	# for supervised only, we simply plot a horizontal line using the last point
 	len_avg_loss = len(avg_loss_sup_only)
 	avg_loss = avg_loss_sup_only[len_avg_loss-1]
 	avg_loss_sup_only = [avg_loss for i in range(len_avg_loss)]
 	line = plt.plot(wt_sup_only, avg_loss_sup_only, 'g', label='Supervised only')
-	'''
+
 	avg_error_sup_only = avg_error(mod)
 
 	summary_file = open(mod.results_path+str(mod.task_id)+'of'+str(mod.num_tasks)+'.sum', 'a')
 	summary_file.write(config_name + ' ' + str(avg_error_comb) + ' ' + str(avg_error_band_only) + ' ' + str(avg_error_sup_only) + '\n')
-
+	summary_file.close()
 	print('')
 
-	'''
+
 	pylab.legend()
 	pylab.xlabel('#bandit examples')
 	pylab.ylabel('Progressive validation error')
@@ -142,7 +142,7 @@ def gen_comparison_graph(mod):
 	plt.gcf().clear()
 
 	#plt.show()
-	'''
+
 
 def ds_files(ds_path):
 	prevdir = os.getcwd()
@@ -167,10 +167,10 @@ def get_num_lines(dataset_name):
 	return int(output)
 
 def avg_error(mod):
-	print mod.vw_output_filename
+	#print mod.vw_output_filename
 	vw_output = open(mod.vw_output_filename, 'r')
 	vw_output_text = vw_output.read()
-	print vw_output_text
+	#print vw_output_text
 	rgx = re.compile('^average loss = (.*)$', flags=re.M)
 	return float(rgx.findall(vw_output_text)[0])
 
@@ -212,7 +212,8 @@ def avg_error(mod):
 
 	# use fractions instead of absolute numbers
 
-	mod.choices_warm_start = [0.01 * pow(2, i) for i in range(5)]
+	mod.choices_warm_start = [0.01 * pow(2, i) for i in range(4,5)]
+	#mod.choices_warm_start = [0.01 * pow(2, i) for i in range(5)]
 	#mod.choices_bandit = [0.01 * pow(2, i) for i in range(5)]
 
 	#mod.choices_warm_start = [pow(2,i) for i in range(11)] #put it here in order to plot 2d mesh
@@ -220,7 +221,8 @@ def avg_error(mod):
 	#choices_fprob1 = [0.1, 0.2, 0.3]
 	#choices_fprob2 = [0.1, 0.2, 0.3]
 	#choices_cb_types = ['mtr', 'ips']
-	choices_cb_types = ['mtr', 'ips']
+	#choices_cb_types = ['mtr', 'ips']
+	choices_cb_types = ['mtr']
 	#choices_choices_lambda = [pow(2,i) for i in range(10,11)]
 	choices_choices_lambda = [i for i in range(10,11)]
 
@@ -232,6 +234,7 @@ def avg_error(mod):
 	dss = ds_files(mod.ds_path)
 	mod.ds_task = ds_per_task(dss, args.num_tasks, args.task_id)
 
+	print 'task ' + str(mod.task_id) + ' of ' + str(mod.num_tasks) + ':'
 	print mod.ds_task
 
 	# we only need to vary the warm start fraction, and there is no need to vary the bandit fraction,

From c9beeb00b5b78030b9dca04693e46558588eac8b Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Mon, 19 Mar 2018 15:38:56 -0400
Subject: [PATCH 036/127] regexp based line parsing for vw output (not tested
 yet)

---
 scripts/plot_warm_start.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/scripts/plot_warm_start.py b/scripts/plot_warm_start.py
index ca9e897044e..183ba0b4200 100644
--- a/scripts/plot_warm_start.py
+++ b/scripts/plot_warm_start.py
@@ -32,9 +32,12 @@ def collect_stats(mod):
 	f = open(vw_output_filename, 'r')
 	linenumber = 0
 	for line in f:
-		if not line.strip():
-			end_table = True
-		if linenumber >= 9 and (not end_table):
+		#if not line.strip():
+		#	end_table = True
+		#if linenumber >= 9 and (not end_table):
+		vw_progress_pattern = '\d+\.\d+\s+\d+\.\d+\s+\d+\s+\d+\.\d+\s+[a-zA-Z0-9]+\s+[a-zA-Z0-9]\s+\d+'
+		matchobj = re.match(vw_progress_patter, line)
+		if matchobj:
 			items = line.split()
 			avg_loss.append(float(items[0]))
 			last_loss.append(float(items[1]))

From 2343af46f316bd19383d8a133106aa5f04246d89 Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Mon, 19 Mar 2018 19:12:13 -0400
Subject: [PATCH 037/127] .

---
 scripts/plot_warm_start.py | 71 +++++++++++++++++++++++++-------------
 1 file changed, 47 insertions(+), 24 deletions(-)

diff --git a/scripts/plot_warm_start.py b/scripts/plot_warm_start.py
index 183ba0b4200..91cce14b1e5 100644
--- a/scripts/plot_warm_start.py
+++ b/scripts/plot_warm_start.py
@@ -36,7 +36,7 @@ def collect_stats(mod):
 		#	end_table = True
 		#if linenumber >= 9 and (not end_table):
 		vw_progress_pattern = '\d+\.\d+\s+\d+\.\d+\s+\d+\s+\d+\.\d+\s+[a-zA-Z0-9]+\s+[a-zA-Z0-9]\s+\d+'
-		matchobj = re.match(vw_progress_patter, line)
+		matchobj = re.match(vw_progress_pattern, line)
 		if matchobj:
 			items = line.split()
 			avg_loss.append(float(items[0]))
@@ -44,6 +44,7 @@ def collect_stats(mod):
 			wt.append(float(items[3]))
 		linenumber += 1
 
+	f.close()
 	return avg_loss, last_loss, wt
 
 def execute_vw(mod):
@@ -85,6 +86,7 @@ def gen_comparison_graph(mod):
 	mod.warm_start = int(math.floor(mod.warm_start_frac * mod.num_lines))
 	mod.bandit = mod.num_lines - mod.warm_start
 	mod.progress = int(math.ceil(float(mod.bandit) / float(mod.num_checkpoints)))
+	mod.num_classes = get_num_classes(mod.dataset)
 
 	#config_name = str(mod.dataset) + '_' + str(mod.fprob1)+'_'+str(mod.fprob2)+'_'+str(mod.warm_start)+'_'+str(mod.bandit)+ '_' + str(mod.cb_type) + '_' + str(mod.choices_lambda)
 
@@ -131,7 +133,7 @@ def gen_comparison_graph(mod):
 	avg_error_sup_only = avg_error(mod)
 
 	summary_file = open(mod.results_path+str(mod.task_id)+'of'+str(mod.num_tasks)+'.sum', 'a')
-	summary_file.write(config_name + ' ' + str(avg_error_comb) + ' ' + str(avg_error_band_only) + ' ' + str(avg_error_sup_only) + '\n')
+	summary_file.write(config_name + ' ' + str(avg_error_comb) + ' ' + str(avg_error_band_only) + ' ' + str(avg_error_sup_only) + ' '  + str(mod.choices_lambda) + '\n')
 	summary_file.close()
 	print('')
 
@@ -155,13 +157,23 @@ def ds_files(ds_path):
 	return dss
 
 
-def ds_per_task(dss, num_tasks, task_id):
-	ds_task = []
-	for i in range(len(dss)):
-		if (i % num_tasks == task_id):
-			ds_task.append(dss[i])
+def get_num_classes(ds):
+	did, n_actions = os.path.basename(ds).split('.')[0].split('_')[1:]
+	did, n_actions = int(did), int(n_actions)
+	return n_actions
+
 
-	return ds_task
+def ds_per_task(mod):
+	# put dataset name to the last coordinate so that the task workloads tend to be
+	# allocated equally
+ 	config_all = [item for item in product(mod.choices_cb_types, mod.choices_warm_start_frac, mod.choices_choices_lambda, mod.dss)]
+	config_task = []
+	for i in range(len(config_all)):
+		if (i % mod.num_tasks == mod.task_id):
+			config_task.append(config_all[i])
+			print config_all[i]
+
+	return config_task
 
 def get_num_lines(dataset_name):
 	ps = subprocess.Popen(('zcat', dataset_name), stdout=subprocess.PIPE)
@@ -175,15 +187,25 @@ def avg_error(mod):
 	vw_output_text = vw_output.read()
 	#print vw_output_text
 	rgx = re.compile('^average loss = (.*)$', flags=re.M)
-	return float(rgx.findall(vw_output_text)[0])
+	avge = float(rgx.findall(vw_output_text)[0])
+	vw_output.close()
+	return avge
+
 
+def main_loop(mod):
+
+	summary_file = open(mod.results_path+str(mod.task_id)+'of'+str(mod.num_tasks)+'.sum', 'w')
+	summary_file.close()
+
+	for mod.cb_type, mod.warm_start_frac, mod.choices_lambda, mod.dataset in mod.config_task:
+		gen_comparison_graph(mod)
 
 
 if __name__ == '__main__':
 	parser = argparse.ArgumentParser(description='vw job')
 	parser.add_argument('task_id', type=int, help='task ID, between 0 and num_tasks - 1')
 	parser.add_argument('num_tasks', type=int)
-	parser.add_argument('--results_dir', default='../../figs/')
+	parser.add_argument('--results_dir', default='../../../figs/')
 	args = parser.parse_args()
 	if args.task_id == 0:
 		if not os.path.exists(args.results_dir):
@@ -198,7 +220,7 @@ def avg_error(mod):
 	mod.num_tasks = args.num_tasks
 	mod.task_id = args.task_id
 
-	mod.ds_path = '../../vwshuffled/'
+	mod.ds_path = '../../../vwshuffled/'
 	mod.vw_path = '../vowpalwabbit/vw'
 	mod.results_path = args.results_dir
 
@@ -207,7 +229,7 @@ def avg_error(mod):
 	mod.num_checkpoints = 100
 	#mod.warm_start = 50
 	#mod.bandit = 4096
-	mod.num_classes = 10
+	#mod.num_classes = 10
 	#mod.cb_type = 'mtr'  #'ips'
     #mod.choices_lambda = 10
 	#mod.progress = 25
@@ -215,7 +237,7 @@ def avg_error(mod):
 
 	# use fractions instead of absolute numbers
 
-	mod.choices_warm_start = [0.01 * pow(2, i) for i in range(4,5)]
+	mod.choices_warm_start_frac = [0.01 * pow(2, i) for i in range(3,5)]
 	#mod.choices_warm_start = [0.01 * pow(2, i) for i in range(5)]
 	#mod.choices_bandit = [0.01 * pow(2, i) for i in range(5)]
 
@@ -224,26 +246,27 @@ def avg_error(mod):
 	#choices_fprob1 = [0.1, 0.2, 0.3]
 	#choices_fprob2 = [0.1, 0.2, 0.3]
 	#choices_cb_types = ['mtr', 'ips']
-	#choices_cb_types = ['mtr', 'ips']
-	choices_cb_types = ['mtr']
+	mod.choices_cb_types = ['mtr', 'ips']
+	#mod.choices_cb_types = ['mtr']
 	#choices_choices_lambda = [pow(2,i) for i in range(10,11)]
-	choices_choices_lambda = [i for i in range(10,11)]
+	mod.choices_choices_lambda = [i for i in range(1,3)]
+	#[i for i in range(10,11)]
 
 	#for correctness test
 	#mod.choices_warm_start = [20]
 	#choices_fprob1 = [0.1]
 	#choices_fprob2 = [0.1]
 
-	dss = ds_files(mod.ds_path)
-	mod.ds_task = ds_per_task(dss, args.num_tasks, args.task_id)
+	mod.dss = ds_files(mod.ds_path)
+
+	# here, we are generating the task specific parameter settings
+	# by first generate all parameter setting and pick every num_tasks of them
+	mod.config_task = ds_per_task(mod)
 
 	print 'task ' + str(mod.task_id) + ' of ' + str(mod.num_tasks) + ':'
-	print mod.ds_task
+
+	#print mod.ds_task
 
 	# we only need to vary the warm start fraction, and there is no need to vary the bandit fraction,
 	# as each run of vw automatically accumulates the bandit dataset
-
-	for mod.cb_type, mod.choices_lambda, mod.dataset, mod.warm_start_frac in product(choices_cb_types, choices_choices_lambda, mod.ds_task, mod.choices_warm_start):
-		#mod.dataset_supervised = './vw_' + str(mod.fprob1) + '_m.vw'
-		#mod.dataset_bandit = './source2_' + str(mod.fprob2) + '_m.vw'
-		gen_comparison_graph(mod)
+	main_loop(mod)

From 32d33bad964f114749a587cf6346aa81286f3df7 Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Tue, 20 Mar 2018 09:50:23 -0400
Subject: [PATCH 038/127] .

---
 scripts/alg_comparison.py  | 64 +++++++++++++++++---------------------
 scripts/plot_warm_start.py | 11 +++++--
 2 files changed, 37 insertions(+), 38 deletions(-)

diff --git a/scripts/alg_comparison.py b/scripts/alg_comparison.py
index c0556442dc9..c22baa8ef17 100644
--- a/scripts/alg_comparison.py
+++ b/scripts/alg_comparison.py
@@ -4,6 +4,7 @@
 import pylab
 import os
 import glob
+import pandas as pd
 
 
 def sum_files(result_path):
@@ -15,50 +16,43 @@ def sum_files(result_path):
 
 def parse_sum_file(sum_filename):
 	f = open(sum_filename, 'r')
-	line = f.readline()
-	num_cols = len(line.split())
-	f.seek(0)
-	results = [[] for i in range(num_cols)]
-
-	for line in f:
-		splitted = line.split()
-		for i in range(len(splitted)):
-			if (i == 0):
-				results[i].append(splitted[i])
-			else:
-				results[i].append(float(splitted[i]))
-	return results
+	table = pd.read_table(f, sep=' ', header=None, names=['dataset','combined','bandit_only','supervised_only','choices_lambda'],
+                       lineterminator='\n')
+	return table
 
 
 if __name__ == '__main__':
-	results_path = '../figs/'
+	results_path = '../../../figs/'
 	dss = sum_files(results_path)
 
-	all_results = []
+	all_results = None
 	for i in range(len(dss)):
 		result = parse_sum_file(results_path + dss[i])
-
 		if (i == 0):
 			all_results = result
 		else:
-			num_cols = len(result)
-			for j in range(num_cols):
-				all_results[j] += result[j]
-
+			all_results = all_results.append(result)
 	print all_results
 
-
-
-	# compare combined w/ supervised
-	plt.plot([0,1],[0,1])
-	plt.scatter(all_results[1], all_results[3])
-	plt.title('combined vs supervised only')
-	pylab.savefig('comb_v_super' +'.png')
-	plt.gcf().clear()
-
-	# compare combined w/ bandit
-	plt.plot([0,1],[0,1])
-	plt.scatter(all_results[1], all_results[2])
-	plt.title('combined vs bandit only')
-	pylab.savefig('comb_v_bandit' +'.png')
-	plt.gcf().clear()
+	#choices_choices_lambda = sorted(all_results['choices_lambda'].unique())
+	grouped = all_results.groupby('choices_lambda')
+
+	for cl, results_lambda in grouped:
+		#results_lambda = all_results[all_results['choices_lambda'] == cl]
+		# compare combined w/ supervised
+		results_combined = results_lambda['combined'].tolist()
+		results_bandit = results_lambda['bandit_only'].tolist()
+		results_supervised = results_lambda['supervised_only'].tolist()
+
+		# compare combined w/ bandit
+		plt.plot([0,1],[0,1])
+		plt.scatter(results_combined, results_bandit)
+		plt.title('combined vs bandit only')
+		pylab.savefig('comb_v_bandit ' + 'choices_lambda=' + str(cl) +'.png')
+		plt.gcf().clear()
+
+		plt.plot([0,1],[0,1])
+		plt.scatter(results_combined, results_supervised)
+		plt.title('combined vs supervised only')
+		pylab.savefig('comb_v_supervised ' + 'choices_lambda=' + str(cl) +'.png')
+		plt.gcf().clear()
diff --git a/scripts/plot_warm_start.py b/scripts/plot_warm_start.py
index 91cce14b1e5..a9a787f6138 100644
--- a/scripts/plot_warm_start.py
+++ b/scripts/plot_warm_start.py
@@ -168,6 +168,7 @@ def ds_per_task(mod):
 	# allocated equally
  	config_all = [item for item in product(mod.choices_cb_types, mod.choices_warm_start_frac, mod.choices_choices_lambda, mod.dss)]
 	config_task = []
+	print len(config_all)
 	for i in range(len(config_all)):
 		if (i % mod.num_tasks == mod.task_id):
 			config_task.append(config_all[i])
@@ -237,7 +238,8 @@ def main_loop(mod):
 
 	# use fractions instead of absolute numbers
 
-	mod.choices_warm_start_frac = [0.01 * pow(2, i) for i in range(3,5)]
+	#mod.choices_warm_start_frac = [0.01 * pow(2, i) for i in range(1)]
+	mod.choices_warm_start_frac = [0.01, 0.03, 0.1, 0.3]
 	#mod.choices_warm_start = [0.01 * pow(2, i) for i in range(5)]
 	#mod.choices_bandit = [0.01 * pow(2, i) for i in range(5)]
 
@@ -246,10 +248,12 @@ def main_loop(mod):
 	#choices_fprob1 = [0.1, 0.2, 0.3]
 	#choices_fprob2 = [0.1, 0.2, 0.3]
 	#choices_cb_types = ['mtr', 'ips']
+	#mod.choices_cb_types = ['mtr', 'ips']
 	mod.choices_cb_types = ['mtr', 'ips']
-	#mod.choices_cb_types = ['mtr']
 	#choices_choices_lambda = [pow(2,i) for i in range(10,11)]
-	mod.choices_choices_lambda = [i for i in range(1,3)]
+	#mod.choices_choices_lambda = [i for i in range(1,3)]
+	#mod.choices_choices_lambda = [i for i in range(1,2)]
+	mod.choices_choices_lambda = [1, 3, 5, 7]
 	#[i for i in range(10,11)]
 
 	#for correctness test
@@ -258,6 +262,7 @@ def main_loop(mod):
 	#choices_fprob2 = [0.1]
 
 	mod.dss = ds_files(mod.ds_path)
+	#mod.dss = mod.dss[:5]
 
 	# here, we are generating the task specific parameter settings
 	# by first generate all parameter setting and pick every num_tasks of them

From f1355b74d092a19e25aeafa2d28242dd89ebd0fc Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Mon, 2 Apr 2018 17:39:09 -0400
Subject: [PATCH 039/127] tweaked the scripts

---
 scripts/alg_comparison.py  | 79 +++++++++++++++++++++++++++-----------
 scripts/plot_warm_start.py | 41 ++++++++++++++------
 2 files changed, 86 insertions(+), 34 deletions(-)

diff --git a/scripts/alg_comparison.py b/scripts/alg_comparison.py
index c22baa8ef17..29a68e136d2 100644
--- a/scripts/alg_comparison.py
+++ b/scripts/alg_comparison.py
@@ -5,6 +5,9 @@
 import os
 import glob
 import pandas as pd
+import scipy.stats as stats
+from itertools import compress
+from math import sqrt
 
 
 def sum_files(result_path):
@@ -16,10 +19,45 @@ def sum_files(result_path):
 
 def parse_sum_file(sum_filename):
 	f = open(sum_filename, 'r')
-	table = pd.read_table(f, sep=' ', header=None, names=['dataset','combined','bandit_only','supervised_only','choices_lambda'],
+	table = pd.read_table(f, sep=' ', header=None, names=['dataset','choices_lambda_1','choices_lambda_5','bandit_only','supervised_only','size'],
                        lineterminator='\n')
 	return table
 
+def get_significance(errors_1, errors_2, sizes):
+	significance = []
+	for i in range(len(errors_1)):
+		significance.append( significant(errors_1[i], errors_2[i], sizes[i]) )
+	return significance
+
+def significant(err_1, err_2, size):
+	z = (err_1 - err_2) / sqrt( (err_1*(1 - err_1) + err_2*(1-err_2)) / size )
+
+	print z
+
+	if (stats.norm.cdf(z) < 0.05) or (stats.norm.cdf(z) > 0.95):
+		return True
+	else:
+		return False
+
+def plot_comparison(errors_1, errors_2, sizes, title, filename):
+	print title
+
+	plt.plot([0,1],[0,1])
+	significance = get_significance(errors_1, errors_2, sizes)
+	results_signi_1 = list(compress(errors_1, significance))
+	results_signi_2 = list(compress(errors_2, significance))
+	plt.scatter(results_signi_1, results_signi_2, s=18, c='r')
+
+	insignificance = [not b for b in significance]
+	results_insigni_1 = list(compress(errors_1, insignificance))
+	results_insigni_2 = list(compress(errors_2, insignificance))
+
+	plt.scatter(results_insigni_1, results_insigni_2, s=2, c='k')
+	plt.title(title)
+	pylab.savefig(filename)
+	plt.gcf().clear()
+
+
 
 if __name__ == '__main__':
 	results_path = '../../../figs/'
@@ -35,24 +73,21 @@ def parse_sum_file(sum_filename):
 	print all_results
 
 	#choices_choices_lambda = sorted(all_results['choices_lambda'].unique())
-	grouped = all_results.groupby('choices_lambda')
-
-	for cl, results_lambda in grouped:
-		#results_lambda = all_results[all_results['choices_lambda'] == cl]
-		# compare combined w/ supervised
-		results_combined = results_lambda['combined'].tolist()
-		results_bandit = results_lambda['bandit_only'].tolist()
-		results_supervised = results_lambda['supervised_only'].tolist()
-
-		# compare combined w/ bandit
-		plt.plot([0,1],[0,1])
-		plt.scatter(results_combined, results_bandit)
-		plt.title('combined vs bandit only')
-		pylab.savefig('comb_v_bandit ' + 'choices_lambda=' + str(cl) +'.png')
-		plt.gcf().clear()
-
-		plt.plot([0,1],[0,1])
-		plt.scatter(results_combined, results_supervised)
-		plt.title('combined vs supervised only')
-		pylab.savefig('comb_v_supervised ' + 'choices_lambda=' + str(cl) +'.png')
-		plt.gcf().clear()
+	#grouped = all_results.groupby('choices_lambda')
+
+	#for cl, results_lambda in grouped:
+	#results_lambda = all_results[all_results['choices_lambda'] == cl]
+	# compare combined w/ supervised
+
+	results_choices_lambda_1 = all_results['choices_lambda_1'].tolist()
+	results_choices_lambda_5 = all_results['choices_lambda_5'].tolist()
+	results_bandit = all_results['bandit_only'].tolist()
+	results_supervised = all_results['supervised_only'].tolist()
+	dataset_sizes = all_results['size'].tolist()
+
+	# compare combined w/ bandit
+	plot_comparison(results_choices_lambda_1, results_bandit, dataset_sizes, 'choices_lambda=1 vs bandit only', 'choices_lambda_1_v_bandit_only.png')
+	plot_comparison(results_choices_lambda_1, results_supervised, dataset_sizes, 'choices_lambda=1 vs supervised only', 'choices_lambda_1_v_supervised_only.png')
+	plot_comparison(results_choices_lambda_5, results_bandit, dataset_sizes, 'choices_lambda=5 vs bandit only', 'choices_lambda_5_v_bandit_only.png')
+	plot_comparison(results_choices_lambda_5, results_supervised, dataset_sizes, 'choices_lambda=5 vs supervised only', 'choices_lambda_5_v_supervised_only.png')
+	plot_comparison(results_choices_lambda_1, results_choices_lambda_5, dataset_sizes, 'choices_lambda=1 vs choices_lambda=5', 'choices_lambda_1_v_choices_lambda_5.png')
diff --git a/scripts/plot_warm_start.py b/scripts/plot_warm_start.py
index a9a787f6138..fffbd8c8a5b 100644
--- a/scripts/plot_warm_start.py
+++ b/scripts/plot_warm_start.py
@@ -90,21 +90,36 @@ def gen_comparison_graph(mod):
 
 	#config_name = str(mod.dataset) + '_' + str(mod.fprob1)+'_'+str(mod.fprob2)+'_'+str(mod.warm_start)+'_'+str(mod.bandit)+ '_' + str(mod.cb_type) + '_' + str(mod.choices_lambda)
 
-	config_name = str(mod.dataset) + '_'+str(mod.warm_start)+ '_' + str(mod.cb_type) + '_' + str(mod.choices_lambda)
+	config_name = str(mod.dataset) + '_'+str(mod.warm_start)+ '_' + str(mod.cb_type)
 
-	# combined approach
+	# combined approach, lambdas = 1
+	mod.choices_lambda = 1
 	mod.no_bandit = False
 	mod.no_supervised = False
 	mod.no_exploration = False
-	mod.vw_output_filename = mod.results_path+config_name+'.txt'
+	mod.vw_output_filename = mod.results_path+config_name+'choices_lambda='+str(mod.choices_lambda)+'.txt'
 	execute_vw(mod)
 
-	avg_loss_comb, last_loss_comb, wt_comb = collect_stats(mod)
-	line = plt.plot(wt_comb, avg_loss_comb, 'r', label=('Combined approach, #lambdas=' + str(mod.choices_lambda) ))
+	avg_loss_comb_1, last_loss_comb_1, wt_comb_1 = collect_stats(mod)
+	line = plt.plot(wt_comb_1, avg_loss_comb_1, 'r', label=('Combined approach, #lambdas=' + str(mod.choices_lambda) ))
 
-	avg_error_comb = avg_error(mod)
+	avg_error_comb_1 = avg_error(mod)
+
+	# combined approach, lambdas = 5
+	mod.choices_lambda = 5
+	mod.no_bandit = False
+	mod.no_supervised = False
+	mod.no_exploration = False
+	mod.vw_output_filename = mod.results_path+config_name+'choices_lambda='+str(mod.choices_lambda)+'.txt'
+	execute_vw(mod)
+
+	avg_loss_comb_5, last_loss_comb_5, wt_comb_5 = collect_stats(mod)
+	line = plt.plot(wt_comb_5, avg_loss_comb_5, 'm', label=('Combined approach, #lambdas=' + str(mod.choices_lambda) ))
+
+	avg_error_comb_5 = avg_error(mod)
 
 	# bandit only approach
+	mod.choices_lambda = 1
 	mod.no_bandit = False
 	mod.no_supervised = True
 	mod.no_exploration = False
@@ -117,6 +132,7 @@ def gen_comparison_graph(mod):
 	avg_error_band_only = avg_error(mod)
 
 	# supervised only approach
+	mod.choices_lambda = 1
 	mod.no_bandit = True
 	mod.no_supervised = False
 	mod.no_exploration = False
@@ -133,7 +149,7 @@ def gen_comparison_graph(mod):
 	avg_error_sup_only = avg_error(mod)
 
 	summary_file = open(mod.results_path+str(mod.task_id)+'of'+str(mod.num_tasks)+'.sum', 'a')
-	summary_file.write(config_name + ' ' + str(avg_error_comb) + ' ' + str(avg_error_band_only) + ' ' + str(avg_error_sup_only) + ' '  + str(mod.choices_lambda) + '\n')
+	summary_file.write(config_name + ' ' + str(avg_error_comb_1) + ' ' + str(avg_error_comb_5) + ' ' + str(avg_error_band_only) + ' ' + str(avg_error_sup_only) + ' ' + str(mod.bandit) + '\n')
 	summary_file.close()
 	print('')
 
@@ -166,7 +182,7 @@ def get_num_classes(ds):
 def ds_per_task(mod):
 	# put dataset name to the last coordinate so that the task workloads tend to be
 	# allocated equally
- 	config_all = [item for item in product(mod.choices_cb_types, mod.choices_warm_start_frac, mod.choices_choices_lambda, mod.dss)]
+ 	config_all = [item for item in product(mod.choices_cb_types, mod.choices_warm_start_frac, mod.dss)]
 	config_task = []
 	print len(config_all)
 	for i in range(len(config_all)):
@@ -198,7 +214,7 @@ def main_loop(mod):
 	summary_file = open(mod.results_path+str(mod.task_id)+'of'+str(mod.num_tasks)+'.sum', 'w')
 	summary_file.close()
 
-	for mod.cb_type, mod.warm_start_frac, mod.choices_lambda, mod.dataset in mod.config_task:
+	for mod.cb_type, mod.warm_start_frac, mod.dataset in mod.config_task:
 		gen_comparison_graph(mod)
 
 
@@ -239,7 +255,8 @@ def main_loop(mod):
 	# use fractions instead of absolute numbers
 
 	#mod.choices_warm_start_frac = [0.01 * pow(2, i) for i in range(1)]
-	mod.choices_warm_start_frac = [0.01, 0.03, 0.1, 0.3]
+	#mod.choices_warm_start_frac = [0.01, 0.03, 0.1, 0.3]
+	mod.choices_warm_start_frac = [0.03]
 	#mod.choices_warm_start = [0.01 * pow(2, i) for i in range(5)]
 	#mod.choices_bandit = [0.01 * pow(2, i) for i in range(5)]
 
@@ -249,11 +266,11 @@ def main_loop(mod):
 	#choices_fprob2 = [0.1, 0.2, 0.3]
 	#choices_cb_types = ['mtr', 'ips']
 	#mod.choices_cb_types = ['mtr', 'ips']
-	mod.choices_cb_types = ['mtr', 'ips']
+	mod.choices_cb_types = ['mtr']
 	#choices_choices_lambda = [pow(2,i) for i in range(10,11)]
 	#mod.choices_choices_lambda = [i for i in range(1,3)]
 	#mod.choices_choices_lambda = [i for i in range(1,2)]
-	mod.choices_choices_lambda = [1, 3, 5, 7]
+	#mod.choices_choices_lambda = [1, 3, 5, 7]
 	#[i for i in range(10,11)]
 
 	#for correctness test

From caac66e264aa4f47a58b5296574d059b92a7d812 Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Mon, 2 Apr 2018 18:31:18 -0400
Subject: [PATCH 040/127] .

---
 scripts/shuffle.sh | 10 ++++++++++
 1 file changed, 10 insertions(+)
 create mode 100644 scripts/shuffle.sh

diff --git a/scripts/shuffle.sh b/scripts/shuffle.sh
new file mode 100644
index 00000000000..69aacfc3ee5
--- /dev/null
+++ b/scripts/shuffle.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+suffix=".gz"
+
+for filename in ./*.vw.gz; do
+	vw_name=$(echo "$filename" | sed -e "s/$suffix$//")
+	echo $vw_name
+	zcat $filename | shuf > ../vwshuffled/$vw_name
+	gzip ../vwshuffled/$vw_name
+done

From 9a4eef56f80904c15e7be303fa1d4c3c4bbebc42 Mon Sep 17 00:00:00 2001
From: chicheng <zcc1307@gmail.com>
Date: Mon, 2 Apr 2018 20:54:37 -0400
Subject: [PATCH 041/127] .

---
 vowpalwabbit/cbify.cc | 62 ++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 61 insertions(+), 1 deletion(-)

diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc
index 2ec09cce29a..0b3babf5a15 100644
--- a/vowpalwabbit/cbify.cc
+++ b/vowpalwabbit/cbify.cc
@@ -5,6 +5,7 @@
 #include "bs.h"
 #include "../explore/cpp/MWTExplorer.h"
 #include "vw.h"
+#include <random>
 
 using namespace LEARNER;
 using namespace MultiWorldTesting;
@@ -68,10 +69,45 @@ struct cbify
 	CB::label* cbl_empty;
 	bool warm_start;
 	float* old_weights;
+	float label_corrupt;
 
 
 };
 
+float rand_zeroone()
+{
+	std::random_device rd;
+	std::mt19937 gen(rd());
+	std::uniform_real_distribution<> dis(0.0, 1.0);
+	return dis(gen);
+}
+
+
+size_t generate_uar_action(size_t num_actions)
+{
+	float rand = rand_zeroone();
+	for (size_t i = 1; i <= num_actions; i++)
+	{
+		if (rand <= float(i) / num_actions)
+			return i;
+	}	
+	return num_actions;
+
+}
+
+size_t corrupt_action(size_t action, size_t num_actions, float label_corrupt)
+{
+	float rand = rand_zeroone();
+	if (rand < label_corrupt)
+		return generate_uar_action(num_actions);
+	else
+		return action;
+
+}
+
+
+
+
 vector<float> vw_scorer::Score_Actions(example& ctx)
 {
   vector<float> probs_vec;
@@ -269,6 +305,7 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 	{
 		data.warm_start_period--;
 
+		/*
 		//generate cost-sensitive label
 		COST_SENSITIVE::label& csl = *data.csls;
     csl.costs.resize(data.num_actions);
@@ -283,9 +320,29 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 
 		//predict
 		data.all->cost_sensitive->predict(ec, argmin);
+		*/
+
+		//predict
+		data.all->cost_sensitive->predict(ec, argmin);
+
+		//first, corrupt fully supervised example ec's label here
+		size_t corrupted_label = corrupt_action(ld.label, data.num_actions, data.label_corrupt);
+
+		//generate cost-sensitive label
+		COST_SENSITIVE::label& csl = *data.csls;
+    csl.costs.resize(data.num_actions);
+    csl.costs.end() = csl.costs.begin()+data.num_actions;
+		for (uint32_t j = 0; j < data.num_actions; j++)
+		{
+			csl.costs[j].class_index = j+1;
+			csl.costs[j].x = loss(data, corrupted_label, j+1);
+		}
+
+		ec.l.cs = csl;		
 
 		if (data.ind_supervised)
 		{
+			
 			for (uint32_t i = 0; i < data.choices_lambda; i++)
 			{
 				ec.weight = 1;
@@ -533,7 +590,8 @@ base_learner* cbify_setup(vw& all)
 	("bandit", po::value<size_t>(), "number of training examples for bandit processing")
   ("choices_lambda", po::value<size_t>(), "numbers of lambdas importance weights to aggregate")
 	("no_supervised", "indicator of using supervised only")
-	("no_bandit", "indicator of using bandit only");
+	("no_bandit", "indicator of using bandit only")
+	("label_corrupt", po::value<float>(), "probability of label corruption in the supervised datasets (when corruption happens, the new label is chosen uniformly at random)");
   add_options(all);
 
   po::variables_map& vm = all.vm;
@@ -562,6 +620,8 @@ base_learner* cbify_setup(vw& all)
 	//cout<<data.warm_start_period<<endl;
 	data.choices_lambda = vm.count("choices_lambda") ? vm["choices_lambda"].as<size_t>() : 1;
 
+	data.label_corrupt = vm.count("label_corrupt") ? vm["label_corrupt"].as<float>() : 0.0;
+
 	generate_lambdas(data.lambdas, data.choices_lambda);
 
 	for (size_t i = 0; i < data.choices_lambda; i++)

From b30d98747a8146548d5b87a5dda23cb20090d1ce Mon Sep 17 00:00:00 2001
From: chicheng <zcc1307@gmail.com>
Date: Mon, 2 Apr 2018 21:22:49 -0400
Subject: [PATCH 042/127] label corruption code

---
 vowpalwabbit/cbify.cc | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc
index 0b3babf5a15..f92fcdb454a 100644
--- a/vowpalwabbit/cbify.cc
+++ b/vowpalwabbit/cbify.cc
@@ -86,6 +86,8 @@ float rand_zeroone()
 size_t generate_uar_action(size_t num_actions)
 {
 	float rand = rand_zeroone();
+	//cout<<rand<<endl;
+
 	for (size_t i = 1; i <= num_actions; i++)
 	{
 		if (rand <= float(i) / num_actions)
@@ -105,9 +107,6 @@ size_t corrupt_action(size_t action, size_t num_actions, float label_corrupt)
 
 }
 
-
-
-
 vector<float> vw_scorer::Score_Actions(example& ctx)
 {
   vector<float> probs_vec;
@@ -322,9 +321,6 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 		data.all->cost_sensitive->predict(ec, argmin);
 		*/
 
-		//predict
-		data.all->cost_sensitive->predict(ec, argmin);
-
 		//first, corrupt fully supervised example ec's label here
 		size_t corrupted_label = corrupt_action(ld.label, data.num_actions, data.label_corrupt);
 
@@ -338,7 +334,10 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 			csl.costs[j].x = loss(data, corrupted_label, j+1);
 		}
 
-		ec.l.cs = csl;		
+		ec.l.cs = csl;	
+
+		//predict (for vw's internal reason, this step has to be put after ec's cs label is created)
+		data.all->cost_sensitive->predict(ec, argmin);
 
 		if (data.ind_supervised)
 		{
@@ -405,8 +404,6 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 		ec.weight = 0;
 
 	}
-
-
 }
 
 
@@ -444,12 +441,14 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 
 		if (data.ind_supervised)
 		{
+			size_t corrupted_label = corrupt_action(ld.label, data.num_actions, data.label_corrupt);
+
 			for (uint32_t i = 0; i < data.choices_lambda; i++)
 			{
 				for (size_t a = 0; a < data.adf_data.num_actions; ++a)
 				{
 					csls[a].costs[0].class_index = a+1;
-					csls[a].costs[0].x = loss(data, ld.label, a+1);
+					csls[a].costs[0].x = loss(data, corrupted_label, a+1);
 
 					cbls[a] = ecs[a].l.cb;
 					ecs[a].l.cs = csls[a];
@@ -507,7 +506,7 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 				for (size_t a = 0; a < data.adf_data.num_actions; ++a)
 				{
 					data.old_weights[a] = ecs[a].weight;
-					ecs[a].weight *= data.lambdas[i] / (1- data.lambdas[i]);
+					ecs[a].weight *= data.lambdas[i] / (1-data.lambdas[i]);
 					base.learn(ecs[a], i);
 				}
 				base.learn(*empty_example, i);

From 6735a024640200d1d4cd35d4193c005b0f04b43e Mon Sep 17 00:00:00 2001
From: chicheng <zcc1307@gmail.com>
Date: Tue, 3 Apr 2018 01:37:44 -0400
Subject: [PATCH 043/127] supervised dataset validation

---
 vowpalwabbit/cbify.cc | 195 ++++++++++++++++++++++++++++++++++--------
 1 file changed, 157 insertions(+), 38 deletions(-)

diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc
index f92fcdb454a..523e062e048 100644
--- a/vowpalwabbit/cbify.cc
+++ b/vowpalwabbit/cbify.cc
@@ -69,8 +69,15 @@ struct cbify
 	CB::label* cbl_empty;
 	bool warm_start;
 	float* old_weights;
-	float label_corrupt;
 
+	float corrupt_prob_supervised;
+	float corrupt_prob_bandit;
+	size_t corrupt_type_supervised;
+	size_t corrupt_type_bandit;
+	size_t validation_method;
+	size_t bandit_iter;
+	size_t warm_start_iter;
+	v_array<example> supervised_validation;
 
 };
 
@@ -80,6 +87,7 @@ float rand_zeroone()
 	std::mt19937 gen(rd());
 	std::uniform_real_distribution<> dis(0.0, 1.0);
 	return dis(gen);
+	//return 0.5;
 }
 
 
@@ -97,11 +105,16 @@ size_t generate_uar_action(size_t num_actions)
 
 }
 
-size_t corrupt_action(size_t action, size_t num_actions, float label_corrupt)
+size_t corrupt_action(size_t action, size_t num_actions, float label_corrupt, size_t type)
 {
 	float rand = rand_zeroone();
 	if (rand < label_corrupt)
-		return generate_uar_action(num_actions);
+	{
+		if (type == 1)
+			return generate_uar_action(num_actions);
+		else 
+			return (action % num_actions) + 1;
+	}
 	else
 		return action;
 
@@ -255,33 +268,95 @@ uint32_t predict_sublearner(cbify& data, base_learner& base, uint32_t i)
 
 void accumulate_costs_ips(cbify& data, example& ec, CB::cb_class& cl)
 {
-	//IPS for approximating the cumulative costs for all lambdas
-	for (uint32_t i = 0; i < data.choices_lambda; i++)
+	// validation using bandit data
+	if (data.validation_method == 1)
 	{
-		data.all->cost_sensitive->predict(ec, i);
-		if (ec.pred.multiclass == cl.action)
-			data.cumulative_costs[i] += cl.cost / cl.probability;
-		//cout<<data.cumulative_costs[i]<<endl;
+		//IPS for approximating the cumulative costs for all lambdas
+		for (uint32_t i = 0; i < data.choices_lambda; i++)
+		{
+			data.all->cost_sensitive->predict(ec, i);
+			if (ec.pred.multiclass == cl.action)
+				data.cumulative_costs[i] += cl.cost / cl.probability;
+			//cout<<data.cumulative_costs[i]<<endl;
+		}
+		//cout<<endl;
 	}
-	//cout<<endl;
+	else //validation using supervised data (their labels are already set to cost-sensitive labels)
+	{
+		//only update cumulative costs every warm_start_period iterations
+		if (data.bandit_iter % data.warm_start_period == 0)
+		{
+			for (uint32_t i = 0; i < data.choices_lambda; i++)
+				data.cumulative_costs[i] = 0;
 
+			//cout<<"updating validation error on supervised data: " << data.bandit_iter / data.warm_start_period << endl;
+			for (uint32_t i = 0; i < data.choices_lambda; i++)
+			{
+				//go over the supervised validation set
+				for (uint32_t j = 0; j < data.supervised_validation.size(); j++)
+				{
+					example& ec_valid = data.supervised_validation[j];
+					data.all->cost_sensitive->predict(ec_valid, i);
+
+					//cout<<ec_valid.pred.multiclass<<endl;
+					//for (uint32_t a = 0; a < data.num_actions; a++)
+					//	cout<<ec_valid.l.cs.costs[a].class_index<<" "<<ec_valid.l.cs.costs[a].x<<endl;
+					//cout<<endl;
+
+					for (uint32_t a = 0; a < data.num_actions; a++)
+						if (ec_valid.pred.multiclass == ec_valid.l.cs.costs[a].class_index)
+							data.cumulative_costs[i] += ec_valid.l.cs.costs[a].x;
+				}
+				//cout<<data.cumulative_costs[i]<<endl;
+			}
+		}
+	}
 }
 
 void accumulate_costs_ips_adf(cbify& data, example& ec, CB::cb_class& cl, base_learner& base)
 {
-	uint32_t best_action;
-
-	//IPS for approximating the cumulative costs for all lambdas
-	for (uint32_t i = 0; i < data.choices_lambda; i++)
+	
+	if (data.validation_method == 1)
 	{
-		best_action = predict_sublearner(data, base, i);
+		uint32_t best_action;
 
-		if (best_action == cl.action)
-			data.cumulative_costs[i] += cl.cost / cl.probability;
+		//IPS for approximating the cumulative costs for all lambdas
+		for (uint32_t i = 0; i < data.choices_lambda; i++)
+		{
+			best_action = predict_sublearner(data, base, i);
+
+			if (best_action == cl.action)
+				data.cumulative_costs[i] += cl.cost / cl.probability;
 
-		//cout<<data.cumulative_costs[i]<<endl;
+			//cout<<data.cumulative_costs[i]<<endl;
+		}
+		//cout<<endl;
+	}
+	else
+	{
+		//only update cumulative costs every warm_start_period iterations
+		if (data.bandit_iter % data.warm_start_period == 0)
+		{
+			for (uint32_t i = 0; i < data.choices_lambda; i++)
+				data.cumulative_costs[i] = 0;
+
+			//cout<<"updating validation error on supervised data: " << data.bandit_iter / data.warm_start_period << endl;
+			for (uint32_t i = 0; i < data.choices_lambda; i++)
+			{
+				for (uint32_t j = 0; j < data.supervised_validation.size(); j++)
+				{
+					example& ec_valid = data.supervised_validation[j];
+			  	copy_example_to_adf(data, ec_valid);
+					uint32_t pred_label = predict_sublearner(data, base, i);
+					data.cumulative_costs[i] += loss(data, ec_valid.l.multi.label, pred_label);
+					//cout<<ec_valid.l.multi.label<<" "<<pred_label<<endl;
+				}
+				//cout<<data.cumulative_costs[i]<<endl;
+			}
+			
+		}
+		
 	}
-	//cout<<endl;
 
 }
 
@@ -300,10 +375,8 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 
 	//cout<<ld.label<<endl;
 
-	if (data.warm_start_period > 0) // Call the cost-sensitive learner directly
+	if (data.warm_start_iter < data.warm_start_period) // Call the cost-sensitive learner directly
 	{
-		data.warm_start_period--;
-
 		/*
 		//generate cost-sensitive label
 		COST_SENSITIVE::label& csl = *data.csls;
@@ -322,10 +395,14 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 		*/
 
 		//first, corrupt fully supervised example ec's label here
-		size_t corrupted_label = corrupt_action(ld.label, data.num_actions, data.label_corrupt);
+		size_t corrupted_label = corrupt_action(ld.label, data.num_actions, data.corrupt_prob_supervised, data.corrupt_type_supervised);
+		//use this for now; I am not sure if v_array is the same as STL's array where elements are copied when brought in
+		ld.label = corrupted_label;
 
 		//generate cost-sensitive label
-		COST_SENSITIVE::label& csl = *data.csls;
+		//COST_SENSITIVE::label& csl = *data.csls;
+		COST_SENSITIVE::label* cslp = calloc_or_throw<COST_SENSITIVE::label>(1);
+		COST_SENSITIVE::label csl = *cslp;
     csl.costs.resize(data.num_actions);
     csl.costs.end() = csl.costs.begin()+data.num_actions;
 		for (uint32_t j = 0; j < data.num_actions; j++)
@@ -348,13 +425,23 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 				data.all->cost_sensitive->learn(ec, i);
 			}
 		}
-		ec.l.multi = ld;
+		
+		//ec.l.multi = ld;
 		ec.weight = 0;
+
+		// This is purely a hack here - need to clean up; I also did not deallocate the label and the copied example in finish()
+		example* ecp = calloc_or_throw<example>(1);		
+		VW::copy_example_data(false, ecp, &ec);
+		ecp->l.cs = csl;
+
+		// I am not sure if written this way, ec will be deleted in some other stages and causes error
+		if (data.validation_method == 2)
+			data.supervised_validation.push_back(*ecp);
+
+		data.warm_start_iter++;
 	}
-	else if (data.bandit_period > 0)//Call the cb_explore algorithm. It returns a vector of probabilities for each action
+	else if (data.bandit_iter < data.bandit_period)//Call the cb_explore algorithm. It returns a vector of probabilities for each action
 	{
-		data.bandit_period--;
-
 		data.cb_label.costs.erase();
 		ec.l.cb = data.cb_label;
 		ec.pred.a_s = data.a_s;
@@ -370,7 +457,9 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 
 		if(!cl.action)
 		  THROW("No action with non-zero probability found!");
-		cl.cost = loss(data, ld.label, cl.action);
+
+		size_t corrupted_label = corrupt_action(ld.label, data.num_actions, data.corrupt_prob_bandit, data.corrupt_type_bandit);
+		cl.cost = loss(data, corrupted_label, cl.action);
 
 		// accumulate the cumulative costs of lambdas
 		accumulate_costs_ips(data, ec, cl);
@@ -395,6 +484,8 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 		ec.l.multi = ld;
 	  ec.pred.multiclass = action;
 		ec.weight = old_weight;
+
+		data.bandit_iter++;
 	}
 	else
 	{
@@ -422,9 +513,8 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 
   copy_example_to_adf(data, ec);
 
-	if (data.warm_start_period > 0) // Call the cost-sensitive learner directly
+	if (data.warm_start_iter < data.warm_start_period) // Call the cost-sensitive learner directly
 	{
-		data.warm_start_period--;
 
 		best_action = predict_sublearner(data, base, argmin);
 
@@ -439,10 +529,10 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 		CB::label* cbls = data.cbls;
 		CB::label* cbl_empty = data.cbl_empty;
 
+		size_t corrupted_label = corrupt_action(ld.label, data.num_actions, data.corrupt_prob_supervised, data.corrupt_type_supervised);
+
 		if (data.ind_supervised)
 		{
-			size_t corrupted_label = corrupt_action(ld.label, data.num_actions, data.label_corrupt);
-
 			for (uint32_t i = 0; i < data.choices_lambda; i++)
 			{
 				for (size_t a = 0; a < data.adf_data.num_actions; ++a)
@@ -466,10 +556,21 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 		ec.pred.multiclass = best_action;
 		ec.l.multi = ld;
 		ec.weight = 0;
+
+		//a hack here - allocated memories not deleted
+		example* ecp = calloc_or_throw<example>(1);	
+		VW::copy_example_data(false, ecp, &ec);
+		ecp->l.multi.label = corrupted_label;
+		ecp->l.multi.weight = 1.0;
+
+		if (data.validation_method == 2)
+			data.supervised_validation.push_back(*ecp);
+
+		data.warm_start_iter++;	
+
 	}
-	else if (data.bandit_period > 0) // call the bandit learner
+	else if (data.bandit_iter < data.bandit_period) // call the bandit learner
 	{
-		data.bandit_period--;
 
 		for (size_t a = 0; a < data.adf_data.num_actions; ++a)
 		{
@@ -489,7 +590,9 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 
 		if(!cl.action)
 		  THROW("No action with non-zero probability found!");
-		cl.cost = loss(data, ld.label, cl.action);
+
+		size_t corrupted_label = corrupt_action(ld.label, data.num_actions, data.corrupt_prob_bandit, data.corrupt_type_bandit);
+		cl.cost = loss(data, corrupted_label, cl.action);
 
 		// accumulate the cumulative costs of lambdas
 		accumulate_costs_ips_adf(data, ec, cl, base);
@@ -517,6 +620,8 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 		}
 
 		ec.pred.multiclass = cl.action;
+			
+		data.bandit_iter++;
 	}
 	else
 	{
@@ -563,6 +668,8 @@ void init_adf_data(cbify& data, const size_t num_actions)
 
 void generate_lambdas(v_array<float>& lambdas, size_t lambda_size)
 {
+	// The lambdas are in fact arranged in ascending order (the middle lambda is 0.5)
+
 	lambdas = v_init<float>();
 	uint32_t mid = lambda_size / 2;
 	for (uint32_t i = 0; i < lambda_size; i++)
@@ -590,7 +697,11 @@ base_learner* cbify_setup(vw& all)
   ("choices_lambda", po::value<size_t>(), "numbers of lambdas importance weights to aggregate")
 	("no_supervised", "indicator of using supervised only")
 	("no_bandit", "indicator of using bandit only")
-	("label_corrupt", po::value<float>(), "probability of label corruption in the supervised datasets (when corruption happens, the new label is chosen uniformly at random)");
+	("corrupt_prob_supervised", po::value<float>(), "probability of label corruption in the supervised part")
+	("corrupt_prob_bandit", po::value<float>(), "probability of label corruption in the bandit part")
+	("corrupt_type_supervised", po::value<size_t>(), "type of label corruption in the supervised part (1 is uar, 2 is circular)")
+	("corrupt_type_bandit", po::value<size_t>(), "probability of label corruption in the bandit part (1 is uar, 2 is circular)")
+	("validation_method", po::value<size_t>(), "lambda selection criterion (1 is using bandit with progressive validation, 2 is using supervised and amortizing)");
   add_options(all);
 
   po::variables_map& vm = all.vm;
@@ -619,7 +730,15 @@ base_learner* cbify_setup(vw& all)
 	//cout<<data.warm_start_period<<endl;
 	data.choices_lambda = vm.count("choices_lambda") ? vm["choices_lambda"].as<size_t>() : 1;
 
-	data.label_corrupt = vm.count("label_corrupt") ? vm["label_corrupt"].as<float>() : 0.0;
+	data.corrupt_prob_supervised = vm.count("corrupt_prob_supervised") ? vm["corrupt_prob_supervised"].as<float>() : 0.0;
+	data.corrupt_prob_bandit = vm.count("corrupt_prob_bandit") ? vm["corrupt_prob_bandit"].as<float>() : 0.0;
+	data.corrupt_type_supervised = vm.count("corrupt_type_supervised") ? vm["corrupt_type_supervised"].as<size_t>() : 1;
+	data.corrupt_type_bandit = vm.count("corrupt_type_bandit") ? vm["corrupt_type_bandit"].as<size_t>() : 1;
+	data.validation_method = vm.count("validation_method") ? vm["validation_method"].as<size_t>() : 1;
+
+	data.bandit_iter = 0;
+	data.warm_start_iter = 0;
+	
 
 	generate_lambdas(data.lambdas, data.choices_lambda);
 

From 024d9cc88f1b12aea6be490e733ae541986ac4f4 Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Tue, 3 Apr 2018 03:41:14 -0400
Subject: [PATCH 044/127] lambda script

---
 scripts/plot_warm_start.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/scripts/plot_warm_start.py b/scripts/plot_warm_start.py
index fffbd8c8a5b..3198b3acfc6 100644
--- a/scripts/plot_warm_start.py
+++ b/scripts/plot_warm_start.py
@@ -35,8 +35,9 @@ def collect_stats(mod):
 		#if not line.strip():
 		#	end_table = True
 		#if linenumber >= 9 and (not end_table):
-		vw_progress_pattern = '\d+\.\d+\s+\d+\.\d+\s+\d+\s+\d+\.\d+\s+[a-zA-Z0-9]+\s+[a-zA-Z0-9]\s+\d+'
+		vw_progress_pattern = '\d+\.\d+\s+\d+\.\d+\s+\d+\s+\d+\.\d+\s+[a-zA-Z0-9]+\s+[a-zA-Z0-9]+\s+\d+'
 		matchobj = re.match(vw_progress_pattern, line)
+
 		if matchobj:
 			items = line.split()
 			avg_loss.append(float(items[0]))
@@ -146,6 +147,7 @@ def gen_comparison_graph(mod):
 	avg_loss_sup_only = [avg_loss for i in range(len_avg_loss)]
 	line = plt.plot(wt_sup_only, avg_loss_sup_only, 'g', label='Supervised only')
 
+
 	avg_error_sup_only = avg_error(mod)
 
 	summary_file = open(mod.results_path+str(mod.task_id)+'of'+str(mod.num_tasks)+'.sum', 'a')
@@ -279,6 +281,7 @@ def main_loop(mod):
 	#choices_fprob2 = [0.1]
 
 	mod.dss = ds_files(mod.ds_path)
+	#mod.dss = ["ds_223_63.vw.gz"]
 	#mod.dss = mod.dss[:5]
 
 	# here, we are generating the task specific parameter settings

From bd5fe57469b7ddce35661eedc107ca816509a5b9 Mon Sep 17 00:00:00 2001
From: chicheng <zcc1307@gmail.com>
Date: Tue, 3 Apr 2018 05:53:40 -0400
Subject: [PATCH 045/127] weighting scheme

---
 vowpalwabbit/cbify.cc | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc
index 523e062e048..5fb42fb32a4 100644
--- a/vowpalwabbit/cbify.cc
+++ b/vowpalwabbit/cbify.cc
@@ -77,6 +77,7 @@ struct cbify
 	size_t validation_method;
 	size_t bandit_iter;
 	size_t warm_start_iter;
+	size_t weighting_scheme;
 	v_array<example> supervised_validation;
 
 };
@@ -475,7 +476,11 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 		{
 			for (uint32_t i = 0; i < data.choices_lambda; i++)
 			{
-				ec.weight = old_weight * data.lambdas[i] / (1-data.lambdas[i]);
+				if (data.weighting_scheme == 1)
+					ec.weight = old_weight * data.lambdas[i] / (1-data.lambdas[i]);
+				else
+					ec.weight = old_weight * data.lambdas[i] / (1-data.lambdas[i]) * data.warm_start_period / ( (data.bandit_iter+1) * (data.bandit_iter+2) );
+
 				base.learn(ec, i);
 			}
 		}
@@ -609,7 +614,12 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 				for (size_t a = 0; a < data.adf_data.num_actions; ++a)
 				{
 					data.old_weights[a] = ecs[a].weight;
-					ecs[a].weight *= data.lambdas[i] / (1-data.lambdas[i]);
+
+					if (data.weighting_scheme == 1)
+						ecs[a].weight *= data.lambdas[i] / (1-data.lambdas[i]);
+					else
+						ecs[a].weight *= data.lambdas[i] / (1-data.lambdas[i]) * data.warm_start_period / ( (data.bandit_iter+1) * (data.bandit_iter+2) );					
+
 					base.learn(ecs[a], i);
 				}
 				base.learn(*empty_example, i);
@@ -701,7 +711,8 @@ base_learner* cbify_setup(vw& all)
 	("corrupt_prob_bandit", po::value<float>(), "probability of label corruption in the bandit part")
 	("corrupt_type_supervised", po::value<size_t>(), "type of label corruption in the supervised part (1 is uar, 2 is circular)")
 	("corrupt_type_bandit", po::value<size_t>(), "probability of label corruption in the bandit part (1 is uar, 2 is circular)")
-	("validation_method", po::value<size_t>(), "lambda selection criterion (1 is using bandit with progressive validation, 2 is using supervised and amortizing)");
+	("validation_method", po::value<size_t>(), "lambda selection criterion (1 is using bandit with progressive validation, 2 is using supervised and amortizing)")
+	("weighting_scheme", po::value<size_t>(), "weighting scheme (1 is per instance weighting, 2 is per dataset weighting (where we use a diminishing weighting scheme) )");
   add_options(all);
 
   po::variables_map& vm = all.vm;
@@ -735,6 +746,8 @@ base_learner* cbify_setup(vw& all)
 	data.corrupt_type_supervised = vm.count("corrupt_type_supervised") ? vm["corrupt_type_supervised"].as<size_t>() : 1;
 	data.corrupt_type_bandit = vm.count("corrupt_type_bandit") ? vm["corrupt_type_bandit"].as<size_t>() : 1;
 	data.validation_method = vm.count("validation_method") ? vm["validation_method"].as<size_t>() : 1;
+	data.weighting_scheme = vm.count("weighting_scheme") ? vm["weighting_scheme"].as<size_t>() : 1;
+
 
 	data.bandit_iter = 0;
 	data.warm_start_iter = 0;

From 3f64541ccd3f96a663c03bcc9c1a6b9a9e097705 Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Tue, 3 Apr 2018 11:09:14 -0400
Subject: [PATCH 046/127] .

---
 scripts/alg_comparison.py  | 35 ++++++++++++++------
 scripts/plot_warm_start.py | 66 +++++++++++++++++++++++++++++++++-----
 2 files changed, 83 insertions(+), 18 deletions(-)

diff --git a/scripts/alg_comparison.py b/scripts/alg_comparison.py
index 29a68e136d2..fad7c281aa6 100644
--- a/scripts/alg_comparison.py
+++ b/scripts/alg_comparison.py
@@ -9,6 +9,13 @@
 from itertools import compress
 from math import sqrt
 
+# this part is changable
+#alg1 = 'epsilon'
+#alg2 = 'cover'
+#alg1 = 'choices_lambda_1'
+#alg2 = 'choices_lambda_5'
+alg1 = 'instance weighting'
+alg2 = 'dataset weighting'
 
 def sum_files(result_path):
 	prevdir = os.getcwd()
@@ -19,8 +26,9 @@ def sum_files(result_path):
 
 def parse_sum_file(sum_filename):
 	f = open(sum_filename, 'r')
-	table = pd.read_table(f, sep=' ', header=None, names=['dataset','choices_lambda_1','choices_lambda_5','bandit_only','supervised_only','size'],
-                       lineterminator='\n')
+	table = pd.read_table(f, sep=' ', header=None, 	names=['dataset',alg1,alg2,'bandit_only','supervised_only','size'],
+	                       lineterminator='\n')
+
 	return table
 
 def get_significance(errors_1, errors_2, sizes):
@@ -60,7 +68,14 @@ def plot_comparison(errors_1, errors_2, sizes, title, filename):
 
 
 if __name__ == '__main__':
-	results_path = '../../../figs/'
+	#results_path = '../../../lambdas/'
+	#results_path = '../../../warm_start_frac=0.1/'
+	#results_path = '../../../cover_vs_epsilon/'
+	#results_path = '../../../corrupt_supervised_type1_0.3/'
+	#results_path = '../../../corrupt_supervised_type2_0.3/'
+	#results_path = '../../../supervised_validation/'
+	results_path = '../../../weighting_schemes/'
+
 	dss = sum_files(results_path)
 
 	all_results = None
@@ -79,15 +94,15 @@ def plot_comparison(errors_1, errors_2, sizes, title, filename):
 	#results_lambda = all_results[all_results['choices_lambda'] == cl]
 	# compare combined w/ supervised
 
-	results_choices_lambda_1 = all_results['choices_lambda_1'].tolist()
-	results_choices_lambda_5 = all_results['choices_lambda_5'].tolist()
+	results_alg1 = all_results[alg1].tolist()
+	results_alg2 = all_results[alg2].tolist()
 	results_bandit = all_results['bandit_only'].tolist()
 	results_supervised = all_results['supervised_only'].tolist()
 	dataset_sizes = all_results['size'].tolist()
 
 	# compare combined w/ bandit
-	plot_comparison(results_choices_lambda_1, results_bandit, dataset_sizes, 'choices_lambda=1 vs bandit only', 'choices_lambda_1_v_bandit_only.png')
-	plot_comparison(results_choices_lambda_1, results_supervised, dataset_sizes, 'choices_lambda=1 vs supervised only', 'choices_lambda_1_v_supervised_only.png')
-	plot_comparison(results_choices_lambda_5, results_bandit, dataset_sizes, 'choices_lambda=5 vs bandit only', 'choices_lambda_5_v_bandit_only.png')
-	plot_comparison(results_choices_lambda_5, results_supervised, dataset_sizes, 'choices_lambda=5 vs supervised only', 'choices_lambda_5_v_supervised_only.png')
-	plot_comparison(results_choices_lambda_1, results_choices_lambda_5, dataset_sizes, 'choices_lambda=1 vs choices_lambda=5', 'choices_lambda_1_v_choices_lambda_5.png')
+	plot_comparison(results_alg1, results_bandit, dataset_sizes, alg1 + ' vs ' + 'bandit only', results_path + alg1 + ' vs ' + 'bandit only' + '.png')
+	plot_comparison(results_alg1, results_supervised, dataset_sizes, alg1 + ' vs ' + 'supervised only', results_path + alg1 + ' vs ' + 'supervised only' + '.png')
+	plot_comparison(results_alg2, results_bandit, dataset_sizes, alg2 + ' vs ' + 'bandit only', results_path + alg2 + ' vs ' + 'bandit only' + '.png')
+	plot_comparison(results_alg2, results_supervised, dataset_sizes, alg2 + ' vs ' + 'supervised only', results_path + alg2 + ' vs ' + 'supervised only' + '.png')
+	plot_comparison(results_alg1, results_alg2, dataset_sizes, alg1 + ' vs ' + alg2, results_path+alg1 + ' vs ' + alg2 + '.png')
diff --git a/scripts/plot_warm_start.py b/scripts/plot_warm_start.py
index 3198b3acfc6..436596cb0d0 100644
--- a/scripts/plot_warm_start.py
+++ b/scripts/plot_warm_start.py
@@ -46,11 +46,21 @@ def collect_stats(mod):
 		linenumber += 1
 
 	f.close()
+
+	if len(avg_loss) == 0:
+		avg_loss = [0]
+		last_loss = [0]
+		wt = [0]
+
 	return avg_loss, last_loss, wt
 
 def execute_vw(mod):
 
 	alg_option = ' '
+	if mod.cover_on:
+		alg_option += ' --cb_explore ' + str(mod.num_classes) + ' --cover 5 --psi 0.01 '
+		mod.cb_type = 'dr'
+		mod.adf_on = False
 	if mod.no_bandit:
 		alg_option += ' --no_bandit '
 	if mod.no_supervised:
@@ -62,14 +72,21 @@ def execute_vw(mod):
 	if mod.adf_on:
 		alg_option += ' --cb_explore_adf '
 
+
 	# using two datasets
 	#cmd_catfile = '( head -n ' + str(mod.warm_start) + ' ' + mod.dataset_supervised + ';' + ' head -n ' + str(mod.bandit) + ' ' + mod.dataset_bandit + '; )'
 	# using only one dataset
 	#cmd_catfile = '( head -n ' + str(mod.warm_start + mod.bandit) + ' ' + mod.dataset + '; )'
 	#cmd_catfile = '( cat ' + mod.ds_path+mod.dataset + '; )'
 
-	cmd_vw = mod.vw_path + ' --cbify ' + str(mod.num_classes) + ' --cb_type ' + str(mod.cb_type) + ' --warm_start ' + str(mod.warm_start) + ' --bandit ' + str(mod.bandit) + ' --choices_lambda ' + str(mod.choices_lambda) + alg_option + ' --progress ' + str(mod.progress) + ' -d ' + mod.ds_path + mod.dataset
-
+	cmd_vw = mod.vw_path + ' --cbify ' + str(mod.num_classes) + ' --cb_type ' + str(mod.cb_type) + ' --warm_start ' + str(mod.warm_start) + ' --bandit ' + str(mod.bandit) + ' --choices_lambda ' + str(mod.choices_lambda) + alg_option + ' --progress ' + str(mod.progress) \
+	 + ' -d ' + mod.ds_path + mod.dataset \
+	 + ' --corrupt_type_supervised ' + str(mod.corrupt_type_supervised) \
+	 + ' --corrupt_prob_supervised ' + str(mod.corrupt_prob_supervised) \
+	 + ' --corrupt_type_bandit ' + str(mod.corrupt_type_bandit) \
+	 + ' --corrupt_prob_bandit ' + str(mod.corrupt_prob_bandit) \
+	 + ' --validation_method ' + str(mod.validation_method) \
+	 + ' --weighting_scheme ' + str(mod.weighting_scheme)
 	cmd = cmd_vw
 	#cmd = cmd_catfile + ' | ' + cmd_vw
 
@@ -93,37 +110,53 @@ def gen_comparison_graph(mod):
 
 	config_name = str(mod.dataset) + '_'+str(mod.warm_start)+ '_' + str(mod.cb_type)
 
-	# combined approach, lambdas = 1
-	mod.choices_lambda = 1
+	# combined approach, epsilon
+	mod.choices_lambda = 5
+	mod.weighting_scheme = 1
 	mod.no_bandit = False
 	mod.no_supervised = False
 	mod.no_exploration = False
+	mod.cover_on = False
 	mod.vw_output_filename = mod.results_path+config_name+'choices_lambda='+str(mod.choices_lambda)+'.txt'
+
 	execute_vw(mod)
 
 	avg_loss_comb_1, last_loss_comb_1, wt_comb_1 = collect_stats(mod)
-	line = plt.plot(wt_comb_1, avg_loss_comb_1, 'r', label=('Combined approach, #lambdas=' + str(mod.choices_lambda) ))
+	line = plt.plot(wt_comb_1, avg_loss_comb_1, 'r', label=('Combined approach, instance weighting'))
+	#line = plt.plot(wt_comb_1, avg_loss_comb_1, 'r', label=('Combined approach, lambda=1'))
 
 	avg_error_comb_1 = avg_error(mod)
 
-	# combined approach, lambdas = 5
+	# combined approach, cover
+	# combined approach, per-dataset weighting
+	#mod.choices_lambda = 1
+	#mod.no_bandit = False
+	#mod.no_supervised = False
+	#mod.no_exploration = False
+	#mod.cover_on = True
+	#mod.vw_output_filename = mod.results_path+config_name+'choices_lambda='+str(mod.choices_lambda)+'.txt'
 	mod.choices_lambda = 5
+	mod.weighting_scheme = 2
 	mod.no_bandit = False
 	mod.no_supervised = False
 	mod.no_exploration = False
+	mod.cover_on = False
 	mod.vw_output_filename = mod.results_path+config_name+'choices_lambda='+str(mod.choices_lambda)+'.txt'
 	execute_vw(mod)
 
 	avg_loss_comb_5, last_loss_comb_5, wt_comb_5 = collect_stats(mod)
-	line = plt.plot(wt_comb_5, avg_loss_comb_5, 'm', label=('Combined approach, #lambdas=' + str(mod.choices_lambda) ))
+	#line = plt.plot(wt_comb_5, avg_loss_comb_5, 'm', label=('Combined approach, lambda=5'))
+	line = plt.plot(wt_comb_5, avg_loss_comb_5, 'm', label=('Combined approach, dataset weighting'))
 
 	avg_error_comb_5 = avg_error(mod)
 
 	# bandit only approach
 	mod.choices_lambda = 1
+	mod.weighting_scheme = 1
 	mod.no_bandit = False
 	mod.no_supervised = True
 	mod.no_exploration = False
+	mod.cover_on = False
 	mod.vw_output_filename = mod.results_path+config_name+'_no_supervised'+'.txt'
 	execute_vw(mod)
 
@@ -134,9 +167,11 @@ def gen_comparison_graph(mod):
 
 	# supervised only approach
 	mod.choices_lambda = 1
+	mod.weighting_scheme = 1
 	mod.no_bandit = True
 	mod.no_supervised = False
 	mod.no_exploration = False
+	mod.cover_on = False
 	mod.vw_output_filename = mod.results_path+config_name+'_no_bandit'+'.txt'
 	execute_vw(mod)
 
@@ -206,7 +241,13 @@ def avg_error(mod):
 	vw_output_text = vw_output.read()
 	#print vw_output_text
 	rgx = re.compile('^average loss = (.*)$', flags=re.M)
-	avge = float(rgx.findall(vw_output_text)[0])
+
+	errs = rgx.findall(vw_output_text)
+	if not errs:
+		avge = 0
+	else:
+		avge = float(errs[0])
+
 	vw_output.close()
 	return avge
 
@@ -274,6 +315,15 @@ def main_loop(mod):
 	#mod.choices_choices_lambda = [i for i in range(1,2)]
 	#mod.choices_choices_lambda = [1, 3, 5, 7]
 	#[i for i in range(10,11)]
+	#mod.corrupt_type_supervised = 2
+	#mod.corrupt_prob_supervised = 0.3
+	mod.corrupt_type_supervised = 1
+	mod.corrupt_prob_supervised = 0.0
+
+	mod.corrupt_type_bandit = 1
+	mod.corrupt_prob_bandit = 0.3
+
+	mod.validation_method = 2
 
 	#for correctness test
 	#mod.choices_warm_start = [20]

From 87f9afa57f6c53329351c00b70180be2640910f5 Mon Sep 17 00:00:00 2001
From: chicheng <zcc1307@gmail.com>
Date: Fri, 6 Apr 2018 17:05:45 -0400
Subject: [PATCH 047/127] start properly copying the examples

---
 vowpalwabbit/cbify.cc | 87 +++++++++++++++++++++++++++----------------
 1 file changed, 55 insertions(+), 32 deletions(-)

diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc
index 5fb42fb32a4..ff1a26837f6 100644
--- a/vowpalwabbit/cbify.cc
+++ b/vowpalwabbit/cbify.cc
@@ -5,7 +5,19 @@
 #include "bs.h"
 #include "../explore/cpp/MWTExplorer.h"
 #include "vw.h"
-#include <random>
+
+#define SUPERVISED 1
+#define BANDIT 2
+
+#define UAR 1
+#define CIRCULAR 2
+
+#define BANDIT_VALI 1
+#define SUPERVISED_VALI 2
+
+#define INSTANCE_WT 1
+#define DATASET_WT 2
+
 
 using namespace LEARNER;
 using namespace MultiWorldTesting;
@@ -82,39 +94,51 @@ struct cbify
 
 };
 
-float rand_zeroone()
+float rand_zeroone(vw* all)
 {
-	std::random_device rd;
-	std::mt19937 gen(rd());
-	std::uniform_real_distribution<> dis(0.0, 1.0);
-	return dis(gen);
-	//return 0.5;
+	float f = merand48(all->random_state);
+	//cout<<f<<endl;
+	return f;
 }
 
 
-size_t generate_uar_action(size_t num_actions)
+size_t generate_uar_action(cbify& data)
 {
-	float rand = rand_zeroone();
+	float rand = rand_zeroone(data.all);
 	//cout<<rand<<endl;
 
-	for (size_t i = 1; i <= num_actions; i++)
+	for (size_t i = 1; i <= data.num_actions; i++)
 	{
-		if (rand <= float(i) / num_actions)
+		if (rand <= float(i) / data.num_actions)
 			return i;
 	}	
-	return num_actions;
+	return data.num_actions;
 
 }
 
-size_t corrupt_action(size_t action, size_t num_actions, float label_corrupt, size_t type)
+size_t corrupt_action(size_t action, cbify& data, size_t data_type)
 {
-	float rand = rand_zeroone();
-	if (rand < label_corrupt)
+	float corrupt_prob;
+	size_t corrupt_type;
+
+	if (data_type == SUPERVISED)
+	{
+		corrupt_prob = data.corrupt_prob_supervised;
+		corrupt_type = data.corrupt_type_supervised;
+	}
+	else
+	{
+		corrupt_prob = data.corrupt_prob_bandit;
+		corrupt_type = data.corrupt_type_bandit;
+	}
+
+	float rand = rand_zeroone(data.all);
+	if (rand < corrupt_prob)
 	{
-		if (type == 1)
-			return generate_uar_action(num_actions);
+		if (corrupt_type == UAR)
+			return generate_uar_action(data);
 		else 
-			return (action % num_actions) + 1;
+			return (action % data.num_actions) + 1;
 	}
 	else
 		return action;
@@ -395,10 +419,9 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 		data.all->cost_sensitive->predict(ec, argmin);
 		*/
 
+		//Note: v_array is different STL's array; elements' references are used in v_array
 		//first, corrupt fully supervised example ec's label here
-		size_t corrupted_label = corrupt_action(ld.label, data.num_actions, data.corrupt_prob_supervised, data.corrupt_type_supervised);
-		//use this for now; I am not sure if v_array is the same as STL's array where elements are copied when brought in
-		ld.label = corrupted_label;
+		size_t corrupted_label = corrupt_action(ld.label, data, SUPERVISED);
 
 		//generate cost-sensitive label
 		//COST_SENSITIVE::label& csl = *data.csls;
@@ -436,7 +459,7 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 		ecp->l.cs = csl;
 
 		// I am not sure if written this way, ec will be deleted in some other stages and causes error
-		if (data.validation_method == 2)
+		if (data.validation_method == SUPERVISED_VALI)
 			data.supervised_validation.push_back(*ecp);
 
 		data.warm_start_iter++;
@@ -459,7 +482,7 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 		if(!cl.action)
 		  THROW("No action with non-zero probability found!");
 
-		size_t corrupted_label = corrupt_action(ld.label, data.num_actions, data.corrupt_prob_bandit, data.corrupt_type_bandit);
+		size_t corrupted_label = corrupt_action(ld.label, data, BANDIT);
 		cl.cost = loss(data, corrupted_label, cl.action);
 
 		// accumulate the cumulative costs of lambdas
@@ -476,7 +499,7 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 		{
 			for (uint32_t i = 0; i < data.choices_lambda; i++)
 			{
-				if (data.weighting_scheme == 1)
+				if (data.weighting_scheme == INSTANCE_WT)
 					ec.weight = old_weight * data.lambdas[i] / (1-data.lambdas[i]);
 				else
 					ec.weight = old_weight * data.lambdas[i] / (1-data.lambdas[i]) * data.warm_start_period / ( (data.bandit_iter+1) * (data.bandit_iter+2) );
@@ -534,7 +557,7 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 		CB::label* cbls = data.cbls;
 		CB::label* cbl_empty = data.cbl_empty;
 
-		size_t corrupted_label = corrupt_action(ld.label, data.num_actions, data.corrupt_prob_supervised, data.corrupt_type_supervised);
+		size_t corrupted_label = corrupt_action(ld.label, data, SUPERVISED);
 
 		if (data.ind_supervised)
 		{
@@ -568,7 +591,7 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 		ecp->l.multi.label = corrupted_label;
 		ecp->l.multi.weight = 1.0;
 
-		if (data.validation_method == 2)
+		if (data.validation_method == SUPERVISED_VALI)
 			data.supervised_validation.push_back(*ecp);
 
 		data.warm_start_iter++;	
@@ -596,7 +619,7 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 		if(!cl.action)
 		  THROW("No action with non-zero probability found!");
 
-		size_t corrupted_label = corrupt_action(ld.label, data.num_actions, data.corrupt_prob_bandit, data.corrupt_type_bandit);
+		size_t corrupted_label = corrupt_action(ld.label, data, BANDIT);
 		cl.cost = loss(data, corrupted_label, cl.action);
 
 		// accumulate the cumulative costs of lambdas
@@ -615,7 +638,7 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 				{
 					data.old_weights[a] = ecs[a].weight;
 
-					if (data.weighting_scheme == 1)
+					if (data.weighting_scheme == INSTANCE_WT)
 						ecs[a].weight *= data.lambdas[i] / (1-data.lambdas[i]);
 					else
 						ecs[a].weight *= data.lambdas[i] / (1-data.lambdas[i]) * data.warm_start_period / ( (data.bandit_iter+1) * (data.bandit_iter+2) );					
@@ -743,10 +766,10 @@ base_learner* cbify_setup(vw& all)
 
 	data.corrupt_prob_supervised = vm.count("corrupt_prob_supervised") ? vm["corrupt_prob_supervised"].as<float>() : 0.0;
 	data.corrupt_prob_bandit = vm.count("corrupt_prob_bandit") ? vm["corrupt_prob_bandit"].as<float>() : 0.0;
-	data.corrupt_type_supervised = vm.count("corrupt_type_supervised") ? vm["corrupt_type_supervised"].as<size_t>() : 1;
-	data.corrupt_type_bandit = vm.count("corrupt_type_bandit") ? vm["corrupt_type_bandit"].as<size_t>() : 1;
-	data.validation_method = vm.count("validation_method") ? vm["validation_method"].as<size_t>() : 1;
-	data.weighting_scheme = vm.count("weighting_scheme") ? vm["weighting_scheme"].as<size_t>() : 1;
+	data.corrupt_type_supervised = vm.count("corrupt_type_supervised") ? vm["corrupt_type_supervised"].as<size_t>() : UAR; // 1 is the default value
+	data.corrupt_type_bandit = vm.count("corrupt_type_bandit") ? vm["corrupt_type_bandit"].as<size_t>() : UAR; // 1 is the default value
+	data.validation_method = vm.count("validation_method") ? vm["validation_method"].as<size_t>() : BANDIT_VALI; // 1 is the default value
+	data.weighting_scheme = vm.count("weighting_scheme") ? vm["weighting_scheme"].as<size_t>() : INSTANCE_WT; // 1 is the default value
 
 
 	data.bandit_iter = 0;

From 4b54dc0f509525481f552032d097441be7233f2c Mon Sep 17 00:00:00 2001
From: chicheng <zcc1307@gmail.com>
Date: Fri, 6 Apr 2018 18:28:12 -0400
Subject: [PATCH 048/127] model is not updating in the supervised phase

---
 vowpalwabbit/cbify.cc | 63 +++++++++++++++++++------------------------
 1 file changed, 28 insertions(+), 35 deletions(-)

diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc
index ff1a26837f6..dd2a1cd3543 100644
--- a/vowpalwabbit/cbify.cc
+++ b/vowpalwabbit/cbify.cc
@@ -175,6 +175,14 @@ void finish(cbify& data)
 	data.lambdas.delete_v();
 	data.cumulative_costs.delete_v();
 
+	for (size_t i = 0; i < data.warm_start_period; ++i)
+	{
+		VW::dealloc_example(COST_SENSITIVE::cs_label.delete_label, data.supervised_validation[i]);
+		free(&data.supervised_validation[i]);
+	}
+
+	data.supervised_validation.delete_v();
+
 
   if (data.use_adf)
   {
@@ -402,47 +410,30 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 
 	if (data.warm_start_iter < data.warm_start_period) // Call the cost-sensitive learner directly
 	{
-		/*
-		//generate cost-sensitive label
-		COST_SENSITIVE::label& csl = *data.csls;
-    csl.costs.resize(data.num_actions);
-    csl.costs.end() = csl.costs.begin()+data.num_actions;
-		for (uint32_t j = 0; j < data.num_actions; j++)
-		{
-			csl.costs[j].class_index = j+1;
-			csl.costs[j].x = loss(data, ld.label, j+1);
-		}
-
-		ec.l.cs = csl;
-
-		//predict
-		data.all->cost_sensitive->predict(ec, argmin);
-		*/
-
 		//Note: v_array is different STL's array; elements' references are used in v_array
 		//first, corrupt fully supervised example ec's label here
 		size_t corrupted_label = corrupt_action(ld.label, data, SUPERVISED);
 
-		//generate cost-sensitive label
-		//COST_SENSITIVE::label& csl = *data.csls;
-		COST_SENSITIVE::label* cslp = calloc_or_throw<COST_SENSITIVE::label>(1);
-		COST_SENSITIVE::label csl = *cslp;
+		//generate cost-sensitive label (only for CSOAA's use - this will be retracted at the end)
+		COST_SENSITIVE::label& csl = *data.csls;
+		//COST_SENSITIVE::label* cslp = calloc_or_throw<COST_SENSITIVE::label>(1);
+		//COST_SENSITIVE::label csl = *cslp;
+		//csl.costs.end() = csl.costs.begin()+data.num_actions;
+
     csl.costs.resize(data.num_actions);
-    csl.costs.end() = csl.costs.begin()+data.num_actions;
 		for (uint32_t j = 0; j < data.num_actions; j++)
 		{
 			csl.costs[j].class_index = j+1;
 			csl.costs[j].x = loss(data, corrupted_label, j+1);
 		}
 
-		ec.l.cs = csl;	
+		ec.l.cs = csl;
 
 		//predict (for vw's internal reason, this step has to be put after ec's cs label is created)
 		data.all->cost_sensitive->predict(ec, argmin);
 
 		if (data.ind_supervised)
 		{
-			
 			for (uint32_t i = 0; i < data.choices_lambda; i++)
 			{
 				ec.weight = 1;
@@ -450,21 +441,23 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 			}
 		}
 		
-		//ec.l.multi = ld;
-		ec.weight = 0;
-
-		// This is purely a hack here - need to clean up; I also did not deallocate the label and the copied example in finish()
-		example* ecp = calloc_or_throw<example>(1);		
-		VW::copy_example_data(false, ecp, &ec);
-		ecp->l.cs = csl;
-
-		// I am not sure if written this way, ec will be deleted in some other stages and causes error
-		if (data.validation_method == SUPERVISED_VALI)
+		// NOTE WELL: for convenience in supervised validation, we intentionally use a cost-sensitive label as opposed to 
+		// a multiclass label. This is because the csoaa learner needs a cost-sensitive label to predict (not sure why).
+		// I also did not deallocate the label and the copied example in finish()
+		if (data.validation_method == SUPERVISED_VALI)		
+		{
+			example* ecp = calloc_or_throw<example>(1);		
+			VW::copy_example_data(false, ecp, &ec, 0, COST_SENSITIVE::cs_label.copy_label);
 			data.supervised_validation.push_back(*ecp);
+		}
+
+		//set the label of ec back to a multiclass label
+		ec.l.multi = ld;
+		ec.weight = 0;
 
 		data.warm_start_iter++;
 	}
-	else if (data.bandit_iter < data.bandit_period)//Call the cb_explore algorithm. It returns a vector of probabilities for each action
+	else if (data.bandit_iter < data.bandit_period) //Call the cb_explore learner. It returns a vector of probabilities for each action
 	{
 		data.cb_label.costs.erase();
 		ec.l.cb = data.cb_label;

From 5e993af78a0d184b9d9bdbac1d3bdaa78e9a0390 Mon Sep 17 00:00:00 2001
From: chicheng <zcc1307@gmail.com>
Date: Sat, 7 Apr 2018 17:38:39 -0400
Subject: [PATCH 049/127] change to using proper copy example functions. Memory
 leak issues persist.

---
 vowpalwabbit/cbify.cc | 66 +++++++++++++++++++++++++++++++------------
 1 file changed, 48 insertions(+), 18 deletions(-)

diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc
index dd2a1cd3543..cb518732608 100644
--- a/vowpalwabbit/cbify.cc
+++ b/vowpalwabbit/cbify.cc
@@ -90,7 +90,7 @@ struct cbify
 	size_t bandit_iter;
 	size_t warm_start_iter;
 	size_t weighting_scheme;
-	v_array<example> supervised_validation;
+	example* supervised_validation;
 
 };
 
@@ -177,11 +177,9 @@ void finish(cbify& data)
 
 	for (size_t i = 0; i < data.warm_start_period; ++i)
 	{
-		VW::dealloc_example(COST_SENSITIVE::cs_label.delete_label, data.supervised_validation[i]);
-		free(&data.supervised_validation[i]);
+		//VW::dealloc_example(COST_SENSITIVE::cs_label.delete_label, data.supervised_validation[i]);
 	}
-
-	data.supervised_validation.delete_v();
+	free(data.supervised_validation);
 
 
   if (data.use_adf)
@@ -209,6 +207,11 @@ void finish(cbify& data)
 		free(data.cbls);
 
   }
+	else
+	{
+		data.csls->costs.delete_v();
+	}
+
 	free(data.csls);
 
 
@@ -326,7 +329,7 @@ void accumulate_costs_ips(cbify& data, example& ec, CB::cb_class& cl)
 			for (uint32_t i = 0; i < data.choices_lambda; i++)
 			{
 				//go over the supervised validation set
-				for (uint32_t j = 0; j < data.supervised_validation.size(); j++)
+				for (uint32_t j = 0; j < data.warm_start_period; j++)
 				{
 					example& ec_valid = data.supervised_validation[j];
 					data.all->cost_sensitive->predict(ec_valid, i);
@@ -337,8 +340,12 @@ void accumulate_costs_ips(cbify& data, example& ec, CB::cb_class& cl)
 					//cout<<endl;
 
 					for (uint32_t a = 0; a < data.num_actions; a++)
+					{
+						//cout<<ec_valid.pred.multiclass<<" "<<ec_valid.l.cs.costs[a].class_index<<endl;
+
 						if (ec_valid.pred.multiclass == ec_valid.l.cs.costs[a].class_index)
 							data.cumulative_costs[i] += ec_valid.l.cs.costs[a].x;
+					}
 				}
 				//cout<<data.cumulative_costs[i]<<endl;
 			}
@@ -376,7 +383,7 @@ void accumulate_costs_ips_adf(cbify& data, example& ec, CB::cb_class& cl, base_l
 			//cout<<"updating validation error on supervised data: " << data.bandit_iter / data.warm_start_period << endl;
 			for (uint32_t i = 0; i < data.choices_lambda; i++)
 			{
-				for (uint32_t j = 0; j < data.supervised_validation.size(); j++)
+				for (uint32_t j = 0; j < data.warm_start_period; j++)
 				{
 					example& ec_valid = data.supervised_validation[j];
 			  	copy_example_to_adf(data, ec_valid);
@@ -418,9 +425,12 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 		COST_SENSITIVE::label& csl = *data.csls;
 		//COST_SENSITIVE::label* cslp = calloc_or_throw<COST_SENSITIVE::label>(1);
 		//COST_SENSITIVE::label csl = *cslp;
-		//csl.costs.end() = csl.costs.begin()+data.num_actions;
-
+		
+		//Note: these two lines are important, otherwise the cost sensitive vector seems to be unbounded.
+		//This is crucial for 1. cost-sensitive learn 2. label copy
     csl.costs.resize(data.num_actions);
+		csl.costs.end() = csl.costs.begin()+data.num_actions;
+
 		for (uint32_t j = 0; j < data.num_actions; j++)
 		{
 			csl.costs[j].class_index = j+1;
@@ -429,6 +439,10 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 
 		ec.l.cs = csl;
 
+		//cout<<"in predict or learn:"<<endl;
+		//for (uint32_t j = 0; j < data.num_actions; j++)
+		//	cout<<ec.l.cs.costs[j].class_index<<" "<<ec.l.cs.costs[j].x<<endl;
+
 		//predict (for vw's internal reason, this step has to be put after ec's cs label is created)
 		data.all->cost_sensitive->predict(ec, argmin);
 
@@ -446,11 +460,20 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 		// I also did not deallocate the label and the copied example in finish()
 		if (data.validation_method == SUPERVISED_VALI)		
 		{
-			example* ecp = calloc_or_throw<example>(1);		
-			VW::copy_example_data(false, ecp, &ec, 0, COST_SENSITIVE::cs_label.copy_label);
-			data.supervised_validation.push_back(*ecp);
+			example& ec_copy = data.supervised_validation[data.warm_start_iter];
+			//why doesn't the following two apporaches leak memory?			
+			VW::copy_example_data(false, &ec_copy, &ec, 0, COST_SENSITIVE::cs_label.copy_label);
+			//copy_array(ec_copy.l.cs.costs, ec.l.cs.costs);
+			//VW::copy_example_data(false, &ec_copy, &ec);
+			//for (uint32_t j = 0; j < data.num_actions; j++)
+			//{
+			//	ec_copy.l.cs.costs.push_back(ec.l.cs.costs[j]);
+			//}
+			//cout<<"after copying"<<endl;
+			//for (uint32_t j = 0; j < data.num_actions; j++)
+			//	cout<<ec_copy.l.cs.costs[j].class_index<<" "<<ec_copy.l.cs.costs[j].x<<endl;
 		}
-
+		
 		//set the label of ec back to a multiclass label
 		ec.l.multi = ld;
 		ec.weight = 0;
@@ -579,13 +602,14 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 		ec.weight = 0;
 
 		//a hack here - allocated memories not deleted
-		example* ecp = calloc_or_throw<example>(1);	
-		VW::copy_example_data(false, ecp, &ec);
-		ecp->l.multi.label = corrupted_label;
-		ecp->l.multi.weight = 1.0;
+		//example* ecp = calloc_or_throw<example>(1);	
+		//VW::copy_example_data(false, ecp, &ec);
+		//ecp->l.multi.label = corrupted_label;
+		//ecp->l.multi.weight = 1.0;
 
+		//to be corrected
 		if (data.validation_method == SUPERVISED_VALI)
-			data.supervised_validation.push_back(*ecp);
+			VW::copy_example_data(false, &data.supervised_validation[data.warm_start_iter], &ec, 0, MULTICLASS::mc_label.copy_label);
 
 		data.warm_start_iter++;	
 
@@ -765,6 +789,12 @@ base_learner* cbify_setup(vw& all)
 	data.weighting_scheme = vm.count("weighting_scheme") ? vm["weighting_scheme"].as<size_t>() : INSTANCE_WT; // 1 is the default value
 
 
+	if (data.validation_method == SUPERVISED_VALI)
+	{
+		data.supervised_validation = calloc_or_throw<example>(data.warm_start_period);
+	}
+
+
 	data.bandit_iter = 0;
 	data.warm_start_iter = 0;
 	

From 24c79e88167ce5db9aef7d685f9b37ad7ac1cb16 Mon Sep 17 00:00:00 2001
From: chicheng <zcc1307@gmail.com>
Date: Mon, 9 Apr 2018 17:15:32 -0400
Subject: [PATCH 050/127] .

---
 vowpalwabbit/cbify.cc | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc
index cb518732608..bdeb590d422 100644
--- a/vowpalwabbit/cbify.cc
+++ b/vowpalwabbit/cbify.cc
@@ -716,7 +716,7 @@ void init_adf_data(cbify& data, const size_t num_actions)
 
 }
 
-void generate_lambdas(v_array<float>& lambdas, size_t lambda_size)
+void generate_lambdas(cbify& data, v_array<float>& lambdas, size_t lambda_size)
 {
 	// The lambdas are in fact arranged in ascending order (the middle lambda is 0.5)
 
@@ -734,6 +734,23 @@ void generate_lambdas(v_array<float>& lambdas, size_t lambda_size)
 
 }
 
+void minimax_lambda(float epsilon, size_t num_actions, size_t warm_start_period, size_t bandit_period, size_t dim)
+{
+	if ( (epsilon / num_actions) * bandit_period >= dim )
+		return 1.0;
+	else
+	{
+		float z = sqrt( dim * ( (epsilon / num_actions) * bandit_period + warm_start_period) - (epsilon / num_actions) * bandit_period * warm_start_period );	
+
+		float numer = (epsilon / num_actions) + warm_start_period * (epsilon / num_actions) * (1/z);
+		float denom = 1 + (epsilon / num_actions) + (warm_start_period - bandit_period) * (epsilon / num_actions) * (1/z);
+
+		return numer / denom;
+
+	}
+
+}
+
 base_learner* cbify_setup(vw& all)
 {
   //parse and set arguments
@@ -752,7 +769,8 @@ base_learner* cbify_setup(vw& all)
 	("corrupt_type_supervised", po::value<size_t>(), "type of label corruption in the supervised part (1 is uar, 2 is circular)")
 	("corrupt_type_bandit", po::value<size_t>(), "probability of label corruption in the bandit part (1 is uar, 2 is circular)")
 	("validation_method", po::value<size_t>(), "lambda selection criterion (1 is using bandit with progressive validation, 2 is using supervised and amortizing)")
-	("weighting_scheme", po::value<size_t>(), "weighting scheme (1 is per instance weighting, 2 is per dataset weighting (where we use a diminishing weighting scheme) )");
+	("weighting_scheme", po::value<size_t>(), "weighting scheme (1 is per instance weighting, 2 is per dataset weighting (where we use a diminishing weighting scheme) )")
+	("lambda_scheme", po::value<size_t>(), "Lambda set scheme (1 is expanding based on center 0.5, 2 is expanding based on center=minimax lambda, 3 is expanding based on center=minimax lambda along with forcing 0,1 in Lambda )");
   add_options(all);
 
   po::variables_map& vm = all.vm;

From 502d593aef9112aeafe7d02d0496b7559fff5ca6 Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Mon, 9 Apr 2018 21:45:56 -0400
Subject: [PATCH 051/127] updated the lambda tuning scheme

---
 scripts/plot_warm_start.py |   4 +-
 vowpalwabbit/cbify.cc      | 138 +++++++++++++++++++++++--------------
 2 files changed, 89 insertions(+), 53 deletions(-)

diff --git a/scripts/plot_warm_start.py b/scripts/plot_warm_start.py
index 436596cb0d0..65fabebbaf8 100644
--- a/scripts/plot_warm_start.py
+++ b/scripts/plot_warm_start.py
@@ -320,8 +320,8 @@ def main_loop(mod):
 	mod.corrupt_type_supervised = 1
 	mod.corrupt_prob_supervised = 0.0
 
-	mod.corrupt_type_bandit = 1
-	mod.corrupt_prob_bandit = 0.3
+	mod.corrupt_type_bandit = 2
+	mod.corrupt_prob_bandit = 1.0
 
 	mod.validation_method = 2
 
diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc
index bdeb590d422..cf77a36e249 100644
--- a/vowpalwabbit/cbify.cc
+++ b/vowpalwabbit/cbify.cc
@@ -18,6 +18,10 @@
 #define INSTANCE_WT 1
 #define DATASET_WT 2
 
+#define ABS_CENTRAL 1
+#define MINIMAX_CENTRAL 2
+#define MINIMAX_CENTRAL_ZEROONE 3
+
 
 using namespace LEARNER;
 using namespace MultiWorldTesting;
@@ -91,9 +95,61 @@ struct cbify
 	size_t warm_start_iter;
 	size_t weighting_scheme;
 	example* supervised_validation;
+	size_t lambda_scheme;
+	float epsilon;
 
 };
 
+float minimax_lambda(float epsilon, size_t num_actions, size_t warm_start_period, size_t bandit_period, size_t dim)
+{
+	if ( (epsilon / num_actions) * bandit_period >= dim )
+		return 1.0;
+	else
+	{
+		float z = sqrt( dim * ( (epsilon / num_actions) * bandit_period + warm_start_period) - (epsilon / num_actions) * bandit_period * warm_start_period );
+
+		float numer = (epsilon / num_actions) + warm_start_period * (epsilon / num_actions) * (1/z);
+		float denom = 1 + (epsilon / num_actions) + (warm_start_period - bandit_period) * (epsilon / num_actions) * (1/z);
+
+		//cout<<"z = "<<z<<endl;
+		//cout<<"numer = "<<numer<<endl;
+		//cout<<"denom = "<<denom<<endl;
+		return numer / denom;
+
+	}
+}
+
+void setup_lambdas(cbify& data, example& ec)
+{
+	// The lambdas are in fact arranged in ascending order (the middle lambda is 0.5)
+	v_array<float>& lambdas = data.lambdas;
+
+	uint32_t mid = data.choices_lambda / 2;
+
+	if (data.lambda_scheme == ABS_CENTRAL)
+		lambdas[mid] = 0.5;
+	else
+		lambdas[mid] = minimax_lambda(data.epsilon, data.num_actions, data.warm_start_period, data.bandit_period, ec.num_features);
+
+	for (uint32_t i = mid; i > 0; i--)
+		lambdas[i-1] = lambdas[i] / 2;
+
+	for (uint32_t i = mid+1; i < data.choices_lambda; i++)
+		lambdas[i] = 1 - (1-lambdas[i-1]) / 2;
+
+	if (data.lambda_scheme == MINIMAX_CENTRAL_ZEROONE)
+	{
+		lambdas[0] = 0.0;
+		lambdas[data.choices_lambda-1] = 1.0 - 1e-4;
+	}
+
+	//cout<<"lambdas:"<<endl;
+	//for (uint32_t i = 0; i < data.choices_lambda; i++)
+	//	cout<<lambdas[i]<<endl;
+
+}
+
+
 float rand_zeroone(vw* all)
 {
 	float f = merand48(all->random_state);
@@ -111,7 +167,7 @@ size_t generate_uar_action(cbify& data)
 	{
 		if (rand <= float(i) / data.num_actions)
 			return i;
-	}	
+	}
 	return data.num_actions;
 
 }
@@ -137,7 +193,7 @@ size_t corrupt_action(size_t action, cbify& data, size_t data_type)
 	{
 		if (corrupt_type == UAR)
 			return generate_uar_action(data);
-		else 
+		else
 			return (action % data.num_actions) + 1;
 	}
 	else
@@ -355,7 +411,7 @@ void accumulate_costs_ips(cbify& data, example& ec, CB::cb_class& cl)
 
 void accumulate_costs_ips_adf(cbify& data, example& ec, CB::cb_class& cl, base_learner& base)
 {
-	
+
 	if (data.validation_method == 1)
 	{
 		uint32_t best_action;
@@ -393,9 +449,9 @@ void accumulate_costs_ips_adf(cbify& data, example& ec, CB::cb_class& cl, base_l
 				}
 				//cout<<data.cumulative_costs[i]<<endl;
 			}
-			
+
 		}
-		
+
 	}
 
 }
@@ -425,7 +481,7 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 		COST_SENSITIVE::label& csl = *data.csls;
 		//COST_SENSITIVE::label* cslp = calloc_or_throw<COST_SENSITIVE::label>(1);
 		//COST_SENSITIVE::label csl = *cslp;
-		
+
 		//Note: these two lines are important, otherwise the cost sensitive vector seems to be unbounded.
 		//This is crucial for 1. cost-sensitive learn 2. label copy
     csl.costs.resize(data.num_actions);
@@ -454,14 +510,14 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 				data.all->cost_sensitive->learn(ec, i);
 			}
 		}
-		
-		// NOTE WELL: for convenience in supervised validation, we intentionally use a cost-sensitive label as opposed to 
+
+		// NOTE WELL: for convenience in supervised validation, we intentionally use a cost-sensitive label as opposed to
 		// a multiclass label. This is because the csoaa learner needs a cost-sensitive label to predict (not sure why).
 		// I also did not deallocate the label and the copied example in finish()
-		if (data.validation_method == SUPERVISED_VALI)		
+		if (data.validation_method == SUPERVISED_VALI)
 		{
 			example& ec_copy = data.supervised_validation[data.warm_start_iter];
-			//why doesn't the following two apporaches leak memory?			
+			//why doesn't the following two apporaches leak memory?
 			VW::copy_example_data(false, &ec_copy, &ec, 0, COST_SENSITIVE::cs_label.copy_label);
 			//copy_array(ec_copy.l.cs.costs, ec.l.cs.costs);
 			//VW::copy_example_data(false, &ec_copy, &ec);
@@ -473,7 +529,7 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 			//for (uint32_t j = 0; j < data.num_actions; j++)
 			//	cout<<ec_copy.l.cs.costs[j].class_index<<" "<<ec_copy.l.cs.costs[j].x<<endl;
 		}
-		
+
 		//set the label of ec back to a multiclass label
 		ec.l.multi = ld;
 		ec.weight = 0;
@@ -482,6 +538,10 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 	}
 	else if (data.bandit_iter < data.bandit_period) //Call the cb_explore learner. It returns a vector of probabilities for each action
 	{
+		// Need to initilize the lambda vector
+		if (data.bandit_iter == 0)
+			setup_lambdas(data, ec);
+
 		data.cb_label.costs.erase();
 		ec.l.cb = data.cb_label;
 		ec.pred.a_s = data.a_s;
@@ -602,7 +662,7 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 		ec.weight = 0;
 
 		//a hack here - allocated memories not deleted
-		//example* ecp = calloc_or_throw<example>(1);	
+		//example* ecp = calloc_or_throw<example>(1);
 		//VW::copy_example_data(false, ecp, &ec);
 		//ecp->l.multi.label = corrupted_label;
 		//ecp->l.multi.weight = 1.0;
@@ -611,11 +671,13 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 		if (data.validation_method == SUPERVISED_VALI)
 			VW::copy_example_data(false, &data.supervised_validation[data.warm_start_iter], &ec, 0, MULTICLASS::mc_label.copy_label);
 
-		data.warm_start_iter++;	
+		data.warm_start_iter++;
 
 	}
 	else if (data.bandit_iter < data.bandit_period) // call the bandit learner
 	{
+		if (data.bandit_iter == 0)
+			setup_lambdas(data, ec);
 
 		for (size_t a = 0; a < data.adf_data.num_actions; ++a)
 		{
@@ -658,7 +720,7 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 					if (data.weighting_scheme == INSTANCE_WT)
 						ecs[a].weight *= data.lambdas[i] / (1-data.lambdas[i]);
 					else
-						ecs[a].weight *= data.lambdas[i] / (1-data.lambdas[i]) * data.warm_start_period / ( (data.bandit_iter+1) * (data.bandit_iter+2) );					
+						ecs[a].weight *= data.lambdas[i] / (1-data.lambdas[i]) * data.warm_start_period / ( (data.bandit_iter+1) * (data.bandit_iter+2) );
 
 					base.learn(ecs[a], i);
 				}
@@ -670,7 +732,7 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 		}
 
 		ec.pred.multiclass = cl.action;
-			
+
 		data.bandit_iter++;
 	}
 	else
@@ -716,40 +778,6 @@ void init_adf_data(cbify& data, const size_t num_actions)
 
 }
 
-void generate_lambdas(cbify& data, v_array<float>& lambdas, size_t lambda_size)
-{
-	// The lambdas are in fact arranged in ascending order (the middle lambda is 0.5)
-
-	lambdas = v_init<float>();
-	uint32_t mid = lambda_size / 2;
-	for (uint32_t i = 0; i < lambda_size; i++)
-		lambdas.push_back(0);
-
-	lambdas[mid] = 0.5;
-	for (uint32_t i = mid; i > 0; i--)
-		lambdas[i-1] = lambdas[i] / 2;
-
-	for (uint32_t i = mid+1; i < lambda_size; i++)
-		lambdas[i] = 1 - (1-lambdas[i-1]) / 2;
-
-}
-
-void minimax_lambda(float epsilon, size_t num_actions, size_t warm_start_period, size_t bandit_period, size_t dim)
-{
-	if ( (epsilon / num_actions) * bandit_period >= dim )
-		return 1.0;
-	else
-	{
-		float z = sqrt( dim * ( (epsilon / num_actions) * bandit_period + warm_start_period) - (epsilon / num_actions) * bandit_period * warm_start_period );	
-
-		float numer = (epsilon / num_actions) + warm_start_period * (epsilon / num_actions) * (1/z);
-		float denom = 1 + (epsilon / num_actions) + (warm_start_period - bandit_period) * (epsilon / num_actions) * (1/z);
-
-		return numer / denom;
-
-	}
-
-}
 
 base_learner* cbify_setup(vw& all)
 {
@@ -805,7 +833,11 @@ base_learner* cbify_setup(vw& all)
 	data.corrupt_type_bandit = vm.count("corrupt_type_bandit") ? vm["corrupt_type_bandit"].as<size_t>() : UAR; // 1 is the default value
 	data.validation_method = vm.count("validation_method") ? vm["validation_method"].as<size_t>() : BANDIT_VALI; // 1 is the default value
 	data.weighting_scheme = vm.count("weighting_scheme") ? vm["weighting_scheme"].as<size_t>() : INSTANCE_WT; // 1 is the default value
+	data.lambda_scheme = vm.count("lambda_scheme") ? vm["lambda_scheme"].as<size_t>() : ABS_CENTRAL;
+	data.epsilon = vm.count("epsilon") ? vm["epsilon"].as<float>() : 0.05;
 
+	//cout<<"does epsilon exist?"<<vm.count("epsilon")<<endl;
+	//cout<<"epsilon = "<<data.epsilon<<endl;
 
 	if (data.validation_method == SUPERVISED_VALI)
 	{
@@ -815,9 +847,13 @@ base_learner* cbify_setup(vw& all)
 
 	data.bandit_iter = 0;
 	data.warm_start_iter = 0;
-	
 
-	generate_lambdas(data.lambdas, data.choices_lambda);
+
+	//generate_lambdas(data.lambdas, data.choices_lambda);
+
+	data.lambdas = v_init<float>();
+	for (uint32_t i = 0; i < data.choices_lambda; i++)
+		data.lambdas.push_back(0.);
 
 	for (size_t i = 0; i < data.choices_lambda; i++)
 		data.cumulative_costs.push_back(0.);

From b06e4541da27f03ba23110ef30bfe3bfa9d977f7 Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Thu, 12 Apr 2018 01:21:47 -0400
Subject: [PATCH 052/127] .

---
 scripts/alg_comparison.py  |  57 ++++++++++++------
 scripts/plot_warm_start.py | 116 ++++++++++++++++++++++---------------
 vowpalwabbit/cb_explore.cc |  25 ++++----
 vowpalwabbit/cbify.cc      |   3 +
 4 files changed, 123 insertions(+), 78 deletions(-)

diff --git a/scripts/alg_comparison.py b/scripts/alg_comparison.py
index fad7c281aa6..703c0adac30 100644
--- a/scripts/alg_comparison.py
+++ b/scripts/alg_comparison.py
@@ -14,8 +14,8 @@
 #alg2 = 'cover'
 #alg1 = 'choices_lambda_1'
 #alg2 = 'choices_lambda_5'
-alg1 = 'instance weighting'
-alg2 = 'dataset weighting'
+#alg1 = 'instance weighting'
+#alg2 = 'dataset weighting'
 
 def sum_files(result_path):
 	prevdir = os.getcwd()
@@ -26,22 +26,22 @@ def sum_files(result_path):
 
 def parse_sum_file(sum_filename):
 	f = open(sum_filename, 'r')
-	table = pd.read_table(f, sep=' ', header=None, 	names=['dataset',alg1,alg2,'bandit_only','supervised_only','size'],
-	                       lineterminator='\n')
+	table = pd.read_table(f, sep=' ',lineterminator='\n')
 
 	return table
 
-def get_significance(errors_1, errors_2, sizes):
-	significance = []
+def get_z_scores(errors_1, errors_2, sizes):
+	z_scores = []
 	for i in range(len(errors_1)):
-		significance.append( significant(errors_1[i], errors_2[i], sizes[i]) )
-	return significance
+		z_scores.append( z_score(errors_1[i], errors_2[i], sizes[i]) )
+	return z_scores
 
-def significant(err_1, err_2, size):
+def z_score(err_1, err_2, size):
 	z = (err_1 - err_2) / sqrt( (err_1*(1 - err_1) + err_2*(1-err_2)) / size )
+	return z
+	#print z
 
-	print z
-
+def is_significant(z):
 	if (stats.norm.cdf(z) < 0.05) or (stats.norm.cdf(z) > 0.95):
 		return True
 	else:
@@ -51,7 +51,12 @@ def plot_comparison(errors_1, errors_2, sizes, title, filename):
 	print title
 
 	plt.plot([0,1],[0,1])
-	significance = get_significance(errors_1, errors_2, sizes)
+	z_scores = get_z_scores(errors_1, errors_2, sizes)
+	sorted_z_scores = sorted(enumerate(z_scores), key=lambda x:x[1])
+	for s in sorted_z_scores:
+		print s, is_significant(s[1])
+
+	significance = map(is_significant, z_scores)
 	results_signi_1 = list(compress(errors_1, significance))
 	results_signi_2 = list(compress(errors_2, significance))
 	plt.scatter(results_signi_1, results_signi_2, s=18, c='r')
@@ -72,9 +77,17 @@ def plot_comparison(errors_1, errors_2, sizes, title, filename):
 	#results_path = '../../../warm_start_frac=0.1/'
 	#results_path = '../../../cover_vs_epsilon/'
 	#results_path = '../../../corrupt_supervised_type1_0.3/'
-	#results_path = '../../../corrupt_supervised_type2_0.3/'
-	#results_path = '../../../supervised_validation/'
-	results_path = '../../../weighting_schemes/'
+	#results_path = '../../../expt_0403/corrupt_supervised_type2_0.3/'
+	#results_path = '../../../expt_0403/supervised_validation/'
+	#results_path = '../../../weighting_schemes/'
+	#results_path = '../../../central_lambda/'
+	#results_path = '../../../central_lambda_naive/'
+	#results_path = '../../../central_lambda_zeroone/'
+	#results_path = '../../../type2_0.3/'
+	#results_path = '../../../type1_0.3/'
+	#results_path = '../../../type2_1/'
+	#results_path = '../../../type2_0.65/'
+	results_path = '../../../type2_0.3/'
 
 	dss = sum_files(results_path)
 
@@ -93,12 +106,20 @@ def plot_comparison(errors_1, errors_2, sizes, title, filename):
 	#for cl, results_lambda in grouped:
 	#results_lambda = all_results[all_results['choices_lambda'] == cl]
 	# compare combined w/ supervised
+	alg1 = all_results.columns[1]
+	alg2 = all_results.columns[2]
+	bandit_only = all_results.columns[3]
+	supervised_only = all_results.columns[4]
+	sizes = all_results.columns[5]
 
 	results_alg1 = all_results[alg1].tolist()
 	results_alg2 = all_results[alg2].tolist()
-	results_bandit = all_results['bandit_only'].tolist()
-	results_supervised = all_results['supervised_only'].tolist()
-	dataset_sizes = all_results['size'].tolist()
+	results_bandit = all_results[bandit_only].tolist()
+	results_supervised = all_results[supervised_only].tolist()
+	dataset_sizes = all_results[sizes].tolist()
+
+	print alg1
+	print results_alg1
 
 	# compare combined w/ bandit
 	plot_comparison(results_alg1, results_bandit, dataset_sizes, alg1 + ' vs ' + 'bandit only', results_path + alg1 + ' vs ' + 'bandit only' + '.png')
diff --git a/scripts/plot_warm_start.py b/scripts/plot_warm_start.py
index 65fabebbaf8..6dc38741512 100644
--- a/scripts/plot_warm_start.py
+++ b/scripts/plot_warm_start.py
@@ -57,20 +57,25 @@ def collect_stats(mod):
 def execute_vw(mod):
 
 	alg_option = ' '
+	if mod.adf_on:
+		alg_option += ' --cb_explore_adf '
+	else:
+		alg_option += ' --cb_explore ' + str(mod.num_classes) + ' '
+
 	if mod.cover_on:
-		alg_option += ' --cb_explore ' + str(mod.num_classes) + ' --cover 5 --psi 0.01 '
+		alg_option += ' --cover 5 --psi 0.01 --nounif '
 		mod.cb_type = 'dr'
-		mod.adf_on = False
+	if mod.epsilon_on:
+		alg_option += ' --epsilon ' + str(mod.epsilon) + ' '
 	if mod.no_bandit:
 		alg_option += ' --no_bandit '
 	if mod.no_supervised:
 		alg_option += ' --no_supervised '
-	if mod.no_exploration:
-		alg_option += ' --epsilon 0.0 '
-	if mod.cb_type == 'mtr':
-		mod.adf_on = True;
-	if mod.adf_on:
-		alg_option += ' --cb_explore_adf '
+	#if mod.no_exploration:
+	#	alg_option += ' --epsilon 0.0 '
+	#if mod.cb_type == 'mtr':
+	#	mod.adf_on = True;
+
 
 
 	# using two datasets
@@ -86,7 +91,9 @@ def execute_vw(mod):
 	 + ' --corrupt_type_bandit ' + str(mod.corrupt_type_bandit) \
 	 + ' --corrupt_prob_bandit ' + str(mod.corrupt_prob_bandit) \
 	 + ' --validation_method ' + str(mod.validation_method) \
-	 + ' --weighting_scheme ' + str(mod.weighting_scheme)
+	 + ' --weighting_scheme ' + str(mod.weighting_scheme) \
+	 + ' --lambda_scheme ' + str(mod.lambda_scheme)
+
 	cmd = cmd_vw
 	#cmd = cmd_catfile + ' | ' + cmd_vw
 
@@ -98,6 +105,23 @@ def execute_vw(mod):
 	process.wait()
 	f.close()
 
+def plot_errors(mod):
+
+	execute_vw(mod)
+	avg_loss, last_loss, wt = collect_stats(mod)
+
+	if mod.plot_flat:
+		# for supervised only, we simply plot a horizontal line using the last point
+		len_avg_loss = len(avg_loss)
+		avg_loss = avg_loss[len_avg_loss-1]
+		avg_loss = [avg_loss for i in range(len_avg_loss)]
+
+	line = plt.plot(wt, avg_loss, mod.plot_color, label=(mod.plot_label))
+	avg_error_value = avg_error(mod)
+
+	return avg_error_value
+
+
 def gen_comparison_graph(mod):
 
 	mod.num_lines = get_num_lines(mod.ds_path+mod.dataset)
@@ -113,19 +137,17 @@ def gen_comparison_graph(mod):
 	# combined approach, epsilon
 	mod.choices_lambda = 5
 	mod.weighting_scheme = 1
+	mod.lambda_scheme = 2
 	mod.no_bandit = False
 	mod.no_supervised = False
 	mod.no_exploration = False
 	mod.cover_on = False
-	mod.vw_output_filename = mod.results_path+config_name+'choices_lambda='+str(mod.choices_lambda)+'.txt'
-
-	execute_vw(mod)
-
-	avg_loss_comb_1, last_loss_comb_1, wt_comb_1 = collect_stats(mod)
-	line = plt.plot(wt_comb_1, avg_loss_comb_1, 'r', label=('Combined approach, instance weighting'))
-	#line = plt.plot(wt_comb_1, avg_loss_comb_1, 'r', label=('Combined approach, lambda=1'))
-
-	avg_error_comb_1 = avg_error(mod)
+	mod.epsilon_on = True
+	mod.plot_color = 'r'
+	mod.plot_flat = False
+	mod.vw_output_filename = mod.results_path+config_name+'central_minimax'+'.txt'
+	mod.plot_label = 'Central lambda: minimax'
+	avg_error_comb_1 = plot_errors(mod)
 
 	# combined approach, cover
 	# combined approach, per-dataset weighting
@@ -135,58 +157,56 @@ def gen_comparison_graph(mod):
 	#mod.no_exploration = False
 	#mod.cover_on = True
 	#mod.vw_output_filename = mod.results_path+config_name+'choices_lambda='+str(mod.choices_lambda)+'.txt'
+
 	mod.choices_lambda = 5
-	mod.weighting_scheme = 2
+	mod.weighting_scheme = 1
+	mod.lambda_scheme = 3
 	mod.no_bandit = False
 	mod.no_supervised = False
 	mod.no_exploration = False
 	mod.cover_on = False
-	mod.vw_output_filename = mod.results_path+config_name+'choices_lambda='+str(mod.choices_lambda)+'.txt'
-	execute_vw(mod)
-
-	avg_loss_comb_5, last_loss_comb_5, wt_comb_5 = collect_stats(mod)
-	#line = plt.plot(wt_comb_5, avg_loss_comb_5, 'm', label=('Combined approach, lambda=5'))
-	line = plt.plot(wt_comb_5, avg_loss_comb_5, 'm', label=('Combined approach, dataset weighting'))
+	mod.epsilon_on = True
+	#'Combined approach, lambda=5'
+	mod.plot_color = 'm'
+	mod.plot_flat = False
+	mod.vw_output_filename = mod.results_path+config_name+'central_minimax_zeroone'+'.txt'
+	mod.plot_label = 'Central lambda: minimax, forcing zeroone'
+	avg_error_comb_2 = plot_errors(mod)
 
-	avg_error_comb_5 = avg_error(mod)
 
 	# bandit only approach
 	mod.choices_lambda = 1
 	mod.weighting_scheme = 1
+	mod.lambda_scheme = 1
 	mod.no_bandit = False
 	mod.no_supervised = True
 	mod.no_exploration = False
 	mod.cover_on = False
+	mod.epsilon_on = True
+	mod.plot_color = 'b'
+	mod.plot_flat = False
 	mod.vw_output_filename = mod.results_path+config_name+'_no_supervised'+'.txt'
-	execute_vw(mod)
-
-	avg_loss_band_only, last_loss_band_only, wt_band_only = collect_stats(mod)
-	line = plt.plot(wt_band_only, avg_loss_band_only, 'b', label='Bandit only')
-
-	avg_error_band_only = avg_error(mod)
+	mod.plot_label = 'Bandit only'
+	avg_error_band_only = plot_errors(mod)
 
 	# supervised only approach
 	mod.choices_lambda = 1
 	mod.weighting_scheme = 1
+	mod.lambda_scheme = 1
 	mod.no_bandit = True
 	mod.no_supervised = False
 	mod.no_exploration = False
 	mod.cover_on = False
+	mod.epsilon_on = True
+	mod.plot_color = 'g'
+	mod.plot_flat = True
 	mod.vw_output_filename = mod.results_path+config_name+'_no_bandit'+'.txt'
-	execute_vw(mod)
+	mod.plot_label = 'Supervised only'
+	avg_error_sup_only = plot_errors(mod)
 
-	avg_loss_sup_only, last_loss_sup_only, wt_sup_only = collect_stats(mod)
-	# for supervised only, we simply plot a horizontal line using the last point
-	len_avg_loss = len(avg_loss_sup_only)
-	avg_loss = avg_loss_sup_only[len_avg_loss-1]
-	avg_loss_sup_only = [avg_loss for i in range(len_avg_loss)]
-	line = plt.plot(wt_sup_only, avg_loss_sup_only, 'g', label='Supervised only')
 
-
-	avg_error_sup_only = avg_error(mod)
-
-	summary_file = open(mod.results_path+str(mod.task_id)+'of'+str(mod.num_tasks)+'.sum', 'a')
-	summary_file.write(config_name + ' ' + str(avg_error_comb_1) + ' ' + str(avg_error_comb_5) + ' ' + str(avg_error_band_only) + ' ' + str(avg_error_sup_only) + ' ' + str(mod.bandit) + '\n')
+	summary_file = open(mod.summary_file_name, 'a')
+	summary_file.write(config_name + ' ' + str(avg_error_comb_1) + ' ' + str(avg_error_comb_2) + ' ' + str(avg_error_band_only) + ' ' + str(avg_error_sup_only) + ' ' + str(mod.bandit) + '\n')
 	summary_file.close()
 	print('')
 
@@ -253,8 +273,9 @@ def avg_error(mod):
 
 
 def main_loop(mod):
-
-	summary_file = open(mod.results_path+str(mod.task_id)+'of'+str(mod.num_tasks)+'.sum', 'w')
+	mod.summary_file_name = mod.results_path+str(mod.task_id)+'of'+str(mod.num_tasks)+'.sum'
+	summary_file = open(mod.summary_file_name, 'w')
+	summary_file.write('dataset' + ' ' + 'central_minimax' + ' ' + 'central_minimax_zeroone' + ' ' + 'bandit_only' + ' ' + 'supervised_only' + ' ' + 'size' + '\n')
 	summary_file.close()
 
 	for mod.cb_type, mod.warm_start_frac, mod.dataset in mod.config_task:
@@ -321,9 +342,10 @@ def main_loop(mod):
 	mod.corrupt_prob_supervised = 0.0
 
 	mod.corrupt_type_bandit = 2
-	mod.corrupt_prob_bandit = 1.0
+	mod.corrupt_prob_bandit = 1
 
 	mod.validation_method = 2
+	mod.epsilon = 0.05
 
 	#for correctness test
 	#mod.choices_warm_start = [20]
diff --git a/vowpalwabbit/cb_explore.cc b/vowpalwabbit/cb_explore.cc
index 28f7d748811..2ee8a9b710b 100644
--- a/vowpalwabbit/cb_explore.cc
+++ b/vowpalwabbit/cb_explore.cc
@@ -150,7 +150,7 @@ void predict_or_learn_lambda(cb_explore& data, base_learner& base, example& ec)
 			base.predict(ec, i);
 			uint32_t chosen = ec.pred.multiclass-1;
 			probs[chosen].score += prob;
-		}		
+		}
 	}
   ec.pred.a_s = probs;
 
@@ -164,7 +164,7 @@ float find_min(v_array<float> arr)
 	for (uint32_t i = 0; i < arr.size(); i++)
 	{
 		if (arr[i] < min_val)
-		{	
+		{
 			min_val = arr[i];
 			argmin = i;
 		}
@@ -239,7 +239,7 @@ void predict_or_learn_lambda(cb_explore& data, base_learner& base, example& ec)
   data.cb_label = ec.l.cb;
 
   ec.l.cs = data.cs_label;
-	
+
 
 	// learn
 	if (is_learn)
@@ -250,7 +250,7 @@ void predict_or_learn_lambda(cb_explore& data, base_learner& base, example& ec)
 		//data.cb_label = ec.l.cb;
 		//data.cbcs.known_cost = get_observed_cost(data.cb_label);
 		//gen_cs_example<false>(data.cbcs, ec, data.cb_label, data.cs_label);
-		
+
 		ec.l.cb = data.cb_label;
     base.learn(ec);
 
@@ -268,14 +268,14 @@ void predict_or_learn_lambda(cb_explore& data, base_learner& base, example& ec)
     data.cbcs.known_cost = get_observed_cost(data.cb_label);
     gen_cs_example<false>(data.cbcs, ec, data.cb_label, data.cs_label);
 
-	
+
 		//for (size_t i = 0; i < data.cbcs.num_actions; i++)
 		//	cout<<"action "<<i<<" has cost "<<data.cs_label.costs[i].x<<endl;
 
 
 		for (size_t i = 0; i < data.lambda_size; i++)
 		{
-				//learn with lambda/(1-lambda) learning rate		
+				//learn with lambda/(1-lambda) learning rate
 				ec.weight = data.lambdas[i] / (1-data.lambdas[i]);
 				base.learn(ec, i);
 
@@ -284,7 +284,7 @@ void predict_or_learn_lambda(cb_explore& data, base_learner& base, example& ec)
 
 
 				//update the cumulative costs of the lambdas
-				
+
 				data.cost_lambda[i] = data.cost_lambda[i] + data.cs_label.costs[chosen].x;
 				cout<<"lambda "<<data.lambdas[i]<<", cumulative cost = "<<data.cost_lambda[i]<<endl;
 		}
@@ -302,7 +302,7 @@ void predict_or_learn_lambda(cb_explore& data, base_learner& base, example& ec)
 	//cout<<"lambda = " <<data.lambdas[argmin]<<endl;
 	base.predict(ec, argmin);
 	uint32_t chosen = ec.pred.multiclass-1;
-	probs[chosen].score = probs[chosen].score + (1 - data.epsilon);	
+	probs[chosen].score = probs[chosen].score + (1 - data.epsilon);
 	//cout<<"chosen = "<<chosen<<endl;
 
 
@@ -437,8 +437,8 @@ void predict_or_learn_cover(cb_explore& data, base_learner& base, example& ec)
     for(uint32_t i = 0; i < num_actions; i++)
       probabilities[i] = 0;
 
-		for (size_t i = 0; i < data.cbcs.num_actions; i++)
-			cout<<"action "<<i<<" has cost "<<data.cs_label.costs[i].x<<endl;
+		//for (size_t i = 0; i < data.cbcs.num_actions; i++)
+		//	cout<<"action "<<i<<" has cost "<<data.cs_label.costs[i].x<<endl;
 
     ec.l.cs = data.second_cs_label;
     //2. Update functions
@@ -612,9 +612,9 @@ base_learner* cb_explore_setup(vw& all)
 			{
 				data.lambdas.push_back(pow(0.5f, floor(i/2) + 1));
 				//cout<<pow(0.5f, floor(i/2) + 1)<<endl;
-			}			
+			}
 			else
-			{	
+			{
 				data.lambdas.push_back(1 - pow(0.5f, floor(i/2) + 2));
 				//cout<<1 - pow(0.5f, floor(i/2) + 2)<<endl;
 			}
@@ -670,4 +670,3 @@ base_learner* cb_explore_setup(vw& all)
   l->set_finish_example(finish_example);
   return make_base(*l);
 }
-
diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc
index cf77a36e249..10a6e2a2315 100644
--- a/vowpalwabbit/cbify.cc
+++ b/vowpalwabbit/cbify.cc
@@ -102,6 +102,7 @@ struct cbify
 
 float minimax_lambda(float epsilon, size_t num_actions, size_t warm_start_period, size_t bandit_period, size_t dim)
 {
+	/*
 	if ( (epsilon / num_actions) * bandit_period >= dim )
 		return 1.0;
 	else
@@ -117,6 +118,8 @@ float minimax_lambda(float epsilon, size_t num_actions, size_t warm_start_period
 		return numer / denom;
 
 	}
+	*/
+	return epsilon / (num_actions + epsilon);
 }
 
 void setup_lambdas(cbify& data, example& ec)

From 1922659c1b12e0c6ddf38283ce2786502df864d7 Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Thu, 12 Apr 2018 17:17:28 -0400
Subject: [PATCH 053/127] fixed bug on zero warm start examples on small
 datasets

---
 scripts/alg_comparison.py  | 70 +++++++++++++++++++++++++-------------
 scripts/plot_warm_start.py |  2 +-
 2 files changed, 48 insertions(+), 24 deletions(-)

diff --git a/scripts/alg_comparison.py b/scripts/alg_comparison.py
index 703c0adac30..4509260bc06 100644
--- a/scripts/alg_comparison.py
+++ b/scripts/alg_comparison.py
@@ -8,6 +8,7 @@
 import scipy.stats as stats
 from itertools import compress
 from math import sqrt
+import argparse
 
 # this part is changable
 #alg1 = 'epsilon'
@@ -26,6 +27,7 @@ def sum_files(result_path):
 
 def parse_sum_file(sum_filename):
 	f = open(sum_filename, 'r')
+	#f.seek(0, 0)
 	table = pd.read_table(f, sep=' ',lineterminator='\n')
 
 	return table
@@ -33,6 +35,7 @@ def parse_sum_file(sum_filename):
 def get_z_scores(errors_1, errors_2, sizes):
 	z_scores = []
 	for i in range(len(errors_1)):
+		#print i
 		z_scores.append( z_score(errors_1[i], errors_2[i], sizes[i]) )
 	return z_scores
 
@@ -73,33 +76,54 @@ def plot_comparison(errors_1, errors_2, sizes, title, filename):
 
 
 if __name__ == '__main__':
-	#results_path = '../../../lambdas/'
-	#results_path = '../../../warm_start_frac=0.1/'
-	#results_path = '../../../cover_vs_epsilon/'
-	#results_path = '../../../corrupt_supervised_type1_0.3/'
-	#results_path = '../../../expt_0403/corrupt_supervised_type2_0.3/'
-	#results_path = '../../../expt_0403/supervised_validation/'
-	#results_path = '../../../weighting_schemes/'
-	#results_path = '../../../central_lambda/'
-	#results_path = '../../../central_lambda_naive/'
-	#results_path = '../../../central_lambda_zeroone/'
-	#results_path = '../../../type2_0.3/'
-	#results_path = '../../../type1_0.3/'
-	#results_path = '../../../type2_1/'
-	#results_path = '../../../type2_0.65/'
-	results_path = '../../../type2_0.3/'
-
-	dss = sum_files(results_path)
+	parser = argparse.ArgumentParser(description='result summary')
+	parser.add_argument('--results_dir', default='../../../figs/')
+	args = parser.parse_args()
+	results_dir = args.results_dir
+
+	#results_dir = '../../../lambdas/'
+	#results_dir = '../../../warm_start_frac=0.1/'
+	#results_dir = '../../../cover_vs_epsilon/'
+	#results_dir = '../../../corrupt_supervised_type1_0.3/'
+	#results_dir = '../../../expt_0403/corrupt_supervised_type2_0.3/'
+	#results_dir = '../../../expt_0403/supervised_validation/'
+	#results_dir = '../../../weighting_schemes/'
+	#results_dir = '../../../central_lambda/'
+	#results_dir = '../../../central_lambda_naive/'
+	#results_dir = '../../../central_lambda_zeroone/'
+	#results_dir = '../../../type2_0.3/'
+	#results_dir = '../../../type1_0.3/'
+	#results_dir = '../../../type2_1/'
+	#results_dir = '../../../type2_0.65/'
+	#results_dir = '../../../type2_0.3/'
+
+	dss = sum_files(results_dir)
+
+	#print dss[160]
 
 	all_results = None
+
 	for i in range(len(dss)):
-		result = parse_sum_file(results_path + dss[i])
+		print 'dataset name: ', dss[i]
+		result = parse_sum_file(results_dir + dss[i])
+
 		if (i == 0):
 			all_results = result
 		else:
 			all_results = all_results.append(result)
+
+
+		#if i >= 331 and i <= 340:
+		#	print 'result:', result
+		#	print 'all_results:', all_results
+
 	print all_results
 
+
+
+	#result = parse_sum_file(results_dir + '400of600.sum')
+	#print result
+
 	#choices_choices_lambda = sorted(all_results['choices_lambda'].unique())
 	#grouped = all_results.groupby('choices_lambda')
 
@@ -122,8 +146,8 @@ def plot_comparison(errors_1, errors_2, sizes, title, filename):
 	print results_alg1
 
 	# compare combined w/ bandit
-	plot_comparison(results_alg1, results_bandit, dataset_sizes, alg1 + ' vs ' + 'bandit only', results_path + alg1 + ' vs ' + 'bandit only' + '.png')
-	plot_comparison(results_alg1, results_supervised, dataset_sizes, alg1 + ' vs ' + 'supervised only', results_path + alg1 + ' vs ' + 'supervised only' + '.png')
-	plot_comparison(results_alg2, results_bandit, dataset_sizes, alg2 + ' vs ' + 'bandit only', results_path + alg2 + ' vs ' + 'bandit only' + '.png')
-	plot_comparison(results_alg2, results_supervised, dataset_sizes, alg2 + ' vs ' + 'supervised only', results_path + alg2 + ' vs ' + 'supervised only' + '.png')
-	plot_comparison(results_alg1, results_alg2, dataset_sizes, alg1 + ' vs ' + alg2, results_path+alg1 + ' vs ' + alg2 + '.png')
+	plot_comparison(results_alg1, results_bandit, dataset_sizes, alg1 + ' vs ' + 'bandit only', results_dir + alg1 + ' vs ' + 'bandit only' + '.png')
+	plot_comparison(results_alg1, results_supervised, dataset_sizes, alg1 + ' vs ' + 'supervised only', results_dir + alg1 + ' vs ' + 'supervised only' + '.png')
+	plot_comparison(results_alg2, results_bandit, dataset_sizes, alg2 + ' vs ' + 'bandit only', results_dir + alg2 + ' vs ' + 'bandit only' + '.png')
+	plot_comparison(results_alg2, results_supervised, dataset_sizes, alg2 + ' vs ' + 'supervised only', results_dir + alg2 + ' vs ' + 'supervised only' + '.png')
+	plot_comparison(results_alg1, results_alg2, dataset_sizes, alg1 + ' vs ' + alg2, results_dir+alg1 + ' vs ' + alg2 + '.png')
diff --git a/scripts/plot_warm_start.py b/scripts/plot_warm_start.py
index 6dc38741512..c6716d4c889 100644
--- a/scripts/plot_warm_start.py
+++ b/scripts/plot_warm_start.py
@@ -125,7 +125,7 @@ def plot_errors(mod):
 def gen_comparison_graph(mod):
 
 	mod.num_lines = get_num_lines(mod.ds_path+mod.dataset)
-	mod.warm_start = int(math.floor(mod.warm_start_frac * mod.num_lines))
+	mod.warm_start = int(math.ceil(mod.warm_start_frac * mod.num_lines))
 	mod.bandit = mod.num_lines - mod.warm_start
 	mod.progress = int(math.ceil(float(mod.bandit) / float(mod.num_checkpoints)))
 	mod.num_classes = get_num_classes(mod.dataset)

From f6539b5700fc53454841fa3e2cb8958a180ba466 Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Tue, 24 Apr 2018 01:13:44 -0400
Subject: [PATCH 054/127] added a refined weighting scheme and cumulative var
 calculation (not tested yet)

---
 scripts/alg_comparison.py  |   4 +-
 scripts/plot_warm_start.py |  25 +++++----
 vowpalwabbit/cbify.cc      | 102 ++++++++++++++++++++++++++++++++++---
 3 files changed, 113 insertions(+), 18 deletions(-)

diff --git a/scripts/alg_comparison.py b/scripts/alg_comparison.py
index 4509260bc06..40bdc1b0972 100644
--- a/scripts/alg_comparison.py
+++ b/scripts/alg_comparison.py
@@ -35,7 +35,7 @@ def parse_sum_file(sum_filename):
 def get_z_scores(errors_1, errors_2, sizes):
 	z_scores = []
 	for i in range(len(errors_1)):
-		#print i
+		print i
 		z_scores.append( z_score(errors_1[i], errors_2[i], sizes[i]) )
 	return z_scores
 
@@ -99,7 +99,7 @@ def plot_comparison(errors_1, errors_2, sizes, title, filename):
 
 	dss = sum_files(results_dir)
 
-	#print dss[160]
+	#print dss[168]
 
 	all_results = None
 
diff --git a/scripts/plot_warm_start.py b/scripts/plot_warm_start.py
index c6716d4c889..1e806f512e3 100644
--- a/scripts/plot_warm_start.py
+++ b/scripts/plot_warm_start.py
@@ -135,9 +135,9 @@ def gen_comparison_graph(mod):
 	config_name = str(mod.dataset) + '_'+str(mod.warm_start)+ '_' + str(mod.cb_type)
 
 	# combined approach, epsilon
-	mod.choices_lambda = 5
+	mod.choices_lambda = 2
 	mod.weighting_scheme = 1
-	mod.lambda_scheme = 2
+	mod.lambda_scheme = 3
 	mod.no_bandit = False
 	mod.no_supervised = False
 	mod.no_exploration = False
@@ -145,8 +145,8 @@ def gen_comparison_graph(mod):
 	mod.epsilon_on = True
 	mod.plot_color = 'r'
 	mod.plot_flat = False
-	mod.vw_output_filename = mod.results_path+config_name+'central_minimax'+'.txt'
-	mod.plot_label = 'Central lambda: minimax'
+	mod.vw_output_filename = mod.results_path+config_name+'zeroone'+'.txt'
+	mod.plot_label = 'zeroone only'
 	avg_error_comb_1 = plot_errors(mod)
 
 	# combined approach, cover
@@ -275,7 +275,7 @@ def avg_error(mod):
 def main_loop(mod):
 	mod.summary_file_name = mod.results_path+str(mod.task_id)+'of'+str(mod.num_tasks)+'.sum'
 	summary_file = open(mod.summary_file_name, 'w')
-	summary_file.write('dataset' + ' ' + 'central_minimax' + ' ' + 'central_minimax_zeroone' + ' ' + 'bandit_only' + ' ' + 'supervised_only' + ' ' + 'size' + '\n')
+	summary_file.write('dataset' + ' ' + 'zeroone_only' + ' ' + 'central_minimax_zeroone' + ' ' + 'bandit_only' + ' ' + 'supervised_only' + ' ' + 'size' + '\n')
 	summary_file.close()
 
 	for mod.cb_type, mod.warm_start_frac, mod.dataset in mod.config_task:
@@ -287,6 +287,11 @@ def main_loop(mod):
 	parser.add_argument('task_id', type=int, help='task ID, between 0 and num_tasks - 1')
 	parser.add_argument('num_tasks', type=int)
 	parser.add_argument('--results_dir', default='../../../figs/')
+	parser.add_argument('--warm_start_fraction', type=float)
+	parser.add_argument('--corrupt_prob_supervised', type=float)
+	parser.add_argument('--corrupt_prob_bandit',type=float)
+
+
 	args = parser.parse_args()
 	if args.task_id == 0:
 		if not os.path.exists(args.results_dir):
@@ -320,7 +325,8 @@ def main_loop(mod):
 
 	#mod.choices_warm_start_frac = [0.01 * pow(2, i) for i in range(1)]
 	#mod.choices_warm_start_frac = [0.01, 0.03, 0.1, 0.3]
-	mod.choices_warm_start_frac = [0.03]
+	#mod.choices_warm_start_frac = [0.03]
+	mod.choices_warm_start_frac = [args.warm_start_fraction]
 	#mod.choices_warm_start = [0.01 * pow(2, i) for i in range(5)]
 	#mod.choices_bandit = [0.01 * pow(2, i) for i in range(5)]
 
@@ -339,10 +345,11 @@ def main_loop(mod):
 	#mod.corrupt_type_supervised = 2
 	#mod.corrupt_prob_supervised = 0.3
 	mod.corrupt_type_supervised = 1
-	mod.corrupt_prob_supervised = 0.0
+	#mod.corrupt_prob_supervised = 0.3
+	mod.corrupt_prob_supervised = args.corrupt_prob_supervised
 
-	mod.corrupt_type_bandit = 2
-	mod.corrupt_prob_bandit = 1
+	mod.corrupt_type_bandit = 1
+	mod.corrupt_prob_bandit = args.corrupt_prob_bandit
 
 	mod.validation_method = 2
 	mod.epsilon = 0.05
diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc
index 10a6e2a2315..e9e80d79a88 100644
--- a/vowpalwabbit/cbify.cc
+++ b/vowpalwabbit/cbify.cc
@@ -97,6 +97,7 @@ struct cbify
 	example* supervised_validation;
 	size_t lambda_scheme;
 	float epsilon;
+	float cumulative_variance;
 
 };
 
@@ -143,7 +144,7 @@ void setup_lambdas(cbify& data, example& ec)
 	if (data.lambda_scheme == MINIMAX_CENTRAL_ZEROONE)
 	{
 		lambdas[0] = 0.0;
-		lambdas[data.choices_lambda-1] = 1.0 - 1e-4;
+		lambdas[data.choices_lambda-1] = 1.0;
 	}
 
 	//cout<<"lambdas:"<<endl;
@@ -459,6 +460,28 @@ void accumulate_costs_ips_adf(cbify& data, example& ec, CB::cb_class& cl, base_l
 
 }
 
+size_t predict_cs(cbify& data, example& ec)
+{
+	uint32_t argmin = find_min(data.cumulative_costs);
+
+	COST_SENSITIVE::label& csl = *data.csls;
+
+	csl.costs.resize(data.num_actions);
+	csl.costs.end() = csl.costs.begin()+data.num_actions;
+
+	for (uint32_t j = 0; j < data.num_actions; j++)
+	{
+		csl.costs[j].class_index = j+1;
+		csl.costs[j].x = loss(data, corrupted_label, j+1);
+	}
+
+	ec.l.cs = csl;
+
+	data.all->cost_sensitive->predict(ec, argmin);
+
+	return ec.pred.multiclass;
+
+}
 
 
 template <bool is_learn>
@@ -509,8 +532,14 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 		{
 			for (uint32_t i = 0; i < data.choices_lambda; i++)
 			{
-				ec.weight = 1;
+				if (data.lambdas[i] >= 0.5)
+					ec.weight = (1 - data.lambdas[i]) / data.lambdas[i];
+				else
+					ec.weight = 1;
+
 				data.all->cost_sensitive->learn(ec, i);
+
+				ec.weight = 1;
 			}
 		}
 
@@ -541,7 +570,7 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 	}
 	else if (data.bandit_iter < data.bandit_period) //Call the cb_explore learner. It returns a vector of probabilities for each action
 	{
-		// Need to initilize the lambda vector
+		// Need to initialize the lambda vector
 		if (data.bandit_iter == 0)
 			setup_lambdas(data, ec);
 
@@ -578,14 +607,24 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 		{
 			for (uint32_t i = 0; i < data.choices_lambda; i++)
 			{
+				float weight_multiplier;
+				if (data.lambdas[i] >= 0.5)
+					weight_multiplier = 1;
+				else
+					weight_multiplier = data.lambdas[i] / (1-data.lambdas[i]);
+
 				if (data.weighting_scheme == INSTANCE_WT)
-					ec.weight = old_weight * data.lambdas[i] / (1-data.lambdas[i]);
+					ec.weight = old_weight * weight_multiplier;
 				else
-					ec.weight = old_weight * data.lambdas[i] / (1-data.lambdas[i]) * data.warm_start_period / ( (data.bandit_iter+1) * (data.bandit_iter+2) );
+					ec.weight = old_weight * weight_multiplier * data.warm_start_period / ( (data.bandit_iter+1) * (data.bandit_iter+2) );
 
 				base.learn(ec, i);
 			}
 		}
+
+		size_t pred_best_approx = predict_cs(data, ec);
+		data.cumulative_variance += 1.0 / ec.pred.a_s[pred_best_approx-1].score;
+
 		data.a_s.erase();
 		data.a_s = ec.pred.a_s;
 		ec.l.multi = ld;
@@ -593,6 +632,12 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 		ec.weight = old_weight;
 
 		data.bandit_iter++;
+
+		if (data.bandit_iter == data.bandit_period)
+		{
+			cout<<"Ideal average variance = "<<data.num_actions / data.epsilon<<endl;
+			cout<<"Measured average variance = "<<data.cumulative_variance / data.bandit_period<<endl;
+		}
 	}
 	else
 	{
@@ -604,6 +649,16 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 	}
 }
 
+size_t predict_cs_adf(cbify& data, example& ec)
+{
+	uint32_t argmin = find_min(data.cumulative_costs);
+
+	copy_example_to_adf(data, ec);
+
+	size_t best_action = predict_sublearner(data, base, argmin);
+
+	return best_action;
+}
 
 template <bool is_learn>
 void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
@@ -642,13 +697,24 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 		{
 			for (uint32_t i = 0; i < data.choices_lambda; i++)
 			{
+				float weight_multiplier;
+				if (data.lambdas[i] >= 0.5)
+					weight_multiplier = (1 - data.lambdas[i]) / data.lambdas[i];
+				else
+					weight_multiplier = 1;
+
 				for (size_t a = 0; a < data.adf_data.num_actions; ++a)
 				{
+					data.old_weights[a] = ecs[a].weight;
+
 					csls[a].costs[0].class_index = a+1;
 					csls[a].costs[0].x = loss(data, corrupted_label, a+1);
 
 					cbls[a] = ecs[a].l.cb;
 					ecs[a].l.cs = csls[a];
+
+					ecs[a].weight *= weight_multiplier;
+
 					data.all->cost_sensitive->learn(ecs[a],i);
 				}
 				*cbl_empty = empty_example->l.cb;
@@ -656,7 +722,11 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 				data.all->cost_sensitive->learn(*empty_example,i);
 
 				for (size_t a = 0; a < data.adf_data.num_actions; ++a)
+				{
 					ecs[a].l.cb = cbls[a];
+					ecs[a].weights = data.old_weights[a];
+				}
+
 				empty_example->l.cb = *cbl_empty;
 			}
 		}
@@ -716,14 +786,21 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 		{
 			for (uint32_t i = 0; i < data.choices_lambda; i++)
 			{
+				float weight_multiplier;
+
+				if (data.lambdas[i] >= 0.5)
+					weight_multiplier = 1;
+				else
+					weight_multiplier = data.lambdas[i] / (1-data.lambdas[i]);
+
 				for (size_t a = 0; a < data.adf_data.num_actions; ++a)
 				{
 					data.old_weights[a] = ecs[a].weight;
 
 					if (data.weighting_scheme == INSTANCE_WT)
-						ecs[a].weight *= data.lambdas[i] / (1-data.lambdas[i]);
+						ecs[a].weight *= weight_multiplier;
 					else
-						ecs[a].weight *= data.lambdas[i] / (1-data.lambdas[i]) * data.warm_start_period / ( (data.bandit_iter+1) * (data.bandit_iter+2) );
+						ecs[a].weight *= weight_multiplier * data.warm_start_period / ( (data.bandit_iter+1) * (data.bandit_iter+2) );
 
 					base.learn(ecs[a], i);
 				}
@@ -734,9 +811,18 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 			}
 		}
 
+		size_t pred_best_approx = predict_cs(data, ec);
+		data.cumulative_variance += 1.0 / out_ec.pred.a_s[pred_best_approx-1].score;
+
 		ec.pred.multiclass = cl.action;
 
 		data.bandit_iter++;
+
+		if (data.bandit_iter == data.bandit_period)
+		{
+			cout<<"Ideal average variance = "<<data.num_actions / data.epsilon<<endl;
+			cout<<"Measured average variance = "<<data.cumulative_variance / data.bandit_period<<endl;
+		}
 	}
 	else
 	{
@@ -861,6 +947,8 @@ base_learner* cbify_setup(vw& all)
 	for (size_t i = 0; i < data.choices_lambda; i++)
 		data.cumulative_costs.push_back(0.);
 
+	data.cumulative_variance = 0;
+
 	data.num_actions = num_actions;
 
 

From b471ddb1a99c8ec7b3467d803681063a7217e630 Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Fri, 27 Apr 2018 17:24:43 -0400
Subject: [PATCH 055/127] warm start = 0 does not work

---
 vowpalwabbit/cbify.cc | 231 ++++++++++++++++++++++++------------------
 1 file changed, 130 insertions(+), 101 deletions(-)

diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc
index e9e80d79a88..c9da279db42 100644
--- a/vowpalwabbit/cbify.cc
+++ b/vowpalwabbit/cbify.cc
@@ -83,6 +83,9 @@ struct cbify
 	COST_SENSITIVE::label* csl_empty;
 	CB::label* cbls;
 	CB::label* cbl_empty;
+	polyprediction pred;
+
+
 	bool warm_start;
 	float* old_weights;
 
@@ -465,123 +468,154 @@ size_t predict_cs(cbify& data, example& ec)
 	uint32_t argmin = find_min(data.cumulative_costs);
 
 	COST_SENSITIVE::label& csl = *data.csls;
+	//For vw's internal reason, we need to first have a cs label before
+	//using csoaa to predict
+	ec.l.cs = csl;
+
+	data.all->cost_sensitive->predict(ec, argmin);
+
+	cout<<ec.pred.multiclass<<endl;
+
+	return ec.pred.multiclass;
+
+}
 
-	csl.costs.resize(data.num_actions);
-	csl.costs.end() = csl.costs.begin()+data.num_actions;
+void learn_cs(cbify& data, example& ec)
+{
+	for (uint32_t i = 0; i < data.choices_lambda; i++)
+	{
+		if (data.lambdas[i] >= 0.5)
+			ec.weight = (1 - data.lambdas[i]) / data.lambdas[i];
+		else
+			ec.weight = 1;
 
+		data.all->cost_sensitive->learn(ec, i);
+
+		ec.weight = 1;
+	}
+}
+
+//Requires the csl's cost array to have num_actions elements
+void multiclass_to_cs(cbify& data, COST_SENSITIVE::label& csl, size_t corrupted_label)
+{
 	for (uint32_t j = 0; j < data.num_actions; j++)
 	{
 		csl.costs[j].class_index = j+1;
 		csl.costs[j].x = loss(data, corrupted_label, j+1);
 	}
+}
+
+void generate_corrupted_cs(cbify& data, example& ec, MULTICLASS::label_t ld)
+{
+	size_t corrupted_label = corrupt_action(ld.label, data, SUPERVISED);
+
+	//generate cost-sensitive label (only for CSOAA's use - this will be retracted at the end)
+	COST_SENSITIVE::label& csl = *data.csls;
+
+	multiclass_to_cs(data, csl, corrupted_label);
 
 	ec.l.cs = csl;
+}
 
-	data.all->cost_sensitive->predict(ec, argmin);
+void add_to_sup_validation(cbify& data, example& ec)
+{
+	// NOTE WELL: for convenience in supervised validation, we intentionally use a cost-sensitive label as opposed to
+	// a multiclass label. This is because the csoaa learner needs a cost-sensitive label to predict (for vw's internal reasons).
+	// Also: I did not deallocate the label and the copied example in finish()
+	example& ec_copy = data.supervised_validation[data.warm_start_iter];
+	//why doesn't the following two apporaches leak memory?
+	VW::copy_example_data(false, &ec_copy, &ec, 0, COST_SENSITIVE::cs_label.copy_label);
+	//copy_array(ec_copy.l.cs.costs, ec.l.cs.costs);
+	//VW::copy_example_data(false, &ec_copy, &ec);
+	//for (uint32_t j = 0; j < data.num_actions; j++)
+	//{
+	//	ec_copy.l.cs.costs.push_back(ec.l.cs.costs[j]);
+	//}
+	//cout<<"after copying"<<endl;
+	//for (uint32_t j = 0; j < data.num_actions; j++)
+	//	cout<<ec_copy.l.cs.costs[j].class_index<<" "<<ec_copy.l.cs.costs[j].x<<endl;
+}
 
-	return ec.pred.multiclass;
+size_t predict_bandit(cbify& data, base_learner& base, example& ec)
+{
+	data.cb_label.costs.erase();
+	ec.l.cb = data.cb_label;
+	ec.pred.a_s = data.a_s;
+
+	uint32_t argmin = find_min(data.cumulative_costs);
+	base.predict(ec, argmin);
+	data.pred = ec.pred;
+
+	uint32_t action = data.mwt_explorer->Choose_Action(*data.generic_explorer, StringUtils::to_string(data.example_counter++), ec);
+
+	return action;
+
+}
+
+void learn_bandit(cbify& data, base_learner& base, example& ec)
+{
+	float old_weight = ec.weight;
+	for (uint32_t i = 0; i < data.choices_lambda; i++)
+	{
+		float weight_multiplier;
+		if (data.lambdas[i] >= 0.5)
+			weight_multiplier = 1;
+		else
+			weight_multiplier = data.lambdas[i] / (1-data.lambdas[i]);
 
+		if (data.weighting_scheme == INSTANCE_WT)
+			ec.weight = old_weight * weight_multiplier;
+		else
+			ec.weight = old_weight * weight_multiplier * data.warm_start_period / ( (data.bandit_iter+1) * (data.bandit_iter+2) );
+
+		base.learn(ec, i);
+	}
+	ec.weight = old_weight;
 }
 
 
 template <bool is_learn>
 void predict_or_learn(cbify& data, base_learner& base, example& ec)
 {
-	float old_weight;
-	uint32_t argmin;
-
-	argmin = find_min(data.cumulative_costs);
+	//float old_weight;
+	//uint32_t argmin;
+	//argmin = find_min(data.cumulative_costs);
 
 	//Store the multiclass input label
 	MULTICLASS::label_t ld = ec.l.multi;
 
 	//cout<<ld.label<<endl;
 
+	// Initialize the lambda vector
+	if (data.warm_start_iter == 0 && data.bandit_iter == 0)
+		setup_lambdas(data, ec);
+
 	if (data.warm_start_iter < data.warm_start_period) // Call the cost-sensitive learner directly
 	{
 		//Note: v_array is different STL's array; elements' references are used in v_array
-		//first, corrupt fully supervised example ec's label here
-		size_t corrupted_label = corrupt_action(ld.label, data, SUPERVISED);
+		//predict
+		predict_cs(data, ec);
 
-		//generate cost-sensitive label (only for CSOAA's use - this will be retracted at the end)
-		COST_SENSITIVE::label& csl = *data.csls;
-		//COST_SENSITIVE::label* cslp = calloc_or_throw<COST_SENSITIVE::label>(1);
-		//COST_SENSITIVE::label csl = *cslp;
-
-		//Note: these two lines are important, otherwise the cost sensitive vector seems to be unbounded.
-		//This is crucial for 1. cost-sensitive learn 2. label copy
-    csl.costs.resize(data.num_actions);
-		csl.costs.end() = csl.costs.begin()+data.num_actions;
-
-		for (uint32_t j = 0; j < data.num_actions; j++)
-		{
-			csl.costs[j].class_index = j+1;
-			csl.costs[j].x = loss(data, corrupted_label, j+1);
-		}
-
-		ec.l.cs = csl;
-
-		//cout<<"in predict or learn:"<<endl;
-		//for (uint32_t j = 0; j < data.num_actions; j++)
-		//	cout<<ec.l.cs.costs[j].class_index<<" "<<ec.l.cs.costs[j].x<<endl;
-
-		//predict (for vw's internal reason, this step has to be put after ec's cs label is created)
-		data.all->cost_sensitive->predict(ec, argmin);
+		//learn
+		//first, corrupt fully supervised example ec's label here
+		generate_corrupted_cs(data, ec, ld);
 
 		if (data.ind_supervised)
-		{
-			for (uint32_t i = 0; i < data.choices_lambda; i++)
-			{
-				if (data.lambdas[i] >= 0.5)
-					ec.weight = (1 - data.lambdas[i]) / data.lambdas[i];
-				else
-					ec.weight = 1;
-
-				data.all->cost_sensitive->learn(ec, i);
-
-				ec.weight = 1;
-			}
-		}
+			learn_cs(data, ec);
 
-		// NOTE WELL: for convenience in supervised validation, we intentionally use a cost-sensitive label as opposed to
-		// a multiclass label. This is because the csoaa learner needs a cost-sensitive label to predict (not sure why).
-		// I also did not deallocate the label and the copied example in finish()
 		if (data.validation_method == SUPERVISED_VALI)
-		{
-			example& ec_copy = data.supervised_validation[data.warm_start_iter];
-			//why doesn't the following two apporaches leak memory?
-			VW::copy_example_data(false, &ec_copy, &ec, 0, COST_SENSITIVE::cs_label.copy_label);
-			//copy_array(ec_copy.l.cs.costs, ec.l.cs.costs);
-			//VW::copy_example_data(false, &ec_copy, &ec);
-			//for (uint32_t j = 0; j < data.num_actions; j++)
-			//{
-			//	ec_copy.l.cs.costs.push_back(ec.l.cs.costs[j]);
-			//}
-			//cout<<"after copying"<<endl;
-			//for (uint32_t j = 0; j < data.num_actions; j++)
-			//	cout<<ec_copy.l.cs.costs[j].class_index<<" "<<ec_copy.l.cs.costs[j].x<<endl;
-		}
+			add_to_sup_validation(data, ec);
 
 		//set the label of ec back to a multiclass label
 		ec.l.multi = ld;
 		ec.weight = 0;
-
 		data.warm_start_iter++;
 	}
 	else if (data.bandit_iter < data.bandit_period) //Call the cb_explore learner. It returns a vector of probabilities for each action
 	{
-		// Need to initialize the lambda vector
-		if (data.bandit_iter == 0)
-			setup_lambdas(data, ec);
-
-		data.cb_label.costs.erase();
-		ec.l.cb = data.cb_label;
-		ec.pred.a_s = data.a_s;
-
-		base.predict(ec, argmin);
-		auto old_pred = ec.pred;
+		predict_cs(data, ec);
 
-		uint32_t action = data.mwt_explorer->Choose_Action(*data.generic_explorer, StringUtils::to_string(data.example_counter++), ec);
+		size_t action = predict_bandit(data, base, ec);
 
 		CB::cb_class cl;
 		cl.action = action;
@@ -600,36 +634,24 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 		data.cb_label.costs.push_back(cl);
 		ec.l.cb = data.cb_label;
 
-		ec.pred = old_pred;
-		old_weight = ec.weight;
+		ec.pred = data.pred;
 
-		if (data.ind_bandit)
-		{
-			for (uint32_t i = 0; i < data.choices_lambda; i++)
-			{
-				float weight_multiplier;
-				if (data.lambdas[i] >= 0.5)
-					weight_multiplier = 1;
-				else
-					weight_multiplier = data.lambdas[i] / (1-data.lambdas[i]);
 
-				if (data.weighting_scheme == INSTANCE_WT)
-					ec.weight = old_weight * weight_multiplier;
-				else
-					ec.weight = old_weight * weight_multiplier * data.warm_start_period / ( (data.bandit_iter+1) * (data.bandit_iter+2) );
+		if (data.ind_bandit)
+			learn_bandit(data, base, ec);
 
-				base.learn(ec, i);
-			}
-		}
+		data.a_s.erase();
+		data.a_s = ec.pred.a_s;
 
 		size_t pred_best_approx = predict_cs(data, ec);
-		data.cumulative_variance += 1.0 / ec.pred.a_s[pred_best_approx-1].score;
+		data.cumulative_variance += 1.0 / data.a_s[pred_best_approx-1].score;
+
+		//cout<<"variance at bandit round "<< data.bandit_iter << " = " << 1.0 / data.a_s[pred_best_approx-1].score << endl;
+		//cout<<pred_best_approx<<endl;
 
-		data.a_s.erase();
-		data.a_s = ec.pred.a_s;
 		ec.l.multi = ld;
 	  ec.pred.multiclass = action;
-		ec.weight = old_weight;
+		//ec.weight = old_weight;
 
 		data.bandit_iter++;
 
@@ -649,7 +671,7 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 	}
 }
 
-size_t predict_cs_adf(cbify& data, example& ec)
+size_t predict_cs_adf(cbify& data, base_learner& base, example& ec)
 {
 	uint32_t argmin = find_min(data.cumulative_costs);
 
@@ -724,7 +746,7 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 				for (size_t a = 0; a < data.adf_data.num_actions; ++a)
 				{
 					ecs[a].l.cb = cbls[a];
-					ecs[a].weights = data.old_weights[a];
+					ecs[a].weight = data.old_weights[a];
 				}
 
 				empty_example->l.cb = *cbl_empty;
@@ -811,7 +833,7 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 			}
 		}
 
-		size_t pred_best_approx = predict_cs(data, ec);
+		size_t pred_best_approx = predict_cs_adf(data, base, ec);
 		data.cumulative_variance += 1.0 / out_ec.pred.a_s[pred_best_approx-1].score;
 
 		ec.pred.multiclass = cl.action;
@@ -959,6 +981,13 @@ base_learner* cbify_setup(vw& all)
 	else
 	{
 		data.csls = calloc_or_throw<COST_SENSITIVE::label>(1);
+		auto& csl = data.csls[0];
+
+		csl.costs = v_init<COST_SENSITIVE::wclass>();
+		//Note: these two lines are important, otherwise the cost sensitive vector seems to be unbounded.
+		//This is crucial for 1. cost-sensitive learn 2. label copy
+		csl.costs.resize(data.num_actions);
+		csl.costs.end() = csl.costs.begin()+data.num_actions;
 	}
 
 

From 6259c672e3eeaebd99474647095ffcac85c03e61 Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Sun, 29 Apr 2018 13:48:44 -0400
Subject: [PATCH 056/127] fixed the csl label zero problem - now the label is
 set properly: 1,2,..K

---
 vowpalwabbit/cbify.cc | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc
index c9da279db42..98af1430ef5 100644
--- a/vowpalwabbit/cbify.cc
+++ b/vowpalwabbit/cbify.cc
@@ -474,7 +474,7 @@ size_t predict_cs(cbify& data, example& ec)
 
 	data.all->cost_sensitive->predict(ec, argmin);
 
-	cout<<ec.pred.multiclass<<endl;
+	//cout<<ec.pred.multiclass<<endl;
 
 	return ec.pred.multiclass;
 
@@ -985,9 +985,11 @@ base_learner* cbify_setup(vw& all)
 
 		csl.costs = v_init<COST_SENSITIVE::wclass>();
 		//Note: these two lines are important, otherwise the cost sensitive vector seems to be unbounded.
-		//This is crucial for 1. cost-sensitive learn 2. label copy
-		csl.costs.resize(data.num_actions);
-		csl.costs.end() = csl.costs.begin()+data.num_actions;
+
+		for (size_t a = 0; a < num_actions; ++a)
+		{
+			csl.costs.push_back({0, a+1, 0, 0});
+		}
 	}
 
 

From c3304502f5872738c5d7a078e06bcd3ecdd0ee17 Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Sun, 29 Apr 2018 14:25:46 -0400
Subject: [PATCH 057/127] .

---
 vowpalwabbit/cbify.cc | 34 ++++++++++++++--------------------
 1 file changed, 14 insertions(+), 20 deletions(-)

diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc
index 98af1430ef5..1d9a36c0fd3 100644
--- a/vowpalwabbit/cbify.cc
+++ b/vowpalwabbit/cbify.cc
@@ -536,6 +536,18 @@ void add_to_sup_validation(cbify& data, example& ec)
 	//	cout<<ec_copy.l.cs.costs[j].class_index<<" "<<ec_copy.l.cs.costs[j].x<<endl;
 }
 
+void generate_corrupt_cb(cbify& data, example& ec, CB::cb_class& cl, MULTICLASS::label_t& ld, size_t action)
+{
+	cl.action = action;
+	cl.probability = ec.pred.a_s[action-1].score;
+
+	if(!cl.action)
+		THROW("No action with non-zero probability found!");
+
+	size_t corrupted_label = corrupt_action(ld.label, data, BANDIT);
+	cl.cost = loss(data, corrupted_label, cl.action);
+}
+
 size_t predict_bandit(cbify& data, base_learner& base, example& ec)
 {
 	data.cb_label.costs.erase();
@@ -577,13 +589,8 @@ void learn_bandit(cbify& data, base_learner& base, example& ec)
 template <bool is_learn>
 void predict_or_learn(cbify& data, base_learner& base, example& ec)
 {
-	//float old_weight;
-	//uint32_t argmin;
-	//argmin = find_min(data.cumulative_costs);
-
 	//Store the multiclass input label
 	MULTICLASS::label_t ld = ec.l.multi;
-
 	//cout<<ld.label<<endl;
 
 	// Initialize the lambda vector
@@ -592,7 +599,6 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 
 	if (data.warm_start_iter < data.warm_start_period) // Call the cost-sensitive learner directly
 	{
-		//Note: v_array is different STL's array; elements' references are used in v_array
 		//predict
 		predict_cs(data, ec);
 
@@ -613,30 +619,20 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 	}
 	else if (data.bandit_iter < data.bandit_period) //Call the cb_explore learner. It returns a vector of probabilities for each action
 	{
-		predict_cs(data, ec);
-
 		size_t action = predict_bandit(data, base, ec);
 
 		CB::cb_class cl;
-		cl.action = action;
-		cl.probability = ec.pred.a_s[action-1].score;
-
-		if(!cl.action)
-		  THROW("No action with non-zero probability found!");
-
-		size_t corrupted_label = corrupt_action(ld.label, data, BANDIT);
-		cl.cost = loss(data, corrupted_label, cl.action);
 
+		generate_corrupt_cb(data, ec, cl, ld, action);
 		// accumulate the cumulative costs of lambdas
 		accumulate_costs_ips(data, ec, cl);
 
 		//Create a new cb label
 		data.cb_label.costs.push_back(cl);
 		ec.l.cb = data.cb_label;
-
+		//make sure the prediction here is a cb prediction
 		ec.pred = data.pred;
 
-
 		if (data.ind_bandit)
 			learn_bandit(data, base, ec);
 
@@ -987,9 +983,7 @@ base_learner* cbify_setup(vw& all)
 		//Note: these two lines are important, otherwise the cost sensitive vector seems to be unbounded.
 
 		for (size_t a = 0; a < num_actions; ++a)
-		{
 			csl.costs.push_back({0, a+1, 0, 0});
-		}
 	}
 
 

From 6fa003115590f294c348bce2f7ada00b379b6d1b Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Sun, 29 Apr 2018 14:51:27 -0400
Subject: [PATCH 058/127] make the lambda weighting more modular

---
 vowpalwabbit/cbify.cc | 36 ++++++++++++++++++++++++------------
 1 file changed, 24 insertions(+), 12 deletions(-)

diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc
index 1d9a36c0fd3..6f8a8f56a14 100644
--- a/vowpalwabbit/cbify.cc
+++ b/vowpalwabbit/cbify.cc
@@ -463,6 +463,25 @@ void accumulate_costs_ips_adf(cbify& data, example& ec, CB::cb_class& cl, base_l
 
 }
 
+float compute_weight_multiplier(cbify& data, size_t i, size_t data_type)
+{
+	if (data_type == SUPERVISED)
+	{
+		if (data.lambdas[i] >= 0.5)
+		 	return (1 - data.lambdas[i]) / data.lambdas[i];
+		else
+			return 1;
+	}
+	else
+	{
+		if (data.lambdas[i] >= 0.5)
+			return 1;
+		else
+			return data.lambdas[i] / (1-data.lambdas[i]);
+	}
+}
+
+
 size_t predict_cs(cbify& data, example& ec)
 {
 	uint32_t argmin = find_min(data.cumulative_costs);
@@ -482,17 +501,14 @@ size_t predict_cs(cbify& data, example& ec)
 
 void learn_cs(cbify& data, example& ec)
 {
+	float old_weight = ec.weight;
 	for (uint32_t i = 0; i < data.choices_lambda; i++)
 	{
-		if (data.lambdas[i] >= 0.5)
-			ec.weight = (1 - data.lambdas[i]) / data.lambdas[i];
-		else
-			ec.weight = 1;
-
+		float weight_multiplier = compute_weight_multiplier(data, i, SUPERVISED);
+		ec.weight = old_weight * weight_multiplier;
 		data.all->cost_sensitive->learn(ec, i);
-
-		ec.weight = 1;
 	}
+	ec.weight = old_weight;
 }
 
 //Requires the csl's cost array to have num_actions elements
@@ -569,11 +585,7 @@ void learn_bandit(cbify& data, base_learner& base, example& ec)
 	float old_weight = ec.weight;
 	for (uint32_t i = 0; i < data.choices_lambda; i++)
 	{
-		float weight_multiplier;
-		if (data.lambdas[i] >= 0.5)
-			weight_multiplier = 1;
-		else
-			weight_multiplier = data.lambdas[i] / (1-data.lambdas[i]);
+		float weight_multiplier = compute_weight_multiplier(data, i, BANDIT);
 
 		if (data.weighting_scheme == INSTANCE_WT)
 			ec.weight = old_weight * weight_multiplier;

From 7240acb970c572065c87043659f9fa25522cca9b Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Mon, 30 Apr 2018 00:40:41 -0400
Subject: [PATCH 059/127] make adf modular

---
 vowpalwabbit/cbify.cc | 261 +++++++++++++++++++++++++-----------------
 1 file changed, 158 insertions(+), 103 deletions(-)

diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc
index 6f8a8f56a14..1bf09c5c07b 100644
--- a/vowpalwabbit/cbify.cc
+++ b/vowpalwabbit/cbify.cc
@@ -249,10 +249,10 @@ void finish(cbify& data)
   {
 	  for (size_t a = 0; a < data.adf_data.num_actions; ++a)
 	  {
-			VW::dealloc_example(CB::cb_label.delete_label, data.adf_data.ecs[a]);
+			//VW::dealloc_example(CB::cb_label.delete_label, data.adf_data.ecs[a]);
 			data.adf_data.ecs[a].pred.a_s.delete_v();
 	  }
-	  VW::dealloc_example(CB::cb_label.delete_label, *data.adf_data.empty_example);
+	  //VW::dealloc_example(CB::cb_label.delete_label, *data.adf_data.empty_example);
 		data.adf_data.empty_example->pred.a_s.delete_v();
 
     free(data.adf_data.ecs);
@@ -690,86 +690,185 @@ size_t predict_cs_adf(cbify& data, base_learner& base, example& ec)
 	return best_action;
 }
 
-template <bool is_learn>
-void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
+size_t predict_bandit_adf(cbify& data, base_learner& base, example& ec)
 {
-	uint32_t argmin;
-	uint32_t best_action;
 	example* ecs = data.adf_data.ecs;
 	example* empty_example = data.adf_data.empty_example;
 
-	argmin = find_min(data.cumulative_costs);
+	uint32_t argmin = find_min(data.cumulative_costs);
 
-  //Store the multiclass input label
-  MULTICLASS::label_t ld = ec.l.multi;
+	copy_example_to_adf(data, ec);
 
-  copy_example_to_adf(data, ec);
+	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
+	{
+		base.predict(ecs[a], argmin);
+	}
+	base.predict(*empty_example, argmin);
 
-	if (data.warm_start_iter < data.warm_start_period) // Call the cost-sensitive learner directly
+	// get output scores
+	auto& out_ec = data.adf_data.ecs[0];
+	uint32_t idx = data.mwt_explorer->Choose_Action(
+									 *data.generic_explorer,
+									 StringUtils::to_string(data.example_counter++), out_ec) - 1;
+
+	return idx;
+
+}
+
+void multiclass_to_cs_adf(cbify& data, COST_SENSITIVE::label* csls, size_t corrupted_label)
+{
+	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
 	{
+		csls[a].costs[0].class_index = a+1;
+		csls[a].costs[0].x = loss(data, corrupted_label, a+1);
+	}
 
-		best_action = predict_sublearner(data, base, argmin);
+}
 
-		//data.all->cost_sensitive->predict(ec,argmin);
 
-		//generate cost-sensitive label
-		// ecs[a].weight *= 1;
-		//				cout << "size cbify = " << ecs[a].l.cs.costs.size() << endl;
+void generate_corrupted_cs_adf(cbify& data, example& ec, MULTICLASS::label_t ld)
+{
+	//suppose copy_example_data has already been called
+	example* ecs = data.adf_data.ecs;
+	example* empty_example = data.adf_data.empty_example;
 
-		COST_SENSITIVE::label* csls = data.csls;
-		COST_SENSITIVE::label* csl_empty = data.csl_empty;
-		CB::label* cbls = data.cbls;
-		CB::label* cbl_empty = data.cbl_empty;
+	size_t corrupted_label = corrupt_action(ld.label, data, SUPERVISED);
+
+	//generate cost-sensitive label (only for CSOAA's use - this will be retracted at the end)
+	COST_SENSITIVE::label* csls = data.csls;
+	COST_SENSITIVE::label* csl_empty = data.csl_empty;
 
-		size_t corrupted_label = corrupt_action(ld.label, data, SUPERVISED);
+	multiclass_to_cs_adf(data, csls, corrupted_label);
 
-		if (data.ind_supervised)
+	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
+		ecs[a].l.cs = csls[a];
+
+	empty_example->l.cs = *csl_empty;
+
+}
+
+void learn_cs_adf(cbify& data, example& ec)
+{
+	example* ecs = data.adf_data.ecs;
+	example* empty_example = data.adf_data.empty_example;
+
+	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
+		data.old_weights[a] = ecs[a].weight;
+
+	for (uint32_t i = 0; i < data.choices_lambda; i++)
+	{
+		float weight_multiplier = compute_weight_multiplier(data, i, SUPERVISED);
+		for (size_t a = 0; a < data.adf_data.num_actions; ++a)
 		{
-			for (uint32_t i = 0; i < data.choices_lambda; i++)
-			{
-				float weight_multiplier;
-				if (data.lambdas[i] >= 0.5)
-					weight_multiplier = (1 - data.lambdas[i]) / data.lambdas[i];
-				else
-					weight_multiplier = 1;
+			ecs[a].weight = data.old_weights[a] * weight_multiplier;
+			data.all->cost_sensitive->learn(ecs[a],i);
+		}
+		data.all->cost_sensitive->learn(*empty_example,i);
+	}
 
-				for (size_t a = 0; a < data.adf_data.num_actions; ++a)
-				{
-					data.old_weights[a] = ecs[a].weight;
+	//Seems like we don't need to set the weights back as this example will be
+	//discarded anyway
+	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
+		ecs[a].weight = data.old_weights[a];
+}
 
-					csls[a].costs[0].class_index = a+1;
-					csls[a].costs[0].x = loss(data, corrupted_label, a+1);
+void generate_corrupt_cb_adf(cbify& data, example& out_ec, CB::cb_class& cl, MULTICLASS::label_t& ld, size_t idx)
+{
+	cl.action = out_ec.pred.a_s[idx].action + 1;
+	cl.probability = out_ec.pred.a_s[idx].score;
 
-					cbls[a] = ecs[a].l.cb;
-					ecs[a].l.cs = csls[a];
+	if(!cl.action)
+		THROW("No action with non-zero probability found!");
 
-					ecs[a].weight *= weight_multiplier;
+	size_t corrupted_label = corrupt_action(ld.label, data, BANDIT);
+	cl.cost = loss(data, corrupted_label, cl.action);
 
-					data.all->cost_sensitive->learn(ecs[a],i);
-				}
-				*cbl_empty = empty_example->l.cb;
-				empty_example->l.cs = *csl_empty;
-				data.all->cost_sensitive->learn(*empty_example,i);
+}
 
-				for (size_t a = 0; a < data.adf_data.num_actions; ++a)
-				{
-					ecs[a].l.cb = cbls[a];
-					ecs[a].weight = data.old_weights[a];
-				}
+void learn_bandit_adf(cbify& data, base_learner& base, example& ec)
+{
+	example* ecs = data.adf_data.ecs;
+	example* empty_example = data.adf_data.empty_example;
 
-				empty_example->l.cb = *cbl_empty;
-			}
+	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
+		data.old_weights[a] = ecs[a].weight;
+
+	for (uint32_t i = 0; i < data.choices_lambda; i++)
+	{
+		float weight_multiplier = compute_weight_multiplier(data, i, BANDIT);
+
+		for (size_t a = 0; a < data.adf_data.num_actions; ++a)
+		{
+			if (data.weighting_scheme == INSTANCE_WT)
+				ecs[a].weight = data.old_weights[a] * weight_multiplier;
+			else
+				ecs[a].weight = data.old_weights[a] * weight_multiplier * data.warm_start_period / ( (data.bandit_iter+1) * (data.bandit_iter+2) );
+
+			base.learn(ecs[a], i);
 		}
+		base.learn(*empty_example, i);
+	}
+
+	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
+		ecs[a].weight = data.old_weights[a];
+}
+
+void accumulate_variance_adf(cbify& data, base_learner& base, example& ec)
+{
+	auto& out_ec = data.adf_data.ecs[0];
+
+	data.a_s.erase();
+	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
+		data.a_s.push_back({out_ec.pred.a_s[a].action, out_ec.pred.a_s[a].score});
+
+	size_t pred_best_approx = predict_cs_adf(data, base, ec);
+	float temp_variance;
+
+	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
+		if (pred_best_approx == data.a_s[a].action + 1)
+			temp_variance = 1.0 / data.a_s[a].score;
+
+	data.cumulative_variance += temp_variance;
+
+	//cout<<"variance at bandit round "<< data.bandit_iter << " = " << temp_variance << endl;
+	//cout<<pred_pi<<" "<<pred_best_approx<<" "<<ld.label<<endl;
+
+}
+
+template <bool is_learn>
+void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
+{
+
+  //Store the multiclass input label
+  MULTICLASS::label_t ld = ec.l.multi;
+
+	if (data.warm_start_iter == 0 && data.bandit_iter == 0)
+		setup_lambdas(data, ec);
+
+  //copy_example_to_adf(data, ec);
+
+	if (data.warm_start_iter < data.warm_start_period) // Call the cost-sensitive learner directly
+	{
+
+		//best_action = predict_sublearner(data, base, argmin);
+		uint32_t best_action = predict_cs_adf(data, base, ec);
+
+		//data.all->cost_sensitive->predict(ec,argmin);
+
+		//generate cost-sensitive label
+		// ecs[a].weight *= 1;
+		//				cout << "size cbify = " << ecs[a].l.cs.costs.size() << endl;
+
+		generate_corrupted_cs_adf(data, ec, ld);
+
+		if (data.ind_supervised)
+			learn_cs_adf(data, ec);
+
 		ec.pred.multiclass = best_action;
 		ec.l.multi = ld;
 		ec.weight = 0;
 
 		//a hack here - allocated memories not deleted
-		//example* ecp = calloc_or_throw<example>(1);
-		//VW::copy_example_data(false, ecp, &ec);
-		//ecp->l.multi.label = corrupted_label;
-		//ecp->l.multi.weight = 1.0;
-
 		//to be corrected
 		if (data.validation_method == SUPERVISED_VALI)
 			VW::copy_example_data(false, &data.supervised_validation[data.warm_start_iter], &ec, 0, MULTICLASS::mc_label.copy_label);
@@ -779,30 +878,13 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 	}
 	else if (data.bandit_iter < data.bandit_period) // call the bandit learner
 	{
-		if (data.bandit_iter == 0)
-			setup_lambdas(data, ec);
-
-		for (size_t a = 0; a < data.adf_data.num_actions; ++a)
-		{
-		  base.predict(ecs[a], argmin);
-		}
-		base.predict(*empty_example, argmin);
-
-		// get output scores
+		//size_t pred_pi = predict_cs_adf(data, base, ec);
+		uint32_t idx = predict_bandit_adf(data, base, ec);
 		auto& out_ec = data.adf_data.ecs[0];
-		uint32_t idx = data.mwt_explorer->Choose_Action(
-		                 *data.generic_explorer,
-		                 StringUtils::to_string(data.example_counter++), out_ec) - 1;
 
 		CB::cb_class cl;
-		cl.action = out_ec.pred.a_s[idx].action + 1;
-		cl.probability = out_ec.pred.a_s[idx].score;
-
-		if(!cl.action)
-		  THROW("No action with non-zero probability found!");
 
-		size_t corrupted_label = corrupt_action(ld.label, data, BANDIT);
-		cl.cost = loss(data, corrupted_label, cl.action);
+		generate_corrupt_cb_adf(data, out_ec, cl, ld, idx);
 
 		// accumulate the cumulative costs of lambdas
 		accumulate_costs_ips_adf(data, ec, cl, base);
@@ -813,36 +895,9 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 
 
 		if (data.ind_bandit)
-		{
-			for (uint32_t i = 0; i < data.choices_lambda; i++)
-			{
-				float weight_multiplier;
-
-				if (data.lambdas[i] >= 0.5)
-					weight_multiplier = 1;
-				else
-					weight_multiplier = data.lambdas[i] / (1-data.lambdas[i]);
-
-				for (size_t a = 0; a < data.adf_data.num_actions; ++a)
-				{
-					data.old_weights[a] = ecs[a].weight;
-
-					if (data.weighting_scheme == INSTANCE_WT)
-						ecs[a].weight *= weight_multiplier;
-					else
-						ecs[a].weight *= weight_multiplier * data.warm_start_period / ( (data.bandit_iter+1) * (data.bandit_iter+2) );
-
-					base.learn(ecs[a], i);
-				}
-				base.learn(*empty_example, i);
-
-				for (size_t a = 0; a < data.adf_data.num_actions; ++a)
-					ecs[a].weight = data.old_weights[a];
-			}
-		}
+			learn_bandit_adf(data, base, ec);
 
-		size_t pred_best_approx = predict_cs_adf(data, base, ec);
-		data.cumulative_variance += 1.0 / out_ec.pred.a_s[pred_best_approx-1].score;
+		accumulate_variance_adf(data, base, ec);
 
 		ec.pred.multiclass = cl.action;
 

From 621b39202503d58904da880a27ff888800fd6311 Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Mon, 30 Apr 2018 00:47:02 -0400
Subject: [PATCH 060/127] the version where there is an error on memory free

---
 vowpalwabbit/cbify.cc | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc
index 1bf09c5c07b..fe61f95787f 100644
--- a/vowpalwabbit/cbify.cc
+++ b/vowpalwabbit/cbify.cc
@@ -682,7 +682,6 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 size_t predict_cs_adf(cbify& data, base_learner& base, example& ec)
 {
 	uint32_t argmin = find_min(data.cumulative_costs);
-
 	copy_example_to_adf(data, ec);
 
 	size_t best_action = predict_sublearner(data, base, argmin);
@@ -845,7 +844,7 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 	if (data.warm_start_iter == 0 && data.bandit_iter == 0)
 		setup_lambdas(data, ec);
 
-  //copy_example_to_adf(data, ec);
+  copy_example_to_adf(data, ec);
 
 	if (data.warm_start_iter < data.warm_start_period) // Call the cost-sensitive learner directly
 	{

From d1fbfd7d7c0f7c0991596d8e414771a52df2cf5d Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Mon, 30 Apr 2018 01:24:56 -0400
Subject: [PATCH 061/127] finished cleanup (need to double check the cb label
 swap in the adf case)

---
 vowpalwabbit/cbify.cc | 65 ++++++++++++++++++++++++++-----------------
 1 file changed, 40 insertions(+), 25 deletions(-)

diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc
index fe61f95787f..036afc6355e 100644
--- a/vowpalwabbit/cbify.cc
+++ b/vowpalwabbit/cbify.cc
@@ -597,6 +597,16 @@ void learn_bandit(cbify& data, base_learner& base, example& ec)
 	ec.weight = old_weight;
 }
 
+void accumulate_variance(cbify& data, example& ec)
+{
+	size_t pred_best_approx = predict_cs(data, ec);
+	data.cumulative_variance += 1.0 / data.a_s[pred_best_approx-1].score;
+
+	//cout<<"variance at bandit round "<< data.bandit_iter << " = " << 1.0 / data.a_s[pred_best_approx-1].score << endl;
+	//cout<<pred_best_approx<<endl;
+
+}
+
 
 template <bool is_learn>
 void predict_or_learn(cbify& data, base_learner& base, example& ec)
@@ -651,11 +661,7 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 		data.a_s.erase();
 		data.a_s = ec.pred.a_s;
 
-		size_t pred_best_approx = predict_cs(data, ec);
-		data.cumulative_variance += 1.0 / data.a_s[pred_best_approx-1].score;
-
-		//cout<<"variance at bandit round "<< data.bandit_iter << " = " << 1.0 / data.a_s[pred_best_approx-1].score << endl;
-		//cout<<pred_best_approx<<endl;
+		accumulate_variance(data, ec);
 
 		ec.l.multi = ld;
 	  ec.pred.multiclass = action;
@@ -679,25 +685,22 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 	}
 }
 
-size_t predict_cs_adf(cbify& data, base_learner& base, example& ec)
+size_t predict_cs_adf(cbify& data, base_learner& base)
 {
 	uint32_t argmin = find_min(data.cumulative_costs);
-	copy_example_to_adf(data, ec);
 
 	size_t best_action = predict_sublearner(data, base, argmin);
 
 	return best_action;
 }
 
-size_t predict_bandit_adf(cbify& data, base_learner& base, example& ec)
+size_t predict_bandit_adf(cbify& data, base_learner& base)
 {
 	example* ecs = data.adf_data.ecs;
 	example* empty_example = data.adf_data.empty_example;
 
 	uint32_t argmin = find_min(data.cumulative_costs);
 
-	copy_example_to_adf(data, ec);
-
 	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
 	{
 		base.predict(ecs[a], argmin);
@@ -725,7 +728,7 @@ void multiclass_to_cs_adf(cbify& data, COST_SENSITIVE::label* csls, size_t corru
 }
 
 
-void generate_corrupted_cs_adf(cbify& data, example& ec, MULTICLASS::label_t ld)
+void generate_corrupted_cs_adf(cbify& data, MULTICLASS::label_t ld)
 {
 	//suppose copy_example_data has already been called
 	example* ecs = data.adf_data.ecs;
@@ -746,7 +749,7 @@ void generate_corrupted_cs_adf(cbify& data, example& ec, MULTICLASS::label_t ld)
 
 }
 
-void learn_cs_adf(cbify& data, example& ec)
+void learn_cs_adf(cbify& data)
 {
 	example* ecs = data.adf_data.ecs;
 	example* empty_example = data.adf_data.empty_example;
@@ -771,8 +774,9 @@ void learn_cs_adf(cbify& data, example& ec)
 		ecs[a].weight = data.old_weights[a];
 }
 
-void generate_corrupt_cb_adf(cbify& data, example& out_ec, CB::cb_class& cl, MULTICLASS::label_t& ld, size_t idx)
+void generate_corrupt_cb_adf(cbify& data, CB::cb_class& cl, MULTICLASS::label_t& ld, size_t idx)
 {
+	auto& out_ec = data.adf_data.ecs[0];
 	cl.action = out_ec.pred.a_s[idx].action + 1;
 	cl.probability = out_ec.pred.a_s[idx].score;
 
@@ -784,7 +788,7 @@ void generate_corrupt_cb_adf(cbify& data, example& out_ec, CB::cb_class& cl, MUL
 
 }
 
-void learn_bandit_adf(cbify& data, base_learner& base, example& ec)
+void learn_bandit_adf(cbify& data, base_learner& base)
 {
 	example* ecs = data.adf_data.ecs;
 	example* empty_example = data.adf_data.empty_example;
@@ -812,7 +816,7 @@ void learn_bandit_adf(cbify& data, base_learner& base, example& ec)
 		ecs[a].weight = data.old_weights[a];
 }
 
-void accumulate_variance_adf(cbify& data, base_learner& base, example& ec)
+void accumulate_variance_adf(cbify& data, base_learner& base)
 {
 	auto& out_ec = data.adf_data.ecs[0];
 
@@ -820,7 +824,7 @@ void accumulate_variance_adf(cbify& data, base_learner& base, example& ec)
 	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
 		data.a_s.push_back({out_ec.pred.a_s[a].action, out_ec.pred.a_s[a].score});
 
-	size_t pred_best_approx = predict_cs_adf(data, base, ec);
+	size_t pred_best_approx = predict_cs_adf(data, base);
 	float temp_variance;
 
 	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
@@ -846,11 +850,15 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 
   copy_example_to_adf(data, ec);
 
+	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
+		data.cbls[a].costs = data.adf_data.ecs[a].l.cb.costs;
+	data.cbl_empty->costs = data.adf_data.empty_example->l.cb.costs;
+
 	if (data.warm_start_iter < data.warm_start_period) // Call the cost-sensitive learner directly
 	{
 
 		//best_action = predict_sublearner(data, base, argmin);
-		uint32_t best_action = predict_cs_adf(data, base, ec);
+		uint32_t best_action = predict_cs_adf(data, base);
 
 		//data.all->cost_sensitive->predict(ec,argmin);
 
@@ -858,10 +866,10 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 		// ecs[a].weight *= 1;
 		//				cout << "size cbify = " << ecs[a].l.cs.costs.size() << endl;
 
-		generate_corrupted_cs_adf(data, ec, ld);
+		generate_corrupted_cs_adf(data, ld);
 
 		if (data.ind_supervised)
-			learn_cs_adf(data, ec);
+			learn_cs_adf(data);
 
 		ec.pred.multiclass = best_action;
 		ec.l.multi = ld;
@@ -878,12 +886,11 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 	else if (data.bandit_iter < data.bandit_period) // call the bandit learner
 	{
 		//size_t pred_pi = predict_cs_adf(data, base, ec);
-		uint32_t idx = predict_bandit_adf(data, base, ec);
-		auto& out_ec = data.adf_data.ecs[0];
+		uint32_t idx = predict_bandit_adf(data, base);
 
 		CB::cb_class cl;
 
-		generate_corrupt_cb_adf(data, out_ec, cl, ld, idx);
+		generate_corrupt_cb_adf(data, cl, ld, idx);
 
 		// accumulate the cumulative costs of lambdas
 		accumulate_costs_ips_adf(data, ec, cl, base);
@@ -894,9 +901,9 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 
 
 		if (data.ind_bandit)
-			learn_bandit_adf(data, base, ec);
+			learn_bandit_adf(data, base);
 
-		accumulate_variance_adf(data, base, ec);
+		accumulate_variance_adf(data, base);
 
 		ec.pred.multiclass = cl.action;
 
@@ -913,6 +920,10 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 		ec.pred.multiclass = 0;
 		ec.weight = 0;
 	}
+
+	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
+		data.adf_data.ecs[a].l.cb.costs = data.cbls[a].costs;
+	data.adf_data.empty_example->l.cb.costs = data.cbl_empty->costs;
 }
 
 void init_adf_data(cbify& data, const size_t num_actions)
@@ -933,6 +944,8 @@ void init_adf_data(cbify& data, const size_t num_actions)
 
 
 	data.csls = calloc_or_throw<COST_SENSITIVE::label>(num_actions);
+
+
 	data.csl_empty = calloc_or_throw<COST_SENSITIVE::label>(1);
 	data.cbls = calloc_or_throw<CB::label>(num_actions);
 	data.cbl_empty = calloc_or_throw<CB::label>(1);
@@ -940,13 +953,15 @@ void init_adf_data(cbify& data, const size_t num_actions)
 
 	data.old_weights = calloc_or_throw<float>(num_actions);
 
+	data.csl_empty->costs = v_init<COST_SENSITIVE::wclass>();
 	data.csl_empty->costs.push_back({0, 0, 0, 0});
 	data.csl_empty->costs[0].class_index = 0;
 	data.csl_empty->costs[0].x = FLT_MAX;
 
 	for (size_t a = 0; a < num_actions; ++a)
 	{
-		data.csls[a].costs.push_back({0, 0, 0, 0});
+		data.csls[a].costs = v_init<COST_SENSITIVE::wclass>();
+		data.csls[a].costs.push_back({0, a+1, 0, 0});
 	}
 
 }

From 6bddc96aa77516a54ff2d1e492b0be9b0ad42033 Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Mon, 30 Apr 2018 15:11:05 -0400
Subject: [PATCH 062/127] adjusted the output of the script so that it is more
 systematic

---
 scripts/alg_comparison.py  |  24 ++++-----
 scripts/plot_warm_start.py | 103 +++++++++++++++++++++++++------------
 2 files changed, 81 insertions(+), 46 deletions(-)

diff --git a/scripts/alg_comparison.py b/scripts/alg_comparison.py
index 40bdc1b0972..4a20fb48ce1 100644
--- a/scripts/alg_comparison.py
+++ b/scripts/alg_comparison.py
@@ -35,7 +35,7 @@ def parse_sum_file(sum_filename):
 def get_z_scores(errors_1, errors_2, sizes):
 	z_scores = []
 	for i in range(len(errors_1)):
-		print i
+		#print i
 		z_scores.append( z_score(errors_1[i], errors_2[i], sizes[i]) )
 	return z_scores
 
@@ -104,7 +104,7 @@ def plot_comparison(errors_1, errors_2, sizes, title, filename):
 	all_results = None
 
 	for i in range(len(dss)):
-		print 'dataset name: ', dss[i]
+		print 'result file name: ', dss[i]
 		result = parse_sum_file(results_dir + dss[i])
 
 		if (i == 0):
@@ -112,13 +112,11 @@ def plot_comparison(errors_1, errors_2, sizes, title, filename):
 		else:
 			all_results = all_results.append(result)
 
-
-		#if i >= 331 and i <= 340:
-		#	print 'result:', result
-		#	print 'all_results:', all_results
-
 	print all_results
 
+	#if i >= 331 and i <= 340:
+	#	print 'result:', result
+	#	print 'all_results:', all_results
 
 
 	#result = parse_sum_file(results_dir + '400of600.sum')
@@ -142,12 +140,12 @@ def plot_comparison(errors_1, errors_2, sizes, title, filename):
 	results_supervised = all_results[supervised_only].tolist()
 	dataset_sizes = all_results[sizes].tolist()
 
-	print alg1
-	print results_alg1
+	#print alg1
+	#print results_alg1
 
 	# compare combined w/ bandit
 	plot_comparison(results_alg1, results_bandit, dataset_sizes, alg1 + ' vs ' + 'bandit only', results_dir + alg1 + ' vs ' + 'bandit only' + '.png')
-	plot_comparison(results_alg1, results_supervised, dataset_sizes, alg1 + ' vs ' + 'supervised only', results_dir + alg1 + ' vs ' + 'supervised only' + '.png')
-	plot_comparison(results_alg2, results_bandit, dataset_sizes, alg2 + ' vs ' + 'bandit only', results_dir + alg2 + ' vs ' + 'bandit only' + '.png')
-	plot_comparison(results_alg2, results_supervised, dataset_sizes, alg2 + ' vs ' + 'supervised only', results_dir + alg2 + ' vs ' + 'supervised only' + '.png')
-	plot_comparison(results_alg1, results_alg2, dataset_sizes, alg1 + ' vs ' + alg2, results_dir+alg1 + ' vs ' + alg2 + '.png')
+	#plot_comparison(results_alg1, results_supervised, dataset_sizes, alg1 + ' vs ' + 'supervised only', results_dir + alg1 + ' vs ' + 'supervised only' + '.png')
+	#plot_comparison(results_alg2, results_bandit, dataset_sizes, alg2 + ' vs ' + 'bandit only', results_dir + alg2 + ' vs ' + 'bandit only' + '.png')
+	#plot_comparison(results_alg2, results_supervised, dataset_sizes, alg2 + ' vs ' + 'supervised only', results_dir + alg2 + ' vs ' + 'supervised only' + '.png')
+	#plot_comparison(results_alg1, results_alg2, dataset_sizes, alg1 + ' vs ' + alg2, results_dir+alg1 + ' vs ' + alg2 + '.png')
diff --git a/scripts/plot_warm_start.py b/scripts/plot_warm_start.py
index 1e806f512e3..31c0880b379 100644
--- a/scripts/plot_warm_start.py
+++ b/scripts/plot_warm_start.py
@@ -116,10 +116,12 @@ def plot_errors(mod):
 		avg_loss = avg_loss[len_avg_loss-1]
 		avg_loss = [avg_loss for i in range(len_avg_loss)]
 
-	line = plt.plot(wt, avg_loss, mod.plot_color, label=(mod.plot_label))
+	#line = plt.plot(wt, avg_loss, mod.plot_color, label=(mod.plot_label))
 	avg_error_value = avg_error(mod)
+	actual_var_value = actual_var(mod)
+	ideal_var_value = ideal_var(mod)
 
-	return avg_error_value
+	return avg_error_value, actual_var_value, ideal_var_value
 
 
 def gen_comparison_graph(mod):
@@ -132,23 +134,27 @@ def gen_comparison_graph(mod):
 
 	#config_name = str(mod.dataset) + '_' + str(mod.fprob1)+'_'+str(mod.fprob2)+'_'+str(mod.warm_start)+'_'+str(mod.bandit)+ '_' + str(mod.cb_type) + '_' + str(mod.choices_lambda)
 
-	config_name = str(mod.dataset) + '_'+str(mod.warm_start)+ '_' + str(mod.cb_type)
+	config_name = str(mod.dataset) + ' ' \
+	+ str(mod.corrupt_type_supervised) + ' ' +str(mod.corrupt_prob_supervised) \
+	+ ' ' + str(mod.corrupt_type_bandit) + ' ' + str(mod.corrupt_prob_bandit) \
+	+ ' ' + str(mod.warm_start) + ' ' + str(mod.bandit) + ' ' + str(mod.cb_type) \
+	+ ' ' + str(mod.validation_method) + ' ' + str(mod.weighting_scheme) \
+	+ ' ' + str(mod.lambda_scheme) + ' ' + str(mod.choices_lambda) \
+	+ ' ' + str(mod.no_supervised) + ' ' + str(mod.no_bandit)
 
 	# combined approach, epsilon
-	mod.choices_lambda = 2
-	mod.weighting_scheme = 1
-	mod.lambda_scheme = 3
-	mod.no_bandit = False
-	mod.no_supervised = False
-	mod.no_exploration = False
-	mod.cover_on = False
-	mod.epsilon_on = True
-	mod.plot_color = 'r'
-	mod.plot_flat = False
-	mod.vw_output_filename = mod.results_path+config_name+'zeroone'+'.txt'
-	mod.plot_label = 'zeroone only'
-	avg_error_comb_1 = plot_errors(mod)
+	mod.vw_output_filename = mod.results_path+config_name+'.txt'
+	avg_error_value, actual_var_value, ideal_var_value = plot_errors(mod)
+
+	result = str(avg_error_value) + ' ' + str(actual_var_value) + ' ' + str(ideal_var_value)
 
+	summary_file = open(mod.summary_file_name, 'a')
+	summary_file.write(config_name + ' ' + result + '\n')
+	summary_file.close()
+	print('')
+
+
+	'''
 	# combined approach, cover
 	# combined approach, per-dataset weighting
 	#mod.choices_lambda = 1
@@ -204,13 +210,6 @@ def gen_comparison_graph(mod):
 	mod.plot_label = 'Supervised only'
 	avg_error_sup_only = plot_errors(mod)
 
-
-	summary_file = open(mod.summary_file_name, 'a')
-	summary_file.write(config_name + ' ' + str(avg_error_comb_1) + ' ' + str(avg_error_comb_2) + ' ' + str(avg_error_band_only) + ' ' + str(avg_error_sup_only) + ' ' + str(mod.bandit) + '\n')
-	summary_file.close()
-	print('')
-
-
 	pylab.legend()
 	pylab.xlabel('#bandit examples')
 	pylab.ylabel('Progressive validation error')
@@ -220,7 +219,7 @@ def gen_comparison_graph(mod):
 	plt.gcf().clear()
 
 	#plt.show()
-
+	'''
 
 def ds_files(ds_path):
 	prevdir = os.getcwd()
@@ -239,7 +238,7 @@ def get_num_classes(ds):
 def ds_per_task(mod):
 	# put dataset name to the last coordinate so that the task workloads tend to be
 	# allocated equally
- 	config_all = [item for item in product(mod.choices_cb_types, mod.choices_warm_start_frac, mod.dss)]
+ 	config_all = [item for item in product(mod.choices_corrupt_type_supervised, mod.choices_corrupt_prob_supervised, mod.choices_cb_types, mod.choices_warm_start_frac, mod.choices_no_supervised, mod.choices_no_bandit, mod.dss)]
 	config_task = []
 	print len(config_all)
 	for i in range(len(config_all)):
@@ -256,11 +255,20 @@ def get_num_lines(dataset_name):
 	return int(output)
 
 def avg_error(mod):
+	return vw_output_extract(mod, 'average loss')
+
+def actual_var(mod):
+	return vw_output_extract(mod, 'Measured average variance')
+
+def ideal_var(mod):
+	return vw_output_extract(mod, 'Ideal average variance')
+
+def vw_output_extract(mod, pattern):
 	#print mod.vw_output_filename
 	vw_output = open(mod.vw_output_filename, 'r')
 	vw_output_text = vw_output.read()
 	#print vw_output_text
-	rgx = re.compile('^average loss = (.*)$', flags=re.M)
+	rgx = re.compile('^'+pattern+' = (.*)$', flags=re.M)
 
 	errs = rgx.findall(vw_output_text)
 	if not errs:
@@ -275,10 +283,23 @@ def avg_error(mod):
 def main_loop(mod):
 	mod.summary_file_name = mod.results_path+str(mod.task_id)+'of'+str(mod.num_tasks)+'.sum'
 	summary_file = open(mod.summary_file_name, 'w')
-	summary_file.write('dataset' + ' ' + 'zeroone_only' + ' ' + 'central_minimax_zeroone' + ' ' + 'bandit_only' + ' ' + 'supervised_only' + ' ' + 'size' + '\n')
+
+	summary_header = 'str(mod.dataset)' + ' ' \
+	+ 'str(mod.corrupt_type_supervised)' + ' ' + 'str(mod.corrupt_prob_supervised)' \
+	+ ' ' + 'str(mod.corrupt_type_bandit)' + ' ' + 'str(mod.corrupt_prob_bandit)' \
+	+ ' ' + 'str(mod.warm_start)' + ' ' + 'str(mod.bandit)' + ' ' + 'str(mod.cb_type)' \
+	+ ' ' + 'str(mod.validation_method)' + ' ' + 'str(mod.weighting_scheme)' \
+	+ ' ' + 'str(mod.lambda_scheme)' + ' ' + 'str(mod.choices_lambda)' \
+	+ ' ' + 'str(mod.no_supervised)' + ' ' + 'str(mod.no_bandit)' \
+	+ ' ' + 'str(avg_error_value)' + ' ' + 'str(actual_var_value)' \
+	+ ' ' + 'str(ideal_var_value)'
+
+	summary_file.write(summary_header+'\n')
+
+	#summary_file.write('dataset' + ' ' + 'zeroone_only' + ' ' + 'central_minimax_zeroone' + ' ' + 'bandit_only' + ' ' + 'supervised_only' + ' ' + 'size' + '\n')
 	summary_file.close()
 
-	for mod.cb_type, mod.warm_start_frac, mod.dataset in mod.config_task:
+	for mod.corrupt_type_supervised, mod.corrupt_prob_supervised, mod.cb_type, mod.warm_start_frac, mod.no_supervised, mod.no_bandit, mod.dataset in mod.config_task:
 		gen_comparison_graph(mod)
 
 
@@ -326,7 +347,8 @@ def main_loop(mod):
 	#mod.choices_warm_start_frac = [0.01 * pow(2, i) for i in range(1)]
 	#mod.choices_warm_start_frac = [0.01, 0.03, 0.1, 0.3]
 	#mod.choices_warm_start_frac = [0.03]
-	mod.choices_warm_start_frac = [args.warm_start_fraction]
+	mod.choices_warm_start_frac = [0.01, 0.02, 0.04, 0.08, 0.16, 0.32]
+
 	#mod.choices_warm_start = [0.01 * pow(2, i) for i in range(5)]
 	#mod.choices_bandit = [0.01 * pow(2, i) for i in range(5)]
 
@@ -337,6 +359,8 @@ def main_loop(mod):
 	#choices_cb_types = ['mtr', 'ips']
 	#mod.choices_cb_types = ['mtr', 'ips']
 	mod.choices_cb_types = ['mtr']
+	mod.choices_no_supervised = [False, True]
+	mod.choices_no_bandit = [False, True]
 	#choices_choices_lambda = [pow(2,i) for i in range(10,11)]
 	#mod.choices_choices_lambda = [i for i in range(1,3)]
 	#mod.choices_choices_lambda = [i for i in range(1,2)]
@@ -344,16 +368,29 @@ def main_loop(mod):
 	#[i for i in range(10,11)]
 	#mod.corrupt_type_supervised = 2
 	#mod.corrupt_prob_supervised = 0.3
-	mod.corrupt_type_supervised = 1
+	mod.choices_corrupt_type_supervised = [1,2]
+	#mod.choices_corrupt_type_supervised = [2]
 	#mod.corrupt_prob_supervised = 0.3
-	mod.corrupt_prob_supervised = args.corrupt_prob_supervised
+	mod.choices_corrupt_prob_supervised = [0,0.3]
+	#mod.choices_corrupt_prob_supervised = [0.3]
 
 	mod.corrupt_type_bandit = 1
-	mod.corrupt_prob_bandit = args.corrupt_prob_bandit
+	mod.corrupt_prob_bandit = 0
 
-	mod.validation_method = 2
+	mod.validation_method = 1
 	mod.epsilon = 0.05
 
+	mod.choices_lambda = 2
+	mod.weighting_scheme = 1
+	mod.lambda_scheme = 3
+	mod.no_bandit = False
+	mod.no_supervised = False
+	mod.no_exploration = False
+	mod.cover_on = False
+	mod.epsilon_on = True
+	mod.plot_color = 'r'
+	mod.plot_flat = False
+
 	#for correctness test
 	#mod.choices_warm_start = [20]
 	#choices_fprob1 = [0.1]

From f529db02240c54c6ebf60153f8837b53ca4bd601 Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Mon, 30 Apr 2018 17:43:19 -0400
Subject: [PATCH 063/127] a more complete summary file

---
 scripts/plot_warm_start.py | 224 +++++++++++++++----------------------
 1 file changed, 92 insertions(+), 132 deletions(-)

diff --git a/scripts/plot_warm_start.py b/scripts/plot_warm_start.py
index 31c0880b379..764ed4855b0 100644
--- a/scripts/plot_warm_start.py
+++ b/scripts/plot_warm_start.py
@@ -24,35 +24,60 @@ def collect_stats(mod):
 	# num_rows = mod.bandit / mod.progress
 	#print vw_output_filename
 
+	#avg_error_value = avg_error(mod)
+	mod.actual_var = actual_var(mod)
+	mod.ideal_var = ideal_var(mod)
+
 	avg_loss = []
 	last_loss = []
 	wt = []
 	end_table = False
 
 	f = open(vw_output_filename, 'r')
-	linenumber = 0
+	#linenumber = 0
+	i = 0
 	for line in f:
-		#if not line.strip():
-		#	end_table = True
-		#if linenumber >= 9 and (not end_table):
 		vw_progress_pattern = '\d+\.\d+\s+\d+\.\d+\s+\d+\s+\d+\.\d+\s+[a-zA-Z0-9]+\s+[a-zA-Z0-9]+\s+\d+'
 		matchobj = re.match(vw_progress_pattern, line)
 
 		if matchobj:
-			items = line.split()
-			avg_loss.append(float(items[0]))
-			last_loss.append(float(items[1]))
-			wt.append(float(items[3]))
-		linenumber += 1
+			avg_loss_str, last_loss_str, counter_str, weight_str, curr_label_str, \
+			curr_pred_str, curr_feat_str = line.split()
+
+			avg_loss.append(float(avg_loss_str))
+			last_loss.append(float(last_loss_str))
+			wt.append(float(weight_str))
+
+			mod.avg_loss = float(avg_loss_str)
+			mod.bandit = float(weight_str)
+
+			for mod.ratio in mod.critical_size_ratios:
+				if mod.bandit >= 0.99 * mod.warm_start * mod.ratio and \
+				mod.bandit <= 1.01 * mod.warm_start * mod.ratio:
+					record_result(mod)
+
+
+		#linenumber += 1
 
 	f.close()
 
-	if len(avg_loss) == 0:
-		avg_loss = [0]
-		last_loss = [0]
-		wt = [0]
+	#if len(avg_loss) == 0:
+	#	avg_loss = [0]
+	#	last_loss = [0]
+	#	wt = [0]
+	#return avg_loss, last_loss, wt
+
+def record_result(mod):
+	problem_params_trailer = [mod.bandit, mod.ratio]
+	config_name = disperse(mod.problem_params + problem_params_trailer + mod.alg_params, ' ')
+
+	list_results = [mod.avg_loss, mod.actual_var, mod.ideal_var]
+	result = disperse(list_results, ' ')
+
+	summary_file = open(mod.summary_file_name, 'a')
+	summary_file.write(config_name + ' ' + result + '\n')
+	summary_file.close()
 
-	return avg_loss, last_loss, wt
 
 def execute_vw(mod):
 
@@ -64,7 +89,7 @@ def execute_vw(mod):
 
 	if mod.cover_on:
 		alg_option += ' --cover 5 --psi 0.01 --nounif '
-		mod.cb_type = 'dr'
+		#mod.cb_type = 'dr'
 	if mod.epsilon_on:
 		alg_option += ' --epsilon ' + str(mod.epsilon) + ' '
 	if mod.no_bandit:
@@ -76,14 +101,6 @@ def execute_vw(mod):
 	#if mod.cb_type == 'mtr':
 	#	mod.adf_on = True;
 
-
-
-	# using two datasets
-	#cmd_catfile = '( head -n ' + str(mod.warm_start) + ' ' + mod.dataset_supervised + ';' + ' head -n ' + str(mod.bandit) + ' ' + mod.dataset_bandit + '; )'
-	# using only one dataset
-	#cmd_catfile = '( head -n ' + str(mod.warm_start + mod.bandit) + ' ' + mod.dataset + '; )'
-	#cmd_catfile = '( cat ' + mod.ds_path+mod.dataset + '; )'
-
 	cmd_vw = mod.vw_path + ' --cbify ' + str(mod.num_classes) + ' --cb_type ' + str(mod.cb_type) + ' --warm_start ' + str(mod.warm_start) + ' --bandit ' + str(mod.bandit) + ' --choices_lambda ' + str(mod.choices_lambda) + alg_option + ' --progress ' + str(mod.progress) \
 	 + ' -d ' + mod.ds_path + mod.dataset \
 	 + ' --corrupt_type_supervised ' + str(mod.corrupt_type_supervised) \
@@ -95,8 +112,6 @@ def execute_vw(mod):
 	 + ' --lambda_scheme ' + str(mod.lambda_scheme)
 
 	cmd = cmd_vw
-	#cmd = cmd_catfile + ' | ' + cmd_vw
-
 	print cmd
 
 	f = open(mod.vw_output_filename, 'w')
@@ -105,11 +120,9 @@ def execute_vw(mod):
 	process.wait()
 	f.close()
 
+'''
 def plot_errors(mod):
-
-	execute_vw(mod)
-	avg_loss, last_loss, wt = collect_stats(mod)
-
+	#avg_loss, last_loss, wt =
 	if mod.plot_flat:
 		# for supervised only, we simply plot a horizontal line using the last point
 		len_avg_loss = len(avg_loss)
@@ -122,105 +135,42 @@ def plot_errors(mod):
 	ideal_var_value = ideal_var(mod)
 
 	return avg_error_value, actual_var_value, ideal_var_value
+'''
+
+def disperse(l, ch):
+	s = ''
+	for item in l:
+		s += str(item)
+		s += ch
+	return s
 
 
 def gen_comparison_graph(mod):
 
 	mod.num_lines = get_num_lines(mod.ds_path+mod.dataset)
-	mod.warm_start = int(math.ceil(mod.warm_start_frac * mod.num_lines))
+	mod.progress = int(math.ceil(float(mod.num_lines) / float(mod.num_checkpoints)))
+	mod.warm_start = mod.warm_start_multiplier * mod.progress
 	mod.bandit = mod.num_lines - mod.warm_start
-	mod.progress = int(math.ceil(float(mod.bandit) / float(mod.num_checkpoints)))
 	mod.num_classes = get_num_classes(mod.dataset)
 
-	#config_name = str(mod.dataset) + '_' + str(mod.fprob1)+'_'+str(mod.fprob2)+'_'+str(mod.warm_start)+'_'+str(mod.bandit)+ '_' + str(mod.cb_type) + '_' + str(mod.choices_lambda)
+	mod.problem_params = [mod.dataset, mod.num_classes, mod.num_lines, \
+	mod.corrupt_type_supervised, mod.corrupt_prob_supervised, \
+	mod.corrupt_type_bandit, mod.corrupt_prob_bandit, \
+	mod.warm_start]
 
-	config_name = str(mod.dataset) + ' ' \
-	+ str(mod.corrupt_type_supervised) + ' ' +str(mod.corrupt_prob_supervised) \
-	+ ' ' + str(mod.corrupt_type_bandit) + ' ' + str(mod.corrupt_prob_bandit) \
-	+ ' ' + str(mod.warm_start) + ' ' + str(mod.bandit) + ' ' + str(mod.cb_type) \
-	+ ' ' + str(mod.validation_method) + ' ' + str(mod.weighting_scheme) \
-	+ ' ' + str(mod.lambda_scheme) + ' ' + str(mod.choices_lambda) \
-	+ ' ' + str(mod.no_supervised) + ' ' + str(mod.no_bandit)
+	mod.alg_params = [ mod.cb_type, \
+	mod.validation_method, mod.weighting_scheme, \
+	mod.lambda_scheme, mod.choices_lambda, \
+	mod.no_supervised, mod.no_bandit]
 
-	# combined approach, epsilon
-	mod.vw_output_filename = mod.results_path+config_name+'.txt'
-	avg_error_value, actual_var_value, ideal_var_value = plot_errors(mod)
+	mod.vw_output_filename = mod.results_path + disperse(mod.problem_params+mod.alg_params, '_') + '.txt'
 
-	result = str(avg_error_value) + ' ' + str(actual_var_value) + ' ' + str(ideal_var_value)
+	#plot_errors(mod)
+	execute_vw(mod)
+	collect_stats(mod)
 
-	summary_file = open(mod.summary_file_name, 'a')
-	summary_file.write(config_name + ' ' + result + '\n')
-	summary_file.close()
 	print('')
 
-
-	'''
-	# combined approach, cover
-	# combined approach, per-dataset weighting
-	#mod.choices_lambda = 1
-	#mod.no_bandit = False
-	#mod.no_supervised = False
-	#mod.no_exploration = False
-	#mod.cover_on = True
-	#mod.vw_output_filename = mod.results_path+config_name+'choices_lambda='+str(mod.choices_lambda)+'.txt'
-
-	mod.choices_lambda = 5
-	mod.weighting_scheme = 1
-	mod.lambda_scheme = 3
-	mod.no_bandit = False
-	mod.no_supervised = False
-	mod.no_exploration = False
-	mod.cover_on = False
-	mod.epsilon_on = True
-	#'Combined approach, lambda=5'
-	mod.plot_color = 'm'
-	mod.plot_flat = False
-	mod.vw_output_filename = mod.results_path+config_name+'central_minimax_zeroone'+'.txt'
-	mod.plot_label = 'Central lambda: minimax, forcing zeroone'
-	avg_error_comb_2 = plot_errors(mod)
-
-
-	# bandit only approach
-	mod.choices_lambda = 1
-	mod.weighting_scheme = 1
-	mod.lambda_scheme = 1
-	mod.no_bandit = False
-	mod.no_supervised = True
-	mod.no_exploration = False
-	mod.cover_on = False
-	mod.epsilon_on = True
-	mod.plot_color = 'b'
-	mod.plot_flat = False
-	mod.vw_output_filename = mod.results_path+config_name+'_no_supervised'+'.txt'
-	mod.plot_label = 'Bandit only'
-	avg_error_band_only = plot_errors(mod)
-
-	# supervised only approach
-	mod.choices_lambda = 1
-	mod.weighting_scheme = 1
-	mod.lambda_scheme = 1
-	mod.no_bandit = True
-	mod.no_supervised = False
-	mod.no_exploration = False
-	mod.cover_on = False
-	mod.epsilon_on = True
-	mod.plot_color = 'g'
-	mod.plot_flat = True
-	mod.vw_output_filename = mod.results_path+config_name+'_no_bandit'+'.txt'
-	mod.plot_label = 'Supervised only'
-	avg_error_sup_only = plot_errors(mod)
-
-	pylab.legend()
-	pylab.xlabel('#bandit examples')
-	pylab.ylabel('Progressive validation error')
-	pylab.title(mod.dataset + ' warm_start = ' + str(mod.warm_start) + ' cb_type = ' + mod.cb_type)
-	#pylab.title('Source 1 feature flipping prob = ' + str(mod.fprob1) + '; source 2 feature flipping prob = ' + str(mod.fprob2) + 'cb_type = '+ mod.cb_type )
-	pylab.savefig(mod.results_path+config_name +'.png')
-	plt.gcf().clear()
-
-	#plt.show()
-	'''
-
 def ds_files(ds_path):
 	prevdir = os.getcwd()
 	os.chdir(ds_path)
@@ -238,7 +188,14 @@ def get_num_classes(ds):
 def ds_per_task(mod):
 	# put dataset name to the last coordinate so that the task workloads tend to be
 	# allocated equally
- 	config_all = [item for item in product(mod.choices_corrupt_type_supervised, mod.choices_corrupt_prob_supervised, mod.choices_cb_types, mod.choices_warm_start_frac, mod.choices_no_supervised, mod.choices_no_bandit, mod.dss)]
+	config_baselines_raw = list(product(mod.choices_corrupt_type_supervised, mod.choices_corrupt_prob_supervised, mod.choices_cb_types, mod.dss, mod.warm_start_multipliers, [1], [False, True], [False, True]))
+
+	config_baselines = filter(lambda (x1, x2, x3, x4, x5, x6, x7, x8): x7 == True or x8 == True, config_baselines_raw)
+
+
+	config_algs = list(product(mod.choices_corrupt_type_supervised, mod.choices_corrupt_prob_supervised, mod.choices_cb_types, mod.dss, mod.warm_start_multipliers, mod.choices_choices_lambda, [False], [False]))
+
+ 	config_all = config_baselines + config_algs
 	config_task = []
 	print len(config_all)
 	for i in range(len(config_all)):
@@ -284,22 +241,25 @@ def main_loop(mod):
 	mod.summary_file_name = mod.results_path+str(mod.task_id)+'of'+str(mod.num_tasks)+'.sum'
 	summary_file = open(mod.summary_file_name, 'w')
 
-	summary_header = 'str(mod.dataset)' + ' ' \
-	+ 'str(mod.corrupt_type_supervised)' + ' ' + 'str(mod.corrupt_prob_supervised)' \
-	+ ' ' + 'str(mod.corrupt_type_bandit)' + ' ' + 'str(mod.corrupt_prob_bandit)' \
-	+ ' ' + 'str(mod.warm_start)' + ' ' + 'str(mod.bandit)' + ' ' + 'str(mod.cb_type)' \
-	+ ' ' + 'str(mod.validation_method)' + ' ' + 'str(mod.weighting_scheme)' \
-	+ ' ' + 'str(mod.lambda_scheme)' + ' ' + 'str(mod.choices_lambda)' \
-	+ ' ' + 'str(mod.no_supervised)' + ' ' + 'str(mod.no_bandit)' \
-	+ ' ' + 'str(avg_error_value)' + ' ' + 'str(actual_var_value)' \
-	+ ' ' + 'str(ideal_var_value)'
+	list_header = ['dataset', 'num_classes', 'total_size', \
+	'corrupt_type_supervised', 'corrupt_prob_supervised', \
+	'corrupt_type_bandit', 'corrupt_prob_bandit', \
+	'warm_start_size', 'bandit_size', 'bandit_supervised_size_ratio', \
+	'cb_type', 'validation_method', 'weighting_scheme', \
+	'lambda_scheme', 'choices_lambda', \
+	'no_supervised', 'no_bandit', \
+	'avg_error', 'actual_variance', \
+	'ideal_variance']
 
-	summary_file.write(summary_header+'\n')
+	summary_header = disperse(list_header, ' ')
 
-	#summary_file.write('dataset' + ' ' + 'zeroone_only' + ' ' + 'central_minimax_zeroone' + ' ' + 'bandit_only' + ' ' + 'supervised_only' + ' ' + 'size' + '\n')
+	summary_file.write(summary_header+'\n')
 	summary_file.close()
 
-	for mod.corrupt_type_supervised, mod.corrupt_prob_supervised, mod.cb_type, mod.warm_start_frac, mod.no_supervised, mod.no_bandit, mod.dataset in mod.config_task:
+	for mod.corrupt_type_supervised, mod.corrupt_prob_supervised, \
+	mod.cb_type, mod.dataset, mod.warm_start_multiplier, \
+	mod.choices_lambda, \
+	mod.no_supervised, mod.no_bandit in mod.config_task:
 		gen_comparison_graph(mod)
 
 
@@ -343,11 +303,11 @@ def main_loop(mod):
 	mod.adf_on = True
 
 	# use fractions instead of absolute numbers
-
+	mod.warm_start_multipliers = [pow(2, i) for i in range(6)]
 	#mod.choices_warm_start_frac = [0.01 * pow(2, i) for i in range(1)]
 	#mod.choices_warm_start_frac = [0.01, 0.03, 0.1, 0.3]
 	#mod.choices_warm_start_frac = [0.03]
-	mod.choices_warm_start_frac = [0.01, 0.02, 0.04, 0.08, 0.16, 0.32]
+	#mod.choices_warm_start_frac = [0.01, 0.02, 0.04, 0.08, 0.16, 0.32]
 
 	#mod.choices_warm_start = [0.01 * pow(2, i) for i in range(5)]
 	#mod.choices_bandit = [0.01 * pow(2, i) for i in range(5)]
@@ -361,7 +321,7 @@ def main_loop(mod):
 	mod.choices_cb_types = ['mtr']
 	mod.choices_no_supervised = [False, True]
 	mod.choices_no_bandit = [False, True]
-	#choices_choices_lambda = [pow(2,i) for i in range(10,11)]
+	mod.choices_choices_lambda = [2*i for i in range(1,5)]
 	#mod.choices_choices_lambda = [i for i in range(1,3)]
 	#mod.choices_choices_lambda = [i for i in range(1,2)]
 	#mod.choices_choices_lambda = [1, 3, 5, 7]
@@ -371,7 +331,7 @@ def main_loop(mod):
 	mod.choices_corrupt_type_supervised = [1,2]
 	#mod.choices_corrupt_type_supervised = [2]
 	#mod.corrupt_prob_supervised = 0.3
-	mod.choices_corrupt_prob_supervised = [0,0.3]
+	mod.choices_corrupt_prob_supervised = [0.0,0.3]
 	#mod.choices_corrupt_prob_supervised = [0.3]
 
 	mod.corrupt_type_bandit = 1
@@ -390,6 +350,7 @@ def main_loop(mod):
 	mod.epsilon_on = True
 	mod.plot_color = 'r'
 	mod.plot_flat = False
+	mod.critical_size_ratios = [pow(2,i) for i in range(-5, 7)]
 
 	#for correctness test
 	#mod.choices_warm_start = [20]
@@ -403,7 +364,6 @@ def main_loop(mod):
 	# here, we are generating the task specific parameter settings
 	# by first generate all parameter setting and pick every num_tasks of them
 	mod.config_task = ds_per_task(mod)
-
 	print 'task ' + str(mod.task_id) + ' of ' + str(mod.num_tasks) + ':'
 
 	#print mod.ds_task

From e84c7d9bc685ddacdee55762477d110f3b28e614 Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Wed, 2 May 2018 01:04:53 -0400
Subject: [PATCH 064/127] bring back the pairwise comparison plot

---
 scripts/alg_comparison.py  | 185 ++++++++++++++++++++++++++++++++++---
 scripts/plot_warm_start.py |  42 +++++----
 vowpalwabbit/cbify.cc      |  19 ++++
 3 files changed, 215 insertions(+), 31 deletions(-)

diff --git a/scripts/alg_comparison.py b/scripts/alg_comparison.py
index 4a20fb48ce1..27057514b5c 100644
--- a/scripts/alg_comparison.py
+++ b/scripts/alg_comparison.py
@@ -9,6 +9,12 @@
 from itertools import compress
 from math import sqrt
 import argparse
+import numpy as np
+
+
+class model:
+	def __init__(self):
+		pass
 
 # this part is changable
 #alg1 = 'epsilon'
@@ -28,7 +34,7 @@ def sum_files(result_path):
 def parse_sum_file(sum_filename):
 	f = open(sum_filename, 'r')
 	#f.seek(0, 0)
-	table = pd.read_table(f, sep=' ',lineterminator='\n')
+	table = pd.read_table(f, sep='\s+',lineterminator='\n')
 
 	return table
 
@@ -40,6 +46,11 @@ def get_z_scores(errors_1, errors_2, sizes):
 	return z_scores
 
 def z_score(err_1, err_2, size):
+	if (abs(err_1) < 1e-6 or abs(err_1) > 1-1e-6) and (abs(err_2) < 1e-6 or abs(err_2) > 1-1e-6):
+		return 0
+
+	#print err_1, err_2, size, sqrt( (err_1*(1 - err_1) + err_2*(1-err_2)) / size )
+
 	z = (err_1 - err_2) / sqrt( (err_1*(1 - err_1) + err_2*(1-err_2)) / size )
 	return z
 	#print z
@@ -50,14 +61,13 @@ def is_significant(z):
 	else:
 		return False
 
-def plot_comparison(errors_1, errors_2, sizes, title, filename):
-	print title
-
+def plot_comparison(errors_1, errors_2, sizes):
+	#print title
 	plt.plot([0,1],[0,1])
 	z_scores = get_z_scores(errors_1, errors_2, sizes)
 	sorted_z_scores = sorted(enumerate(z_scores), key=lambda x:x[1])
-	for s in sorted_z_scores:
-		print s, is_significant(s[1])
+	#for s in sorted_z_scores:
+	#	print s, is_significant(s[1])
 
 	significance = map(is_significant, z_scores)
 	results_signi_1 = list(compress(errors_1, significance))
@@ -69,17 +79,145 @@ def plot_comparison(errors_1, errors_2, sizes, title, filename):
 	results_insigni_2 = list(compress(errors_2, insignificance))
 
 	plt.scatter(results_insigni_1, results_insigni_2, s=2, c='k')
-	plt.title(title)
-	pylab.savefig(filename)
-	plt.gcf().clear()
+
+
+def normalized_score(lst):
+	#print lst
+	l = min(lst)
+	u = max(lst)
+	return [ (item - l) / (u - l + 1e-4) for item in lst ]
+
+def alg_str(alg_name):
+	if (alg_name[1] == True and alg_name[2] == True):
+		return 'no_update'
+	if (alg_name[1] == True and alg_name[2] == False):
+		return 'bandit_only'
+	if (alg_name[1] == False and alg_name[2] == True):
+		return 'supervised_only'
+	if (alg_name[1] == False and alg_name[2] == False):
+		return 'combined_choices_lambda='+str(alg_name[0])
+
+def problem_str(name_problem):
+	return 'supervised_corrupt_type='+str(name_problem[0]) \
+			+'_supervised_corrupt_prob='+str(name_problem[1]) \
+			+'_bandit_supervised_size_ratio='+str(name_problem[2])
+
+
+
+def plot_cdf(alg_name, errs):
+
+	plt.step(np.sort(errs), np.linspace(0, 1, len(errs), endpoint=False), label=alg_str(alg_name))
+
+	print alg_name
+	print errs
+	print len(errs)
+	#raw_input("Press Enter to continue...")
+
+def plot_all_cdfs(alg_results, mod):
+	#plot all cdfs:
+	i = 0
+	for alg_name, errs in alg_results.iteritems():
+		plot_cdf(alg_name, errs)
+
+	plt.legend()
+	plt.xlim(0,1)
+	plt.ylim(0,1)
+	plt.savefig(mod.fulldir+problem_str(mod.name_problem)+'.png')
+	plt.clf()
+
+
+def plot_all_pair_comp(alg_results, sizes, mod):
+	alg_names = alg_results.keys()
+
+	for i in range(len(alg_names)):
+		for j in range(len(alg_names)):
+			if i < j:
+				errs_1 = alg_results[alg_names[i]]
+				errs_2 = alg_results[alg_names[j]]
+
+				print len(errs_1), len(errs_2), len(sizes)
+				#raw_input('Press any key to continue..')
+
+				plot_comparison(errs_1, errs_2, sizes)
+
+				plt.title(alg_str(alg_names[i])+' vs '+alg_str(alg_names[j]))
+				plt.savefig(mod.fulldir+problem_str(mod.name_problem)+'_'+alg_str(alg_names[i])+'_vs_'+alg_str(alg_names[j])+'.png')
+				plt.clf()
+
+def init_results(result_table):
+	alg_results = {}
+	for idx, row in result_table.iterrows():
+		alg_name = (row['choices_lambda'], row['no_supervised'], row['no_bandit'])
+		alg_results[alg_name] = []
+	return alg_results
+
+def plot_all(mod, all_results):
+	grouped_by_problem = all_results.groupby(['corrupt_type_supervised',
+						'corrupt_prob_supervised','bandit_supervised_size_ratio'])
+
+	#then group by dataset and warm_start size (corresponding to each point in cdf)
+	for name_problem, group_problem in grouped_by_problem:
+		normalized_results = None
+		unnormalized_results = None
+		sizes = None
+		mod.name_problem = name_problem
+
+		grouped_by_dataset = group_problem.groupby(['dataset','warm_start_size'])
+		#then select unique combinations of (no_supervised, no_bandit, choices_lambda)
+		#e.g. (True, True, 1), (True, False, 1), (False, True, 1), (False, False, 2)
+		#(False, False, 8), and compute a normalized score
+
+		for name_dataset, group_dataset in grouped_by_dataset:
+			result_table = group_dataset #group_dataset.groupby(['choices_lambda','no_supervised',														'no_bandit'])
+
+			#first time - generate names of algorithms considered
+			if normalized_results is None:
+				sizes = []
+				normalized_results = init_results(result_table)
+				unnormalized_results = init_results(result_table)
+
+				#print alg_results
+				#dummy = input('')
+
+			#in general (including the first time) - record the error rates of all algorithms
+			errs = []
+			for idx, row in result_table.iterrows():
+				errs.append(row['avg_error'])
+			normalized_errs = normalized_score(errs)
+
+			i = 0
+			for idx, row in result_table.iterrows():
+				if i == 0:
+					sizes.append(row['total_size'])
+				alg_name = (row['choices_lambda'], row['no_supervised'], row['no_bandit'])
+				unnormalized_results[alg_name].append(errs[i])
+				normalized_results[alg_name].append(normalized_errs[i])
+				i += 1
+
+		plot_all_pair_comp(unnormalized_results, sizes, mod)
+		plot_all_cdfs(normalized_results, mod)
+
+
+
 
 
 
 if __name__ == '__main__':
 	parser = argparse.ArgumentParser(description='result summary')
 	parser.add_argument('--results_dir', default='../../../figs/')
+	parser.add_argument('--filter', default='1')
+	parser.add_argument('--plot_subdir', default='expt1/')
 	args = parser.parse_args()
-	results_dir = args.results_dir
+
+	mod = model()
+
+	mod.results_dir = args.results_dir
+	mod.filter = args.filter
+	mod.plot_subdir = args.plot_subdir
+
+	mod.fulldir = mod.results_dir + mod.plot_subdir
+	if not os.path.exists(mod.fulldir):
+		os.makedirs(mod.fulldir)
 
 	#results_dir = '../../../lambdas/'
 	#results_dir = '../../../warm_start_frac=0.1/'
@@ -97,7 +235,7 @@ def plot_comparison(errors_1, errors_2, sizes, title, filename):
 	#results_dir = '../../../type2_0.65/'
 	#results_dir = '../../../type2_0.3/'
 
-	dss = sum_files(results_dir)
+	dss = sum_files(mod.results_dir)
 
 	#print dss[168]
 
@@ -105,7 +243,7 @@ def plot_comparison(errors_1, errors_2, sizes, title, filename):
 
 	for i in range(len(dss)):
 		print 'result file name: ', dss[i]
-		result = parse_sum_file(results_dir + dss[i])
+		result = parse_sum_file(mod.results_dir + dss[i])
 
 		if (i == 0):
 			all_results = result
@@ -114,6 +252,24 @@ def plot_comparison(errors_1, errors_2, sizes, title, filename):
 
 	print all_results
 
+	#first group by corruption mode, then corruption prob
+	#then group by warm start - bandit ratio
+	#these constitutes all the problem settings we are looking at (corresponding
+	#to each cdf graph)
+
+	if mod.filter == '1':
+		pass
+	elif mod.filter == '2':
+		#print all_results['warm_start_size'] >= 100
+		#raw_input(' ')
+		all_results = all_results[all_results['warm_start_size'] >= 100]
+	elif mod.filter == '3':
+		all_results = all_results[all_results['num_classes'] >= 3]
+	elif mod.filter == '4':
+		all_results = all_results[all_results['num_classes'] <= 2]
+
+	plot_all(mod, all_results)
+
 	#if i >= 331 and i <= 340:
 	#	print 'result:', result
 	#	print 'all_results:', all_results
@@ -128,6 +284,7 @@ def plot_comparison(errors_1, errors_2, sizes, title, filename):
 	#for cl, results_lambda in grouped:
 	#results_lambda = all_results[all_results['choices_lambda'] == cl]
 	# compare combined w/ supervised
+	'''
 	alg1 = all_results.columns[1]
 	alg2 = all_results.columns[2]
 	bandit_only = all_results.columns[3]
@@ -139,12 +296,12 @@ def plot_comparison(errors_1, errors_2, sizes, title, filename):
 	results_bandit = all_results[bandit_only].tolist()
 	results_supervised = all_results[supervised_only].tolist()
 	dataset_sizes = all_results[sizes].tolist()
-
+	'''
 	#print alg1
 	#print results_alg1
 
 	# compare combined w/ bandit
-	plot_comparison(results_alg1, results_bandit, dataset_sizes, alg1 + ' vs ' + 'bandit only', results_dir + alg1 + ' vs ' + 'bandit only' + '.png')
+	#plot_comparison(results_alg1, results_bandit, dataset_sizes, alg1 + ' vs ' + 'bandit only', results_dir + alg1 + ' vs ' + 'bandit only' + '.png')
 	#plot_comparison(results_alg1, results_supervised, dataset_sizes, alg1 + ' vs ' + 'supervised only', results_dir + alg1 + ' vs ' + 'supervised only' + '.png')
 	#plot_comparison(results_alg2, results_bandit, dataset_sizes, alg2 + ' vs ' + 'bandit only', results_dir + alg2 + ' vs ' + 'bandit only' + '.png')
 	#plot_comparison(results_alg2, results_supervised, dataset_sizes, alg2 + ' vs ' + 'supervised only', results_dir + alg2 + ' vs ' + 'supervised only' + '.png')
diff --git a/scripts/plot_warm_start.py b/scripts/plot_warm_start.py
index 764ed4855b0..e3ce7f7212e 100644
--- a/scripts/plot_warm_start.py
+++ b/scripts/plot_warm_start.py
@@ -49,11 +49,11 @@ def collect_stats(mod):
 			wt.append(float(weight_str))
 
 			mod.avg_loss = float(avg_loss_str)
-			mod.bandit = float(weight_str)
+			mod.bandit_effective = int(float(weight_str))
 
 			for mod.ratio in mod.critical_size_ratios:
-				if mod.bandit >= 0.99 * mod.warm_start * mod.ratio and \
-				mod.bandit <= 1.01 * mod.warm_start * mod.ratio:
+				if mod.bandit_effective >= 0.99 * mod.warm_start * mod.ratio and \
+				mod.bandit_effective <= 1.01 * mod.warm_start * mod.ratio:
 					record_result(mod)
 
 
@@ -68,7 +68,7 @@ def collect_stats(mod):
 	#return avg_loss, last_loss, wt
 
 def record_result(mod):
-	problem_params_trailer = [mod.bandit, mod.ratio]
+	problem_params_trailer = [mod.bandit_effective, mod.ratio]
 	config_name = disperse(mod.problem_params + problem_params_trailer + mod.alg_params, ' ')
 
 	list_results = [mod.avg_loss, mod.actual_var, mod.ideal_var]
@@ -188,14 +188,22 @@ def get_num_classes(ds):
 def ds_per_task(mod):
 	# put dataset name to the last coordinate so that the task workloads tend to be
 	# allocated equally
-	config_baselines_raw = list(product(mod.choices_corrupt_type_supervised, mod.choices_corrupt_prob_supervised, mod.choices_cb_types, mod.dss, mod.warm_start_multipliers, [1], [False, True], [False, True]))
 
-	config_baselines = filter(lambda (x1, x2, x3, x4, x5, x6, x7, x8): x7 == True or x8 == True, config_baselines_raw)
+	# put dataset name to the first coordinate so that the result production order is
+	# in accordance with dataset order
 
+	config_corrupt_sup_raw = product(mod.choices_corrupt_type_supervised, mod.choices_corrupt_prob_supervised)
+	config_corrupt_sup = filter(lambda (type, prob): type == 1 or abs(prob) > 1e-4, config_corrupt_sup_raw)
 
-	config_algs = list(product(mod.choices_corrupt_type_supervised, mod.choices_corrupt_prob_supervised, mod.choices_cb_types, mod.dss, mod.warm_start_multipliers, mod.choices_choices_lambda, [False], [False]))
+	config_common = product(mod.dss, config_corrupt_sup, mod.choices_cb_types, mod.warm_start_multipliers)
+
+	config_baselines_raw = list(product([1], [True, False], [True, False]))
+	config_baselines = filter(lambda (x1, x2, x3): x2 == True or x3 == True, config_baselines_raw)
+	config_algs = list(product(mod.choices_choices_lambda, [False], [False]))
+ 	config_all_spec = config_baselines + config_algs
+
+	config_all = list(product(config_common, config_all_spec))
 
- 	config_all = config_baselines + config_algs
 	config_task = []
 	print len(config_all)
 	for i in range(len(config_all)):
@@ -256,10 +264,10 @@ def main_loop(mod):
 	summary_file.write(summary_header+'\n')
 	summary_file.close()
 
-	for mod.corrupt_type_supervised, mod.corrupt_prob_supervised, \
-	mod.cb_type, mod.dataset, mod.warm_start_multiplier, \
-	mod.choices_lambda, \
-	mod.no_supervised, mod.no_bandit in mod.config_task:
+	for ((mod.dataset, (mod.corrupt_type_supervised, mod.corrupt_prob_supervised), \
+	mod.cb_type, mod.warm_start_multiplier), \
+	(mod.choices_lambda, \
+	mod.no_supervised, mod.no_bandit)) in mod.config_task:
 		gen_comparison_graph(mod)
 
 
@@ -303,7 +311,7 @@ def main_loop(mod):
 	mod.adf_on = True
 
 	# use fractions instead of absolute numbers
-	mod.warm_start_multipliers = [pow(2, i) for i in range(6)]
+	mod.warm_start_multipliers = [2*pow(4, i) for i in range(3)]
 	#mod.choices_warm_start_frac = [0.01 * pow(2, i) for i in range(1)]
 	#mod.choices_warm_start_frac = [0.01, 0.03, 0.1, 0.3]
 	#mod.choices_warm_start_frac = [0.03]
@@ -319,9 +327,9 @@ def main_loop(mod):
 	#choices_cb_types = ['mtr', 'ips']
 	#mod.choices_cb_types = ['mtr', 'ips']
 	mod.choices_cb_types = ['mtr']
-	mod.choices_no_supervised = [False, True]
-	mod.choices_no_bandit = [False, True]
-	mod.choices_choices_lambda = [2*i for i in range(1,5)]
+	#mod.choices_no_supervised = [False, True]
+	#mod.choices_no_bandit = [False, True]
+	mod.choices_choices_lambda = [2, 4, 8]
 	#mod.choices_choices_lambda = [i for i in range(1,3)]
 	#mod.choices_choices_lambda = [i for i in range(1,2)]
 	#mod.choices_choices_lambda = [1, 3, 5, 7]
@@ -335,7 +343,7 @@ def main_loop(mod):
 	#mod.choices_corrupt_prob_supervised = [0.3]
 
 	mod.corrupt_type_bandit = 1
-	mod.corrupt_prob_bandit = 0
+	mod.corrupt_prob_bandit = 0.0
 
 	mod.validation_method = 1
 	mod.epsilon = 0.05
diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc
index 036afc6355e..5fec5f3f233 100644
--- a/vowpalwabbit/cbify.cc
+++ b/vowpalwabbit/cbify.cc
@@ -131,6 +131,25 @@ void setup_lambdas(cbify& data, example& ec)
 	// The lambdas are in fact arranged in ascending order (the middle lambda is 0.5)
 	v_array<float>& lambdas = data.lambdas;
 
+	//bandit only
+	if (!data.ind_supervised && data.ind_bandit)
+	{
+		for (uint32_t i = 0; i<data.choices_lambda; i++)
+			lambdas[i] = 1.0;
+		return;
+	}
+
+	//supervised only
+	if (!data.ind_bandit && data.ind_supervised)
+	{
+		for (uint32_t i = 0; i<data.choices_lambda; i++)
+			lambdas[i] = 0.0;
+		return;
+	}
+
+	//if no supervised and no bandit, then as there are no updates anyway,
+	//we are still fine
+
 	uint32_t mid = data.choices_lambda / 2;
 
 	if (data.lambda_scheme == ABS_CENTRAL)

From 7e6b889f6457bcff1841dcfcfc2d8ea9f967946b Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Sat, 5 May 2018 18:06:33 -0400
Subject: [PATCH 065/127] added type 3 noise

---
 scripts/alg_comparison.py  |  27 ++++++--
 scripts/plot_warm_start.py | 133 +++++++++++++++++++++++++------------
 vowpalwabbit/cbify.cc      |   8 ++-
 3 files changed, 120 insertions(+), 48 deletions(-)

diff --git a/scripts/alg_comparison.py b/scripts/alg_comparison.py
index 27057514b5c..3826b66b2b8 100644
--- a/scripts/alg_comparison.py
+++ b/scripts/alg_comparison.py
@@ -81,9 +81,9 @@ def plot_comparison(errors_1, errors_2, sizes):
 	plt.scatter(results_insigni_1, results_insigni_2, s=2, c='k')
 
 
-def normalized_score(lst):
+def normalized_score(lst, l):
 	#print lst
-	l = min(lst)
+	#l = min(lst)
 	u = max(lst)
 	return [ (item - l) / (u - l + 1e-4) for item in lst ]
 
@@ -120,7 +120,7 @@ def plot_all_cdfs(alg_results, mod):
 		plot_cdf(alg_name, errs)
 
 	plt.legend()
-	plt.xlim(0,1)
+	plt.xlim(-1,1)
 	plt.ylim(0,1)
 	plt.savefig(mod.fulldir+problem_str(mod.name_problem)+'.png')
 	plt.clf()
@@ -151,6 +151,20 @@ def init_results(result_table):
 		alg_results[alg_name] = []
 	return alg_results
 
+def get_best_error(best_error_table, name_dataset):
+	name = name_dataset[0]
+	best_error_oneline = best_error_table[best_error_table['dataset'] == name]
+	best_error = best_error_oneline.loc[best_error_oneline.index[0], 'avg_error']
+	#print name
+	#raw_input("...")
+	#print best_error_oneline
+	#raw_input("...")
+	#print best_error
+	#raw_input("...")
+	return best_error
+
+
+
 def plot_all(mod, all_results):
 	grouped_by_problem = all_results.groupby(['corrupt_type_supervised',
 						'corrupt_prob_supervised','bandit_supervised_size_ratio'])
@@ -180,10 +194,12 @@ def plot_all(mod, all_results):
 				#dummy = input('')
 
 			#in general (including the first time) - record the error rates of all algorithms
+
+			err_best = get_best_error(mod.best_error_table, name_dataset)
 			errs = []
 			for idx, row in result_table.iterrows():
 				errs.append(row['avg_error'])
-			normalized_errs = normalized_score(errs)
+			normalized_errs = normalized_score(errs, err_best)
 
 			i = 0
 			for idx, row in result_table.iterrows():
@@ -257,6 +273,9 @@ def plot_all(mod, all_results):
 	#these constitutes all the problem settings we are looking at (corresponding
 	#to each cdf graph)
 
+	mod.best_error_table = all_results[all_results['choices_lambda'] == 0]
+	all_results = all_results[all_results['choices_lambda'] != 0]
+
 	if mod.filter == '1':
 		pass
 	elif mod.filter == '2':
diff --git a/scripts/plot_warm_start.py b/scripts/plot_warm_start.py
index e3ce7f7212e..1b247c142e2 100644
--- a/scripts/plot_warm_start.py
+++ b/scripts/plot_warm_start.py
@@ -24,29 +24,39 @@ def collect_stats(mod):
 	# num_rows = mod.bandit / mod.progress
 	#print vw_output_filename
 
-	#avg_error_value = avg_error(mod)
+	avg_error_value = avg_error(mod)
 	mod.actual_var = actual_var(mod)
 	mod.ideal_var = ideal_var(mod)
 
-	avg_loss = []
-	last_loss = []
-	wt = []
-	end_table = False
+	#avg_loss = []
+	#last_loss = []
+	#wt = []
+	#end_table = False
+
+	if mod.choices_lambda == 0:
+		mod.avg_loss = avg_error_value
+		mod.bandit_effective = 0
+		mod.ratio = 0
+		record_result(mod)
+		return
 
 	f = open(vw_output_filename, 'r')
 	#linenumber = 0
 	i = 0
 	for line in f:
-		vw_progress_pattern = '\d+\.\d+\s+\d+\.\d+\s+\d+\s+\d+\.\d+\s+[a-zA-Z0-9]+\s+[a-zA-Z0-9]+\s+\d+'
+		vw_progress_pattern = '\d+\.\d+\s+\d+\.\d+\s+\d+\s+\d+\.\d+\s+[a-zA-Z0-9]+\s+[a-zA-Z0-9]+\s+\d+.*'
 		matchobj = re.match(vw_progress_pattern, line)
 
 		if matchobj:
+			s = line.split()
+			if len(s) >= 8:
+				s = s[:7]
 			avg_loss_str, last_loss_str, counter_str, weight_str, curr_label_str, \
-			curr_pred_str, curr_feat_str = line.split()
+			curr_pred_str, curr_feat_str = s
 
-			avg_loss.append(float(avg_loss_str))
-			last_loss.append(float(last_loss_str))
-			wt.append(float(weight_str))
+			#avg_loss.append(float(avg_loss_str))
+			#last_loss.append(float(last_loss_str))
+			#wt.append(float(weight_str))
 
 			mod.avg_loss = float(avg_loss_str)
 			mod.bandit_effective = int(float(weight_str))
@@ -75,7 +85,7 @@ def record_result(mod):
 	result = disperse(list_results, ' ')
 
 	summary_file = open(mod.summary_file_name, 'a')
-	summary_file.write(config_name + ' ' + result + '\n')
+	summary_file.write(config_name + result + '\n')
 	summary_file.close()
 
 
@@ -101,15 +111,23 @@ def execute_vw(mod):
 	#if mod.cb_type == 'mtr':
 	#	mod.adf_on = True;
 
-	cmd_vw = mod.vw_path + ' --cbify ' + str(mod.num_classes) + ' --cb_type ' + str(mod.cb_type) + ' --warm_start ' + str(mod.warm_start) + ' --bandit ' + str(mod.bandit) + ' --choices_lambda ' + str(mod.choices_lambda) + alg_option + ' --progress ' + str(mod.progress) \
-	 + ' -d ' + mod.ds_path + mod.dataset \
-	 + ' --corrupt_type_supervised ' + str(mod.corrupt_type_supervised) \
-	 + ' --corrupt_prob_supervised ' + str(mod.corrupt_prob_supervised) \
-	 + ' --corrupt_type_bandit ' + str(mod.corrupt_type_bandit) \
-	 + ' --corrupt_prob_bandit ' + str(mod.corrupt_prob_bandit) \
-	 + ' --validation_method ' + str(mod.validation_method) \
-	 + ' --weighting_scheme ' + str(mod.weighting_scheme) \
-	 + ' --lambda_scheme ' + str(mod.lambda_scheme)
+	if mod.choices_lambda == 0:
+		cmd_vw = mod.vw_path + ' --oaa ' + str(mod.num_classes) + ' --passes 5 ' \
+		 + ' --progress ' + str(mod.progress) + ' -d ' \
+		+ mod.ds_path + mod.dataset \
+		+ ' --cache_file ' + mod.results_path + mod.dataset + '.cache'
+	else:
+		cmd_vw = mod.vw_path + ' --cbify ' + str(mod.num_classes) + ' --cb_type ' + str(mod.cb_type) + ' --warm_start ' + str(mod.warm_start) + ' --bandit ' + str(mod.bandit) + ' --choices_lambda ' + str(mod.choices_lambda) + alg_option + ' --progress ' + str(mod.progress) \
+		 + ' -d ' + mod.ds_path + mod.dataset \
+		 + ' --corrupt_type_supervised ' + str(mod.corrupt_type_supervised) \
+		 + ' --corrupt_prob_supervised ' + str(mod.corrupt_prob_supervised) \
+		 + ' --corrupt_type_bandit ' + str(mod.corrupt_type_bandit) \
+		 + ' --corrupt_prob_bandit ' + str(mod.corrupt_prob_bandit) \
+		 + ' --validation_method ' + str(mod.validation_method) \
+		 + ' --weighting_scheme ' + str(mod.weighting_scheme) \
+		 + ' --lambda_scheme ' + str(mod.lambda_scheme) \
+		 + ' --learning_rate ' + str(mod.learning_rate) \
+		 + ' --overwrite_label ' + str(mod.majority_class)
 
 	cmd = cmd_vw
 	print cmd
@@ -148,6 +166,7 @@ def disperse(l, ch):
 def gen_comparison_graph(mod):
 
 	mod.num_lines = get_num_lines(mod.ds_path+mod.dataset)
+	mod.majority_class = get_majority_class(mod.ds_path+mod.dataset)
 	mod.progress = int(math.ceil(float(mod.num_lines) / float(mod.num_checkpoints)))
 	mod.warm_start = mod.warm_start_multiplier * mod.progress
 	mod.bandit = mod.num_lines - mod.warm_start
@@ -195,14 +214,31 @@ def ds_per_task(mod):
 	config_corrupt_sup_raw = product(mod.choices_corrupt_type_supervised, mod.choices_corrupt_prob_supervised)
 	config_corrupt_sup = filter(lambda (type, prob): type == 1 or abs(prob) > 1e-4, config_corrupt_sup_raw)
 
-	config_common = product(mod.dss, config_corrupt_sup, mod.choices_cb_types, mod.warm_start_multipliers)
+	config_problem = product(mod.dss, config_corrupt_sup, mod.choices_cb_types, mod.warm_start_multipliers, mod.learning_rates)
+
 
-	config_baselines_raw = list(product([1], [True, False], [True, False]))
-	config_baselines = filter(lambda (x1, x2, x3): x2 == True or x3 == True, config_baselines_raw)
-	config_algs = list(product(mod.choices_choices_lambda, [False], [False]))
- 	config_all_spec = config_baselines + config_algs
 
-	config_all = list(product(config_common, config_all_spec))
+	if mod.baselines_on:
+		config_baselines_raw = list(product([1], [True, False], [True, False]))
+		config_baselines_solution = filter(lambda (x1, x2, x3): x2 == True or x3 == True, config_baselines_raw)
+		config_baselines = list(product(config_problem, config_baselines_solution))
+	else:
+		config_baselines = []
+
+	if mod.algs_on:
+		config_algs_solution = list(product(mod.choices_choices_lambda, [False], [False]))
+		config_algs = list(product(config_problem, config_algs_solution))
+	else:
+		config_algs = []
+
+	if mod.optimal_on:
+		config_optimal_problem = product(mod.dss, [(1, 0)], [1], [1], [0.5])
+		config_optimal_solution = [(0, False, False)]
+		config_optimal = list(product(config_optimal_problem, config_optimal_solution))
+	else:
+		config_optimal = []
+
+	config_all = config_baselines + config_algs + config_optimal
 
 	config_task = []
 	print len(config_all)
@@ -219,6 +255,10 @@ def get_num_lines(dataset_name):
 	ps.wait()
 	return int(output)
 
+def get_majority_class(dataset_name):
+	maj_class = subprocess.check_output(('zcat '+ dataset_name +' | cut -d \' \' -f 1 | sort | uniq -c | sort -r | head -1 | xargs | cut -d \' \' -f 2  '), shell=True)
+	return int(maj_class)
+
 def avg_error(mod):
 	return vw_output_extract(mod, 'average loss')
 
@@ -233,13 +273,17 @@ def vw_output_extract(mod, pattern):
 	vw_output = open(mod.vw_output_filename, 'r')
 	vw_output_text = vw_output.read()
 	#print vw_output_text
-	rgx = re.compile('^'+pattern+' = (.*)$', flags=re.M)
+	#rgx_pattern = '^'+pattern+' = (.*)(|\sh)\n.*$'
+	#print rgx_pattern
+	rgx_pattern = '.*'+pattern+' = ([\d]*.[\d]*)( h|)\n.*'
+	rgx = re.compile(rgx_pattern, flags=re.M)
 
 	errs = rgx.findall(vw_output_text)
 	if not errs:
 		avge = 0
 	else:
-		avge = float(errs[0])
+		print errs
+		avge = float(errs[0][0])
 
 	vw_output.close()
 	return avge
@@ -265,7 +309,7 @@ def main_loop(mod):
 	summary_file.close()
 
 	for ((mod.dataset, (mod.corrupt_type_supervised, mod.corrupt_prob_supervised), \
-	mod.cb_type, mod.warm_start_multiplier), \
+	mod.cb_type, mod.warm_start_multiplier, mod.learning_rate), \
 	(mod.choices_lambda, \
 	mod.no_supervised, mod.no_bandit)) in mod.config_task:
 		gen_comparison_graph(mod)
@@ -292,6 +336,10 @@ def main_loop(mod):
 			time.sleep(1)
 
 	mod = model()
+	mod.baselines_on = False
+	mod.algs_on = False
+	mod.optimal_on = True
+
 	mod.num_tasks = args.num_tasks
 	mod.task_id = args.task_id
 
@@ -301,7 +349,7 @@ def main_loop(mod):
 
 	#DIR_PATTERN = '../results/cbresults_{}/'
 
-	mod.num_checkpoints = 100
+	mod.num_checkpoints = 200
 	#mod.warm_start = 50
 	#mod.bandit = 4096
 	#mod.num_classes = 10
@@ -311,11 +359,8 @@ def main_loop(mod):
 	mod.adf_on = True
 
 	# use fractions instead of absolute numbers
-	mod.warm_start_multipliers = [2*pow(4, i) for i in range(3)]
-	#mod.choices_warm_start_frac = [0.01 * pow(2, i) for i in range(1)]
-	#mod.choices_warm_start_frac = [0.01, 0.03, 0.1, 0.3]
-	#mod.choices_warm_start_frac = [0.03]
-	#mod.choices_warm_start_frac = [0.01, 0.02, 0.04, 0.08, 0.16, 0.32]
+	mod.warm_start_multipliers = [pow(2,i) for i in range(5)]
+	#mod.warm_start_multipliers = [2*pow(4, i) for i in range(3)]]
 
 	#mod.choices_warm_start = [0.01 * pow(2, i) for i in range(5)]
 	#mod.choices_bandit = [0.01 * pow(2, i) for i in range(5)]
@@ -329,17 +374,18 @@ def main_loop(mod):
 	mod.choices_cb_types = ['mtr']
 	#mod.choices_no_supervised = [False, True]
 	#mod.choices_no_bandit = [False, True]
-	mod.choices_choices_lambda = [2, 4, 8]
+	#mod.choices_choices_lambda = [2, 4, 8]
+	mod.choices_choices_lambda = []
 	#mod.choices_choices_lambda = [i for i in range(1,3)]
 	#mod.choices_choices_lambda = [i for i in range(1,2)]
 	#mod.choices_choices_lambda = [1, 3, 5, 7]
 	#[i for i in range(10,11)]
 	#mod.corrupt_type_supervised = 2
 	#mod.corrupt_prob_supervised = 0.3
-	mod.choices_corrupt_type_supervised = [1,2]
+	mod.choices_corrupt_type_supervised = [1,2,3]
 	#mod.choices_corrupt_type_supervised = [2]
 	#mod.corrupt_prob_supervised = 0.3
-	mod.choices_corrupt_prob_supervised = [0.0,0.3]
+	mod.choices_corrupt_prob_supervised = [0.0,0.3,0.6,0.9,1]
 	#mod.choices_corrupt_prob_supervised = [0.3]
 
 	mod.corrupt_type_bandit = 1
@@ -351,15 +397,16 @@ def main_loop(mod):
 	mod.choices_lambda = 2
 	mod.weighting_scheme = 1
 	mod.lambda_scheme = 3
-	mod.no_bandit = False
-	mod.no_supervised = False
+
 	mod.no_exploration = False
 	mod.cover_on = False
 	mod.epsilon_on = True
-	mod.plot_color = 'r'
-	mod.plot_flat = False
-	mod.critical_size_ratios = [pow(2,i) for i in range(-5, 7)]
+	#mod.plot_color = 'r'
+	#mod.plot_flat = False
+	mod.critical_size_ratios = [184 * pow(2, -i) for i in range(8) ]
+	mod.learning_rates = [0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1.0, 3.0, 10.0]
 
+	#pow(2,i) for i in range(-5, 7)
 	#for correctness test
 	#mod.choices_warm_start = [20]
 	#choices_fprob1 = [0.1]
diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc
index 5fec5f3f233..bec78e99e70 100644
--- a/vowpalwabbit/cbify.cc
+++ b/vowpalwabbit/cbify.cc
@@ -11,6 +11,7 @@
 
 #define UAR 1
 #define CIRCULAR 2
+#define OVERWRITE 3
 
 #define BANDIT_VALI 1
 #define SUPERVISED_VALI 2
@@ -101,6 +102,7 @@ struct cbify
 	size_t lambda_scheme;
 	float epsilon;
 	float cumulative_variance;
+	size_t overwrite_label;
 
 };
 
@@ -219,6 +221,8 @@ size_t corrupt_action(size_t action, cbify& data, size_t data_type)
 	{
 		if (corrupt_type == UAR)
 			return generate_uar_action(data);
+		else if (corrupt_type == OVERWRITE)
+			return data.overwrite_label;
 		else
 			return (action % data.num_actions) + 1;
 	}
@@ -1005,7 +1009,8 @@ base_learner* cbify_setup(vw& all)
 	("corrupt_type_bandit", po::value<size_t>(), "probability of label corruption in the bandit part (1 is uar, 2 is circular)")
 	("validation_method", po::value<size_t>(), "lambda selection criterion (1 is using bandit with progressive validation, 2 is using supervised and amortizing)")
 	("weighting_scheme", po::value<size_t>(), "weighting scheme (1 is per instance weighting, 2 is per dataset weighting (where we use a diminishing weighting scheme) )")
-	("lambda_scheme", po::value<size_t>(), "Lambda set scheme (1 is expanding based on center 0.5, 2 is expanding based on center=minimax lambda, 3 is expanding based on center=minimax lambda along with forcing 0,1 in Lambda )");
+	("lambda_scheme", po::value<size_t>(), "Lambda set scheme (1 is expanding based on center 0.5, 2 is expanding based on center=minimax lambda, 3 is expanding based on center=minimax lambda along with forcing 0,1 in Lambda )")
+	("overwrite_label", po::value<size_t>(), "the label type 3 corruptions (overwriting) turn to");
   add_options(all);
 
   po::variables_map& vm = all.vm;
@@ -1042,6 +1047,7 @@ base_learner* cbify_setup(vw& all)
 	data.weighting_scheme = vm.count("weighting_scheme") ? vm["weighting_scheme"].as<size_t>() : INSTANCE_WT; // 1 is the default value
 	data.lambda_scheme = vm.count("lambda_scheme") ? vm["lambda_scheme"].as<size_t>() : ABS_CENTRAL;
 	data.epsilon = vm.count("epsilon") ? vm["epsilon"].as<float>() : 0.05;
+	data.overwrite_label = vm.count("overwrite_label") ? vm["overwrite_label"].as<size_t>() : 1;
 
 	//cout<<"does epsilon exist?"<<vm.count("epsilon")<<endl;
 	//cout<<"epsilon = "<<data.epsilon<<endl;

From 6f7fc0070ca0cae9041c47f1574cdbb8ad9910cf Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Mon, 7 May 2018 14:11:55 -0400
Subject: [PATCH 066/127] (warm start type = 2, adf) setting gives wrong
 results

---
 vowpalwabbit/cbify.cc | 266 ++++++++++++++++++++++++------------------
 1 file changed, 152 insertions(+), 114 deletions(-)

diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc
index bec78e99e70..9f634ebe73b 100644
--- a/vowpalwabbit/cbify.cc
+++ b/vowpalwabbit/cbify.cc
@@ -6,9 +6,14 @@
 #include "../explore/cpp/MWTExplorer.h"
 #include "vw.h"
 
+//In the future, the above two's names should be changed to
+//WARM_START and INTERACTIVE
 #define SUPERVISED 1
 #define BANDIT 2
 
+#define SUPERVISED_WS 1
+#define BANDIT_WS 2
+
 #define UAR 1
 #define CIRCULAR 2
 #define OVERWRITE 3
@@ -103,6 +108,7 @@ struct cbify
 	float epsilon;
 	float cumulative_variance;
 	size_t overwrite_label;
+	size_t warm_start_type;
 
 };
 
@@ -200,12 +206,12 @@ size_t generate_uar_action(cbify& data)
 
 }
 
-size_t corrupt_action(size_t action, cbify& data, size_t data_type)
+size_t corrupt_action(size_t action, cbify& data, size_t ec_type)
 {
 	float corrupt_prob;
 	size_t corrupt_type;
 
-	if (data_type == SUPERVISED)
+	if (ec_type == SUPERVISED)
 	{
 		corrupt_prob = data.corrupt_prob_supervised;
 		corrupt_type = data.corrupt_type_supervised;
@@ -486,9 +492,9 @@ void accumulate_costs_ips_adf(cbify& data, example& ec, CB::cb_class& cl, base_l
 
 }
 
-float compute_weight_multiplier(cbify& data, size_t i, size_t data_type)
+float compute_weight_multiplier(cbify& data, size_t i, size_t ec_type)
 {
-	if (data_type == SUPERVISED)
+	if (ec_type == SUPERVISED)
 	{
 		if (data.lambdas[i] >= 0.5)
 		 	return (1 - data.lambdas[i]) / data.lambdas[i];
@@ -522,12 +528,12 @@ size_t predict_cs(cbify& data, example& ec)
 
 }
 
-void learn_cs(cbify& data, example& ec)
+void learn_cs(cbify& data, example& ec, size_t ec_type)
 {
 	float old_weight = ec.weight;
 	for (uint32_t i = 0; i < data.choices_lambda; i++)
 	{
-		float weight_multiplier = compute_weight_multiplier(data, i, SUPERVISED);
+		float weight_multiplier = compute_weight_multiplier(data, i, ec_type);
 		ec.weight = old_weight * weight_multiplier;
 		data.all->cost_sensitive->learn(ec, i);
 	}
@@ -544,10 +550,8 @@ void multiclass_to_cs(cbify& data, COST_SENSITIVE::label& csl, size_t corrupted_
 	}
 }
 
-void generate_corrupted_cs(cbify& data, example& ec, MULTICLASS::label_t ld)
+void generate_corrupted_cs(cbify& data, example& ec, MULTICLASS::label_t ld, size_t corrupted_label)
 {
-	size_t corrupted_label = corrupt_action(ld.label, data, SUPERVISED);
-
 	//generate cost-sensitive label (only for CSOAA's use - this will be retracted at the end)
 	COST_SENSITIVE::label& csl = *data.csls;
 
@@ -575,7 +579,7 @@ void add_to_sup_validation(cbify& data, example& ec)
 	//	cout<<ec_copy.l.cs.costs[j].class_index<<" "<<ec_copy.l.cs.costs[j].x<<endl;
 }
 
-void generate_corrupt_cb(cbify& data, example& ec, CB::cb_class& cl, MULTICLASS::label_t& ld, size_t action)
+void generate_corrupted_cb(cbify& data, example& ec, CB::cb_class& cl, MULTICLASS::label_t& ld, size_t action, size_t corrupted_label)
 {
 	cl.action = action;
 	cl.probability = ec.pred.a_s[action-1].score;
@@ -583,7 +587,6 @@ void generate_corrupt_cb(cbify& data, example& ec, CB::cb_class& cl, MULTICLASS:
 	if(!cl.action)
 		THROW("No action with non-zero probability found!");
 
-	size_t corrupted_label = corrupt_action(ld.label, data, BANDIT);
 	cl.cost = loss(data, corrupted_label, cl.action);
 }
 
@@ -603,12 +606,12 @@ size_t predict_bandit(cbify& data, base_learner& base, example& ec)
 
 }
 
-void learn_bandit(cbify& data, base_learner& base, example& ec)
+void learn_bandit(cbify& data, base_learner& base, example& ec, size_t ec_type)
 {
 	float old_weight = ec.weight;
 	for (uint32_t i = 0; i < data.choices_lambda; i++)
 	{
-		float weight_multiplier = compute_weight_multiplier(data, i, BANDIT);
+		float weight_multiplier = compute_weight_multiplier(data, i, ec_type);
 
 		if (data.weighting_scheme == INSTANCE_WT)
 			ec.weight = old_weight * weight_multiplier;
@@ -630,12 +633,64 @@ void accumulate_variance(cbify& data, example& ec)
 
 }
 
+void predict_or_learn_cs(cbify& data, example& ec, bool is_update, size_t ec_type)
+{
+	MULTICLASS::label_t ld = ec.l.multi;
+	//predict
+	predict_cs(data, ec);
+
+	//learn
+	//first, corrupt fully supervised example ec's label here
+	size_t corrupted_label = corrupt_action(ld.label, data, ec_type);
+	generate_corrupted_cs(data, ec, ld, corrupted_label);
+
+	if (is_update)
+		learn_cs(data, ec, ec_type);
+
+	if (data.validation_method == SUPERVISED_VALI)
+		add_to_sup_validation(data, ec);
+
+	//set the label of ec back to a multiclass label
+	ec.l.multi = ld;
+}
+
+void predict_or_learn_bandit(cbify& data, base_learner& base, example& ec, bool is_update, size_t ec_type)
+{
+	MULTICLASS::label_t ld = ec.l.multi;
+	size_t action = predict_bandit(data, base, ec);
+
+	CB::cb_class cl;
+
+	size_t corrupted_label = corrupt_action(ld.label, data, ec_type);
+	generate_corrupted_cb(data, ec, cl, ld, action, corrupted_label);
+	// accumulate the cumulative costs of lambdas
+	accumulate_costs_ips(data, ec, cl);
+
+	//Create a new cb label
+	data.cb_label.costs.push_back(cl);
+	ec.l.cb = data.cb_label;
+	//make sure the prediction here is a cb prediction
+	ec.pred = data.pred;
+
+	if (is_update)
+		learn_bandit(data, base, ec, ec_type);
+
+	data.a_s.erase();
+	data.a_s = ec.pred.a_s;
+
+	accumulate_variance(data, ec);
+
+	ec.l.multi = ld;
+	ec.pred.multiclass = action;
+	//ec.weight = old_weight;
+}
+
 
 template <bool is_learn>
 void predict_or_learn(cbify& data, base_learner& base, example& ec)
 {
 	//Store the multiclass input label
-	MULTICLASS::label_t ld = ec.l.multi;
+
 	//cout<<ld.label<<endl;
 
 	// Initialize the lambda vector
@@ -644,54 +699,17 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 
 	if (data.warm_start_iter < data.warm_start_period) // Call the cost-sensitive learner directly
 	{
-		//predict
-		predict_cs(data, ec);
-
-		//learn
-		//first, corrupt fully supervised example ec's label here
-		generate_corrupted_cs(data, ec, ld);
-
-		if (data.ind_supervised)
-			learn_cs(data, ec);
-
-		if (data.validation_method == SUPERVISED_VALI)
-			add_to_sup_validation(data, ec);
-
-		//set the label of ec back to a multiclass label
-		ec.l.multi = ld;
+		if (data.warm_start_type == SUPERVISED_WS)
+			predict_or_learn_cs(data, ec, data.ind_supervised, SUPERVISED);
+		else
+			predict_or_learn_bandit(data, base, ec, data.ind_supervised, SUPERVISED);
 		ec.weight = 0;
 		data.warm_start_iter++;
 	}
 	else if (data.bandit_iter < data.bandit_period) //Call the cb_explore learner. It returns a vector of probabilities for each action
 	{
-		size_t action = predict_bandit(data, base, ec);
-
-		CB::cb_class cl;
-
-		generate_corrupt_cb(data, ec, cl, ld, action);
-		// accumulate the cumulative costs of lambdas
-		accumulate_costs_ips(data, ec, cl);
-
-		//Create a new cb label
-		data.cb_label.costs.push_back(cl);
-		ec.l.cb = data.cb_label;
-		//make sure the prediction here is a cb prediction
-		ec.pred = data.pred;
-
-		if (data.ind_bandit)
-			learn_bandit(data, base, ec);
-
-		data.a_s.erase();
-		data.a_s = ec.pred.a_s;
-
-		accumulate_variance(data, ec);
-
-		ec.l.multi = ld;
-	  ec.pred.multiclass = action;
-		//ec.weight = old_weight;
-
+		predict_or_learn_bandit(data, base, ec, data.ind_bandit, BANDIT);
 		data.bandit_iter++;
-
 		if (data.bandit_iter == data.bandit_period)
 		{
 			cout<<"Ideal average variance = "<<data.num_actions / data.epsilon<<endl;
@@ -704,7 +722,6 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 		//base.predict(ec, argmin);
 		ec.pred.multiclass = 0;
 		ec.weight = 0;
-
 	}
 }
 
@@ -751,14 +768,12 @@ void multiclass_to_cs_adf(cbify& data, COST_SENSITIVE::label* csls, size_t corru
 }
 
 
-void generate_corrupted_cs_adf(cbify& data, MULTICLASS::label_t ld)
+void generate_corrupted_cs_adf(cbify& data, MULTICLASS::label_t ld, size_t corrupted_label)
 {
 	//suppose copy_example_data has already been called
 	example* ecs = data.adf_data.ecs;
 	example* empty_example = data.adf_data.empty_example;
 
-	size_t corrupted_label = corrupt_action(ld.label, data, SUPERVISED);
-
 	//generate cost-sensitive label (only for CSOAA's use - this will be retracted at the end)
 	COST_SENSITIVE::label* csls = data.csls;
 	COST_SENSITIVE::label* csl_empty = data.csl_empty;
@@ -772,7 +787,7 @@ void generate_corrupted_cs_adf(cbify& data, MULTICLASS::label_t ld)
 
 }
 
-void learn_cs_adf(cbify& data)
+void learn_cs_adf(cbify& data, size_t ec_type)
 {
 	example* ecs = data.adf_data.ecs;
 	example* empty_example = data.adf_data.empty_example;
@@ -782,7 +797,7 @@ void learn_cs_adf(cbify& data)
 
 	for (uint32_t i = 0; i < data.choices_lambda; i++)
 	{
-		float weight_multiplier = compute_weight_multiplier(data, i, SUPERVISED);
+		float weight_multiplier = compute_weight_multiplier(data, i, ec_type);
 		for (size_t a = 0; a < data.adf_data.num_actions; ++a)
 		{
 			ecs[a].weight = data.old_weights[a] * weight_multiplier;
@@ -797,7 +812,7 @@ void learn_cs_adf(cbify& data)
 		ecs[a].weight = data.old_weights[a];
 }
 
-void generate_corrupt_cb_adf(cbify& data, CB::cb_class& cl, MULTICLASS::label_t& ld, size_t idx)
+void generate_corrupted_cb_adf(cbify& data, CB::cb_class& cl, MULTICLASS::label_t& ld, size_t idx, size_t corrupted_label)
 {
 	auto& out_ec = data.adf_data.ecs[0];
 	cl.action = out_ec.pred.a_s[idx].action + 1;
@@ -806,12 +821,11 @@ void generate_corrupt_cb_adf(cbify& data, CB::cb_class& cl, MULTICLASS::label_t&
 	if(!cl.action)
 		THROW("No action with non-zero probability found!");
 
-	size_t corrupted_label = corrupt_action(ld.label, data, BANDIT);
 	cl.cost = loss(data, corrupted_label, cl.action);
 
 }
 
-void learn_bandit_adf(cbify& data, base_learner& base)
+void learn_bandit_adf(cbify& data, base_learner& base, size_t ec_type)
 {
 	example* ecs = data.adf_data.ecs;
 	example* empty_example = data.adf_data.empty_example;
@@ -821,7 +835,7 @@ void learn_bandit_adf(cbify& data, base_learner& base)
 
 	for (uint32_t i = 0; i < data.choices_lambda; i++)
 	{
-		float weight_multiplier = compute_weight_multiplier(data, i, BANDIT);
+		float weight_multiplier = compute_weight_multiplier(data, i, ec_type);
 
 		for (size_t a = 0; a < data.adf_data.num_actions; ++a)
 		{
@@ -858,80 +872,103 @@ void accumulate_variance_adf(cbify& data, base_learner& base)
 
 	//cout<<"variance at bandit round "<< data.bandit_iter << " = " << temp_variance << endl;
 	//cout<<pred_pi<<" "<<pred_best_approx<<" "<<ld.label<<endl;
-
 }
 
-template <bool is_learn>
-void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
+void add_to_sup_validation_adf(cbify& data, example& ec)
 {
+	example& ec_copy = data.supervised_validation[data.warm_start_iter];
+	VW::copy_example_data(false, &ec_copy, &ec, 0, MULTICLASS::mc_label.copy_label);
+}
 
-  //Store the multiclass input label
-  MULTICLASS::label_t ld = ec.l.multi;
-
-	if (data.warm_start_iter == 0 && data.bandit_iter == 0)
-		setup_lambdas(data, ec);
+void predict_or_learn_cs_adf(cbify& data, base_learner& base, example& ec, bool is_update, size_t ec_type)
+{
+	//Store the multiclass input label
+	MULTICLASS::label_t ld = ec.l.multi;
 
-  copy_example_to_adf(data, ec);
+	copy_example_to_adf(data, ec);
 
 	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
 		data.cbls[a].costs = data.adf_data.ecs[a].l.cb.costs;
 	data.cbl_empty->costs = data.adf_data.empty_example->l.cb.costs;
 
-	if (data.warm_start_iter < data.warm_start_period) // Call the cost-sensitive learner directly
-	{
+	//best_action = predict_sublearner(data, base, argmin);
+	uint32_t best_action = predict_cs_adf(data, base);
 
-		//best_action = predict_sublearner(data, base, argmin);
-		uint32_t best_action = predict_cs_adf(data, base);
+	//data.all->cost_sensitive->predict(ec,argmin);
 
-		//data.all->cost_sensitive->predict(ec,argmin);
+	//generate cost-sensitive label
+	// ecs[a].weight *= 1;
+	//				cout << "size cbify = " << ecs[a].l.cs.costs.size() << endl;
+	size_t corrupted_label = corrupt_action(ld.label, data, ec_type);
+	generate_corrupted_cs_adf(data, ld, corrupted_label);
 
-		//generate cost-sensitive label
-		// ecs[a].weight *= 1;
-		//				cout << "size cbify = " << ecs[a].l.cs.costs.size() << endl;
+	if (is_update)
+		learn_cs_adf(data, ec_type);
 
-		generate_corrupted_cs_adf(data, ld);
+	ec.pred.multiclass = best_action;
+	ec.l.multi = ld;
 
-		if (data.ind_supervised)
-			learn_cs_adf(data);
+	//a hack here - allocated memories not deleted
+	//to be corrected
+	if (data.validation_method == SUPERVISED_VALI)
+		add_to_sup_validation_adf(data, ec);
+}
 
-		ec.pred.multiclass = best_action;
-		ec.l.multi = ld;
-		ec.weight = 0;
 
-		//a hack here - allocated memories not deleted
-		//to be corrected
-		if (data.validation_method == SUPERVISED_VALI)
-			VW::copy_example_data(false, &data.supervised_validation[data.warm_start_iter], &ec, 0, MULTICLASS::mc_label.copy_label);
+void predict_or_learn_bandit_adf(cbify& data, base_learner& base, example& ec, bool is_update, size_t ec_type)
+{
+	//Store the multiclass input label
+	MULTICLASS::label_t ld = ec.l.multi;
 
-		data.warm_start_iter++;
+	copy_example_to_adf(data, ec);
 
-	}
-	else if (data.bandit_iter < data.bandit_period) // call the bandit learner
-	{
-		//size_t pred_pi = predict_cs_adf(data, base, ec);
-		uint32_t idx = predict_bandit_adf(data, base);
+	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
+		data.cbls[a].costs = data.adf_data.ecs[a].l.cb.costs;
+	data.cbl_empty->costs = data.adf_data.empty_example->l.cb.costs;
 
-		CB::cb_class cl;
+	//size_t pred_pi = predict_cs_adf(data, base, ec);
+	uint32_t idx = predict_bandit_adf(data, base);
 
-		generate_corrupt_cb_adf(data, cl, ld, idx);
+	CB::cb_class cl;
 
-		// accumulate the cumulative costs of lambdas
-		accumulate_costs_ips_adf(data, ec, cl, base);
+	size_t corrupted_label = corrupt_action(ld.label, data, ec_type);
+	generate_corrupted_cb_adf(data, cl, ld, idx, corrupted_label);
 
-		// add cb label to chosen action
-		auto& lab = data.adf_data.ecs[cl.action - 1].l.cb;
-		lab.costs.push_back(cl);
+	// accumulate the cumulative costs of lambdas
+	accumulate_costs_ips_adf(data, ec, cl, base);
 
+	// add cb label to chosen action
+	auto& lab = data.adf_data.ecs[cl.action - 1].l.cb;
+	lab.costs.push_back(cl);
 
-		if (data.ind_bandit)
-			learn_bandit_adf(data, base);
 
-		accumulate_variance_adf(data, base);
+	if (is_update)
+		learn_bandit_adf(data, base, ec_type);
 
-		ec.pred.multiclass = cl.action;
+	accumulate_variance_adf(data, base);
 
-		data.bandit_iter++;
+	ec.pred.multiclass = cl.action;
+}
+
+template <bool is_learn>
+void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
+{
+	if (data.warm_start_iter == 0 && data.bandit_iter == 0)
+		setup_lambdas(data, ec);
 
+	if (data.warm_start_iter < data.warm_start_period) // Call the cost-sensitive learner directly
+	{
+		if (data.warm_start_type == SUPERVISED_WS)
+			predict_or_learn_cs_adf(data, base, ec, data.ind_supervised, SUPERVISED);
+		else
+			predict_or_learn_bandit_adf(data, base, ec, data.ind_supervised, SUPERVISED);
+		ec.weight = 0;
+		data.warm_start_iter++;
+	}
+	else if (data.bandit_iter < data.bandit_period) // call the bandit learner
+	{
+		predict_or_learn_bandit_adf(data, base, ec, data.ind_bandit, BANDIT);
+		data.bandit_iter++;
 		if (data.bandit_iter == data.bandit_period)
 		{
 			cout<<"Ideal average variance = "<<data.num_actions / data.epsilon<<endl;
@@ -998,7 +1035,7 @@ base_learner* cbify_setup(vw& all)
   new_options(all, "CBIFY options")
   ("loss0", po::value<float>(), "loss for correct label")
   ("loss1", po::value<float>(), "loss for incorrect label")
-	("warm_start", po::value<size_t>(), "number of training examples for fully-supervised warm start")
+	("warm_start", po::value<size_t>(), "number of training examples for warm start")
 	("bandit", po::value<size_t>(), "number of training examples for bandit processing")
   ("choices_lambda", po::value<size_t>(), "numbers of lambdas importance weights to aggregate")
 	("no_supervised", "indicator of using supervised only")
@@ -1010,7 +1047,8 @@ base_learner* cbify_setup(vw& all)
 	("validation_method", po::value<size_t>(), "lambda selection criterion (1 is using bandit with progressive validation, 2 is using supervised and amortizing)")
 	("weighting_scheme", po::value<size_t>(), "weighting scheme (1 is per instance weighting, 2 is per dataset weighting (where we use a diminishing weighting scheme) )")
 	("lambda_scheme", po::value<size_t>(), "Lambda set scheme (1 is expanding based on center 0.5, 2 is expanding based on center=minimax lambda, 3 is expanding based on center=minimax lambda along with forcing 0,1 in Lambda )")
-	("overwrite_label", po::value<size_t>(), "the label type 3 corruptions (overwriting) turn to");
+	("overwrite_label", po::value<size_t>(), "the label type 3 corruptions (overwriting) turn to")
+	("warm_start_type", po::value<size_t>(), "the type of warm start approach (1 is supervised warm start, 2 is contextual bandit warm start)");
   add_options(all);
 
   po::variables_map& vm = all.vm;
@@ -1048,7 +1086,7 @@ base_learner* cbify_setup(vw& all)
 	data.lambda_scheme = vm.count("lambda_scheme") ? vm["lambda_scheme"].as<size_t>() : ABS_CENTRAL;
 	data.epsilon = vm.count("epsilon") ? vm["epsilon"].as<float>() : 0.05;
 	data.overwrite_label = vm.count("overwrite_label") ? vm["overwrite_label"].as<size_t>() : 1;
-
+	data.warm_start_type = vm.count("warm_start_type") ? vm["warm_start_type"].as<size_t>() : SUPERVISED_WS;
 	//cout<<"does epsilon exist?"<<vm.count("epsilon")<<endl;
 	//cout<<"epsilon = "<<data.epsilon<<endl;
 

From 1a5b3e069a03a0251207ef5db2e7d3e99f374632 Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Mon, 7 May 2018 14:18:43 -0400
Subject: [PATCH 067/127] (warm start type = 2, adf) setting gives wrong
 results

---
 vowpalwabbit/cbify.cc | 51 +++++++++++++++++++++----------------------
 1 file changed, 25 insertions(+), 26 deletions(-)

diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc
index 9f634ebe73b..9cb686d12cf 100644
--- a/vowpalwabbit/cbify.cc
+++ b/vowpalwabbit/cbify.cc
@@ -6,8 +6,6 @@
 #include "../explore/cpp/MWTExplorer.h"
 #include "vw.h"
 
-//In the future, the above two's names should be changed to
-//WARM_START and INTERACTIVE
 #define SUPERVISED 1
 #define BANDIT 2
 
@@ -206,12 +204,12 @@ size_t generate_uar_action(cbify& data)
 
 }
 
-size_t corrupt_action(size_t action, cbify& data, size_t ec_type)
+size_t corrupt_action(size_t action, cbify& data, size_t data_type)
 {
 	float corrupt_prob;
 	size_t corrupt_type;
 
-	if (ec_type == SUPERVISED)
+	if (data_type == SUPERVISED)
 	{
 		corrupt_prob = data.corrupt_prob_supervised;
 		corrupt_type = data.corrupt_type_supervised;
@@ -492,9 +490,9 @@ void accumulate_costs_ips_adf(cbify& data, example& ec, CB::cb_class& cl, base_l
 
 }
 
-float compute_weight_multiplier(cbify& data, size_t i, size_t ec_type)
+float compute_weight_multiplier(cbify& data, size_t i, size_t data_type)
 {
-	if (ec_type == SUPERVISED)
+	if (data_type == SUPERVISED)
 	{
 		if (data.lambdas[i] >= 0.5)
 		 	return (1 - data.lambdas[i]) / data.lambdas[i];
@@ -528,12 +526,12 @@ size_t predict_cs(cbify& data, example& ec)
 
 }
 
-void learn_cs(cbify& data, example& ec, size_t ec_type)
+void learn_cs(cbify& data, example& ec)
 {
 	float old_weight = ec.weight;
 	for (uint32_t i = 0; i < data.choices_lambda; i++)
 	{
-		float weight_multiplier = compute_weight_multiplier(data, i, ec_type);
+		float weight_multiplier = compute_weight_multiplier(data, i, SUPERVISED);
 		ec.weight = old_weight * weight_multiplier;
 		data.all->cost_sensitive->learn(ec, i);
 	}
@@ -606,12 +604,12 @@ size_t predict_bandit(cbify& data, base_learner& base, example& ec)
 
 }
 
-void learn_bandit(cbify& data, base_learner& base, example& ec, size_t ec_type)
+void learn_bandit(cbify& data, base_learner& base, example& ec)
 {
 	float old_weight = ec.weight;
 	for (uint32_t i = 0; i < data.choices_lambda; i++)
 	{
-		float weight_multiplier = compute_weight_multiplier(data, i, ec_type);
+		float weight_multiplier = compute_weight_multiplier(data, i, BANDIT);
 
 		if (data.weighting_scheme == INSTANCE_WT)
 			ec.weight = old_weight * weight_multiplier;
@@ -633,7 +631,7 @@ void accumulate_variance(cbify& data, example& ec)
 
 }
 
-void predict_or_learn_cs(cbify& data, example& ec, bool is_update, size_t ec_type)
+void predict_or_learn_cs(cbify& data, example& ec, bool is_update, size_t data_type)
 {
 	MULTICLASS::label_t ld = ec.l.multi;
 	//predict
@@ -641,11 +639,11 @@ void predict_or_learn_cs(cbify& data, example& ec, bool is_update, size_t ec_typ
 
 	//learn
 	//first, corrupt fully supervised example ec's label here
-	size_t corrupted_label = corrupt_action(ld.label, data, ec_type);
+	size_t corrupted_label = corrupt_action(ld.label, data, data_type);
 	generate_corrupted_cs(data, ec, ld, corrupted_label);
 
 	if (is_update)
-		learn_cs(data, ec, ec_type);
+		learn_cs(data, ec);
 
 	if (data.validation_method == SUPERVISED_VALI)
 		add_to_sup_validation(data, ec);
@@ -654,14 +652,14 @@ void predict_or_learn_cs(cbify& data, example& ec, bool is_update, size_t ec_typ
 	ec.l.multi = ld;
 }
 
-void predict_or_learn_bandit(cbify& data, base_learner& base, example& ec, bool is_update, size_t ec_type)
+void predict_or_learn_bandit(cbify& data, base_learner& base, example& ec, bool is_update, size_t data_type)
 {
 	MULTICLASS::label_t ld = ec.l.multi;
 	size_t action = predict_bandit(data, base, ec);
 
 	CB::cb_class cl;
 
-	size_t corrupted_label = corrupt_action(ld.label, data, ec_type);
+	size_t corrupted_label = corrupt_action(ld.label, data, data_type);
 	generate_corrupted_cb(data, ec, cl, ld, action, corrupted_label);
 	// accumulate the cumulative costs of lambdas
 	accumulate_costs_ips(data, ec, cl);
@@ -673,7 +671,7 @@ void predict_or_learn_bandit(cbify& data, base_learner& base, example& ec, bool
 	ec.pred = data.pred;
 
 	if (is_update)
-		learn_bandit(data, base, ec, ec_type);
+		learn_bandit(data, base, ec);
 
 	data.a_s.erase();
 	data.a_s = ec.pred.a_s;
@@ -787,7 +785,7 @@ void generate_corrupted_cs_adf(cbify& data, MULTICLASS::label_t ld, size_t corru
 
 }
 
-void learn_cs_adf(cbify& data, size_t ec_type)
+void learn_cs_adf(cbify& data)
 {
 	example* ecs = data.adf_data.ecs;
 	example* empty_example = data.adf_data.empty_example;
@@ -797,7 +795,7 @@ void learn_cs_adf(cbify& data, size_t ec_type)
 
 	for (uint32_t i = 0; i < data.choices_lambda; i++)
 	{
-		float weight_multiplier = compute_weight_multiplier(data, i, ec_type);
+		float weight_multiplier = compute_weight_multiplier(data, i, SUPERVISED);
 		for (size_t a = 0; a < data.adf_data.num_actions; ++a)
 		{
 			ecs[a].weight = data.old_weights[a] * weight_multiplier;
@@ -825,7 +823,7 @@ void generate_corrupted_cb_adf(cbify& data, CB::cb_class& cl, MULTICLASS::label_
 
 }
 
-void learn_bandit_adf(cbify& data, base_learner& base, size_t ec_type)
+void learn_bandit_adf(cbify& data, base_learner& base)
 {
 	example* ecs = data.adf_data.ecs;
 	example* empty_example = data.adf_data.empty_example;
@@ -835,7 +833,7 @@ void learn_bandit_adf(cbify& data, base_learner& base, size_t ec_type)
 
 	for (uint32_t i = 0; i < data.choices_lambda; i++)
 	{
-		float weight_multiplier = compute_weight_multiplier(data, i, ec_type);
+		float weight_multiplier = compute_weight_multiplier(data, i, BANDIT);
 
 		for (size_t a = 0; a < data.adf_data.num_actions; ++a)
 		{
@@ -880,7 +878,7 @@ void add_to_sup_validation_adf(cbify& data, example& ec)
 	VW::copy_example_data(false, &ec_copy, &ec, 0, MULTICLASS::mc_label.copy_label);
 }
 
-void predict_or_learn_cs_adf(cbify& data, base_learner& base, example& ec, bool is_update, size_t ec_type)
+void predict_or_learn_cs_adf(cbify& data, base_learner& base, example& ec, bool is_update, size_t data_type)
 {
 	//Store the multiclass input label
 	MULTICLASS::label_t ld = ec.l.multi;
@@ -899,11 +897,11 @@ void predict_or_learn_cs_adf(cbify& data, base_learner& base, example& ec, bool
 	//generate cost-sensitive label
 	// ecs[a].weight *= 1;
 	//				cout << "size cbify = " << ecs[a].l.cs.costs.size() << endl;
-	size_t corrupted_label = corrupt_action(ld.label, data, ec_type);
+	size_t corrupted_label = corrupt_action(ld.label, data, SUPERVISED);
 	generate_corrupted_cs_adf(data, ld, corrupted_label);
 
 	if (is_update)
-		learn_cs_adf(data, ec_type);
+		learn_cs_adf(data);
 
 	ec.pred.multiclass = best_action;
 	ec.l.multi = ld;
@@ -915,7 +913,7 @@ void predict_or_learn_cs_adf(cbify& data, base_learner& base, example& ec, bool
 }
 
 
-void predict_or_learn_bandit_adf(cbify& data, base_learner& base, example& ec, bool is_update, size_t ec_type)
+void predict_or_learn_bandit_adf(cbify& data, base_learner& base, example& ec, bool is_update, size_t data_type)
 {
 	//Store the multiclass input label
 	MULTICLASS::label_t ld = ec.l.multi;
@@ -931,7 +929,7 @@ void predict_or_learn_bandit_adf(cbify& data, base_learner& base, example& ec, b
 
 	CB::cb_class cl;
 
-	size_t corrupted_label = corrupt_action(ld.label, data, ec_type);
+	size_t corrupted_label = corrupt_action(ld.label, data, data_type);
 	generate_corrupted_cb_adf(data, cl, ld, idx, corrupted_label);
 
 	// accumulate the cumulative costs of lambdas
@@ -943,7 +941,7 @@ void predict_or_learn_bandit_adf(cbify& data, base_learner& base, example& ec, b
 
 
 	if (is_update)
-		learn_bandit_adf(data, base, ec_type);
+		learn_bandit_adf(data, base);
 
 	accumulate_variance_adf(data, base);
 
@@ -969,6 +967,7 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 	{
 		predict_or_learn_bandit_adf(data, base, ec, data.ind_bandit, BANDIT);
 		data.bandit_iter++;
+
 		if (data.bandit_iter == data.bandit_period)
 		{
 			cout<<"Ideal average variance = "<<data.num_actions / data.epsilon<<endl;

From 9e431ed4c001a4b2a1c21234eea79a49ac33ef38 Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Mon, 7 May 2018 14:36:39 -0400
Subject: [PATCH 068/127] fixed the place of weight multiplier calculation

---
 vowpalwabbit/cbify.cc | 28 ++++++++++++----------------
 1 file changed, 12 insertions(+), 16 deletions(-)

diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc
index 9f634ebe73b..bab211f3da2 100644
--- a/vowpalwabbit/cbify.cc
+++ b/vowpalwabbit/cbify.cc
@@ -494,20 +494,26 @@ void accumulate_costs_ips_adf(cbify& data, example& ec, CB::cb_class& cl, base_l
 
 float compute_weight_multiplier(cbify& data, size_t i, size_t ec_type)
 {
+	float weight_multiplier;
+
 	if (ec_type == SUPERVISED)
 	{
 		if (data.lambdas[i] >= 0.5)
-		 	return (1 - data.lambdas[i]) / data.lambdas[i];
+		 	weight_multiplier = (1 - data.lambdas[i]) / data.lambdas[i];
 		else
-			return 1;
+			weight_multiplier = 1;
 	}
 	else
 	{
 		if (data.lambdas[i] >= 0.5)
-			return 1;
+			weight_multiplier = 1;
 		else
-			return data.lambdas[i] / (1-data.lambdas[i]);
+			weight_multiplier = data.lambdas[i] / (1-data.lambdas[i]);
+
+		if (data.weighting_scheme == DATASET_WT)
+			weight_multiplier = weight_multiplier * data.warm_start_period / ( (data.bandit_iter+1) * (data.bandit_iter+2) );
 	}
+	return weight_multiplier;
 }
 
 
@@ -612,12 +618,7 @@ void learn_bandit(cbify& data, base_learner& base, example& ec, size_t ec_type)
 	for (uint32_t i = 0; i < data.choices_lambda; i++)
 	{
 		float weight_multiplier = compute_weight_multiplier(data, i, ec_type);
-
-		if (data.weighting_scheme == INSTANCE_WT)
-			ec.weight = old_weight * weight_multiplier;
-		else
-			ec.weight = old_weight * weight_multiplier * data.warm_start_period / ( (data.bandit_iter+1) * (data.bandit_iter+2) );
-
+		ec.weight = old_weight * weight_multiplier;
 		base.learn(ec, i);
 	}
 	ec.weight = old_weight;
@@ -836,14 +837,9 @@ void learn_bandit_adf(cbify& data, base_learner& base, size_t ec_type)
 	for (uint32_t i = 0; i < data.choices_lambda; i++)
 	{
 		float weight_multiplier = compute_weight_multiplier(data, i, ec_type);
-
 		for (size_t a = 0; a < data.adf_data.num_actions; ++a)
 		{
-			if (data.weighting_scheme == INSTANCE_WT)
-				ecs[a].weight = data.old_weights[a] * weight_multiplier;
-			else
-				ecs[a].weight = data.old_weights[a] * weight_multiplier * data.warm_start_period / ( (data.bandit_iter+1) * (data.bandit_iter+2) );
-
+			ecs[a].weight = data.old_weights[a] * weight_multiplier;
 			base.learn(ecs[a], i);
 		}
 		base.learn(*empty_example, i);

From bc94f6cb19ba280ec7e63f37aaec964c8a6fd13d Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Mon, 7 May 2018 14:52:46 -0400
Subject: [PATCH 069/127] force the changes

---
 vowpalwabbit/cbify.cc | 45 ++++++++++++++++++++++---------------------
 1 file changed, 23 insertions(+), 22 deletions(-)

diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc
index 2050f3b0ded..bab211f3da2 100644
--- a/vowpalwabbit/cbify.cc
+++ b/vowpalwabbit/cbify.cc
@@ -6,6 +6,8 @@
 #include "../explore/cpp/MWTExplorer.h"
 #include "vw.h"
 
+//In the future, the above two's names should be changed to
+//WARM_START and INTERACTIVE
 #define SUPERVISED 1
 #define BANDIT 2
 
@@ -204,12 +206,12 @@ size_t generate_uar_action(cbify& data)
 
 }
 
-size_t corrupt_action(size_t action, cbify& data, size_t data_type)
+size_t corrupt_action(size_t action, cbify& data, size_t ec_type)
 {
 	float corrupt_prob;
 	size_t corrupt_type;
 
-	if (data_type == SUPERVISED)
+	if (ec_type == SUPERVISED)
 	{
 		corrupt_prob = data.corrupt_prob_supervised;
 		corrupt_type = data.corrupt_type_supervised;
@@ -490,7 +492,7 @@ void accumulate_costs_ips_adf(cbify& data, example& ec, CB::cb_class& cl, base_l
 
 }
 
-float compute_weight_multiplier(cbify& data, size_t i, size_t data_type)
+float compute_weight_multiplier(cbify& data, size_t i, size_t ec_type)
 {
 	float weight_multiplier;
 
@@ -532,12 +534,12 @@ size_t predict_cs(cbify& data, example& ec)
 
 }
 
-void learn_cs(cbify& data, example& ec)
+void learn_cs(cbify& data, example& ec, size_t ec_type)
 {
 	float old_weight = ec.weight;
 	for (uint32_t i = 0; i < data.choices_lambda; i++)
 	{
-		float weight_multiplier = compute_weight_multiplier(data, i, SUPERVISED);
+		float weight_multiplier = compute_weight_multiplier(data, i, ec_type);
 		ec.weight = old_weight * weight_multiplier;
 		data.all->cost_sensitive->learn(ec, i);
 	}
@@ -610,7 +612,7 @@ size_t predict_bandit(cbify& data, base_learner& base, example& ec)
 
 }
 
-void learn_bandit(cbify& data, base_learner& base, example& ec)
+void learn_bandit(cbify& data, base_learner& base, example& ec, size_t ec_type)
 {
 	float old_weight = ec.weight;
 	for (uint32_t i = 0; i < data.choices_lambda; i++)
@@ -632,7 +634,7 @@ void accumulate_variance(cbify& data, example& ec)
 
 }
 
-void predict_or_learn_cs(cbify& data, example& ec, bool is_update, size_t data_type)
+void predict_or_learn_cs(cbify& data, example& ec, bool is_update, size_t ec_type)
 {
 	MULTICLASS::label_t ld = ec.l.multi;
 	//predict
@@ -640,11 +642,11 @@ void predict_or_learn_cs(cbify& data, example& ec, bool is_update, size_t data_t
 
 	//learn
 	//first, corrupt fully supervised example ec's label here
-	size_t corrupted_label = corrupt_action(ld.label, data, data_type);
+	size_t corrupted_label = corrupt_action(ld.label, data, ec_type);
 	generate_corrupted_cs(data, ec, ld, corrupted_label);
 
 	if (is_update)
-		learn_cs(data, ec);
+		learn_cs(data, ec, ec_type);
 
 	if (data.validation_method == SUPERVISED_VALI)
 		add_to_sup_validation(data, ec);
@@ -653,14 +655,14 @@ void predict_or_learn_cs(cbify& data, example& ec, bool is_update, size_t data_t
 	ec.l.multi = ld;
 }
 
-void predict_or_learn_bandit(cbify& data, base_learner& base, example& ec, bool is_update, size_t data_type)
+void predict_or_learn_bandit(cbify& data, base_learner& base, example& ec, bool is_update, size_t ec_type)
 {
 	MULTICLASS::label_t ld = ec.l.multi;
 	size_t action = predict_bandit(data, base, ec);
 
 	CB::cb_class cl;
 
-	size_t corrupted_label = corrupt_action(ld.label, data, data_type);
+	size_t corrupted_label = corrupt_action(ld.label, data, ec_type);
 	generate_corrupted_cb(data, ec, cl, ld, action, corrupted_label);
 	// accumulate the cumulative costs of lambdas
 	accumulate_costs_ips(data, ec, cl);
@@ -672,7 +674,7 @@ void predict_or_learn_bandit(cbify& data, base_learner& base, example& ec, bool
 	ec.pred = data.pred;
 
 	if (is_update)
-		learn_bandit(data, base, ec);
+		learn_bandit(data, base, ec, ec_type);
 
 	data.a_s.erase();
 	data.a_s = ec.pred.a_s;
@@ -786,7 +788,7 @@ void generate_corrupted_cs_adf(cbify& data, MULTICLASS::label_t ld, size_t corru
 
 }
 
-void learn_cs_adf(cbify& data)
+void learn_cs_adf(cbify& data, size_t ec_type)
 {
 	example* ecs = data.adf_data.ecs;
 	example* empty_example = data.adf_data.empty_example;
@@ -796,7 +798,7 @@ void learn_cs_adf(cbify& data)
 
 	for (uint32_t i = 0; i < data.choices_lambda; i++)
 	{
-		float weight_multiplier = compute_weight_multiplier(data, i, SUPERVISED);
+		float weight_multiplier = compute_weight_multiplier(data, i, ec_type);
 		for (size_t a = 0; a < data.adf_data.num_actions; ++a)
 		{
 			ecs[a].weight = data.old_weights[a] * weight_multiplier;
@@ -824,7 +826,7 @@ void generate_corrupted_cb_adf(cbify& data, CB::cb_class& cl, MULTICLASS::label_
 
 }
 
-void learn_bandit_adf(cbify& data, base_learner& base)
+void learn_bandit_adf(cbify& data, base_learner& base, size_t ec_type)
 {
 	example* ecs = data.adf_data.ecs;
 	example* empty_example = data.adf_data.empty_example;
@@ -874,7 +876,7 @@ void add_to_sup_validation_adf(cbify& data, example& ec)
 	VW::copy_example_data(false, &ec_copy, &ec, 0, MULTICLASS::mc_label.copy_label);
 }
 
-void predict_or_learn_cs_adf(cbify& data, base_learner& base, example& ec, bool is_update, size_t data_type)
+void predict_or_learn_cs_adf(cbify& data, base_learner& base, example& ec, bool is_update, size_t ec_type)
 {
 	//Store the multiclass input label
 	MULTICLASS::label_t ld = ec.l.multi;
@@ -893,11 +895,11 @@ void predict_or_learn_cs_adf(cbify& data, base_learner& base, example& ec, bool
 	//generate cost-sensitive label
 	// ecs[a].weight *= 1;
 	//				cout << "size cbify = " << ecs[a].l.cs.costs.size() << endl;
-	size_t corrupted_label = corrupt_action(ld.label, data, SUPERVISED);
+	size_t corrupted_label = corrupt_action(ld.label, data, ec_type);
 	generate_corrupted_cs_adf(data, ld, corrupted_label);
 
 	if (is_update)
-		learn_cs_adf(data);
+		learn_cs_adf(data, ec_type);
 
 	ec.pred.multiclass = best_action;
 	ec.l.multi = ld;
@@ -909,7 +911,7 @@ void predict_or_learn_cs_adf(cbify& data, base_learner& base, example& ec, bool
 }
 
 
-void predict_or_learn_bandit_adf(cbify& data, base_learner& base, example& ec, bool is_update, size_t data_type)
+void predict_or_learn_bandit_adf(cbify& data, base_learner& base, example& ec, bool is_update, size_t ec_type)
 {
 	//Store the multiclass input label
 	MULTICLASS::label_t ld = ec.l.multi;
@@ -925,7 +927,7 @@ void predict_or_learn_bandit_adf(cbify& data, base_learner& base, example& ec, b
 
 	CB::cb_class cl;
 
-	size_t corrupted_label = corrupt_action(ld.label, data, data_type);
+	size_t corrupted_label = corrupt_action(ld.label, data, ec_type);
 	generate_corrupted_cb_adf(data, cl, ld, idx, corrupted_label);
 
 	// accumulate the cumulative costs of lambdas
@@ -937,7 +939,7 @@ void predict_or_learn_bandit_adf(cbify& data, base_learner& base, example& ec, b
 
 
 	if (is_update)
-		learn_bandit_adf(data, base);
+		learn_bandit_adf(data, base, ec_type);
 
 	accumulate_variance_adf(data, base);
 
@@ -963,7 +965,6 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 	{
 		predict_or_learn_bandit_adf(data, base, ec, data.ind_bandit, BANDIT);
 		data.bandit_iter++;
-
 		if (data.bandit_iter == data.bandit_period)
 		{
 			cout<<"Ideal average variance = "<<data.num_actions / data.epsilon<<endl;

From 1bab4c3efec9e8c223b2a16a234a73772ace036f Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Tue, 8 May 2018 11:11:36 -0400
Subject: [PATCH 070/127] before modifying the baseline of no update

---
 scripts/alg_comparison.py  |  52 ++++++++++++-----
 scripts/plot_warm_start.py | 115 ++++++++++++++++++++++++++-----------
 2 files changed, 118 insertions(+), 49 deletions(-)

diff --git a/scripts/alg_comparison.py b/scripts/alg_comparison.py
index 3826b66b2b8..3d810373075 100644
--- a/scripts/alg_comparison.py
+++ b/scripts/alg_comparison.py
@@ -88,14 +88,16 @@ def normalized_score(lst, l):
 	return [ (item - l) / (u - l + 1e-4) for item in lst ]
 
 def alg_str(alg_name):
-	if (alg_name[1] == True and alg_name[2] == True):
+	if (alg_name[0] == 2):
+		return 'supervised_underutil_as_bandit'
+	if (alg_name[2] == True and alg_name[3] == True):
 		return 'no_update'
-	if (alg_name[1] == True and alg_name[2] == False):
+	if (alg_name[2] == True and alg_name[3] == False):
 		return 'bandit_only'
-	if (alg_name[1] == False and alg_name[2] == True):
+	if (alg_name[2] == False and alg_name[3] == True):
 		return 'supervised_only'
-	if (alg_name[1] == False and alg_name[2] == False):
-		return 'combined_choices_lambda='+str(alg_name[0])
+	if (alg_name[2] == False and alg_name[3] == False):
+		return 'combined_choices_lambda='+str(alg_name[1])
 
 def problem_str(name_problem):
 	return 'supervised_corrupt_type='+str(name_problem[0]) \
@@ -117,12 +119,15 @@ def plot_all_cdfs(alg_results, mod):
 	#plot all cdfs:
 	i = 0
 	for alg_name, errs in alg_results.iteritems():
-		plot_cdf(alg_name, errs)
+		if (alg_name[2] == False and alg_name[3] == False and alg_name[1] != 8):
+			pass
+		else:
+			plot_cdf(alg_name, errs)
 
 	plt.legend()
-	plt.xlim(-1,1)
+	plt.xlim(-0.2,1)
 	plt.ylim(0,1)
-	plt.savefig(mod.fulldir+problem_str(mod.name_problem)+'.png')
+	plt.savefig(mod.problemdir+'/cdf.png')
 	plt.clf()
 
 
@@ -141,13 +146,13 @@ def plot_all_pair_comp(alg_results, sizes, mod):
 				plot_comparison(errs_1, errs_2, sizes)
 
 				plt.title(alg_str(alg_names[i])+' vs '+alg_str(alg_names[j]))
-				plt.savefig(mod.fulldir+problem_str(mod.name_problem)+'_'+alg_str(alg_names[i])+'_vs_'+alg_str(alg_names[j])+'.png')
+				plt.savefig(mod.problemdir+'/'+alg_str(alg_names[i])+'_vs_'+alg_str(alg_names[j])+'.png')
 				plt.clf()
 
 def init_results(result_table):
 	alg_results = {}
 	for idx, row in result_table.iterrows():
-		alg_name = (row['choices_lambda'], row['no_supervised'], row['no_bandit'])
+		alg_name = (row['warm_start_type'], row['choices_lambda'], row['no_supervised'], row['no_bandit'])
 		alg_results[alg_name] = []
 	return alg_results
 
@@ -201,16 +206,30 @@ def plot_all(mod, all_results):
 				errs.append(row['avg_error'])
 			normalized_errs = normalized_score(errs, err_best)
 
+			#print result_table
+
 			i = 0
 			for idx, row in result_table.iterrows():
 				if i == 0:
-					sizes.append(row['total_size'])
-				alg_name = (row['choices_lambda'], row['no_supervised'], row['no_bandit'])
-				unnormalized_results[alg_name].append(errs[i])
-				normalized_results[alg_name].append(normalized_errs[i])
+					temp_size = row['bandit_size']
+					sizes.append(row['bandit_size'])
+
+				if row['bandit_size'] == temp_size:
+					alg_name = (row['warm_start_type'], row['choices_lambda'], row['no_supervised'], row['no_bandit'])
+					unnormalized_results[alg_name].append(errs[i])
+					normalized_results[alg_name].append(normalized_errs[i])
 				i += 1
 
-		plot_all_pair_comp(unnormalized_results, sizes, mod)
+			#print 'sizes:'
+			#print len(sizes)
+			#for k, v in unnormalized_results.iteritems():
+			#	print len(v)
+
+		mod.problemdir = mod.fulldir+problem_str(mod.name_problem)+'/'
+		if not os.path.exists(mod.problemdir):
+			os.makedirs(mod.problemdir)
+
+		#plot_all_pair_comp(unnormalized_results, sizes, mod)
 		plot_all_cdfs(normalized_results, mod)
 
 
@@ -251,12 +270,15 @@ def plot_all(mod, all_results):
 	#results_dir = '../../../type2_0.65/'
 	#results_dir = '../../../type2_0.3/'
 
+	print 'reading directory..'
 	dss = sum_files(mod.results_dir)
+	print len(dss)
 
 	#print dss[168]
 
 	all_results = None
 
+	print 'reading sum tables..'
 	for i in range(len(dss)):
 		print 'result file name: ', dss[i]
 		result = parse_sum_file(mod.results_dir + dss[i])
diff --git a/scripts/plot_warm_start.py b/scripts/plot_warm_start.py
index 1b247c142e2..5f44d5312ad 100644
--- a/scripts/plot_warm_start.py
+++ b/scripts/plot_warm_start.py
@@ -62,8 +62,8 @@ def collect_stats(mod):
 			mod.bandit_effective = int(float(weight_str))
 
 			for mod.ratio in mod.critical_size_ratios:
-				if mod.bandit_effective >= 0.99 * mod.warm_start * mod.ratio and \
-				mod.bandit_effective <= 1.01 * mod.warm_start * mod.ratio:
+				if mod.bandit_effective >= (1 - 1e-7) * mod.warm_start * mod.ratio and \
+				mod.bandit_effective <= (1 + 1e-7) * mod.warm_start * mod.ratio:
 					record_result(mod)
 
 
@@ -79,13 +79,14 @@ def collect_stats(mod):
 
 def record_result(mod):
 	problem_params_trailer = [mod.bandit_effective, mod.ratio]
-	config_name = disperse(mod.problem_params + problem_params_trailer + mod.alg_params, ' ')
+	config_name = mod.problem_params + problem_params_trailer + mod.alg_params
 
 	list_results = [mod.avg_loss, mod.actual_var, mod.ideal_var]
-	result = disperse(list_results, ' ')
+
+	row = config_name + list_results
 
 	summary_file = open(mod.summary_file_name, 'a')
-	summary_file.write(config_name + result + '\n')
+	summary_file.write( disperse(row, '\t') + '\n')
 	summary_file.close()
 
 
@@ -127,7 +128,8 @@ def execute_vw(mod):
 		 + ' --weighting_scheme ' + str(mod.weighting_scheme) \
 		 + ' --lambda_scheme ' + str(mod.lambda_scheme) \
 		 + ' --learning_rate ' + str(mod.learning_rate) \
-		 + ' --overwrite_label ' + str(mod.majority_class)
+		 + ' --overwrite_label ' + str(mod.majority_class) \
+		 + ' --warm_start_type ' + str(mod.warm_start_type)
 
 	cmd = cmd_vw
 	print cmd
@@ -180,7 +182,7 @@ def gen_comparison_graph(mod):
 	mod.alg_params = [ mod.cb_type, \
 	mod.validation_method, mod.weighting_scheme, \
 	mod.lambda_scheme, mod.choices_lambda, \
-	mod.no_supervised, mod.no_bandit]
+	mod.no_supervised, mod.no_bandit, mod.warm_start_type, mod.learning_rate]
 
 	mod.vw_output_filename = mod.results_path + disperse(mod.problem_params+mod.alg_params, '_') + '.txt'
 
@@ -203,6 +205,24 @@ def get_num_classes(ds):
 	did, n_actions = int(did), int(n_actions)
 	return n_actions
 
+def flatten(l):
+	out = []
+	for item in l:
+		if isinstance(item, (list, tuple)):
+		  out.extend(flatten(item))
+		else:
+		  out.append(item)
+	return tuple(out)
+
+def flatten_all(l):
+	out = []
+	for item in l:
+		flattened = flatten(item)
+		if len(flattened) != 11:
+			print flattened
+		out.append(flattened)
+	return out
+
 
 def ds_per_task(mod):
 	# put dataset name to the last coordinate so that the task workloads tend to be
@@ -211,41 +231,53 @@ def ds_per_task(mod):
 	# put dataset name to the first coordinate so that the result production order is
 	# in accordance with dataset order
 
-	config_corrupt_sup_raw = product(mod.choices_corrupt_type_supervised, mod.choices_corrupt_prob_supervised)
+	config_corrupt_sup_raw = list(product(mod.choices_corrupt_type_supervised, mod.choices_corrupt_prob_supervised))
 	config_corrupt_sup = filter(lambda (type, prob): type == 1 or abs(prob) > 1e-4, config_corrupt_sup_raw)
 
-	config_problem = product(mod.dss, config_corrupt_sup, mod.choices_cb_types, mod.warm_start_multipliers, mod.learning_rates)
-
-
+	config_problem = list(product(mod.dss, config_corrupt_sup, mod.choices_cb_types, mod.warm_start_multipliers))
 
 	if mod.baselines_on:
-		config_baselines_raw = list(product([1], [True, False], [True, False]))
-		config_baselines_solution = filter(lambda (x1, x2, x3): x2 == True or x3 == True, config_baselines_raw)
-		config_baselines = list(product(config_problem, config_baselines_solution))
+		#config_baselines_raw = list(product([1], [True, False], [True, False], [1], [3]))
+		#config_baselines_solution = filter(lambda (x1, x2, x3, x4): x2 == True or x3 == True, config_baselines_raw)
+		config_baselines_solution = [(1, True, True, 1, 3), (1, True, False, 1, 3), (1, False, True, 1, 3)]
+		config_baselines = list(product(*[config_problem, config_baselines_solution, mod.learning_rates]))
+		config_baselines = flatten_all(config_baselines)
 	else:
 		config_baselines = []
 
 	if mod.algs_on:
-		config_algs_solution = list(product(mod.choices_choices_lambda, [False], [False]))
-		config_algs = list(product(config_problem, config_algs_solution))
+		config_algs_solution_1 = list(product(mod.choices_choices_lambda, [False], [False], [1], [3]))
+		config_algs_solution_2 = [(1, False, False, 2, 1)]
+		config_algs_solution = config_algs_solution_1 + config_algs_solution_2
+		config_algs = list(product(*[config_problem, config_algs_solution, mod.learning_rates]))
+		config_algs = flatten_all(config_algs)
 	else:
 		config_algs = []
 
 	if mod.optimal_on:
-		config_optimal_problem = product(mod.dss, [(1, 0)], [1], [1], [0.5])
-		config_optimal_solution = [(0, False, False)]
-		config_optimal = list(product(config_optimal_problem, config_optimal_solution))
+		config_optimal_problem = product(mod.dss, [(1, 0)], ['mtr'], [1])
+		config_optimal_solution = [(0, False, False, 1, 1)]
+		config_optimal = list(product(*[config_optimal_problem, config_optimal_solution, [0.5]]))
+		config_optimal = flatten_all(config_optimal)
 	else:
 		config_optimal = []
 
+	#print len(config_problem)
+	#print len(config_baselines)
+	#print len(config_algs)
+	#print len(config_optimal)
+	#raw_input(' ')
+
 	config_all = config_baselines + config_algs + config_optimal
+	config_all = sorted(config_all)
+	#config_all = sorted(config_all, key=lambda a: str(a))
 
 	config_task = []
 	print len(config_all)
 	for i in range(len(config_all)):
 		if (i % mod.num_tasks == mod.task_id):
 			config_task.append(config_all[i])
-			print config_all[i]
+			#print config_all[i]
 
 	return config_task
 
@@ -300,18 +332,19 @@ def main_loop(mod):
 	'cb_type', 'validation_method', 'weighting_scheme', \
 	'lambda_scheme', 'choices_lambda', \
 	'no_supervised', 'no_bandit', \
+	'warm_start_type', 'learning_rate', \
 	'avg_error', 'actual_variance', \
 	'ideal_variance']
 
-	summary_header = disperse(list_header, ' ')
+	summary_header = disperse(list_header, '\t')
 
 	summary_file.write(summary_header+'\n')
 	summary_file.close()
 
-	for ((mod.dataset, (mod.corrupt_type_supervised, mod.corrupt_prob_supervised), \
-	mod.cb_type, mod.warm_start_multiplier, mod.learning_rate), \
-	(mod.choices_lambda, \
-	mod.no_supervised, mod.no_bandit)) in mod.config_task:
+	for (mod.dataset, mod.corrupt_type_supervised, mod.corrupt_prob_supervised, \
+	mod.cb_type, mod.warm_start_multiplier, \
+	mod.choices_lambda, \
+	mod.no_supervised, mod.no_bandit, mod.warm_start_type, mod.lambda_scheme, mod.learning_rate) in mod.config_task:
 		gen_comparison_graph(mod)
 
 
@@ -320,6 +353,8 @@ def main_loop(mod):
 	parser.add_argument('task_id', type=int, help='task ID, between 0 and num_tasks - 1')
 	parser.add_argument('num_tasks', type=int)
 	parser.add_argument('--results_dir', default='../../../figs/')
+	parser.add_argument('--ds_dir', default='../../../vwshuffled/')
+	parser.add_argument('--num_learning_rates', type=int)
 	parser.add_argument('--warm_start_fraction', type=float)
 	parser.add_argument('--corrupt_prob_supervised', type=float)
 	parser.add_argument('--corrupt_prob_bandit',type=float)
@@ -336,15 +371,15 @@ def main_loop(mod):
 			time.sleep(1)
 
 	mod = model()
-	mod.baselines_on = False
-	mod.algs_on = False
+	mod.baselines_on = True
+	mod.algs_on = True
 	mod.optimal_on = True
 
 	mod.num_tasks = args.num_tasks
 	mod.task_id = args.task_id
 
-	mod.ds_path = '../../../vwshuffled/'
 	mod.vw_path = '../vowpalwabbit/vw'
+	mod.ds_path = args.ds_dir
 	mod.results_path = args.results_dir
 
 	#DIR_PATTERN = '../results/cbresults_{}/'
@@ -359,7 +394,7 @@ def main_loop(mod):
 	mod.adf_on = True
 
 	# use fractions instead of absolute numbers
-	mod.warm_start_multipliers = [pow(2,i) for i in range(5)]
+	mod.warm_start_multipliers = [pow(2,i) for i in range(4)]
 	#mod.warm_start_multipliers = [2*pow(4, i) for i in range(3)]]
 
 	#mod.choices_warm_start = [0.01 * pow(2, i) for i in range(5)]
@@ -375,7 +410,7 @@ def main_loop(mod):
 	#mod.choices_no_supervised = [False, True]
 	#mod.choices_no_bandit = [False, True]
 	#mod.choices_choices_lambda = [2, 4, 8]
-	mod.choices_choices_lambda = []
+	mod.choices_choices_lambda = [2,4,8]
 	#mod.choices_choices_lambda = [i for i in range(1,3)]
 	#mod.choices_choices_lambda = [i for i in range(1,2)]
 	#mod.choices_choices_lambda = [1, 3, 5, 7]
@@ -385,9 +420,10 @@ def main_loop(mod):
 	mod.choices_corrupt_type_supervised = [1,2,3]
 	#mod.choices_corrupt_type_supervised = [2]
 	#mod.corrupt_prob_supervised = 0.3
-	mod.choices_corrupt_prob_supervised = [0.0,0.3,0.6,0.9,1]
+	mod.choices_corrupt_prob_supervised = [0.0,0.5,1.0]
 	#mod.choices_corrupt_prob_supervised = [0.3]
 
+
 	mod.corrupt_type_bandit = 1
 	mod.corrupt_prob_bandit = 0.0
 
@@ -396,15 +432,22 @@ def main_loop(mod):
 
 	mod.choices_lambda = 2
 	mod.weighting_scheme = 1
-	mod.lambda_scheme = 3
+	#mod.lambda_scheme = 3
+	#mod.warm_start_type = 1
 
 	mod.no_exploration = False
 	mod.cover_on = False
 	mod.epsilon_on = True
 	#mod.plot_color = 'r'
 	#mod.plot_flat = False
-	mod.critical_size_ratios = [184 * pow(2, -i) for i in range(8) ]
-	mod.learning_rates = [0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1.0, 3.0, 10.0]
+	mod.critical_size_ratios = [184 * pow(2, -i) for i in range(7) ]
+
+	if args.num_learning_rates == 1:
+		mod.learning_rates = [0.5]
+	elif args.num_learning_rates == 3:
+		mod.learning_rates = [0.1, 0.3, 1.0]
+	else:
+		mod.learning_rates = [0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1.0, 3.0, 10.0]
 
 	#pow(2,i) for i in range(-5, 7)
 	#for correctness test
@@ -412,14 +455,18 @@ def main_loop(mod):
 	#choices_fprob1 = [0.1]
 	#choices_fprob2 = [0.1]
 
+	print 'reading dataset files..'
 	mod.dss = ds_files(mod.ds_path)
+	print len(mod.dss)
 	#mod.dss = ["ds_223_63.vw.gz"]
 	#mod.dss = mod.dss[:5]
 
+	print 'generating tasks..'
 	# here, we are generating the task specific parameter settings
 	# by first generate all parameter setting and pick every num_tasks of them
 	mod.config_task = ds_per_task(mod)
 	print 'task ' + str(mod.task_id) + ' of ' + str(mod.num_tasks) + ':'
+	print len(mod.config_task)
 
 	#print mod.ds_task
 

From 0f6e8dbda76e3dc8b9a13ed9194803fca3e2a4f7 Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Wed, 9 May 2018 02:01:53 -0400
Subject: [PATCH 071/127] a new parameter enumeration scheme

---
 scripts/plot_warm_start.py | 560 ++++++++++++++++++-------------------
 1 file changed, 278 insertions(+), 282 deletions(-)

diff --git a/scripts/plot_warm_start.py b/scripts/plot_warm_start.py
index 5f44d5312ad..5eeecb6acbb 100644
--- a/scripts/plot_warm_start.py
+++ b/scripts/plot_warm_start.py
@@ -18,30 +18,33 @@ def __init__(self):
 		self.no_supervised = False
 
 def collect_stats(mod):
+	avg_error_value = avg_error(mod)
+	actual_var_value = actual_var(mod)
+	ideal_var_value = ideal_var(mod)
 
-	vw_output_filename = mod.vw_output_filename
-	# using progress parameter
-	# num_rows = mod.bandit / mod.progress
-	#print vw_output_filename
+	vw_run_results = []
+	vw_result_template = {
+	'bandit_size': 0,
+	'bandit_supervised_size_ratio': 0,
+	'avg_error': 0.0,
+	'actual_variance': 0.0,
+	'ideal_variance': 0.0
+	}
+
+	if mod.compute_optimal is True:
+		vw_result = vw_result_template.copy()
+		if 'optimal_approx' in mod.param:
+			# this condition is for computing the optimal error
+			vw_result['avg_error'] = avg_error_value
+		else:
+			# this condition is for computing the majority error
+			err =  1 - float(mod.result['majority_size']) / mod.result['total_size']
+			vw_result['avg_error'] = float('%0.5f' % err)
+		vw_run_results.append(vw_result)
+		return vw_run_results
+
+	f = open(mod.vw_output_filename, 'r')
 
-	avg_error_value = avg_error(mod)
-	mod.actual_var = actual_var(mod)
-	mod.ideal_var = ideal_var(mod)
-
-	#avg_loss = []
-	#last_loss = []
-	#wt = []
-	#end_table = False
-
-	if mod.choices_lambda == 0:
-		mod.avg_loss = avg_error_value
-		mod.bandit_effective = 0
-		mod.ratio = 0
-		record_result(mod)
-		return
-
-	f = open(vw_output_filename, 'r')
-	#linenumber = 0
 	i = 0
 	for line in f:
 		vw_progress_pattern = '\d+\.\d+\s+\d+\.\d+\s+\d+\s+\d+\.\d+\s+[a-zA-Z0-9]+\s+[a-zA-Z0-9]+\s+\d+.*'
@@ -54,84 +57,81 @@ def collect_stats(mod):
 			avg_loss_str, last_loss_str, counter_str, weight_str, curr_label_str, \
 			curr_pred_str, curr_feat_str = s
 
-			#avg_loss.append(float(avg_loss_str))
-			#last_loss.append(float(last_loss_str))
-			#wt.append(float(weight_str))
-
-			mod.avg_loss = float(avg_loss_str)
-			mod.bandit_effective = int(float(weight_str))
-
-			for mod.ratio in mod.critical_size_ratios:
-				if mod.bandit_effective >= (1 - 1e-7) * mod.warm_start * mod.ratio and \
-				mod.bandit_effective <= (1 + 1e-7) * mod.warm_start * mod.ratio:
-					record_result(mod)
-
-
-		#linenumber += 1
-
+			avg_loss = float(avg_loss_str)
+			bandit_effective = int(float(weight_str))
+
+			for ratio in mod.critical_size_ratios:
+				if bandit_effective >= (1 - 1e-7) * mod.result['warm_start'] * ratio and \
+				bandit_effective <= (1 + 1e-7) * mod.result['warm_start'] * ratio:
+					vw_result = vw_result_template.copy()
+					vw_result['bandit_size'] = bandit_effective
+					vw_result['bandit_supervised_size_ratio'] = ratio
+					vw_result['avg_error'] = avg_loss
+					vw_result['actual_variance'] = actual_var_value
+					vw_result['ideal_variance'] = ideal_var_value
+					vw_run_results.append(vw_result)
 	f.close()
-
-	#if len(avg_loss) == 0:
-	#	avg_loss = [0]
-	#	last_loss = [0]
-	#	wt = [0]
-	#return avg_loss, last_loss, wt
-
-def record_result(mod):
-	problem_params_trailer = [mod.bandit_effective, mod.ratio]
-	config_name = mod.problem_params + problem_params_trailer + mod.alg_params
-
-	list_results = [mod.avg_loss, mod.actual_var, mod.ideal_var]
-
-	row = config_name + list_results
-
-	summary_file = open(mod.summary_file_name, 'a')
-	summary_file.write( disperse(row, '\t') + '\n')
-	summary_file.close()
-
+	return vw_run_results
+
+
+def gen_vw_options_list(vw_options):
+	vw_options_list = []
+	for k, v in vw_options.iteritems():
+		vw_options_list.append('--'+str(k))
+		vw_options_list.append(str(v))
+	return vw_options_list
+
+def gen_vw_options(mod):
+	vw_options = {}
+	vw_options['data'] = mod.data_full_path
+	vw_options['progress'] = mod.result['progress']
+
+	if 'optimal_approx' in mod.param:
+		vw_options['passes'] = 5
+		vw_options['oaa'] = mod.result['num_classes']
+		vw_options['cache_file'] = mod.param['data'] + '.cache'
+	elif 'majority_approx' in mod.param:
+		pass
+	else:
+		vw_options['corrupt_type_bandit'] = mod.corrupt_type_bandit
+		vw_options['corrupt_prob_bandit'] = mod.corrupt_prob_bandit
+		vw_options['validation_method'] = mod.validation_method
+		vw_options['weighting_scheme'] = mod.weighting_scheme
+		vw_options['bandit'] = mod.bandit
+
+		if mod.adf_on is True:
+			vw_options['cb_explore_adf'] = ' '
+		else:
+			vw_options['cb_explore'] = mod.num_classes
+
+		if mod.epsilon_on is True:
+			vw_options['epsilon'] = mod.epsilon
+
+		vw_options['cb_type'] = mod.param['cb_type']
+		vw_options['choices_lambda'] = mod.param['choices_lambda']
+		vw_options['corrupt_type_supervised'] = mod.param['corrupt_type_supervised']
+		vw_options['corrupt_prob_supervised'] = mod.param['corrupt_prob_supervised']
+		vw_options['lambda_scheme'] = mod.param['lambda_scheme']
+		if mod.param['no_supervised'] is True:
+			vw_options['no_supervised'] = ' '
+		if mod.param['no_bandit'] is True:
+			vw_options['no_bandit'] = ' '
+		vw_options['learning_rate'] = mod.param['learning_rate']
+		vw_options['warm_start_type'] = mod.param['warm_start_type']
+
+		vw_options['cbify'] = mod.result['num_classes']
+		vw_options['warm_start'] = mod.result['warm_start']
+		vw_options['overwrite_label'] = mod.result['majority_class']
+
+		#if mod.cover_on:
+		#	alg_option += ' --cover 5 --psi 0.01 --nounif '
+			#mod.cb_type = 'dr'
+	return vw_options
 
 def execute_vw(mod):
-
-	alg_option = ' '
-	if mod.adf_on:
-		alg_option += ' --cb_explore_adf '
-	else:
-		alg_option += ' --cb_explore ' + str(mod.num_classes) + ' '
-
-	if mod.cover_on:
-		alg_option += ' --cover 5 --psi 0.01 --nounif '
-		#mod.cb_type = 'dr'
-	if mod.epsilon_on:
-		alg_option += ' --epsilon ' + str(mod.epsilon) + ' '
-	if mod.no_bandit:
-		alg_option += ' --no_bandit '
-	if mod.no_supervised:
-		alg_option += ' --no_supervised '
-	#if mod.no_exploration:
-	#	alg_option += ' --epsilon 0.0 '
-	#if mod.cb_type == 'mtr':
-	#	mod.adf_on = True;
-
-	if mod.choices_lambda == 0:
-		cmd_vw = mod.vw_path + ' --oaa ' + str(mod.num_classes) + ' --passes 5 ' \
-		 + ' --progress ' + str(mod.progress) + ' -d ' \
-		+ mod.ds_path + mod.dataset \
-		+ ' --cache_file ' + mod.results_path + mod.dataset + '.cache'
-	else:
-		cmd_vw = mod.vw_path + ' --cbify ' + str(mod.num_classes) + ' --cb_type ' + str(mod.cb_type) + ' --warm_start ' + str(mod.warm_start) + ' --bandit ' + str(mod.bandit) + ' --choices_lambda ' + str(mod.choices_lambda) + alg_option + ' --progress ' + str(mod.progress) \
-		 + ' -d ' + mod.ds_path + mod.dataset \
-		 + ' --corrupt_type_supervised ' + str(mod.corrupt_type_supervised) \
-		 + ' --corrupt_prob_supervised ' + str(mod.corrupt_prob_supervised) \
-		 + ' --corrupt_type_bandit ' + str(mod.corrupt_type_bandit) \
-		 + ' --corrupt_prob_bandit ' + str(mod.corrupt_prob_bandit) \
-		 + ' --validation_method ' + str(mod.validation_method) \
-		 + ' --weighting_scheme ' + str(mod.weighting_scheme) \
-		 + ' --lambda_scheme ' + str(mod.lambda_scheme) \
-		 + ' --learning_rate ' + str(mod.learning_rate) \
-		 + ' --overwrite_label ' + str(mod.majority_class) \
-		 + ' --warm_start_type ' + str(mod.warm_start_type)
-
-	cmd = cmd_vw
+	vw_options = gen_vw_options(mod)
+	vw_options_list = gen_vw_options_list(vw_options)
+	cmd = disperse([mod.vw_path]+vw_options_list, ' ')
 	print cmd
 
 	f = open(mod.vw_output_filename, 'w')
@@ -140,23 +140,6 @@ def execute_vw(mod):
 	process.wait()
 	f.close()
 
-'''
-def plot_errors(mod):
-	#avg_loss, last_loss, wt =
-	if mod.plot_flat:
-		# for supervised only, we simply plot a horizontal line using the last point
-		len_avg_loss = len(avg_loss)
-		avg_loss = avg_loss[len_avg_loss-1]
-		avg_loss = [avg_loss for i in range(len_avg_loss)]
-
-	#line = plt.plot(wt, avg_loss, mod.plot_color, label=(mod.plot_label))
-	avg_error_value = avg_error(mod)
-	actual_var_value = actual_var(mod)
-	ideal_var_value = ideal_var(mod)
-
-	return avg_error_value, actual_var_value, ideal_var_value
-'''
-
 def disperse(l, ch):
 	s = ''
 	for item in l:
@@ -164,132 +147,169 @@ def disperse(l, ch):
 		s += ch
 	return s
 
+def param_to_str(param):
+	param_list = [str(k)+'='+str(v) for k,v in param.iteritems() ]
+	return disperse(param_list, '_')
+
+def param_to_result(param, result):
+	for k, v in param.iteritems():
+		if k in result:
+			result[k] = v
 
 def gen_comparison_graph(mod):
+	mod.result = mod.result_template.copy()
 
-	mod.num_lines = get_num_lines(mod.ds_path+mod.dataset)
-	mod.majority_class = get_majority_class(mod.ds_path+mod.dataset)
-	mod.progress = int(math.ceil(float(mod.num_lines) / float(mod.num_checkpoints)))
-	mod.warm_start = mod.warm_start_multiplier * mod.progress
-	mod.bandit = mod.num_lines - mod.warm_start
-	mod.num_classes = get_num_classes(mod.dataset)
+	if 'majority_approx' in mod.param or 'optimal_approx' in mod.param:
+		mod.compute_optimal = True
+	else:
+		mod.compute_optimal = False
 
-	mod.problem_params = [mod.dataset, mod.num_classes, mod.num_lines, \
-	mod.corrupt_type_supervised, mod.corrupt_prob_supervised, \
-	mod.corrupt_type_bandit, mod.corrupt_prob_bandit, \
-	mod.warm_start]
+	param_to_result(mod.param, mod.result)
+	mod.data_full_path = mod.ds_path + mod.param['data']
 
-	mod.alg_params = [ mod.cb_type, \
-	mod.validation_method, mod.weighting_scheme, \
-	mod.lambda_scheme, mod.choices_lambda, \
-	mod.no_supervised, mod.no_bandit, mod.warm_start_type, mod.learning_rate]
+	mod.result['total_size'] = get_num_lines(mod.data_full_path)
+	mod.result['num_classes'] = get_num_classes(mod.data_full_path)
+	mod.result['majority_size'], mod.result['majority_class'] = get_majority_class(mod.data_full_path)
+	mod.result['progress'] = int(math.ceil(float(mod.result['total_size']) / float(mod.num_checkpoints)))
+	mod.vw_output_filename = mod.results_path + param_to_str(mod.param) + '.txt'
 
-	mod.vw_output_filename = mod.results_path + disperse(mod.problem_params+mod.alg_params, '_') + '.txt'
+	if mod.compute_optimal is False:
+		mod.result['warm_start'] = mod.param['warm_start_multiplier'] * mod.result['progress']
+		mod.bandit = mod.result['total_size'] - mod.result['warm_start']
 
 	#plot_errors(mod)
 	execute_vw(mod)
-	collect_stats(mod)
+	vw_run_results = collect_stats(mod)
+	for vw_result in vw_run_results:
+		result_combined = merge_two_dicts(mod.result, vw_result)
+		result_formatted = format_result(mod.result_template, result_combined)
+		record_result(mod, result_formatted)
 
 	print('')
 
+def format_result(result_template, result):
+	result_formatted = result_template.copy()
+	for k, v in result.iteritems():
+		result_formatted[k] = v
+	return result_formatted
+
+def record_result(mod, result):
+	result_row = []
+	for k in mod.result_header_list:
+		result_row.append(result[k])
+
+	summary_file = open(mod.summary_file_name, 'a')
+	summary_file.write( disperse(result_row, '\t') + '\n')
+	summary_file.close()
+
 def ds_files(ds_path):
 	prevdir = os.getcwd()
 	os.chdir(ds_path)
 	dss = sorted(glob.glob('*.vw.gz'))
+	#dss = [ds_path+ds for ds in dss]
 	os.chdir(prevdir)
 	return dss
 
-
-def get_num_classes(ds):
-	did, n_actions = os.path.basename(ds).split('.')[0].split('_')[1:]
-	did, n_actions = int(did), int(n_actions)
-	return n_actions
-
-def flatten(l):
-	out = []
-	for item in l:
-		if isinstance(item, (list, tuple)):
-		  out.extend(flatten(item))
-		else:
-		  out.append(item)
-	return tuple(out)
-
-def flatten_all(l):
-	out = []
-	for item in l:
-		flattened = flatten(item)
-		if len(flattened) != 11:
-			print flattened
-		out.append(flattened)
-	return out
-
-
-def ds_per_task(mod):
-	# put dataset name to the last coordinate so that the task workloads tend to be
-	# allocated equally
-
-	# put dataset name to the first coordinate so that the result production order is
-	# in accordance with dataset order
-
-	config_corrupt_sup_raw = list(product(mod.choices_corrupt_type_supervised, mod.choices_corrupt_prob_supervised))
-	config_corrupt_sup = filter(lambda (type, prob): type == 1 or abs(prob) > 1e-4, config_corrupt_sup_raw)
-
-	config_problem = list(product(mod.dss, config_corrupt_sup, mod.choices_cb_types, mod.warm_start_multipliers))
-
+def merge_two_dicts(x, y):
+	#print 'x = ', x
+	#print 'y = ', y
+	z = x.copy()   # start with x's keys and values
+	z.update(y)    # modifies z with y's keys and values & returns None
+	return z
+
+def param_cartesian(param_set_1, param_set_2):
+	prod = []
+	for param_1 in param_set_1:
+		for param_2 in param_set_2:
+			prod.append(merge_two_dicts(param_1, param_2))
+	return prod
+
+def param_cartesian_multi(param_sets):
+	#print param_sets
+	prod = [{}]
+	for param_set in param_sets:
+		prod = param_cartesian(prod, param_set)
+	return prod
+
+def dictify(param_name, param_choices):
+	result = []
+	for param in param_choices:
+		dic = {}
+		dic[param_name] = param
+		result.append(dic)
+	return result
+
+def params_per_task(mod):
+	# Problem parameters
+	params_corrupt_type_sup = dictify('corrupt_type_supervised', mod.choices_corrupt_type_supervised)
+	params_corrupt_prob_sup = dictify('corrupt_prob_supervised', mod.choices_corrupt_prob_supervised)
+	params_warm_start_multiplier = dictify('warm_start_multiplier', mod.warm_start_multipliers)
+	params_learning_rate = dictify('learning_rate', mod.learning_rates)
+
+	# Algorithm parameters
+	params_cb_type = dictify('cb_type', mod.choices_cb_type)
+
+	# Common parameters
+	params_common = param_cartesian_multi([params_corrupt_type_sup, params_corrupt_prob_sup, params_warm_start_multiplier, params_learning_rate, params_cb_type])
+	params_common = filter(lambda param: param['corrupt_type_supervised'] == 1 or abs(param['corrupt_prob_supervised']) > 1e-4, params_common)
+
+	# Baseline parameters construction
 	if mod.baselines_on:
-		#config_baselines_raw = list(product([1], [True, False], [True, False], [1], [3]))
-		#config_baselines_solution = filter(lambda (x1, x2, x3, x4): x2 == True or x3 == True, config_baselines_raw)
-		config_baselines_solution = [(1, True, True, 1, 3), (1, True, False, 1, 3), (1, False, True, 1, 3)]
-		config_baselines = list(product(*[config_problem, config_baselines_solution, mod.learning_rates]))
-		config_baselines = flatten_all(config_baselines)
+		params_baseline_basic = [
+		[{'choices_lambda': 1, 'warm_start_type': 1, 'lambda_scheme': 3}], [{'no_supervised': True}, {'no_supervised': False}], [{'no_bandit': True}, {'no_bandit': False}]
+		]
+		params_baseline = param_cartesian_multi([params_common] + params_baseline_basic)
+		params_baseline = filter(lambda param: param['no_supervised'] == True or param['no_bandit'] == True, params_baseline)
 	else:
-		config_baselines = []
+		params_baseline = []
+
 
+	# Algorithm parameters construction
 	if mod.algs_on:
-		config_algs_solution_1 = list(product(mod.choices_choices_lambda, [False], [False], [1], [3]))
-		config_algs_solution_2 = [(1, False, False, 2, 1)]
-		config_algs_solution = config_algs_solution_1 + config_algs_solution_2
-		config_algs = list(product(*[config_problem, config_algs_solution, mod.learning_rates]))
-		config_algs = flatten_all(config_algs)
+		params_choices_lambd = dictify('choices_lambda', mod.choices_choices_lambda)
+		params_algs_1 = param_cartesian(params_choices_lambd, [{'no_supervised': False, 'no_bandit': False, 'warm_start_type': 1, 'lambda_scheme': 3}] )
+		params_algs_2 = [{'no_supervised': False, 'no_bandit': False, 'warm_start_type': 2, 'lambda_scheme': 1, 'choices_lambda':1}]
+		params_algs = param_cartesian( params_common, params_algs_1 + params_algs_2 )
 	else:
-		config_algs = []
+		params_algs = []
 
+	# Optimal baselines parameter construction
 	if mod.optimal_on:
-		config_optimal_problem = product(mod.dss, [(1, 0)], ['mtr'], [1])
-		config_optimal_solution = [(0, False, False, 1, 1)]
-		config_optimal = list(product(*[config_optimal_problem, config_optimal_solution, [0.5]]))
-		config_optimal = flatten_all(config_optimal)
+		params_optimal = [{ 'optimal_approx': True }, { 'majority_approx': True }]
 	else:
-		config_optimal = []
+		params_optimal = []
 
-	#print len(config_problem)
-	#print len(config_baselines)
-	#print len(config_algs)
-	#print len(config_optimal)
-	#raw_input(' ')
+	# Common factor in all 3 groups: dataset
+	params_dataset = dictify('data', mod.dss)
+	params_all = param_cartesian( params_dataset, params_baseline + params_algs + params_optimal )
+	params_all = sorted(params_all)
+	print len(params_all)
+	for row in params_all:
+		print row
+	return get_params_task(params_all)
 
-	config_all = config_baselines + config_algs + config_optimal
-	config_all = sorted(config_all)
-	#config_all = sorted(config_all, key=lambda a: str(a))
 
-	config_task = []
-	print len(config_all)
-	for i in range(len(config_all)):
+def get_params_task(params_all):
+	params_task = []
+	for i in range(len(params_all)):
 		if (i % mod.num_tasks == mod.task_id):
-			config_task.append(config_all[i])
-			#print config_all[i]
-
-	return config_task
+			params_task.append(params_all[i])
+	return params_task
 
 def get_num_lines(dataset_name):
-	ps = subprocess.Popen(('zcat', dataset_name), stdout=subprocess.PIPE)
-	output = subprocess.check_output(('wc', '-l'), stdin=ps.stdout)
-	ps.wait()
-	return int(output)
+	num_lines = subprocess.check_output(('zcat ' + dataset_name + ' | wc -l'), shell=True)
+	return int(num_lines)
+
+def get_num_classes(ds):
+	# could be a bug for including the prefix..
+	did, n_actions = os.path.basename(ds).split('.')[0].split('_')[1:]
+	did, n_actions = int(did), int(n_actions)
+	return n_actions
 
 def get_majority_class(dataset_name):
-	maj_class = subprocess.check_output(('zcat '+ dataset_name +' | cut -d \' \' -f 1 | sort | uniq -c | sort -r | head -1 | xargs | cut -d \' \' -f 2  '), shell=True)
-	return int(maj_class)
+	maj_class_str = subprocess.check_output(('zcat '+ dataset_name +' | cut -d \' \' -f 1 | sort | uniq -c | sort -r | head -1 | xargs '), shell=True)
+	maj_size, maj_class = maj_class_str.split()
+	return int(maj_size), int(maj_class)
 
 def avg_error(mod):
 	return vw_output_extract(mod, 'average loss')
@@ -320,44 +340,56 @@ def vw_output_extract(mod, pattern):
 	vw_output.close()
 	return avge
 
-
-def main_loop(mod):
-	mod.summary_file_name = mod.results_path+str(mod.task_id)+'of'+str(mod.num_tasks)+'.sum'
+def write_summary_header(mod):
 	summary_file = open(mod.summary_file_name, 'w')
-
-	list_header = ['dataset', 'num_classes', 'total_size', \
-	'corrupt_type_supervised', 'corrupt_prob_supervised', \
-	'corrupt_type_bandit', 'corrupt_prob_bandit', \
-	'warm_start_size', 'bandit_size', 'bandit_supervised_size_ratio', \
-	'cb_type', 'validation_method', 'weighting_scheme', \
-	'lambda_scheme', 'choices_lambda', \
-	'no_supervised', 'no_bandit', \
-	'warm_start_type', 'learning_rate', \
-	'avg_error', 'actual_variance', \
-	'ideal_variance']
-
-	summary_header = disperse(list_header, '\t')
-
+	summary_header = disperse(mod.result_header_list, '\t')
 	summary_file.write(summary_header+'\n')
 	summary_file.close()
 
-	for (mod.dataset, mod.corrupt_type_supervised, mod.corrupt_prob_supervised, \
-	mod.cb_type, mod.warm_start_multiplier, \
-	mod.choices_lambda, \
-	mod.no_supervised, mod.no_bandit, mod.warm_start_type, mod.lambda_scheme, mod.learning_rate) in mod.config_task:
+def main_loop(mod):
+	mod.summary_file_name = mod.results_path+str(mod.task_id)+'of'+str(mod.num_tasks)+'.sum'
+	mod.result_template_list = [
+	'data', 'ds',
+	'num_classes', 0,
+	'total_size' , 0,
+	'majority_size', 0,
+	'corrupt_type_supervised', 0,
+	'corrupt_prob_supervised', 0.0,
+	'corrupt_type_bandit', 0,
+	'corrupt_prob_bandit', 0.0,
+	'warm_start', 0,
+	'bandit_size', 0,
+	'bandit_supervised_size_ratio', 0,
+	'cb_type', 'mtr',
+	'validation_method', 0,
+	'weighting_scheme', 0,
+	'lambda_scheme', 0,
+	'choices_lambda', 0,
+	'no_supervised', False,
+	'no_bandit', False,
+	'warm_start_type', 0,
+	'learning_rate', 0.0,
+	'optimal_approx', False,
+	'majority_approx', False,
+	'avg_error', 0.0,
+	'actual_variance', 0.0,
+	'ideal_variance', 0.0 ]
+
+ 	num_cols = len(mod.result_template_list)/2
+	mod.result_header_list = [ mod.result_template_list[2*i] for i in range(num_cols) ]
+	mod.result_template = dict([ (mod.result_template_list[2*i], mod.result_template_list[2*i+1]) for i in range(num_cols) ])
+
+	write_summary_header(mod)
+	for mod.param in mod.config_task:
 		gen_comparison_graph(mod)
 
-
 if __name__ == '__main__':
 	parser = argparse.ArgumentParser(description='vw job')
 	parser.add_argument('task_id', type=int, help='task ID, between 0 and num_tasks - 1')
 	parser.add_argument('num_tasks', type=int)
 	parser.add_argument('--results_dir', default='../../../figs/')
 	parser.add_argument('--ds_dir', default='../../../vwshuffled/')
-	parser.add_argument('--num_learning_rates', type=int)
-	parser.add_argument('--warm_start_fraction', type=float)
-	parser.add_argument('--corrupt_prob_supervised', type=float)
-	parser.add_argument('--corrupt_prob_bandit',type=float)
+	parser.add_argument('--num_learning_rates', type=int, default=1)
 
 
 	args = parser.parse_args()
@@ -371,9 +403,9 @@ def main_loop(mod):
 			time.sleep(1)
 
 	mod = model()
-	mod.baselines_on = True
+	mod.baselines_on = False
 	mod.algs_on = True
-	mod.optimal_on = True
+	mod.optimal_on = False
 
 	mod.num_tasks = args.num_tasks
 	mod.task_id = args.task_id
@@ -382,47 +414,22 @@ def main_loop(mod):
 	mod.ds_path = args.ds_dir
 	mod.results_path = args.results_dir
 
-	#DIR_PATTERN = '../results/cbresults_{}/'
-
 	mod.num_checkpoints = 200
-	#mod.warm_start = 50
-	#mod.bandit = 4096
-	#mod.num_classes = 10
-	#mod.cb_type = 'mtr'  #'ips'
-    #mod.choices_lambda = 10
-	#mod.progress = 25
+
 	mod.adf_on = True
 
 	# use fractions instead of absolute numbers
-	mod.warm_start_multipliers = [pow(2,i) for i in range(4)]
-	#mod.warm_start_multipliers = [2*pow(4, i) for i in range(3)]]
-
-	#mod.choices_warm_start = [0.01 * pow(2, i) for i in range(5)]
-	#mod.choices_bandit = [0.01 * pow(2, i) for i in range(5)]
-
-	#mod.choices_warm_start = [pow(2,i) for i in range(11)] #put it here in order to plot 2d mesh
-	# we are implicitly iterating over the bandit sample sizes
-	#choices_fprob1 = [0.1, 0.2, 0.3]
-	#choices_fprob2 = [0.1, 0.2, 0.3]
-	#choices_cb_types = ['mtr', 'ips']
-	#mod.choices_cb_types = ['mtr', 'ips']
-	mod.choices_cb_types = ['mtr']
-	#mod.choices_no_supervised = [False, True]
-	#mod.choices_no_bandit = [False, True]
-	#mod.choices_choices_lambda = [2, 4, 8]
-	mod.choices_choices_lambda = [2,4,8]
-	#mod.choices_choices_lambda = [i for i in range(1,3)]
-	#mod.choices_choices_lambda = [i for i in range(1,2)]
-	#mod.choices_choices_lambda = [1, 3, 5, 7]
-	#[i for i in range(10,11)]
-	#mod.corrupt_type_supervised = 2
-	#mod.corrupt_prob_supervised = 0.3
-	mod.choices_corrupt_type_supervised = [1,2,3]
-	#mod.choices_corrupt_type_supervised = [2]
-	#mod.corrupt_prob_supervised = 0.3
-	mod.choices_corrupt_prob_supervised = [0.0,0.5,1.0]
-	#mod.choices_corrupt_prob_supervised = [0.3]
+	#mod.warm_start_multipliers = [pow(2,i) for i in range(4)]
+	mod.warm_start_multipliers = [pow(2,i) for i in range(1)]
+
+	mod.choices_cb_type = ['mtr']
+	#mod.choices_choices_lambda = [2,4,8]
+	mod.choices_choices_lambda = [2]
 
+	#mod.choices_corrupt_type_supervised = [1,2,3]
+	#mod.choices_corrupt_prob_supervised = [0.0,0.5,1.0]
+	mod.choices_corrupt_type_supervised = [1,2]
+	mod.choices_corrupt_prob_supervised = [0.0,0.5]
 
 	mod.corrupt_type_bandit = 1
 	mod.corrupt_prob_bandit = 0.0
@@ -432,14 +439,9 @@ def main_loop(mod):
 
 	mod.choices_lambda = 2
 	mod.weighting_scheme = 1
-	#mod.lambda_scheme = 3
-	#mod.warm_start_type = 1
 
-	mod.no_exploration = False
-	mod.cover_on = False
 	mod.epsilon_on = True
-	#mod.plot_color = 'r'
-	#mod.plot_flat = False
+
 	mod.critical_size_ratios = [184 * pow(2, -i) for i in range(7) ]
 
 	if args.num_learning_rates == 1:
@@ -449,12 +451,6 @@ def main_loop(mod):
 	else:
 		mod.learning_rates = [0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1.0, 3.0, 10.0]
 
-	#pow(2,i) for i in range(-5, 7)
-	#for correctness test
-	#mod.choices_warm_start = [20]
-	#choices_fprob1 = [0.1]
-	#choices_fprob2 = [0.1]
-
 	print 'reading dataset files..'
 	mod.dss = ds_files(mod.ds_path)
 	print len(mod.dss)
@@ -464,7 +460,7 @@ def main_loop(mod):
 	print 'generating tasks..'
 	# here, we are generating the task specific parameter settings
 	# by first generate all parameter setting and pick every num_tasks of them
-	mod.config_task = ds_per_task(mod)
+	mod.config_task = params_per_task(mod)
 	print 'task ' + str(mod.task_id) + ' of ' + str(mod.num_tasks) + ':'
 	print len(mod.config_task)
 

From a32c2e7c0925df76ede6addded3630804d64b4f4 Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Thu, 10 May 2018 21:55:26 -0400
Subject: [PATCH 072/127] .

---
 scripts/alg_comparison.py  | 251 +++++++++++++++++++++----------------
 scripts/plot_warm_start.py | 105 +++++++++++-----
 2 files changed, 215 insertions(+), 141 deletions(-)

diff --git a/scripts/alg_comparison.py b/scripts/alg_comparison.py
index 3d810373075..ed6504fbbea 100644
--- a/scripts/alg_comparison.py
+++ b/scripts/alg_comparison.py
@@ -16,14 +16,6 @@ class model:
 	def __init__(self):
 		pass
 
-# this part is changable
-#alg1 = 'epsilon'
-#alg2 = 'cover'
-#alg1 = 'choices_lambda_1'
-#alg2 = 'choices_lambda_5'
-#alg1 = 'instance weighting'
-#alg2 = 'dataset weighting'
-
 def sum_files(result_path):
 	prevdir = os.getcwd()
 	os.chdir(result_path)
@@ -81,13 +73,9 @@ def plot_comparison(errors_1, errors_2, sizes):
 	plt.scatter(results_insigni_1, results_insigni_2, s=2, c='k')
 
 
-def normalized_score(lst, l):
-	#print lst
-	#l = min(lst)
-	u = max(lst)
-	return [ (item - l) / (u - l + 1e-4) for item in lst ]
-
 def alg_str(alg_name):
+	if (alg_name[0] == 0):
+		return 'majority_class'
 	if (alg_name[0] == 2):
 		return 'supervised_underutil_as_bandit'
 	if (alg_name[2] == True and alg_name[3] == True):
@@ -108,24 +96,27 @@ def problem_str(name_problem):
 
 def plot_cdf(alg_name, errs):
 
-	plt.step(np.sort(errs), np.linspace(0, 1, len(errs), endpoint=False), label=alg_str(alg_name))
-
 	print alg_name
 	print errs
 	print len(errs)
+
+	plt.step(np.sort(errs), np.linspace(0, 1, len(errs), endpoint=False), label=alg_str(alg_name))
+
+
 	#raw_input("Press Enter to continue...")
 
 def plot_all_cdfs(alg_results, mod):
 	#plot all cdfs:
+	print 'printing cdfs..'
 	i = 0
 	for alg_name, errs in alg_results.iteritems():
-		if (alg_name[2] == False and alg_name[3] == False and alg_name[1] != 8):
-			pass
-		else:
-			plot_cdf(alg_name, errs)
+		plot_cdf(alg_name, errs)
 
 	plt.legend()
-	plt.xlim(-0.2,1)
+	if mod.normalize_type == 1:
+		plt.xlim(-0.2,1)
+	elif mod.normalize_type == 2:
+		plt.xlim(-1,1)
 	plt.ylim(0,1)
 	plt.savefig(mod.problemdir+'/cdf.png')
 	plt.clf()
@@ -154,8 +145,19 @@ def init_results(result_table):
 	for idx, row in result_table.iterrows():
 		alg_name = (row['warm_start_type'], row['choices_lambda'], row['no_supervised'], row['no_bandit'])
 		alg_results[alg_name] = []
+
+	alg_results[(0, 0, False, False)] = []
 	return alg_results
 
+def normalize_score(unnormalized_result, mod):
+	if mod.normalize_type == 1:
+		l = get_best_error(mod.best_error_table, mod.name_dataset)
+		u = max(unnormalized_result.values())
+		return { k : ((v - l) / (u - l + 1e-4)) for k, v in unnormalized_result.iteritems() }
+	elif mod.normalize_type == 2:
+		l = unnormalized_result[(1, 1, True, False)]
+		return { k : ((v - l) / (l + 1e-4)) for k, v in unnormalized_result.iteritems() }
+
 def get_best_error(best_error_table, name_dataset):
 	name = name_dataset[0]
 	best_error_oneline = best_error_table[best_error_table['dataset'] == name]
@@ -168,6 +170,36 @@ def get_best_error(best_error_table, name_dataset):
 	#raw_input("...")
 	return best_error
 
+def get_maj_error(maj_error_table, name_dataset):
+	name = name_dataset[0]
+	maj_error_oneline = maj_error_table[maj_error_table['data'] == name]
+	maj_error = maj_error_oneline.loc[maj_error_oneline.index[0], 'avg_error']
+	return maj_error
+
+#normalized_results[alg_name].append(normalized_errs[i])
+#errs = []
+#for idx, row in result_table.iterrows():
+#	errs.append(row['avg_error'])
+
+def get_unnormalized_results(result_table):
+	new_unnormalized_results = {}
+	new_size = 0
+
+	i = 0
+	for idx, row in result_table.iterrows():
+		if i == 0:
+			new_size = row['bandit_size']
+
+		if row['bandit_size'] == new_size:
+			alg_name = (row['warm_start_type'], row['choices_lambda'], row['no_supervised'], row['no_bandit'])
+			new_unnormalized_results[alg_name] = row['avg_error']
+		i += 1
+
+	return new_size, new_unnormalized_results
+
+def update_result_dict(results_dict, new_result):
+	for k, v in new_result.iteritems():
+		results_dict[k].append(v)
 
 
 def plot_all(mod, all_results):
@@ -187,7 +219,20 @@ def plot_all(mod, all_results):
 		#(False, False, 8), and compute a normalized score
 
 		for name_dataset, group_dataset in grouped_by_dataset:
-			result_table = group_dataset #group_dataset.groupby(['choices_lambda','no_supervised',														'no_bandit'])
+			result_table = group_dataset
+
+			grouped_by_algorithm = group_dataset.groupby(['warm_start_type', 'choices_lambda', 'no_supervised', 'no_bandit'])
+
+			mod.name_dataset = name_dataset
+			#The 'learning_rate' would be the only free degree here now. Taking the
+			#min aggregation will give us the 7 algorithms we are evaluating.
+			result_table = grouped_by_algorithm.min()
+			result_table = result_table.reset_index()
+
+			#print result_table
+
+
+			#group_dataset.groupby(['choices_lambda','no_supervised',														'no_bandit'])
 
 			#first time - generate names of algorithms considered
 			if normalized_results is None:
@@ -199,26 +244,16 @@ def plot_all(mod, all_results):
 				#dummy = input('')
 
 			#in general (including the first time) - record the error rates of all algorithms
-
-			err_best = get_best_error(mod.best_error_table, name_dataset)
-			errs = []
-			for idx, row in result_table.iterrows():
-				errs.append(row['avg_error'])
-			normalized_errs = normalized_score(errs, err_best)
-
 			#print result_table
 
-			i = 0
-			for idx, row in result_table.iterrows():
-				if i == 0:
-					temp_size = row['bandit_size']
-					sizes.append(row['bandit_size'])
+			new_size, new_unnormalized_result = get_unnormalized_results(result_table)
+			new_unnormalized_result[(0, 0, False, False)] = get_maj_error(mod.maj_error_table, mod.name_dataset)
+
+			new_normalized_result = normalize_score(new_unnormalized_result, mod)
 
-				if row['bandit_size'] == temp_size:
-					alg_name = (row['warm_start_type'], row['choices_lambda'], row['no_supervised'], row['no_bandit'])
-					unnormalized_results[alg_name].append(errs[i])
-					normalized_results[alg_name].append(normalized_errs[i])
-				i += 1
+			update_result_dict(unnormalized_results, new_unnormalized_result)
+			update_result_dict(normalized_results, new_normalized_result)
+			sizes.append(new_size)
 
 			#print 'sizes:'
 			#print len(sizes)
@@ -229,47 +264,28 @@ def plot_all(mod, all_results):
 		if not os.path.exists(mod.problemdir):
 			os.makedirs(mod.problemdir)
 
-		#plot_all_pair_comp(unnormalized_results, sizes, mod)
-		plot_all_cdfs(normalized_results, mod)
-
-
-
-
-
-
-if __name__ == '__main__':
-	parser = argparse.ArgumentParser(description='result summary')
-	parser.add_argument('--results_dir', default='../../../figs/')
-	parser.add_argument('--filter', default='1')
-	parser.add_argument('--plot_subdir', default='expt1/')
-	args = parser.parse_args()
-
-	mod = model()
+		print 'best_errors', mod.best_error_table
+		print 'unnormalized_results', unnormalized_results
+		print 'normalized_results', normalized_results
 
-	mod.results_dir = args.results_dir
-	mod.filter = args.filter
-	mod.plot_subdir = args.plot_subdir
+		if mod.pair_comp_on is True:
+			plot_all_pair_comp(unnormalized_results, sizes, mod)
+		if mod.cdf_on is True:
+			plot_all_cdfs(normalized_results, mod)
 
-	mod.fulldir = mod.results_dir + mod.plot_subdir
-	if not os.path.exists(mod.fulldir):
-		os.makedirs(mod.fulldir)
+def save_to_hdf(mod):
+	print 'saving to hdf..'
+	store = pd.HDFStore('store.h5')
+	store['result_table'] = mod.all_results
+	store.close()
 
-	#results_dir = '../../../lambdas/'
-	#results_dir = '../../../warm_start_frac=0.1/'
-	#results_dir = '../../../cover_vs_epsilon/'
-	#results_dir = '../../../corrupt_supervised_type1_0.3/'
-	#results_dir = '../../../expt_0403/corrupt_supervised_type2_0.3/'
-	#results_dir = '../../../expt_0403/supervised_validation/'
-	#results_dir = '../../../weighting_schemes/'
-	#results_dir = '../../../central_lambda/'
-	#results_dir = '../../../central_lambda_naive/'
-	#results_dir = '../../../central_lambda_zeroone/'
-	#results_dir = '../../../type2_0.3/'
-	#results_dir = '../../../type1_0.3/'
-	#results_dir = '../../../type2_1/'
-	#results_dir = '../../../type2_0.65/'
-	#results_dir = '../../../type2_0.3/'
+def load_from_hdf(mod):
+	print 'reading from hdf..'
+	store = pd.HDFStore('store.h5')
+	mod.all_results = store['result_table']
+	store.close()
 
+def load_from_sum(mod):
 	print 'reading directory..'
 	dss = sum_files(mod.results_dir)
 	print len(dss)
@@ -289,15 +305,65 @@ def plot_all(mod, all_results):
 			all_results = all_results.append(result)
 
 	print all_results
+	mod.all_results = all_results
+
+
+# This is a hack - need to do this systematically in the future
+def load_maj_error(mod):
+	maj_error_table = parse_sum_file(mod.maj_error_dir)
+	return maj_error_table
+
+
+if __name__ == '__main__':
+	parser = argparse.ArgumentParser(description='result summary')
+	parser.add_argument('--results_dir', default='../../../figs/')
+	parser.add_argument('--filter', default='1')
+	parser.add_argument('--plot_subdir', default='expt1/')
+	parser.add_argument('--from_hdf', action='store_true')
+	parser.add_argument('--normalize_type', type=int)
+	args = parser.parse_args()
+
+	mod = model()
+
+	mod.results_dir = args.results_dir
+	mod.filter = args.filter
+	mod.plot_subdir = args.plot_subdir
+	mod.normalize_type = args.normalize_type
+	mod.pair_comp_on = False
+	mod.cdf_on = True
+	mod.maj_error_dir = '../../../figs_maj_errors/0of1.sum'
+
+	mod.fulldir = mod.results_dir + mod.plot_subdir
+	if not os.path.exists(mod.fulldir):
+		os.makedirs(mod.fulldir)
+
+	#print args.from_hdf
+	#raw_input(' ')
+	if args.from_hdf is True:
+		load_from_hdf(mod)
+	else:
+		load_from_sum(mod)
+		save_to_hdf(mod)
 
 	#first group by corruption mode, then corruption prob
 	#then group by warm start - bandit ratio
 	#these constitutes all the problem settings we are looking at (corresponding
 	#to each cdf graph)
+	all_results = mod.all_results
 
 	mod.best_error_table = all_results[all_results['choices_lambda'] == 0]
 	all_results = all_results[all_results['choices_lambda'] != 0]
 
+	#ignore the no update row:
+	all_results = all_results[(all_results['no_supervised'] == False) | (all_results['no_bandit'] == False)]
+
+	#filter choices_lambdas = 2,4,8?
+	#if (alg_name[2] == False and alg_name[3] == False and alg_name[1] != 8):
+	#	pass
+	#else:
+
+	mod.maj_error_table = load_maj_error(mod)
+
 	if mod.filter == '1':
 		pass
 	elif mod.filter == '2':
@@ -314,36 +380,3 @@ def plot_all(mod, all_results):
 	#if i >= 331 and i <= 340:
 	#	print 'result:', result
 	#	print 'all_results:', all_results
-
-
-	#result = parse_sum_file(results_dir + '400of600.sum')
-	#print result
-
-	#choices_choices_lambda = sorted(all_results['choices_lambda'].unique())
-	#grouped = all_results.groupby('choices_lambda')
-
-	#for cl, results_lambda in grouped:
-	#results_lambda = all_results[all_results['choices_lambda'] == cl]
-	# compare combined w/ supervised
-	'''
-	alg1 = all_results.columns[1]
-	alg2 = all_results.columns[2]
-	bandit_only = all_results.columns[3]
-	supervised_only = all_results.columns[4]
-	sizes = all_results.columns[5]
-
-	results_alg1 = all_results[alg1].tolist()
-	results_alg2 = all_results[alg2].tolist()
-	results_bandit = all_results[bandit_only].tolist()
-	results_supervised = all_results[supervised_only].tolist()
-	dataset_sizes = all_results[sizes].tolist()
-	'''
-	#print alg1
-	#print results_alg1
-
-	# compare combined w/ bandit
-	#plot_comparison(results_alg1, results_bandit, dataset_sizes, alg1 + ' vs ' + 'bandit only', results_dir + alg1 + ' vs ' + 'bandit only' + '.png')
-	#plot_comparison(results_alg1, results_supervised, dataset_sizes, alg1 + ' vs ' + 'supervised only', results_dir + alg1 + ' vs ' + 'supervised only' + '.png')
-	#plot_comparison(results_alg2, results_bandit, dataset_sizes, alg2 + ' vs ' + 'bandit only', results_dir + alg2 + ' vs ' + 'bandit only' + '.png')
-	#plot_comparison(results_alg2, results_supervised, dataset_sizes, alg2 + ' vs ' + 'supervised only', results_dir + alg2 + ' vs ' + 'supervised only' + '.png')
-	#plot_comparison(results_alg1, results_alg2, dataset_sizes, alg1 + ' vs ' + alg2, results_dir+alg1 + ' vs ' + alg2 + '.png')
diff --git a/scripts/plot_warm_start.py b/scripts/plot_warm_start.py
index 5eeecb6acbb..0f2a19edd83 100644
--- a/scripts/plot_warm_start.py
+++ b/scripts/plot_warm_start.py
@@ -89,14 +89,15 @@ def gen_vw_options(mod):
 	if 'optimal_approx' in mod.param:
 		vw_options['passes'] = 5
 		vw_options['oaa'] = mod.result['num_classes']
-		vw_options['cache_file'] = mod.param['data'] + '.cache'
+		vw_options['cache_file'] = mod.data_full_path + '.cache'
 	elif 'majority_approx' in mod.param:
-		pass
+		# basically we would like to skip vw running as fast as possible
+		vw_options['cbify'] = mod.result['num_classes']
+		vw_options['warm_start'] = 0
+		vw_options['bandit'] = 0
 	else:
 		vw_options['corrupt_type_bandit'] = mod.corrupt_type_bandit
 		vw_options['corrupt_prob_bandit'] = mod.corrupt_prob_bandit
-		vw_options['validation_method'] = mod.validation_method
-		vw_options['weighting_scheme'] = mod.weighting_scheme
 		vw_options['bandit'] = mod.bandit
 
 		if mod.adf_on is True:
@@ -122,6 +123,8 @@ def gen_vw_options(mod):
 		vw_options['cbify'] = mod.result['num_classes']
 		vw_options['warm_start'] = mod.result['warm_start']
 		vw_options['overwrite_label'] = mod.result['majority_class']
+		vw_options['validation_method'] = mod.result['validation_method']
+		vw_options['weighting_scheme'] = mod.result['weighting_scheme']
 
 		#if mod.cover_on:
 		#	alg_option += ' --cover 5 --psi 0.01 --nounif '
@@ -148,7 +151,7 @@ def disperse(l, ch):
 	return s
 
 def param_to_str(param):
-	param_list = [str(k)+'='+str(v) for k,v in param.iteritems() ]
+	param_list = [ str(k)+'='+str(v) for k,v in param.iteritems() ]
 	return disperse(param_list, '_')
 
 def param_to_result(param, result):
@@ -165,17 +168,24 @@ def gen_comparison_graph(mod):
 		mod.compute_optimal = False
 
 	param_to_result(mod.param, mod.result)
-	mod.data_full_path = mod.ds_path + mod.param['data']
+	mod.data_full_path = mod.ds_path + str(mod.param['fold']) + '/' + mod.param['data']
 
+	mod.result['fold'] = mod.param['fold']
 	mod.result['total_size'] = get_num_lines(mod.data_full_path)
 	mod.result['num_classes'] = get_num_classes(mod.data_full_path)
 	mod.result['majority_size'], mod.result['majority_class'] = get_majority_class(mod.data_full_path)
 	mod.result['progress'] = int(math.ceil(float(mod.result['total_size']) / float(mod.num_checkpoints)))
-	mod.vw_output_filename = mod.results_path + param_to_str(mod.param) + '.txt'
+	mod.vw_output_dir = mod.results_path + remove_suffix(mod.param['data']) + '/'
+	mod.vw_output_filename = mod.vw_output_dir + param_to_str(mod.param) + '.txt'
 
 	if mod.compute_optimal is False:
 		mod.result['warm_start'] = mod.param['warm_start_multiplier'] * mod.result['progress']
 		mod.bandit = mod.result['total_size'] - mod.result['warm_start']
+		mod.result['validation_method'] = mod.validation_method
+		mod.result['weighting_scheme'] = mod.weighting_scheme
+		mod.result['corrupt_type_bandit'] = mod.corrupt_type_bandit
+		mod.result['corrupt_prob_bandit'] = mod.corrupt_prob_bandit
+		mod.result['fold'] = mod.param['fold']
 
 	#plot_errors(mod)
 	execute_vw(mod)
@@ -246,11 +256,14 @@ def params_per_task(mod):
 	params_warm_start_multiplier = dictify('warm_start_multiplier', mod.warm_start_multipliers)
 	params_learning_rate = dictify('learning_rate', mod.learning_rates)
 
+	# could potentially induce a bug if the maj and best does not have this parameter
+	params_fold = dictify('fold', mod.folds)
+
 	# Algorithm parameters
 	params_cb_type = dictify('cb_type', mod.choices_cb_type)
 
 	# Common parameters
-	params_common = param_cartesian_multi([params_corrupt_type_sup, params_corrupt_prob_sup, params_warm_start_multiplier, params_learning_rate, params_cb_type])
+	params_common = param_cartesian_multi([params_corrupt_type_sup, params_corrupt_prob_sup, params_warm_start_multiplier, params_learning_rate, params_cb_type, params_fold])
 	params_common = filter(lambda param: param['corrupt_type_supervised'] == 1 or abs(param['corrupt_prob_supervised']) > 1e-4, params_common)
 
 	# Baseline parameters construction
@@ -275,13 +288,19 @@ def params_per_task(mod):
 
 	# Optimal baselines parameter construction
 	if mod.optimal_on:
-		params_optimal = [{ 'optimal_approx': True }, { 'majority_approx': True }]
+		params_optimal = [{ 'optimal_approx': True }]
 	else:
 		params_optimal = []
 
+	if mod.majority_on:
+		params_majority = [{ 'majority_approx': True }]
+	else:
+		params_majority = []
+
+
 	# Common factor in all 3 groups: dataset
 	params_dataset = dictify('data', mod.dss)
-	params_all = param_cartesian( params_dataset, params_baseline + params_algs + params_optimal )
+	params_all = param_cartesian( params_dataset, params_baseline + params_algs + params_optimal + params_majority )
 	params_all = sorted(params_all)
 	print len(params_all)
 	for row in params_all:
@@ -307,7 +326,7 @@ def get_num_classes(ds):
 	return n_actions
 
 def get_majority_class(dataset_name):
-	maj_class_str = subprocess.check_output(('zcat '+ dataset_name +' | cut -d \' \' -f 1 | sort | uniq -c | sort -r | head -1 | xargs '), shell=True)
+	maj_class_str = subprocess.check_output(('zcat '+ dataset_name +' | cut -d \' \' -f 1 | sort | uniq -c | sort -r -n | head -1 | xargs '), shell=True)
 	maj_size, maj_class = maj_class_str.split()
 	return int(maj_size), int(maj_class)
 
@@ -349,6 +368,7 @@ def write_summary_header(mod):
 def main_loop(mod):
 	mod.summary_file_name = mod.results_path+str(mod.task_id)+'of'+str(mod.num_tasks)+'.sum'
 	mod.result_template_list = [
+	'fold', 0,
 	'data', 'ds',
 	'num_classes', 0,
 	'total_size' , 0,
@@ -383,6 +403,15 @@ def main_loop(mod):
 	for mod.param in mod.config_task:
 		gen_comparison_graph(mod)
 
+def create_dir(dir):
+	if not os.path.exists(dir):
+		os.makedirs(dir)
+		import stat
+		os.chmod(dir, os.stat(dir).st_mode | stat.S_IWOTH)
+
+def remove_suffix(filename):
+	return os.path.basename(filename).split('.')[0]
+
 if __name__ == '__main__':
 	parser = argparse.ArgumentParser(description='vw job')
 	parser.add_argument('task_id', type=int, help='task ID, between 0 and num_tasks - 1')
@@ -394,18 +423,27 @@ def main_loop(mod):
 
 	args = parser.parse_args()
 	if args.task_id == 0:
-		if not os.path.exists(args.results_dir):
-			os.makedirs(args.results_dir)
-			import stat
-			os.chmod(args.results_dir, os.stat(args.results_dir).st_mode | stat.S_IWOTH)
+		# To avoid race condition of writing to the same file at the same time
+		create_dir(args.results_dir)
+
+		# This is specifically designed for teamscratch, as accessing a folder
+		# with a huge number of files can be super slow. Hence, we create a subfolder
+		# for each dataset to alleviate this.
+		dss = ds_files(args.ds_dir + '1/')
+		for ds in dss:
+			ds_no_suffix = remove_suffix(ds)
+			create_dir(args.results_dir + ds_no_suffix + '/')
 	else:
+		# may still have the potential of race condition on those subfolders (if
+		# we have a lot of datasets to run and the datasets are small)
 		while not os.path.exists(args.results_dir):
 			time.sleep(1)
 
 	mod = model()
-	mod.baselines_on = False
+	mod.baselines_on = True
 	mod.algs_on = True
 	mod.optimal_on = False
+	mod.majority_on = False
 
 	mod.num_tasks = args.num_tasks
 	mod.task_id = args.task_id
@@ -416,43 +454,46 @@ def main_loop(mod):
 
 	mod.num_checkpoints = 200
 
-	mod.adf_on = True
-
 	# use fractions instead of absolute numbers
 	#mod.warm_start_multipliers = [pow(2,i) for i in range(4)]
-	mod.warm_start_multipliers = [pow(2,i) for i in range(1)]
+	mod.warm_start_multipliers = [pow(2,i) for i in range(4)]
 
 	mod.choices_cb_type = ['mtr']
 	#mod.choices_choices_lambda = [2,4,8]
-	mod.choices_choices_lambda = [2]
+	mod.choices_choices_lambda = [2, 4, 8]
 
 	#mod.choices_corrupt_type_supervised = [1,2,3]
 	#mod.choices_corrupt_prob_supervised = [0.0,0.5,1.0]
-	mod.choices_corrupt_type_supervised = [1,2]
-	mod.choices_corrupt_prob_supervised = [0.0,0.5]
+	mod.choices_corrupt_type_supervised = [1,2,3]
+	mod.choices_corrupt_prob_supervised = [0.0,0.5,1.0]
+
+	if args.num_learning_rates == 1:
+		mod.learning_rates = [0.5]
+	elif args.num_learning_rates == 3:
+		mod.learning_rates = [0.1, 0.3, 1.0]
+	else:
+		mod.learning_rates = [0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1.0, 3.0, 10.0]
+
+	mod.adf_on = True
 
 	mod.corrupt_type_bandit = 1
 	mod.corrupt_prob_bandit = 0.0
 
 	mod.validation_method = 1
-	mod.epsilon = 0.05
-
-	mod.choices_lambda = 2
 	mod.weighting_scheme = 1
 
+	mod.epsilon = 0.05
 	mod.epsilon_on = True
 
 	mod.critical_size_ratios = [184 * pow(2, -i) for i in range(7) ]
 
-	if args.num_learning_rates == 1:
-		mod.learning_rates = [0.5]
-	elif args.num_learning_rates == 3:
-		mod.learning_rates = [0.1, 0.3, 1.0]
-	else:
-		mod.learning_rates = [0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1.0, 3.0, 10.0]
+	#mod.folds = range(1,11)
+	mod.folds = range(1,6)
 
 	print 'reading dataset files..'
-	mod.dss = ds_files(mod.ds_path)
+	#TODO: this line specifically for multiple folds
+	#Need a systematic way to detect subfolder names 
+	mod.dss = ds_files(mod.ds_path + '1/')
 	print len(mod.dss)
 	#mod.dss = ["ds_223_63.vw.gz"]
 	#mod.dss = mod.dss[:5]

From 4ab1d8cd487124ace0b089480afd68c22dc64a35 Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Fri, 11 May 2018 11:44:01 -0400
Subject: [PATCH 073/127] .

---
 scripts/alg_comparison.py  |  2 ++
 scripts/plot_warm_start.py | 21 ++++++++++++++++++---
 2 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/scripts/alg_comparison.py b/scripts/alg_comparison.py
index ed6504fbbea..da5c4de9a19 100644
--- a/scripts/alg_comparison.py
+++ b/scripts/alg_comparison.py
@@ -87,6 +87,8 @@ def alg_str(alg_name):
 	if (alg_name[2] == False and alg_name[3] == False):
 		return 'combined_choices_lambda='+str(alg_name[1])
 
+	return 'unknown algorithm'
+
 def problem_str(name_problem):
 	return 'supervised_corrupt_type='+str(name_problem[0]) \
 			+'_supervised_corrupt_prob='+str(name_problem[1]) \
diff --git a/scripts/plot_warm_start.py b/scripts/plot_warm_start.py
index 0f2a19edd83..e7607ed34d1 100644
--- a/scripts/plot_warm_start.py
+++ b/scripts/plot_warm_start.py
@@ -152,7 +152,7 @@ def disperse(l, ch):
 
 def param_to_str(param):
 	param_list = [ str(k)+'='+str(v) for k,v in param.iteritems() ]
-	return disperse(param_list, '_')
+	return disperse(param_list, ',')
 
 def param_to_result(param, result):
 	for k, v in param.iteritems():
@@ -297,6 +297,11 @@ def params_per_task(mod):
 	else:
 		params_majority = []
 
+	#print len(params_baseline)
+	#print len(params_algs)
+	#print len(params_common)
+	#raw_input('..')
+
 
 	# Common factor in all 3 groups: dataset
 	params_dataset = dictify('data', mod.dss)
@@ -419,9 +424,12 @@ def remove_suffix(filename):
 	parser.add_argument('--results_dir', default='../../../figs/')
 	parser.add_argument('--ds_dir', default='../../../vwshuffled/')
 	parser.add_argument('--num_learning_rates', type=int, default=1)
+	parser.add_argument('--num_datasets', type=int, default=-1)
 
 
 	args = parser.parse_args()
+	flag_dir = args.results_dir + 'flag/'
+
 	if args.task_id == 0:
 		# To avoid race condition of writing to the same file at the same time
 		create_dir(args.results_dir)
@@ -433,10 +441,12 @@ def remove_suffix(filename):
 		for ds in dss:
 			ds_no_suffix = remove_suffix(ds)
 			create_dir(args.results_dir + ds_no_suffix + '/')
+
+		create_dir(flag_dir)
 	else:
 		# may still have the potential of race condition on those subfolders (if
 		# we have a lot of datasets to run and the datasets are small)
-		while not os.path.exists(args.results_dir):
+		while not os.path.exists(flag_dir):
 			time.sleep(1)
 
 	mod = model()
@@ -492,9 +502,14 @@ def remove_suffix(filename):
 
 	print 'reading dataset files..'
 	#TODO: this line specifically for multiple folds
-	#Need a systematic way to detect subfolder names 
+	#Need a systematic way to detect subfolder names
 	mod.dss = ds_files(mod.ds_path + '1/')
 	print len(mod.dss)
+
+	if args.num_datasets == -1 or args.num_datasets > len(mod.dss):
+		pass
+	else:
+		mod.dss = mod.dss[:args.num_datasets]
 	#mod.dss = ["ds_223_63.vw.gz"]
 	#mod.dss = mod.dss[:5]
 

From 5d7dc3197506e47fb70c307be6d4902729167e48 Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Mon, 21 May 2018 11:53:09 -0400
Subject: [PATCH 074/127] updated scripts

---
 scripts/alg_comparison.py  | 166 ++++++++++++++++++++++++++++++++-----
 scripts/plot_warm_start.py |  28 ++++---
 2 files changed, 161 insertions(+), 33 deletions(-)

diff --git a/scripts/alg_comparison.py b/scripts/alg_comparison.py
index da5c4de9a19..5e0dc1136f8 100644
--- a/scripts/alg_comparison.py
+++ b/scripts/alg_comparison.py
@@ -10,6 +10,9 @@
 from math import sqrt
 import argparse
 import numpy as np
+import seaborn as sns
+from matplotlib.colors import ListedColormap
+from matplotlib.font_manager import FontProperties
 
 
 class model:
@@ -72,28 +75,97 @@ def plot_comparison(errors_1, errors_2, sizes):
 
 	plt.scatter(results_insigni_1, results_insigni_2, s=2, c='k')
 
+	len_errors = len(errors_1)
+	wins_1 = [z_scores[i] < 0 and significance[i] for i in range(len_errors) ]
+	wins_2 = [z_scores[i] > 0 and significance[i] for i in range(len_errors) ]
+	num_wins_1 = wins_1.count(True)
+	num_wins_2 = wins_2.count(True)
 
-def alg_str(alg_name):
+	return num_wins_1, num_wins_2
+
+def alg_info(alg_name, result_lst):
 	if (alg_name[0] == 0):
-		return 'majority_class'
+		return result_lst[0]
 	if (alg_name[0] == 2):
-		return 'supervised_underutil_as_bandit'
+		return result_lst[1]
 	if (alg_name[2] == True and alg_name[3] == True):
-		return 'no_update'
+		return result_lst[2]
 	if (alg_name[2] == True and alg_name[3] == False):
-		return 'bandit_only'
+		return result_lst[3]
 	if (alg_name[2] == False and alg_name[3] == True):
-		return 'supervised_only'
-	if (alg_name[2] == False and alg_name[3] == False):
-		return 'combined_choices_lambda='+str(alg_name[1])
+		return result_lst[4]
+	if (alg_name[2] == False and alg_name[3] == False and alg_name[1] == 2):
+		return result_lst[5]
+	if (alg_name[2] == False and alg_name[3] == False and alg_name[1] == 4):
+		return result_lst[6]
+	if (alg_name[2] == False and alg_name[3] == False and alg_name[1] == 8):
+		return result_lst[7]
+	if (alg_name[2] == False and alg_name[3] == False and alg_name[1] == 16):
+		return result_lst[8]
+
+	return result_lst[9]
+
+def alg_str(alg_name):
+	return alg_info(alg_name, ['Most-Freq', 'Sim-Bandit', 'Class-1', 'Bandit-Only', 'Sup-Only', 'MinimaxBandits', 'AwesomeBandits with $|\Lambda|$=4', 'AwesomeBandits with $|\Lambda|$=8', 'AwesomeBandits with $|\Lambda|$=16', 'unknown'])
+
+def alg_str_compatible(alg_name):
+	return alg_info(alg_name, ['Most-Freq', 'Sim-Bandit', 'Class-1', 'Bandit-Only', 'Sup-Only', 'Choices_lambda=2', 'Choices_lambda=4', 'Choices_lambda=8', 'Choices_lambda=16', 'unknown'])
+
+def alg_color_style(alg_name):
+	palette = sns.color_palette('colorblind')
+	colors = palette.as_hex()
+	#colors = [colors[5], colors[4], 'black', colors[2], colors[1], colors[3], 'black', colors[0], 'black', 'black']
+	colors = [colors[5], colors[3], 'black', colors[0], colors[1], colors[2], colors[2], colors[2], colors[2], 'black' ]
+
+	styles = ['solid', 'solid', 'solid', 'solid', 'dashed', 'dotted', 'dashdot', 'solid', 'dashed', 'solid']
+
+	return alg_info(alg_name, zip(colors, styles))
+	#['black', 'magenta', 'lime', 'green', 'blue', 'darkorange','darksalmon', 'red', 'cyan']
 
-	return 'unknown algorithm'
+def alg_index(alg_name):
+	return alg_info(alg_name, [7.0, 6.0, 8.0, 5.0, 4.0, 2.0, 1.0, 1.2, 1.5, 9.0])
+
+
+def order_legends(indices):
+	ax = plt.gca()
+	handles, labels = ax.get_legend_handles_labels()
+	# sort both labels and handles by labels
+	labels, handles, indices = zip(*sorted(zip(labels, handles, indices), key=lambda t: t[2]))
+	ax.legend(handles, labels)
+
+def save_legend(mod, indices):
+	ax = plt.gca()
+	handles, labels = ax.get_legend_handles_labels()
+	labels, handles, indices = zip(*sorted(zip(labels, handles, indices), key=lambda t: t[2]))
+	#figlegend = pylab.figure(figsize=(26,1))
+	#figlegend.legend(handles, labels, 'center', fontsize=26, ncol=8)
+	figlegend = pylab.figure(figsize=(17,1.5))
+	figlegend.legend(handles, labels, 'center', fontsize=26, ncol=3)
+	figlegend.tight_layout(pad=0)
+	figlegend.savefig(mod.problemdir+'legend.pdf')
 
 def problem_str(name_problem):
 	return 'supervised_corrupt_type='+str(name_problem[0]) \
 			+'_supervised_corrupt_prob='+str(name_problem[1]) \
 			+'_bandit_supervised_size_ratio='+str(name_problem[2])
 
+def noise_type_str(noise_type):
+	if noise_type == 1:
+		return 'UAR'
+	elif noise_type == 2:
+		return 'CYC'
+	elif noise_type == 3:
+		return 'MAJ'
+
+def problem_text(name_problem):
+	s=''
+	s += 'Ratio = ' + str(name_problem[2]) + ', '
+	if abs(name_problem[1]) < 1e-6:
+		s += 'noiseless'
+	else:
+		s += noise_type_str(name_problem[0]) + ', '
+		s += 'p = ' + str(name_problem[1])
+	return s
 
 
 def plot_cdf(alg_name, errs):
@@ -102,25 +174,52 @@ def plot_cdf(alg_name, errs):
 	print errs
 	print len(errs)
 
-	plt.step(np.sort(errs), np.linspace(0, 1, len(errs), endpoint=False), label=alg_str(alg_name))
+	col, sty = alg_color_style(alg_name)
 
+	plt.step(np.sort(errs), np.linspace(0, 1, len(errs), endpoint=False), label=alg_str(alg_name), color=col, linestyle=sty, linewidth=2.0)
+
+	#
 
 	#raw_input("Press Enter to continue...")
 
 def plot_all_cdfs(alg_results, mod):
 	#plot all cdfs:
 	print 'printing cdfs..'
-	i = 0
+
+	indices = []
+
+	pylab.figure(figsize=(8,6))
+
 	for alg_name, errs in alg_results.iteritems():
+		indices.append(alg_index(alg_name))
 		plot_cdf(alg_name, errs)
 
-	plt.legend()
 	if mod.normalize_type == 1:
-		plt.xlim(-0.2,1)
+		plt.xlim(0,1)
 	elif mod.normalize_type == 2:
 		plt.xlim(-1,1)
+	elif mod.normalize_type == 3:
+		plt.xlim(0, 1)
+
 	plt.ylim(0,1)
-	plt.savefig(mod.problemdir+'/cdf.png')
+	#params={'legend.fontsize':26,
+	#'axes.labelsize': 24, 'axes.titlesize':26, 'xtick.labelsize':20,
+	#'ytick.labelsize':20 }
+	#plt.rcParams.update(params)
+	#plt.xlabel('Normalized error',fontsize=34)
+	#plt.ylabel('Cumulative frequency', fontsize=34)
+	#plt.title(problem_text(mod.name_problem), fontsize=36)
+	plt.xticks(fontsize=30)
+	plt.yticks(fontsize=30)
+	plt.tight_layout(pad=0)
+
+	ax = plt.gca()
+	order_legends(indices)
+	ax.legend_.set_zorder(-1)
+	plt.savefig(mod.problemdir+'cdf.pdf')
+	ax.legend_.remove()
+	plt.savefig(mod.problemdir+'cdf_nolegend.pdf')
+	save_legend(mod, indices)
 	plt.clf()
 
 
@@ -136,10 +235,11 @@ def plot_all_pair_comp(alg_results, sizes, mod):
 				print len(errs_1), len(errs_2), len(sizes)
 				#raw_input('Press any key to continue..')
 
-				plot_comparison(errs_1, errs_2, sizes)
+				num_wins_1, num_wins_2 = plot_comparison(errs_1, errs_2, sizes)
 
-				plt.title(alg_str(alg_names[i])+' vs '+alg_str(alg_names[j]))
-				plt.savefig(mod.problemdir+'/'+alg_str(alg_names[i])+'_vs_'+alg_str(alg_names[j])+'.png')
+				plt.title( 'total number of comparisons = ' + str(len(errs_1)) + '\n'+
+				alg_str(alg_names[i]) + ' wins ' + str(num_wins_1) + ' times, \n' + alg_str(alg_names[j]) + ' wins ' + str(num_wins_2) + ' times')
+				plt.savefig(mod.problemdir+alg_str_compatible(alg_names[i])+'_vs_'+alg_str_compatible(alg_names[j])+'.pdf')
 				plt.clf()
 
 def init_results(result_table):
@@ -159,6 +259,8 @@ def normalize_score(unnormalized_result, mod):
 	elif mod.normalize_type == 2:
 		l = unnormalized_result[(1, 1, True, False)]
 		return { k : ((v - l) / (l + 1e-4)) for k, v in unnormalized_result.iteritems() }
+	elif mod.normalize_type == 3:
+		return unnormalized_result
 
 def get_best_error(best_error_table, name_dataset):
 	name = name_dataset[0]
@@ -205,6 +307,9 @@ def update_result_dict(results_dict, new_result):
 
 
 def plot_all(mod, all_results):
+
+	#all_results = all_results[all_results['corrupt_prob_supervised']!=0.0]
+
 	grouped_by_problem = all_results.groupby(['corrupt_type_supervised',
 						'corrupt_prob_supervised','bandit_supervised_size_ratio'])
 
@@ -226,8 +331,12 @@ def plot_all(mod, all_results):
 			grouped_by_algorithm = group_dataset.groupby(['warm_start_type', 'choices_lambda', 'no_supervised', 'no_bandit'])
 
 			mod.name_dataset = name_dataset
+
 			#The 'learning_rate' would be the only free degree here now. Taking the
 			#min aggregation will give us the 7 algorithms we are evaluating.
+
+			#In the future this should be changed now if we run multiple folds: we
+			#should average among folds before choosing the min
 			result_table = grouped_by_algorithm.min()
 			result_table = result_table.reset_index()
 
@@ -322,7 +431,7 @@ def load_maj_error(mod):
 	parser.add_argument('--filter', default='1')
 	parser.add_argument('--plot_subdir', default='expt1/')
 	parser.add_argument('--from_hdf', action='store_true')
-	parser.add_argument('--normalize_type', type=int)
+	parser.add_argument('--normalize_type', type=int, default=1)
 	args = parser.parse_args()
 
 	mod = model()
@@ -330,10 +439,10 @@ def load_maj_error(mod):
 	mod.results_dir = args.results_dir
 	mod.filter = args.filter
 	mod.plot_subdir = args.plot_subdir
-	mod.normalize_type = args.normalize_type
+	mod.normalize_type = args.normalize_type #1: normalized score; 2: bandit only centered score; 3: raw score
 	mod.pair_comp_on = False
 	mod.cdf_on = True
-	mod.maj_error_dir = '../../../figs_maj_errors/0of1.sum'
+	mod.maj_error_dir = '../../../figs_all/expt_0509/figs_maj_errors/0of1.sum'
 
 	mod.fulldir = mod.results_dir + mod.plot_subdir
 	if not os.path.exists(mod.fulldir):
@@ -354,10 +463,18 @@ def load_maj_error(mod):
 	all_results = mod.all_results
 
 	mod.best_error_table = all_results[all_results['choices_lambda'] == 0]
+
+	#print mod.best_error_table[mod.best_error_table['dataset'] == 'ds_160_5.vw.gz']
+	#raw_input(' ')
+
 	all_results = all_results[all_results['choices_lambda'] != 0]
 
 	#ignore the no update row:
 	all_results = all_results[(all_results['no_supervised'] == False) | (all_results['no_bandit'] == False)]
+	#ignore the choice_lambda = 4 row
+	all_results = all_results[(all_results['choices_lambda'] != 4)]
+
+
 
 	#filter choices_lambdas = 2,4,8?
 	#if (alg_name[2] == False and alg_name[3] == False and alg_name[1] != 8):
@@ -376,6 +493,15 @@ def load_maj_error(mod):
 		all_results = all_results[all_results['num_classes'] >= 3]
 	elif mod.filter == '4':
 		all_results = all_results[all_results['num_classes'] <= 2]
+	elif mod.filter == '5':
+		all_results = all_results[all_results['total_size'] >= 10000]
+		all_results = all_results[all_results['num_classes'] >= 3]
+	elif mod.filter == '6':
+		all_results = all_results[all_results['warm_start_size'] >= 100]
+		all_results = all_results[all_results['learning_rate'] == 0.3]
+	elif mod.filter == '7':
+		all_results = all_results[all_results['warm_start_size'] >= 100]
+		all_results = all_results[all_results['num_classes'] >= 3]
 
 	plot_all(mod, all_results)
 
diff --git a/scripts/plot_warm_start.py b/scripts/plot_warm_start.py
index e7607ed34d1..0a33376114e 100644
--- a/scripts/plot_warm_start.py
+++ b/scripts/plot_warm_start.py
@@ -264,7 +264,7 @@ def params_per_task(mod):
 
 	# Common parameters
 	params_common = param_cartesian_multi([params_corrupt_type_sup, params_corrupt_prob_sup, params_warm_start_multiplier, params_learning_rate, params_cb_type, params_fold])
-	params_common = filter(lambda param: param['corrupt_type_supervised'] == 1 or abs(param['corrupt_prob_supervised']) > 1e-4, params_common)
+	params_common = filter(lambda param: param['corrupt_type_supervised'] == 3 or abs(param['corrupt_prob_supervised']) > 1e-4, params_common)
 
 	# Baseline parameters construction
 	if mod.baselines_on:
@@ -358,7 +358,7 @@ def vw_output_extract(mod, pattern):
 	if not errs:
 		avge = 0
 	else:
-		print errs
+		#print errs
 		avge = float(errs[0][0])
 
 	vw_output.close()
@@ -425,7 +425,7 @@ def remove_suffix(filename):
 	parser.add_argument('--ds_dir', default='../../../vwshuffled/')
 	parser.add_argument('--num_learning_rates', type=int, default=1)
 	parser.add_argument('--num_datasets', type=int, default=-1)
-
+	parser.add_argument('--num_folds', type=int, default=1)
 
 	args = parser.parse_args()
 	flag_dir = args.results_dir + 'flag/'
@@ -438,6 +438,7 @@ def remove_suffix(filename):
 		# with a huge number of files can be super slow. Hence, we create a subfolder
 		# for each dataset to alleviate this.
 		dss = ds_files(args.ds_dir + '1/')
+		dss = dss[:args.num_datasets]
 		for ds in dss:
 			ds_no_suffix = remove_suffix(ds)
 			create_dir(args.results_dir + ds_no_suffix + '/')
@@ -466,23 +467,24 @@ def remove_suffix(filename):
 
 	# use fractions instead of absolute numbers
 	#mod.warm_start_multipliers = [pow(2,i) for i in range(4)]
-	mod.warm_start_multipliers = [pow(2,i) for i in range(4)]
+	mod.warm_start_multipliers = [pow(2,i) for i in range(1)]
 
 	mod.choices_cb_type = ['mtr']
 	#mod.choices_choices_lambda = [2,4,8]
-	mod.choices_choices_lambda = [2, 4, 8]
+	mod.choices_choices_lambda = [2,8,16]
 
 	#mod.choices_corrupt_type_supervised = [1,2,3]
 	#mod.choices_corrupt_prob_supervised = [0.0,0.5,1.0]
-	mod.choices_corrupt_type_supervised = [1,2,3]
-	mod.choices_corrupt_prob_supervised = [0.0,0.5,1.0]
+	mod.choices_corrupt_type_supervised = [3]
+	mod.choices_corrupt_prob_supervised = [0,0.25,0.5]
+
+	mod.learning_rates_template = [0.1, 0.03, 0.3, 0.01, 1.0, 0.003, 3.0, 0.001, 10.0]
 
-	if args.num_learning_rates == 1:
-		mod.learning_rates = [0.5]
-	elif args.num_learning_rates == 3:
-		mod.learning_rates = [0.1, 0.3, 1.0]
+	if args.num_learning_rates <= 0 or args.num_learning_rates >= 10:
+		mod.learning_rates = mod.learning_rates_template
 	else:
-		mod.learning_rates = [0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1.0, 3.0, 10.0]
+		mod.learning_rates = mod.learning_rates_template[:args.num_learning_rates]
+
 
 	mod.adf_on = True
 
@@ -498,7 +500,7 @@ def remove_suffix(filename):
 	mod.critical_size_ratios = [184 * pow(2, -i) for i in range(7) ]
 
 	#mod.folds = range(1,11)
-	mod.folds = range(1,6)
+	mod.folds = range(1, args.num_folds+1)
 
 	print 'reading dataset files..'
 	#TODO: this line specifically for multiple folds

From a4fb02fcd14928c82f067ca254f9ec046eb3abf4 Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Mon, 21 May 2018 17:52:04 -0400
Subject: [PATCH 075/127] cleaned up the run vw script; need more tests on more
 choices of param settings

---
 scripts/plot_warm_start.py | 360 +++++++++++++++++++------------------
 1 file changed, 187 insertions(+), 173 deletions(-)

diff --git a/scripts/plot_warm_start.py b/scripts/plot_warm_start.py
index 0a33376114e..c2f2faec726 100644
--- a/scripts/plot_warm_start.py
+++ b/scripts/plot_warm_start.py
@@ -14,8 +14,42 @@
 
 class model:
 	def __init__(self):
-		self.no_bandit = False
-		self.no_supervised = False
+		# Setting up argument-independent learning parameters in the constructor
+		self.baselines_on = True
+		self.algs_on = True
+		self.optimal_on = True
+		self.majority_on = True
+
+		self.num_checkpoints = 200
+
+		# use fractions instead of absolute numbers
+		#mod.warm_start_multipliers = [pow(2,i) for i in range(4)]
+		self.warm_start_multipliers = [pow(2,i) for i in range(1)]
+
+		self.choices_cb_type = ['mtr']
+		#mod.choices_choices_lambda = [2,4,8]
+		self.choices_choices_lambda = [2,8,16]
+
+		#mod.choices_corrupt_type_supervised = [1,2,3]
+		#mod.choices_corrupt_prob_supervised = [0.0,0.5,1.0]
+		self.choices_corrupt_type_supervised = [3]
+		self.choices_corrupt_prob_supervised = [0,0.25,0.5]
+
+		self.learning_rates_template = [0.1, 0.03, 0.3, 0.01, 1.0, 0.003, 3.0, 0.001, 10.0]
+
+		self.adf_on = True
+
+		self.corrupt_type_bandit = 1
+		self.corrupt_prob_bandit = 0.0
+
+		self.validation_method = 1
+		self.weighting_scheme = 1
+
+		#self.epsilon = 0.05
+		#self.epsilon_on = True
+
+		self.critical_size_ratios = [184 * pow(2, -i) for i in range(7) ]
+
 
 def collect_stats(mod):
 	avg_error_value = avg_error(mod)
@@ -31,14 +65,14 @@ def collect_stats(mod):
 	'ideal_variance': 0.0
 	}
 
-	if mod.compute_optimal is True:
+	if 'majority_approx' in mod.param or 'optimal_approx' in mod.param:
 		vw_result = vw_result_template.copy()
 		if 'optimal_approx' in mod.param:
 			# this condition is for computing the optimal error
 			vw_result['avg_error'] = avg_error_value
 		else:
 			# this condition is for computing the majority error
-			err =  1 - float(mod.result['majority_size']) / mod.result['total_size']
+			err =  1 - float(mod.param['majority_size']) / mod.param['total_size']
 			vw_result['avg_error'] = float('%0.5f' % err)
 		vw_run_results.append(vw_result)
 		return vw_run_results
@@ -61,8 +95,8 @@ def collect_stats(mod):
 			bandit_effective = int(float(weight_str))
 
 			for ratio in mod.critical_size_ratios:
-				if bandit_effective >= (1 - 1e-7) * mod.result['warm_start'] * ratio and \
-				bandit_effective <= (1 + 1e-7) * mod.result['warm_start'] * ratio:
+				if bandit_effective >= (1 - 1e-7) * mod.param['warm_start'] * ratio and \
+				bandit_effective <= (1 + 1e-7) * mod.param['warm_start'] * ratio:
 					vw_result = vw_result_template.copy()
 					vw_result['bandit_size'] = bandit_effective
 					vw_result['bandit_supervised_size_ratio'] = ratio
@@ -74,67 +108,55 @@ def collect_stats(mod):
 	return vw_run_results
 
 
-def gen_vw_options_list(vw_options):
+def gen_vw_options_list(mod):
+	mod.vw_options = format_setting(mod.vw_template, mod.param)
 	vw_options_list = []
-	for k, v in vw_options.iteritems():
+	for k, v in mod.vw_options.iteritems():
 		vw_options_list.append('--'+str(k))
 		vw_options_list.append(str(v))
 	return vw_options_list
 
 def gen_vw_options(mod):
-	vw_options = {}
-	vw_options['data'] = mod.data_full_path
-	vw_options['progress'] = mod.result['progress']
-
 	if 'optimal_approx' in mod.param:
-		vw_options['passes'] = 5
-		vw_options['oaa'] = mod.result['num_classes']
-		vw_options['cache_file'] = mod.data_full_path + '.cache'
+		# Fully supervised on full dataset
+		mod.vw_template = {'data':'', 'progress':2.0, 'passes':0, 'oaa':0, 'cache_file':''}
+		mod.param['passes'] = 5
+		mod.param['oaa'] = mod.param['num_classes']
+		mod.param['cache_file'] = mod.param['data'] + '.cache'
 	elif 'majority_approx' in mod.param:
-		# basically we would like to skip vw running as fast as possible
-		vw_options['cbify'] = mod.result['num_classes']
-		vw_options['warm_start'] = 0
-		vw_options['bandit'] = 0
+		# Compute majority error; basically we would like to skip vw running as fast as possible
+		mod.vw_template = {'data':'', 'progress':2.0, 'cbify':0, 'warm_start':0, 'bandit':0}
+		mod.param['cbify'] = mod.param['num_classes']
+		mod.param['warm_start'] = 0
+		mod.param['bandit'] = 0
 	else:
-		vw_options['corrupt_type_bandit'] = mod.corrupt_type_bandit
-		vw_options['corrupt_prob_bandit'] = mod.corrupt_prob_bandit
-		vw_options['bandit'] = mod.bandit
-
-		if mod.adf_on is True:
-			vw_options['cb_explore_adf'] = ' '
+		# General CB
+		mod.vw_template = {'data':'', 'corrupt_type_bandit':0, 'corrupt_prob_bandit':0.0, 'bandit':0, 'cb_type':'mtr',
+		'choices_lambda':0, 'corrupt_type_supervised':0, 'corrupt_prob_supervised':0.0, 'lambda_scheme':1, 'learning_rate':0.5, 'warm_start_type':1, 'cbify':0, 'warm_start':0, 'overwrite_label':1, 'validation_method':1, 'weighting_scheme':1}
+
+		mod.param['warm_start'] = mod.param['warm_start_multiplier'] * mod.param['progress']
+		mod.param['bandit'] = mod.param['total_size'] - mod.param['warm_start']
+		mod.param['cbify'] = mod.param['num_classes']
+		mod.param['overwrite_label'] = mod.param['majority_class']
+
+		if mod.param['adf_on'] is True:
+			mod.param['cb_explore_adf'] = ' '
+			mod.vw_template['cb_explore_adf'] = ' '
 		else:
-			vw_options['cb_explore'] = mod.num_classes
-
-		if mod.epsilon_on is True:
-			vw_options['epsilon'] = mod.epsilon
-
-		vw_options['cb_type'] = mod.param['cb_type']
-		vw_options['choices_lambda'] = mod.param['choices_lambda']
-		vw_options['corrupt_type_supervised'] = mod.param['corrupt_type_supervised']
-		vw_options['corrupt_prob_supervised'] = mod.param['corrupt_prob_supervised']
-		vw_options['lambda_scheme'] = mod.param['lambda_scheme']
-		if mod.param['no_supervised'] is True:
-			vw_options['no_supervised'] = ' '
-		if mod.param['no_bandit'] is True:
-			vw_options['no_bandit'] = ' '
-		vw_options['learning_rate'] = mod.param['learning_rate']
-		vw_options['warm_start_type'] = mod.param['warm_start_type']
-
-		vw_options['cbify'] = mod.result['num_classes']
-		vw_options['warm_start'] = mod.result['warm_start']
-		vw_options['overwrite_label'] = mod.result['majority_class']
-		vw_options['validation_method'] = mod.result['validation_method']
-		vw_options['weighting_scheme'] = mod.result['weighting_scheme']
-
-		#if mod.cover_on:
-		#	alg_option += ' --cover 5 --psi 0.01 --nounif '
-			#mod.cb_type = 'dr'
-	return vw_options
+			mod.param['cb_explore'] = mod.param['num_classes']
+			mod.vw_template['cb_explore'] = 0
+
+		if mod.param['no_warm_start_update'] is True:
+			mod.param['no_supervised'] = ' '
+			mod.vw_template['no_supervised'] = ' '
+		if mod.param['no_interaction_update'] is True:
+			mod.param['no_bandit'] = ' '
+			mod.vw_template['no_bandit'] = ' '
 
 def execute_vw(mod):
-	vw_options = gen_vw_options(mod)
-	vw_options_list = gen_vw_options_list(vw_options)
-	cmd = disperse([mod.vw_path]+vw_options_list, ' ')
+	gen_vw_options(mod)
+	vw_options_list = gen_vw_options_list(mod)
+	cmd = intersperse([mod.vw_path]+vw_options_list, ' ')
 	print cmd
 
 	f = open(mod.vw_output_filename, 'w')
@@ -143,7 +165,7 @@ def execute_vw(mod):
 	process.wait()
 	f.close()
 
-def disperse(l, ch):
+def intersperse(l, ch):
 	s = ''
 	for item in l:
 		s += str(item)
@@ -152,56 +174,68 @@ def disperse(l, ch):
 
 def param_to_str(param):
 	param_list = [ str(k)+'='+str(v) for k,v in param.iteritems() ]
-	return disperse(param_list, ',')
-
-def param_to_result(param, result):
-	for k, v in param.iteritems():
-		if k in result:
-			result[k] = v
+	return intersperse(param_list, ',')
+
+def replace_if_in(dic, k, k_new):
+	if k in dic:
+		dic[k_new] = dic[k]
+		del dic[k]
+
+def replace_keys(dic, simplified_keymap):
+	dic_new = dic.copy()
+	for k, k_new in simplified_keymap.iteritems():
+		replace_if_in(dic_new, k, k_new)
+	return dic_new
+
+def param_to_str_simplified(mod):
+	#print 'before replace'
+	#print param
+	vw_run_param_set = ['lambda_scheme','learning_rate','validation_method',
+	'fold','no_warm_start_update','no_interaction_update',
+	'corrupt_prob_bandit', 'corrupt_prob_supervised',
+	'corrupt_type_bandit', 'corrupt_type_supervised',
+	'warm_start_type','warm_start_multiplier','choices_lambda','weighting_scheme',
+	'cb_type','optimal_approx','majority_approx','dataset', 'adf_on']
+
+	mod.template_red = dict([(k,mod.result_template[k]) for k in vw_run_param_set])
+	mod.simplified_keymap_red = dict([(k,mod.simplified_keymap[k]) for k in vw_run_param_set])
+	# step 1: use the above as a template to filter out irrelevant parameters
+	# in the vw output file title
+	param_formatted = format_setting(mod.template_red, mod.param)
+	# step 2: replace the key names with the simplified names
+	param_simplified = replace_keys(param_formatted, mod.simplified_keymap_red)
+	#print 'after replace'
+	#print param
+	return param_to_str(param_simplified)
 
 def gen_comparison_graph(mod):
-	mod.result = mod.result_template.copy()
+	mod.param['data'] = mod.ds_path + str(mod.param['fold']) + '/' + mod.param['dataset']
 
-	if 'majority_approx' in mod.param or 'optimal_approx' in mod.param:
-		mod.compute_optimal = True
-	else:
-		mod.compute_optimal = False
-
-	param_to_result(mod.param, mod.result)
-	mod.data_full_path = mod.ds_path + str(mod.param['fold']) + '/' + mod.param['data']
-
-	mod.result['fold'] = mod.param['fold']
-	mod.result['total_size'] = get_num_lines(mod.data_full_path)
-	mod.result['num_classes'] = get_num_classes(mod.data_full_path)
-	mod.result['majority_size'], mod.result['majority_class'] = get_majority_class(mod.data_full_path)
-	mod.result['progress'] = int(math.ceil(float(mod.result['total_size']) / float(mod.num_checkpoints)))
+	mod.param['total_size'] = get_num_lines(mod.param['data'])
+	mod.param['num_classes'] = get_num_classes(mod.param['data'])
+	mod.param['majority_size'], mod.param['majority_class'] = get_majority_class(mod.param['data'])
+	mod.param['progress'] = int(math.ceil(float(mod.param['total_size']) / float(mod.num_checkpoints)))
 	mod.vw_output_dir = mod.results_path + remove_suffix(mod.param['data']) + '/'
-	mod.vw_output_filename = mod.vw_output_dir + param_to_str(mod.param) + '.txt'
-
-	if mod.compute_optimal is False:
-		mod.result['warm_start'] = mod.param['warm_start_multiplier'] * mod.result['progress']
-		mod.bandit = mod.result['total_size'] - mod.result['warm_start']
-		mod.result['validation_method'] = mod.validation_method
-		mod.result['weighting_scheme'] = mod.weighting_scheme
-		mod.result['corrupt_type_bandit'] = mod.corrupt_type_bandit
-		mod.result['corrupt_prob_bandit'] = mod.corrupt_prob_bandit
-		mod.result['fold'] = mod.param['fold']
+	mod.vw_output_filename = mod.vw_output_dir + param_to_str_simplified(mod) + '.txt'
 
 	#plot_errors(mod)
 	execute_vw(mod)
 	vw_run_results = collect_stats(mod)
 	for vw_result in vw_run_results:
-		result_combined = merge_two_dicts(mod.result, vw_result)
-		result_formatted = format_result(mod.result_template, result_combined)
+		result_combined = merge_two_dicts(mod.param, vw_result)
+		result_formatted = format_setting(mod.result_template, result_combined)
 		record_result(mod, result_formatted)
 
 	print('')
 
-def format_result(result_template, result):
-	result_formatted = result_template.copy()
-	for k, v in result.iteritems():
-		result_formatted[k] = v
-	return result_formatted
+# The following function is a "template filling" function
+# Given a template, we use the setting dict to fill it as much as possible
+def format_setting(template, setting):
+	formatted = template.copy()
+	for k, v in setting.iteritems():
+		if k in template.keys():
+			formatted[k] = v
+	return formatted
 
 def record_result(mod, result):
 	result_row = []
@@ -209,7 +243,7 @@ def record_result(mod, result):
 		result_row.append(result[k])
 
 	summary_file = open(mod.summary_file_name, 'a')
-	summary_file.write( disperse(result_row, '\t') + '\n')
+	summary_file.write( intersperse(result_row, '\t') + '\n')
 	summary_file.close()
 
 def ds_files(ds_path):
@@ -269,10 +303,10 @@ def params_per_task(mod):
 	# Baseline parameters construction
 	if mod.baselines_on:
 		params_baseline_basic = [
-		[{'choices_lambda': 1, 'warm_start_type': 1, 'lambda_scheme': 3}], [{'no_supervised': True}, {'no_supervised': False}], [{'no_bandit': True}, {'no_bandit': False}]
+		[{'choices_lambda': 1, 'warm_start_type': 1, 'lambda_scheme': 3}], [{'no_warm_start_update': True}, {'no_warm_start_update': False}], [{'no_interaction_update': True}, {'no_interaction_update': False}]
 		]
 		params_baseline = param_cartesian_multi([params_common] + params_baseline_basic)
-		params_baseline = filter(lambda param: param['no_supervised'] == True or param['no_bandit'] == True, params_baseline)
+		params_baseline = filter(lambda param: param['no_warm_start_update'] == True or param['no_interaction_update'] == True, params_baseline)
 	else:
 		params_baseline = []
 
@@ -280,34 +314,45 @@ def params_per_task(mod):
 	# Algorithm parameters construction
 	if mod.algs_on:
 		params_choices_lambd = dictify('choices_lambda', mod.choices_choices_lambda)
-		params_algs_1 = param_cartesian(params_choices_lambd, [{'no_supervised': False, 'no_bandit': False, 'warm_start_type': 1, 'lambda_scheme': 3}] )
-		params_algs_2 = [{'no_supervised': False, 'no_bandit': False, 'warm_start_type': 2, 'lambda_scheme': 1, 'choices_lambda':1}]
+		params_algs_1 = param_cartesian(params_choices_lambd, [{'no_warm_start_update': False, 'no_interaction_update': False, 'warm_start_type': 1, 'lambda_scheme': 3}] )
+		params_algs_2 = [{'no_warm_start_update': False, 'no_interaction_update': False, 'warm_start_type': 2, 'lambda_scheme': 1, 'choices_lambda':1}]
 		params_algs = param_cartesian( params_common, params_algs_1 + params_algs_2 )
 	else:
 		params_algs = []
 
+
+	params_constant = [{'validation_method':mod.validation_method,
+	'weighting_scheme':mod.weighting_scheme,
+	'corrupt_type_bandit':mod.corrupt_type_bandit,
+	'corrupt_prob_bandit':mod.corrupt_prob_bandit,
+	'adf_on':True}]
+
+	params_baseline_and_algs = param_cartesian_multi([params_constant, params_baseline + params_algs])
+
+
 	# Optimal baselines parameter construction
 	if mod.optimal_on:
-		params_optimal = [{ 'optimal_approx': True }]
+		params_optimal = [{ 'optimal_approx': True, 'fold': 1 }]
 	else:
 		params_optimal = []
 
 	if mod.majority_on:
-		params_majority = [{ 'majority_approx': True }]
+		params_majority = [{ 'majority_approx': True, 'fold': 1 }]
 	else:
 		params_majority = []
 
+
 	#print len(params_baseline)
 	#print len(params_algs)
 	#print len(params_common)
 	#raw_input('..')
 
-
 	# Common factor in all 3 groups: dataset
-	params_dataset = dictify('data', mod.dss)
-	params_all = param_cartesian( params_dataset, params_baseline + params_algs + params_optimal + params_majority )
+	params_dataset = dictify('dataset', mod.dss)
+	params_all = param_cartesian_multi( [params_dataset, params_baseline_and_algs + params_optimal + params_majority] )
+
 	params_all = sorted(params_all)
-	print len(params_all)
+	print 'The total number of VW commands to run is: ', len(params_all)
 	for row in params_all:
 		print row
 	return get_params_task(params_all)
@@ -366,43 +411,50 @@ def vw_output_extract(mod, pattern):
 
 def write_summary_header(mod):
 	summary_file = open(mod.summary_file_name, 'w')
-	summary_header = disperse(mod.result_header_list, '\t')
+	summary_header = intersperse(mod.result_header_list, '\t')
 	summary_file.write(summary_header+'\n')
 	summary_file.close()
 
 def main_loop(mod):
 	mod.summary_file_name = mod.results_path+str(mod.task_id)+'of'+str(mod.num_tasks)+'.sum'
+
+	# The reason for using a list is that, we would like to keep the order of the
+	#columns in this way. Maybe use ordered dictionary in the future?
 	mod.result_template_list = [
-	'fold', 0,
-	'data', 'ds',
-	'num_classes', 0,
-	'total_size' , 0,
-	'majority_size', 0,
-	'corrupt_type_supervised', 0,
-	'corrupt_prob_supervised', 0.0,
-	'corrupt_type_bandit', 0,
-	'corrupt_prob_bandit', 0.0,
-	'warm_start', 0,
-	'bandit_size', 0,
-	'bandit_supervised_size_ratio', 0,
-	'cb_type', 'mtr',
-	'validation_method', 0,
-	'weighting_scheme', 0,
-	'lambda_scheme', 0,
-	'choices_lambda', 0,
-	'no_supervised', False,
-	'no_bandit', False,
-	'warm_start_type', 0,
-	'learning_rate', 0.0,
-	'optimal_approx', False,
-	'majority_approx', False,
-	'avg_error', 0.0,
-	'actual_variance', 0.0,
-	'ideal_variance', 0.0 ]
-
- 	num_cols = len(mod.result_template_list)/2
-	mod.result_header_list = [ mod.result_template_list[2*i] for i in range(num_cols) ]
-	mod.result_template = dict([ (mod.result_template_list[2*i], mod.result_template_list[2*i+1]) for i in range(num_cols) ])
+	('fold', 'fd', 0),
+	('data', 'dt', ''),
+	('dataset', 'ds', ''),
+	('num_classes','nc', 0),
+	('total_size', 'ts', 0),
+	('majority_size','ms', 0),
+	('corrupt_type_supervised', 'cts', 0),
+	('corrupt_prob_supervised', 'cps', 0.0),
+	('corrupt_type_bandit', 'ctb', 0),
+	('corrupt_prob_bandit', 'cpb', 0.0),
+	('adf_on', 'ao', True),
+	('warm_start_multiplier','wsm',1),
+	('warm_start', 'ws', 0),
+	('warm_start_type', 'wst', 0),
+	('bandit_size', 'bs', 0),
+	('bandit_supervised_size_ratio', 'bssr', 0),
+	('cb_type', 'cbt', 'mtr'),
+	('validation_method', 'vm', 0),
+	('weighting_scheme', 'wts', 0),
+	('lambda_scheme','ls',  0),
+	('choices_lambda', 'cl', 0),
+	('no_warm_start_update', 'nwsu', False),
+	('no_interaction_update', 'niu', False),
+	('learning_rate', 'lr', 0.0),
+	('optimal_approx', 'oa', False),
+	('majority_approx', 'ma', False),
+	('avg_error', 'ae', 0.0),
+	('actual_variance', 'av', 0.0),
+	('ideal_variance', 'iv', 0.0)]
+
+ 	num_cols = len(mod.result_template_list)
+	mod.result_header_list = [ mod.result_template_list[i][0] for i in range(num_cols) ]
+	mod.result_template = dict([ (mod.result_template_list[i][0], mod.result_template_list[i][2]) for i in range(num_cols) ])
+	mod.simplified_keymap = dict([ (mod.result_template_list[i][0], mod.result_template_list[i][1]) for i in range(num_cols) ])
 
 	write_summary_header(mod)
 	for mod.param in mod.config_task:
@@ -451,54 +503,17 @@ def remove_suffix(filename):
 			time.sleep(1)
 
 	mod = model()
-	mod.baselines_on = True
-	mod.algs_on = True
-	mod.optimal_on = False
-	mod.majority_on = False
 
 	mod.num_tasks = args.num_tasks
 	mod.task_id = args.task_id
-
 	mod.vw_path = '../vowpalwabbit/vw'
 	mod.ds_path = args.ds_dir
 	mod.results_path = args.results_dir
 
-	mod.num_checkpoints = 200
-
-	# use fractions instead of absolute numbers
-	#mod.warm_start_multipliers = [pow(2,i) for i in range(4)]
-	mod.warm_start_multipliers = [pow(2,i) for i in range(1)]
-
-	mod.choices_cb_type = ['mtr']
-	#mod.choices_choices_lambda = [2,4,8]
-	mod.choices_choices_lambda = [2,8,16]
-
-	#mod.choices_corrupt_type_supervised = [1,2,3]
-	#mod.choices_corrupt_prob_supervised = [0.0,0.5,1.0]
-	mod.choices_corrupt_type_supervised = [3]
-	mod.choices_corrupt_prob_supervised = [0,0.25,0.5]
-
-	mod.learning_rates_template = [0.1, 0.03, 0.3, 0.01, 1.0, 0.003, 3.0, 0.001, 10.0]
-
 	if args.num_learning_rates <= 0 or args.num_learning_rates >= 10:
 		mod.learning_rates = mod.learning_rates_template
 	else:
 		mod.learning_rates = mod.learning_rates_template[:args.num_learning_rates]
-
-
-	mod.adf_on = True
-
-	mod.corrupt_type_bandit = 1
-	mod.corrupt_prob_bandit = 0.0
-
-	mod.validation_method = 1
-	mod.weighting_scheme = 1
-
-	mod.epsilon = 0.05
-	mod.epsilon_on = True
-
-	mod.critical_size_ratios = [184 * pow(2, -i) for i in range(7) ]
-
 	#mod.folds = range(1,11)
 	mod.folds = range(1, args.num_folds+1)
 
@@ -523,7 +538,6 @@ def remove_suffix(filename):
 	print len(mod.config_task)
 
 	#print mod.ds_task
-
 	# we only need to vary the warm start fraction, and there is no need to vary the bandit fraction,
 	# as each run of vw automatically accumulates the bandit dataset
 	main_loop(mod)

From f8d14ab016c82a1565c48b7881b66367f8e62cdd Mon Sep 17 00:00:00 2001
From: chicheng <zcc1307@gmail.com>
Date: Fri, 25 May 2018 15:47:43 -0400
Subject: [PATCH 076/127] fixed memory lost problems; still reachable problems
 still not resolved

---
 vowpalwabbit/cbify.cc | 63 ++++++++++++++++++++++---------------------
 1 file changed, 33 insertions(+), 30 deletions(-)

diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc
index bab211f3da2..d17d14f511b 100644
--- a/vowpalwabbit/cbify.cc
+++ b/vowpalwabbit/cbify.cc
@@ -269,7 +269,7 @@ void finish(cbify& data)
 
 	for (size_t i = 0; i < data.warm_start_period; ++i)
 	{
-		//VW::dealloc_example(COST_SENSITIVE::cs_label.delete_label, data.supervised_validation[i]);
+		VW::dealloc_example(COST_SENSITIVE::cs_label.delete_label, data.supervised_validation[i]);
 	}
 	free(data.supervised_validation);
 
@@ -278,11 +278,11 @@ void finish(cbify& data)
   {
 	  for (size_t a = 0; a < data.adf_data.num_actions; ++a)
 	  {
-			//VW::dealloc_example(CB::cb_label.delete_label, data.adf_data.ecs[a]);
-			data.adf_data.ecs[a].pred.a_s.delete_v();
+			VW::dealloc_example(CB::cb_label.delete_label, data.adf_data.ecs[a]);
+			//data.adf_data.ecs[a].pred.a_s.delete_v();
 	  }
-	  //VW::dealloc_example(CB::cb_label.delete_label, *data.adf_data.empty_example);
-		data.adf_data.empty_example->pred.a_s.delete_v();
+	  VW::dealloc_example(CB::cb_label.delete_label, *data.adf_data.empty_example);
+		//data.adf_data.empty_example->pred.a_s.delete_v();
 
     free(data.adf_data.ecs);
     free(data.adf_data.empty_example);
@@ -292,21 +292,20 @@ void finish(cbify& data)
 
 		data.csl_empty->costs.delete_v();
 
+    free(data.csls);
 		free(data.csl_empty);
-		free(data.cbl_empty);
 
-		free(data.old_weights);
-		free(data.cbls);
+    free(data.cbls);
+    free(data.cbl_empty);
 
+    free(data.old_weights);
   }
 	else
 	{
 		data.csls->costs.delete_v();
+    free(data.csls);
 	}
 
-	free(data.csls);
-
-
 }
 
 void copy_example_to_adf(cbify& data, example& ec)
@@ -387,8 +386,12 @@ uint32_t predict_sublearner(cbify& data, base_learner& base, uint32_t i)
 	//}
 	//best_action_dir = ecs[0].pred.a_s[0].action+1;
 	//assert(best_action == best_action_dir);
+  uint32_t pred_action = ecs[0].pred.a_s[0].action+1;
+
+  //Need to clear the prediction, otherwise there will be a memory leak
+  ecs[0].pred.a_s.delete_v();
 
-	return ecs[0].pred.a_s[0].action+1;
+	return pred_action;
 
 }
 
@@ -412,7 +415,7 @@ void accumulate_costs_ips(cbify& data, example& ec, CB::cb_class& cl)
 	else //validation using supervised data (their labels are already set to cost-sensitive labels)
 	{
 		//only update cumulative costs every warm_start_period iterations
-		if (data.bandit_iter % data.warm_start_period == 0)
+		if (abs(log2(data.bandit_iter) - floor(log2(data.bandit_iter))) < 1e-4)
 		{
 			for (uint32_t i = 0; i < data.choices_lambda; i++)
 				data.cumulative_costs[i] = 0;
@@ -467,7 +470,7 @@ void accumulate_costs_ips_adf(cbify& data, example& ec, CB::cb_class& cl, base_l
 	else
 	{
 		//only update cumulative costs every warm_start_period iterations
-		if (data.bandit_iter % data.warm_start_period == 0)
+		if ( abs(log2(data.bandit_iter) - floor(log2(data.bandit_iter))) < 1e-4 )
 		{
 			for (uint32_t i = 0; i < data.choices_lambda; i++)
 				data.cumulative_costs[i] = 0;
@@ -881,12 +884,6 @@ void predict_or_learn_cs_adf(cbify& data, base_learner& base, example& ec, bool
 	//Store the multiclass input label
 	MULTICLASS::label_t ld = ec.l.multi;
 
-	copy_example_to_adf(data, ec);
-
-	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
-		data.cbls[a].costs = data.adf_data.ecs[a].l.cb.costs;
-	data.cbl_empty->costs = data.adf_data.empty_example->l.cb.costs;
-
 	//best_action = predict_sublearner(data, base, argmin);
 	uint32_t best_action = predict_cs_adf(data, base);
 
@@ -916,11 +913,11 @@ void predict_or_learn_bandit_adf(cbify& data, base_learner& base, example& ec, b
 	//Store the multiclass input label
 	MULTICLASS::label_t ld = ec.l.multi;
 
-	copy_example_to_adf(data, ec);
+	//copy_example_to_adf(data, ec);
 
-	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
-		data.cbls[a].costs = data.adf_data.ecs[a].l.cb.costs;
-	data.cbl_empty->costs = data.adf_data.empty_example->l.cb.costs;
+	//for (size_t a = 0; a < data.adf_data.num_actions; ++a)
+	//	data.cbls[a].costs = data.adf_data.ecs[a].l.cb.costs;
+	//data.cbl_empty->costs = data.adf_data.empty_example->l.cb.costs;
 
 	//size_t pred_pi = predict_cs_adf(data, base, ec);
 	uint32_t idx = predict_bandit_adf(data, base);
@@ -937,12 +934,12 @@ void predict_or_learn_bandit_adf(cbify& data, base_learner& base, example& ec, b
 	auto& lab = data.adf_data.ecs[cl.action - 1].l.cb;
 	lab.costs.push_back(cl);
 
-
 	if (is_update)
 		learn_bandit_adf(data, base, ec_type);
 
 	accumulate_variance_adf(data, base);
 
+  lab.costs.delete_v();
 	ec.pred.multiclass = cl.action;
 }
 
@@ -952,6 +949,15 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 	if (data.warm_start_iter == 0 && data.bandit_iter == 0)
 		setup_lambdas(data, ec);
 
+  copy_example_to_adf(data, ec);
+
+  // As we will be processing the examples with cs or cb labels,
+  // we need to store the default cb label so that the next time we call copy_example_to_adf
+  // we can free it successfully (that is the whole purpose of data.cbls)
+  for (size_t a = 0; a < data.adf_data.num_actions; ++a)
+    data.cbls[a].costs = data.adf_data.ecs[a].l.cb.costs;
+  data.cbl_empty->costs = data.adf_data.empty_example->l.cb.costs;
+
 	if (data.warm_start_iter < data.warm_start_period) // Call the cost-sensitive learner directly
 	{
 		if (data.warm_start_type == SUPERVISED_WS)
@@ -964,12 +970,12 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 	else if (data.bandit_iter < data.bandit_period) // call the bandit learner
 	{
 		predict_or_learn_bandit_adf(data, base, ec, data.ind_bandit, BANDIT);
-		data.bandit_iter++;
 		if (data.bandit_iter == data.bandit_period)
 		{
 			cout<<"Ideal average variance = "<<data.num_actions / data.epsilon<<endl;
 			cout<<"Measured average variance = "<<data.cumulative_variance / data.bandit_period<<endl;
 		}
+    data.bandit_iter++;
 	}
 	else
 	{
@@ -998,15 +1004,12 @@ void init_adf_data(cbify& data, const size_t num_actions)
   adf_data.empty_example->in_use = true;
 	adf_data.empty_example->pred.a_s = v_init<action_score>();
 
-
 	data.csls = calloc_or_throw<COST_SENSITIVE::label>(num_actions);
-
-
 	data.csl_empty = calloc_or_throw<COST_SENSITIVE::label>(1);
+
 	data.cbls = calloc_or_throw<CB::label>(num_actions);
 	data.cbl_empty = calloc_or_throw<CB::label>(1);
 
-
 	data.old_weights = calloc_or_throw<float>(num_actions);
 
 	data.csl_empty->costs = v_init<COST_SENSITIVE::wclass>();

From 67ffd897a11a663c41017b92f00a6219b793566f Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Mon, 28 May 2018 13:37:58 -0400
Subject: [PATCH 077/127] started cleaning up the cost-sensitive mc to cs
 conversion

---
 vowpalwabbit/cbify.cc | 206 ++++++++++++++++++++----------------------
 1 file changed, 98 insertions(+), 108 deletions(-)

diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc
index d17d14f511b..3da7f8c9212 100644
--- a/vowpalwabbit/cbify.cc
+++ b/vowpalwabbit/cbify.cc
@@ -7,7 +7,7 @@
 #include "vw.h"
 
 //In the future, the above two's names should be changed to
-//WARM_START and INTERACTIVE
+//WARM_START and INTERACTION
 #define SUPERVISED 1
 #define BANDIT 2
 
@@ -99,6 +99,8 @@ struct cbify
 	float corrupt_prob_bandit;
 	size_t corrupt_type_supervised;
 	size_t corrupt_type_bandit;
+	size_t corrupted_label;
+
 	size_t validation_method;
 	size_t bandit_iter;
 	size_t warm_start_iter;
@@ -109,6 +111,7 @@ struct cbify
 	float cumulative_variance;
 	size_t overwrite_label;
 	size_t warm_start_type;
+	size_t mc_pred;
 
 };
 
@@ -267,26 +270,24 @@ void finish(cbify& data)
 	data.lambdas.delete_v();
 	data.cumulative_costs.delete_v();
 
-	for (size_t i = 0; i < data.warm_start_period; ++i)
+	if (data.validation_method == SUPERVISED_VALI)
 	{
-		VW::dealloc_example(COST_SENSITIVE::cs_label.delete_label, data.supervised_validation[i]);
+		for (size_t i = 0; i < data.warm_start_period; ++i)
+			VW::dealloc_example(MULTICLASS::mc_label.delete_label, data.supervised_validation[i]);
+		free(data.supervised_validation);
 	}
-	free(data.supervised_validation);
-
 
   if (data.use_adf)
   {
 	  for (size_t a = 0; a < data.adf_data.num_actions; ++a)
-	  {
 			VW::dealloc_example(CB::cb_label.delete_label, data.adf_data.ecs[a]);
-			//data.adf_data.ecs[a].pred.a_s.delete_v();
-	  }
+
 	  VW::dealloc_example(CB::cb_label.delete_label, *data.adf_data.empty_example);
-		//data.adf_data.empty_example->pred.a_s.delete_v();
 
     free(data.adf_data.ecs);
     free(data.adf_data.empty_example);
 
+		//TODO: Use CB::cb_label.delete_label / CS here
 		for (size_t a = 0; a < data.adf_data.num_actions; ++a)
 			data.csls[a].costs.delete_v();
 
@@ -355,7 +356,6 @@ uint32_t find_min(v_array<float> arr)
 			argmin = i;
 		}
 	}
-	//cout<<"argmin = "<<argmin<<endl;
 
 	return argmin;
 }
@@ -374,18 +374,6 @@ uint32_t predict_sublearner(cbify& data, base_learner& base, uint32_t i)
 	base.predict(*empty, i);
 	//data.all->cost_sensitive->predict(*empty, argmin);
 
-
-	//float best_score;
-	//for (size_t a = 0; a < data.adf_data.num_actions; ++a)
-	//{
-	//	if ( (a == 0) || (ecs[a].partial_prediction < best_score) )
-	//	{
-	//		best_action = a + 1;
-	//		best_score = ecs[a].partial_prediction;
-	//	}
-	//}
-	//best_action_dir = ecs[0].pred.a_s[0].action+1;
-	//assert(best_action == best_action_dir);
   uint32_t pred_action = ecs[0].pred.a_s[0].action+1;
 
   //Need to clear the prediction, otherwise there will be a memory leak
@@ -396,17 +384,51 @@ uint32_t predict_sublearner(cbify& data, base_learner& base, uint32_t i)
 }
 
 
+void convert_mc_to_cs(cbify& data, example& ec)
+{
+	//generate cost-sensitive label (only for CSOAA's use - this will be retracted at the end)
+	COST_SENSITIVE::label& csl = *data.csls;
+	size_t label = ec.l.multi.label;
 
-void accumulate_costs_ips(cbify& data, example& ec, CB::cb_class& cl)
+	for (uint32_t j = 0; j < data.num_actions; j++)
+	{
+		csl.costs[j].class_index = j+1;
+		csl.costs[j].x = loss(data, label, j+1);
+	}
+	ec.l.cs = csl;
+}
+
+size_t predict_sublearner_noadf(cbify& data, example& ec, uint32_t i)
 {
+	//For vw's internal reason, we need to first have a cs label before
+	//using csoaa to predict
+	MULTICLASS::label_t ld = ec.l.multi;
+	convert_mc_to_cs(data, ec);
+	data.all->cost_sensitive->predict(ec, i);
+	ec.l.multi = ld;
+
+	return ec.pred.multiclass;
+}
+
+size_t predict_cs(cbify& data, example& ec)
+{
+	uint32_t argmin = find_min(data.cumulative_costs);
+	//cout<<argmin<<endl;
+	return predict_sublearner_noadf(data, ec, argmin);
+}
+
+void accumulate_costs_ips(cbify& data, example& ec)
+{
+	CB::cb_class& cl = data.cb_label.costs[0];
 	// validation using bandit data
 	if (data.validation_method == 1)
 	{
 		//IPS for approximating the cumulative costs for all lambdas
 		for (uint32_t i = 0; i < data.choices_lambda; i++)
 		{
-			data.all->cost_sensitive->predict(ec, i);
-			if (ec.pred.multiclass == cl.action)
+			uint32_t action = predict_sublearner_noadf(data, ec, i);
+
+			if (action == cl.action)
 				data.cumulative_costs[i] += cl.cost / cl.probability;
 			//cout<<data.cumulative_costs[i]<<endl;
 		}
@@ -427,23 +449,12 @@ void accumulate_costs_ips(cbify& data, example& ec, CB::cb_class& cl)
 				for (uint32_t j = 0; j < data.warm_start_period; j++)
 				{
 					example& ec_valid = data.supervised_validation[j];
-					data.all->cost_sensitive->predict(ec_valid, i);
-
-					//cout<<ec_valid.pred.multiclass<<endl;
-					//for (uint32_t a = 0; a < data.num_actions; a++)
-					//	cout<<ec_valid.l.cs.costs[a].class_index<<" "<<ec_valid.l.cs.costs[a].x<<endl;
-					//cout<<endl;
-
-					for (uint32_t a = 0; a < data.num_actions; a++)
-					{
-						//cout<<ec_valid.pred.multiclass<<" "<<ec_valid.l.cs.costs[a].class_index<<endl;
-
-						if (ec_valid.pred.multiclass == ec_valid.l.cs.costs[a].class_index)
-							data.cumulative_costs[i] += ec_valid.l.cs.costs[a].x;
-					}
+					uint32_t action = predict_sublearner_noadf(data, ec_valid, i);
+					data.cumulative_costs[i] += loss(data, ec_valid.l.multi.label, action);
 				}
 				//cout<<data.cumulative_costs[i]<<endl;
 			}
+			//cout<<endl;
 		}
 	}
 }
@@ -520,23 +531,6 @@ float compute_weight_multiplier(cbify& data, size_t i, size_t ec_type)
 }
 
 
-size_t predict_cs(cbify& data, example& ec)
-{
-	uint32_t argmin = find_min(data.cumulative_costs);
-
-	COST_SENSITIVE::label& csl = *data.csls;
-	//For vw's internal reason, we need to first have a cs label before
-	//using csoaa to predict
-	ec.l.cs = csl;
-
-	data.all->cost_sensitive->predict(ec, argmin);
-
-	//cout<<ec.pred.multiclass<<endl;
-
-	return ec.pred.multiclass;
-
-}
-
 void learn_cs(cbify& data, example& ec, size_t ec_type)
 {
 	float old_weight = ec.weight;
@@ -549,43 +543,17 @@ void learn_cs(cbify& data, example& ec, size_t ec_type)
 	ec.weight = old_weight;
 }
 
-//Requires the csl's cost array to have num_actions elements
-void multiclass_to_cs(cbify& data, COST_SENSITIVE::label& csl, size_t corrupted_label)
-{
-	for (uint32_t j = 0; j < data.num_actions; j++)
-	{
-		csl.costs[j].class_index = j+1;
-		csl.costs[j].x = loss(data, corrupted_label, j+1);
-	}
-}
-
-void generate_corrupted_cs(cbify& data, example& ec, MULTICLASS::label_t ld, size_t corrupted_label)
-{
-	//generate cost-sensitive label (only for CSOAA's use - this will be retracted at the end)
-	COST_SENSITIVE::label& csl = *data.csls;
-
-	multiclass_to_cs(data, csl, corrupted_label);
-
-	ec.l.cs = csl;
-}
-
 void add_to_sup_validation(cbify& data, example& ec)
 {
 	// NOTE WELL: for convenience in supervised validation, we intentionally use a cost-sensitive label as opposed to
 	// a multiclass label. This is because the csoaa learner needs a cost-sensitive label to predict (for vw's internal reasons).
-	// Also: I did not deallocate the label and the copied example in finish()
+
+	MULTICLASS::label_t ld = ec.l.multi;
+	ec.l.multi.label = data.corrupted_label;
 	example& ec_copy = data.supervised_validation[data.warm_start_iter];
-	//why doesn't the following two apporaches leak memory?
-	VW::copy_example_data(false, &ec_copy, &ec, 0, COST_SENSITIVE::cs_label.copy_label);
-	//copy_array(ec_copy.l.cs.costs, ec.l.cs.costs);
-	//VW::copy_example_data(false, &ec_copy, &ec);
-	//for (uint32_t j = 0; j < data.num_actions; j++)
-	//{
-	//	ec_copy.l.cs.costs.push_back(ec.l.cs.costs[j]);
-	//}
-	//cout<<"after copying"<<endl;
-	//for (uint32_t j = 0; j < data.num_actions; j++)
-	//	cout<<ec_copy.l.cs.costs[j].class_index<<" "<<ec_copy.l.cs.costs[j].x<<endl;
+	VW::copy_example_data(false, &ec_copy, &ec, 0, MULTICLASS::mc_label.copy_label);
+	ec.l.multi = ld;
+
 }
 
 void generate_corrupted_cb(cbify& data, example& ec, CB::cb_class& cl, MULTICLASS::label_t& ld, size_t action, size_t corrupted_label)
@@ -637,38 +605,37 @@ void accumulate_variance(cbify& data, example& ec)
 
 }
 
-void predict_or_learn_cs(cbify& data, example& ec, bool is_update, size_t ec_type)
+void predict_or_learn_cs(cbify& data, example& ec, size_t ec_type)
 {
 	MULTICLASS::label_t ld = ec.l.multi;
 	//predict
 	predict_cs(data, ec);
-
 	//learn
 	//first, corrupt fully supervised example ec's label here
-	size_t corrupted_label = corrupt_action(ld.label, data, ec_type);
-	generate_corrupted_cs(data, ec, ld, corrupted_label);
+	ec.l.multi.label = data.corrupted_label;
+	convert_mc_to_cs(data, ec);
+
+	bool is_update;
+	if (ec_type == SUPERVISED)
+		is_update = data.ind_supervised;
+	else
+		is_update = data.ind_bandit;
 
 	if (is_update)
 		learn_cs(data, ec, ec_type);
 
-	if (data.validation_method == SUPERVISED_VALI)
-		add_to_sup_validation(data, ec);
-
 	//set the label of ec back to a multiclass label
 	ec.l.multi = ld;
+	data.mc_pred = ec.pred.multiclass;
 }
 
-void predict_or_learn_bandit(cbify& data, base_learner& base, example& ec, bool is_update, size_t ec_type)
+void predict_or_learn_bandit(cbify& data, base_learner& base, example& ec, size_t ec_type)
 {
 	MULTICLASS::label_t ld = ec.l.multi;
 	size_t action = predict_bandit(data, base, ec);
 
 	CB::cb_class cl;
-
-	size_t corrupted_label = corrupt_action(ld.label, data, ec_type);
-	generate_corrupted_cb(data, ec, cl, ld, action, corrupted_label);
-	// accumulate the cumulative costs of lambdas
-	accumulate_costs_ips(data, ec, cl);
+	generate_corrupted_cb(data, ec, cl, ld, action, data.corrupted_label);
 
 	//Create a new cb label
 	data.cb_label.costs.push_back(cl);
@@ -676,16 +643,21 @@ void predict_or_learn_bandit(cbify& data, base_learner& base, example& ec, bool
 	//make sure the prediction here is a cb prediction
 	ec.pred = data.pred;
 
+	bool is_update;
+	if (ec_type == SUPERVISED)
+		is_update = data.ind_supervised;
+	else
+		is_update = data.ind_bandit;
+
 	if (is_update)
 		learn_bandit(data, base, ec, ec_type);
 
 	data.a_s.erase();
 	data.a_s = ec.pred.a_s;
 
-	accumulate_variance(data, ec);
-
 	ec.l.multi = ld;
 	ec.pred.multiclass = action;
+	data.mc_pred = ec.pred.multiclass;
 	//ec.weight = old_weight;
 }
 
@@ -703,22 +675,36 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 
 	if (data.warm_start_iter < data.warm_start_period) // Call the cost-sensitive learner directly
 	{
+		data.corrupted_label = corrupt_action(ec.l.multi.label, data, SUPERVISED);
+
 		if (data.warm_start_type == SUPERVISED_WS)
-			predict_or_learn_cs(data, ec, data.ind_supervised, SUPERVISED);
+			predict_or_learn_cs(data, ec, SUPERVISED);
 		else
-			predict_or_learn_bandit(data, base, ec, data.ind_supervised, SUPERVISED);
+			predict_or_learn_bandit(data, base, ec, SUPERVISED);
+
+		if (data.validation_method == SUPERVISED_VALI)
+			add_to_sup_validation(data, ec);
+
 		ec.weight = 0;
+		ec.pred.multiclass = data.mc_pred;
 		data.warm_start_iter++;
 	}
 	else if (data.bandit_iter < data.bandit_period) //Call the cb_explore learner. It returns a vector of probabilities for each action
 	{
-		predict_or_learn_bandit(data, base, ec, data.ind_bandit, BANDIT);
+		data.corrupted_label = corrupt_action(ec.l.multi.label, data, BANDIT);
+		predict_or_learn_bandit(data, base, ec, BANDIT);
 		data.bandit_iter++;
 		if (data.bandit_iter == data.bandit_period)
 		{
 			cout<<"Ideal average variance = "<<data.num_actions / data.epsilon<<endl;
 			cout<<"Measured average variance = "<<data.cumulative_variance / data.bandit_period<<endl;
 		}
+
+		// accumulate the cumulative costs of lambdas, given data.cb_label has the ips info
+		accumulate_costs_ips(data, ec);
+		// accumulate the cumulative variances, given we have data.a_s has the score info
+		accumulate_variance(data, ec);
+		ec.pred.multiclass = data.mc_pred;
 	}
 	else
 	{
@@ -785,8 +771,10 @@ void generate_corrupted_cs_adf(cbify& data, MULTICLASS::label_t ld, size_t corru
 	multiclass_to_cs_adf(data, csls, corrupted_label);
 
 	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
+	{
 		ecs[a].l.cs = csls[a];
-
+		cout<<ecs[a].l.cs.costs.size()<<endl;
+	}
 	empty_example->l.cs = *csl_empty;
 
 }
@@ -875,6 +863,7 @@ void accumulate_variance_adf(cbify& data, base_learner& base)
 
 void add_to_sup_validation_adf(cbify& data, example& ec)
 {
+	//cout<<ec.l.cs.costs.size()<<endl;
 	example& ec_copy = data.supervised_validation[data.warm_start_iter];
 	VW::copy_example_data(false, &ec_copy, &ec, 0, MULTICLASS::mc_label.copy_label);
 }
@@ -1021,6 +1010,7 @@ void init_adf_data(cbify& data, const size_t num_actions)
 	{
 		data.csls[a].costs = v_init<COST_SENSITIVE::wclass>();
 		data.csls[a].costs.push_back({0, a+1, 0, 0});
+		cout<<data.csls[a].costs.size()<<endl;
 	}
 
 }
@@ -1043,7 +1033,7 @@ base_learner* cbify_setup(vw& all)
 	("corrupt_prob_bandit", po::value<float>(), "probability of label corruption in the bandit part")
 	("corrupt_type_supervised", po::value<size_t>(), "type of label corruption in the supervised part (1 is uar, 2 is circular)")
 	("corrupt_type_bandit", po::value<size_t>(), "probability of label corruption in the bandit part (1 is uar, 2 is circular)")
-	("validation_method", po::value<size_t>(), "lambda selection criterion (1 is using bandit with progressive validation, 2 is using supervised and amortizing)")
+	("validation_method", po::value<size_t>(), "lambda selection criterion (1 is using bandit with progressive validation, 2 is using supervised)")
 	("weighting_scheme", po::value<size_t>(), "weighting scheme (1 is per instance weighting, 2 is per dataset weighting (where we use a diminishing weighting scheme) )")
 	("lambda_scheme", po::value<size_t>(), "Lambda set scheme (1 is expanding based on center 0.5, 2 is expanding based on center=minimax lambda, 3 is expanding based on center=minimax lambda along with forcing 0,1 in Lambda )")
 	("overwrite_label", po::value<size_t>(), "the label type 3 corruptions (overwriting) turn to")

From 7b6e2ba4ece9b8f7b05f9c051b236990fd08cb95 Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Mon, 28 May 2018 16:58:03 -0400
Subject: [PATCH 078/127] begin changing the cb learning w/o adf part

---
 vowpalwabbit/cbify.cc | 190 +++++++++++++++++++++---------------------
 1 file changed, 94 insertions(+), 96 deletions(-)

diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc
index 3da7f8c9212..8e9180dd955 100644
--- a/vowpalwabbit/cbify.cc
+++ b/vowpalwabbit/cbify.cc
@@ -383,7 +383,6 @@ uint32_t predict_sublearner(cbify& data, base_learner& base, uint32_t i)
 
 }
 
-
 void convert_mc_to_cs(cbify& data, example& ec)
 {
 	//generate cost-sensitive label (only for CSOAA's use - this will be retracted at the end)
@@ -410,59 +409,10 @@ size_t predict_sublearner_noadf(cbify& data, example& ec, uint32_t i)
 	return ec.pred.multiclass;
 }
 
-size_t predict_cs(cbify& data, example& ec)
-{
-	uint32_t argmin = find_min(data.cumulative_costs);
-	//cout<<argmin<<endl;
-	return predict_sublearner_noadf(data, ec, argmin);
-}
-
-void accumulate_costs_ips(cbify& data, example& ec)
-{
-	CB::cb_class& cl = data.cb_label.costs[0];
-	// validation using bandit data
-	if (data.validation_method == 1)
-	{
-		//IPS for approximating the cumulative costs for all lambdas
-		for (uint32_t i = 0; i < data.choices_lambda; i++)
-		{
-			uint32_t action = predict_sublearner_noadf(data, ec, i);
-
-			if (action == cl.action)
-				data.cumulative_costs[i] += cl.cost / cl.probability;
-			//cout<<data.cumulative_costs[i]<<endl;
-		}
-		//cout<<endl;
-	}
-	else //validation using supervised data (their labels are already set to cost-sensitive labels)
-	{
-		//only update cumulative costs every warm_start_period iterations
-		if (abs(log2(data.bandit_iter) - floor(log2(data.bandit_iter))) < 1e-4)
-		{
-			for (uint32_t i = 0; i < data.choices_lambda; i++)
-				data.cumulative_costs[i] = 0;
-
-			//cout<<"updating validation error on supervised data: " << data.bandit_iter / data.warm_start_period << endl;
-			for (uint32_t i = 0; i < data.choices_lambda; i++)
-			{
-				//go over the supervised validation set
-				for (uint32_t j = 0; j < data.warm_start_period; j++)
-				{
-					example& ec_valid = data.supervised_validation[j];
-					uint32_t action = predict_sublearner_noadf(data, ec_valid, i);
-					data.cumulative_costs[i] += loss(data, ec_valid.l.multi.label, action);
-				}
-				//cout<<data.cumulative_costs[i]<<endl;
-			}
-			//cout<<endl;
-		}
-	}
-}
-
 void accumulate_costs_ips_adf(cbify& data, example& ec, CB::cb_class& cl, base_learner& base)
 {
 
-	if (data.validation_method == 1)
+	if (data.validation_method == BANDIT_VALI)
 	{
 		uint32_t best_action;
 
@@ -530,6 +480,12 @@ float compute_weight_multiplier(cbify& data, size_t i, size_t ec_type)
 	return weight_multiplier;
 }
 
+size_t predict_cs(cbify& data, example& ec)
+{
+	uint32_t argmin = find_min(data.cumulative_costs);
+	//cout<<argmin<<endl;
+	return predict_sublearner_noadf(data, ec, argmin);
+}
 
 void learn_cs(cbify& data, example& ec, size_t ec_type)
 {
@@ -543,19 +499,31 @@ void learn_cs(cbify& data, example& ec, size_t ec_type)
 	ec.weight = old_weight;
 }
 
-void add_to_sup_validation(cbify& data, example& ec)
+void predict_or_learn_cs(cbify& data, example& ec, size_t ec_type)
 {
-	// NOTE WELL: for convenience in supervised validation, we intentionally use a cost-sensitive label as opposed to
-	// a multiclass label. This is because the csoaa learner needs a cost-sensitive label to predict (for vw's internal reasons).
-
 	MULTICLASS::label_t ld = ec.l.multi;
+	//predict
+	predict_cs(data, ec);
+	//learn
+	//first, corrupt fully supervised example ec's label here
 	ec.l.multi.label = data.corrupted_label;
-	example& ec_copy = data.supervised_validation[data.warm_start_iter];
-	VW::copy_example_data(false, &ec_copy, &ec, 0, MULTICLASS::mc_label.copy_label);
-	ec.l.multi = ld;
+	convert_mc_to_cs(data, ec);
 
+	bool is_update;
+	if (ec_type == SUPERVISED)
+		is_update = data.ind_supervised;
+	else
+		is_update = data.ind_bandit;
+
+	if (is_update)
+		learn_cs(data, ec, ec_type);
+
+	//set the label of ec back to a multiclass label
+	ec.l.multi = ld;
+	data.mc_pred = ec.pred.multiclass;
 }
 
+
 void generate_corrupted_cb(cbify& data, example& ec, CB::cb_class& cl, MULTICLASS::label_t& ld, size_t action, size_t corrupted_label)
 {
 	cl.action = action;
@@ -567,7 +535,7 @@ void generate_corrupted_cb(cbify& data, example& ec, CB::cb_class& cl, MULTICLAS
 	cl.cost = loss(data, corrupted_label, cl.action);
 }
 
-size_t predict_bandit(cbify& data, base_learner& base, example& ec)
+uint32_t predict_bandit(cbify& data, base_learner& base, example& ec)
 {
 	data.cb_label.costs.erase();
 	ec.l.cb = data.cb_label;
@@ -595,47 +563,14 @@ void learn_bandit(cbify& data, base_learner& base, example& ec, size_t ec_type)
 	ec.weight = old_weight;
 }
 
-void accumulate_variance(cbify& data, example& ec)
-{
-	size_t pred_best_approx = predict_cs(data, ec);
-	data.cumulative_variance += 1.0 / data.a_s[pred_best_approx-1].score;
-
-	//cout<<"variance at bandit round "<< data.bandit_iter << " = " << 1.0 / data.a_s[pred_best_approx-1].score << endl;
-	//cout<<pred_best_approx<<endl;
-
-}
-
-void predict_or_learn_cs(cbify& data, example& ec, size_t ec_type)
-{
-	MULTICLASS::label_t ld = ec.l.multi;
-	//predict
-	predict_cs(data, ec);
-	//learn
-	//first, corrupt fully supervised example ec's label here
-	ec.l.multi.label = data.corrupted_label;
-	convert_mc_to_cs(data, ec);
-
-	bool is_update;
-	if (ec_type == SUPERVISED)
-		is_update = data.ind_supervised;
-	else
-		is_update = data.ind_bandit;
-
-	if (is_update)
-		learn_cs(data, ec, ec_type);
-
-	//set the label of ec back to a multiclass label
-	ec.l.multi = ld;
-	data.mc_pred = ec.pred.multiclass;
-}
-
 void predict_or_learn_bandit(cbify& data, base_learner& base, example& ec, size_t ec_type)
 {
 	MULTICLASS::label_t ld = ec.l.multi;
-	size_t action = predict_bandit(data, base, ec);
+	uint32_t action = predict_bandit(data, base, ec);
 
-	CB::cb_class cl;
+	//CB::cb_class cl;
 	generate_corrupted_cb(data, ec, cl, ld, action, data.corrupted_label);
+	//convert_mc_to_cb(data, ec, action);
 
 	//Create a new cb label
 	data.cb_label.costs.push_back(cl);
@@ -658,9 +593,72 @@ void predict_or_learn_bandit(cbify& data, base_learner& base, example& ec, size_
 	ec.l.multi = ld;
 	ec.pred.multiclass = action;
 	data.mc_pred = ec.pred.multiclass;
-	//ec.weight = old_weight;
 }
 
+void add_to_sup_validation(cbify& data, example& ec)
+{
+	// NOTE WELL: for convenience in supervised validation, we intentionally use a cost-sensitive label as opposed to
+	// a multiclass label. This is because the csoaa learner needs a cost-sensitive label to predict (for vw's internal reasons).
+
+	MULTICLASS::label_t ld = ec.l.multi;
+	ec.l.multi.label = data.corrupted_label;
+	example& ec_copy = data.supervised_validation[data.warm_start_iter];
+	VW::copy_example_data(false, &ec_copy, &ec, 0, MULTICLASS::mc_label.copy_label);
+	ec.l.multi = ld;
+
+}
+
+void accumulate_costs_ips(cbify& data, example& ec)
+{
+	CB::cb_class& cl = data.cb_label.costs[0];
+	// validation using bandit data
+	if (data.validation_method == 1)
+	{
+		//IPS for approximating the cumulative costs for all lambdas
+		for (uint32_t i = 0; i < data.choices_lambda; i++)
+		{
+			uint32_t action = predict_sublearner_noadf(data, ec, i);
+
+			if (action == cl.action)
+				data.cumulative_costs[i] += cl.cost / cl.probability;
+			//cout<<data.cumulative_costs[i]<<endl;
+		}
+		//cout<<endl;
+	}
+	else //validation using supervised data (their labels are already set to cost-sensitive labels)
+	{
+		//only update cumulative costs every warm_start_period iterations
+		if (abs(log2(data.bandit_iter) - floor(log2(data.bandit_iter))) < 1e-4)
+		{
+			for (uint32_t i = 0; i < data.choices_lambda; i++)
+				data.cumulative_costs[i] = 0;
+
+			//cout<<"updating validation error on supervised data: " << data.bandit_iter / data.warm_start_period << endl;
+			for (uint32_t i = 0; i < data.choices_lambda; i++)
+			{
+				//go over the supervised validation set
+				for (uint32_t j = 0; j < data.warm_start_period; j++)
+				{
+					example& ec_valid = data.supervised_validation[j];
+					uint32_t action = predict_sublearner_noadf(data, ec_valid, i);
+					data.cumulative_costs[i] += loss(data, ec_valid.l.multi.label, action);
+				}
+				//cout<<data.cumulative_costs[i]<<endl;
+			}
+			//cout<<endl;
+		}
+	}
+}
+
+void accumulate_variance(cbify& data, example& ec)
+{
+	size_t pred_best_approx = predict_cs(data, ec);
+	data.cumulative_variance += 1.0 / data.a_s[pred_best_approx-1].score;
+
+	//cout<<"variance at bandit round "<< data.bandit_iter << " = " << 1.0 / data.a_s[pred_best_approx-1].score << endl;
+	//cout<<pred_best_approx<<endl;
+
+}
 
 template <bool is_learn>
 void predict_or_learn(cbify& data, base_learner& base, example& ec)

From 8fce74299e9b95a26f41d4dd4fcbaf7a5368892e Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Mon, 28 May 2018 18:59:25 -0400
Subject: [PATCH 079/127] finished cleaning up the no adf part

---
 vowpalwabbit/cbify.cc | 94 +++++++++++++++++++++----------------------
 1 file changed, 47 insertions(+), 47 deletions(-)

diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc
index 8e9180dd955..0a40cc32918 100644
--- a/vowpalwabbit/cbify.cc
+++ b/vowpalwabbit/cbify.cc
@@ -62,6 +62,7 @@ struct cbify_adf_data
 struct cbify
 {
   CB::label cb_label;
+	COST_SENSITIVE::label cs_label;
   GenericExplorer<example>* generic_explorer;
   //v_array<float> probs;
   vw_scorer* scorer;
@@ -105,7 +106,7 @@ struct cbify
 	size_t bandit_iter;
 	size_t warm_start_iter;
 	size_t weighting_scheme;
-	example* supervised_validation;
+	v_array<example> supervised_validation;
 	size_t lambda_scheme;
 	float epsilon;
 	float cumulative_variance;
@@ -258,9 +259,18 @@ float loss(cbify& data, uint32_t label, uint32_t final_prediction)
 
 template<class T> inline void delete_it(T* p) { if (p != nullptr) delete p; }
 
+bool ind_update(cbify& data, size_t ec_type)
+{
+	if (ec_type == SUPERVISED)
+		return data.ind_supervised;
+	else
+		return data.ind_bandit;
+}
+
+
 void finish(cbify& data)
 {
-  CB::cb_label.delete_label(&data.cb_label);
+  //CB::cb_label.delete_label(&data.cb_label);
   //data.probs.delete_v();
   delete_it(data.scorer);
   delete_it(data.generic_explorer);
@@ -274,7 +284,7 @@ void finish(cbify& data)
 	{
 		for (size_t i = 0; i < data.warm_start_period; ++i)
 			VW::dealloc_example(MULTICLASS::mc_label.delete_label, data.supervised_validation[i]);
-		free(data.supervised_validation);
+		data.supervised_validation.delete_v();
 	}
 
   if (data.use_adf)
@@ -303,8 +313,8 @@ void finish(cbify& data)
   }
 	else
 	{
-		data.csls->costs.delete_v();
-    free(data.csls);
+		COST_SENSITIVE::cs_label.delete_label(&data.cs_label);
+		CB::cb_label.delete_label(&data.cb_label);
 	}
 
 }
@@ -386,7 +396,7 @@ uint32_t predict_sublearner(cbify& data, base_learner& base, uint32_t i)
 void convert_mc_to_cs(cbify& data, example& ec)
 {
 	//generate cost-sensitive label (only for CSOAA's use - this will be retracted at the end)
-	COST_SENSITIVE::label& csl = *data.csls;
+	COST_SENSITIVE::label& csl = data.cs_label;
 	size_t label = ec.l.multi.label;
 
 	for (uint32_t j = 0; j < data.num_actions; j++)
@@ -504,48 +514,46 @@ void predict_or_learn_cs(cbify& data, example& ec, size_t ec_type)
 	MULTICLASS::label_t ld = ec.l.multi;
 	//predict
 	predict_cs(data, ec);
+	data.mc_pred = ec.pred.multiclass;
 	//learn
 	//first, corrupt fully supervised example ec's label here
 	ec.l.multi.label = data.corrupted_label;
 	convert_mc_to_cs(data, ec);
 
-	bool is_update;
-	if (ec_type == SUPERVISED)
-		is_update = data.ind_supervised;
-	else
-		is_update = data.ind_bandit;
-
-	if (is_update)
+	if (ind_update(data, ec_type))
 		learn_cs(data, ec, ec_type);
 
 	//set the label of ec back to a multiclass label
 	ec.l.multi = ld;
-	data.mc_pred = ec.pred.multiclass;
+	ec.pred.multiclass = data.mc_pred;
 }
 
-
-void generate_corrupted_cb(cbify& data, example& ec, CB::cb_class& cl, MULTICLASS::label_t& ld, size_t action, size_t corrupted_label)
+void convert_mc_to_cb(cbify& data, example& ec, uint32_t action)
 {
+	auto& cl = data.cb_label.costs[0];
 	cl.action = action;
 	cl.probability = ec.pred.a_s[action-1].score;
 
 	if(!cl.action)
 		THROW("No action with non-zero probability found!");
 
-	cl.cost = loss(data, corrupted_label, cl.action);
+	cl.cost = loss(data, data.corrupted_label, action);
+	ec.l.cb = data.cb_label;
 }
 
+
 uint32_t predict_bandit(cbify& data, base_learner& base, example& ec)
 {
-	data.cb_label.costs.erase();
-	ec.l.cb = data.cb_label;
+	// we need the cb cost array to be an empty array to make cb prediction
+	ec.l.cb.costs = v_init<CB::cb_class>();
 	ec.pred.a_s = data.a_s;
 
 	uint32_t argmin = find_min(data.cumulative_costs);
 	base.predict(ec, argmin);
-	data.pred = ec.pred;
+	//data.pred = ec.pred;
 
 	uint32_t action = data.mwt_explorer->Choose_Action(*data.generic_explorer, StringUtils::to_string(data.example_counter++), ec);
+	ec.l.cb.costs.delete_v();
 
 	return action;
 
@@ -567,45 +575,34 @@ void predict_or_learn_bandit(cbify& data, base_learner& base, example& ec, size_
 {
 	MULTICLASS::label_t ld = ec.l.multi;
 	uint32_t action = predict_bandit(data, base, ec);
+	data.mc_pred = action;
 
-	//CB::cb_class cl;
-	generate_corrupted_cb(data, ec, cl, ld, action, data.corrupted_label);
-	//convert_mc_to_cb(data, ec, action);
+	convert_mc_to_cb(data, ec, action);
 
-	//Create a new cb label
-	data.cb_label.costs.push_back(cl);
-	ec.l.cb = data.cb_label;
 	//make sure the prediction here is a cb prediction
-	ec.pred = data.pred;
+	//ec.pred = data.pred;
 
-	bool is_update;
-	if (ec_type == SUPERVISED)
-		is_update = data.ind_supervised;
-	else
-		is_update = data.ind_bandit;
-
-	if (is_update)
+	if (ind_update(data, ec_type))
 		learn_bandit(data, base, ec, ec_type);
 
-	data.a_s.erase();
+	//data.a_s.erase();
 	data.a_s = ec.pred.a_s;
 
 	ec.l.multi = ld;
 	ec.pred.multiclass = action;
-	data.mc_pred = ec.pred.multiclass;
 }
 
 void add_to_sup_validation(cbify& data, example& ec)
 {
-	// NOTE WELL: for convenience in supervised validation, we intentionally use a cost-sensitive label as opposed to
-	// a multiclass label. This is because the csoaa learner needs a cost-sensitive label to predict (for vw's internal reasons).
-
 	MULTICLASS::label_t ld = ec.l.multi;
 	ec.l.multi.label = data.corrupted_label;
-	example& ec_copy = data.supervised_validation[data.warm_start_iter];
-	VW::copy_example_data(false, &ec_copy, &ec, 0, MULTICLASS::mc_label.copy_label);
+	example* ec_copy = calloc_or_throw<example>(1);
+	VW::copy_example_data(false, ec_copy, &ec, 0, MULTICLASS::mc_label.copy_label);
 	ec.l.multi = ld;
-
+	// I believe we cannot directly do push_back(ec), as the label won't be deeply copied and that space will be
+	// reallocated when the example fall out of the predict_or_learn scope
+	data.supervised_validation.push_back(*ec_copy);
+	free(ec_copy);
 }
 
 void accumulate_costs_ips(cbify& data, example& ec)
@@ -1079,7 +1076,8 @@ base_learner* cbify_setup(vw& all)
 
 	if (data.validation_method == SUPERVISED_VALI)
 	{
-		data.supervised_validation = calloc_or_throw<example>(data.warm_start_period);
+		data.supervised_validation = v_init<example>();
+		//calloc_or_throw<example>(data.warm_start_period);
 	}
 
 
@@ -1107,14 +1105,16 @@ base_learner* cbify_setup(vw& all)
   }
 	else
 	{
-		data.csls = calloc_or_throw<COST_SENSITIVE::label>(1);
-		auto& csl = data.csls[0];
+		//data.csls = calloc_or_throw<COST_SENSITIVE::label>(1);
+		//auto& csl = data.csls[0];
 
-		csl.costs = v_init<COST_SENSITIVE::wclass>();
+		data.cs_label.costs = v_init<COST_SENSITIVE::wclass>();
 		//Note: these two lines are important, otherwise the cost sensitive vector seems to be unbounded.
 
 		for (size_t a = 0; a < num_actions; ++a)
-			csl.costs.push_back({0, a+1, 0, 0});
+			data.cs_label.costs.push_back({0, a+1, 0, 0});
+
+		data.cb_label.costs.push_back({0, 1, 0, 0});
 	}
 
 

From 0f3b946bb5e3db95b1c7b2b404cd27edaca429ac Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Mon, 28 May 2018 20:01:14 -0400
Subject: [PATCH 080/127] before cleaning up adf

---
 vowpalwabbit/cbify.cc | 286 +++++++++++++++++++++---------------------
 1 file changed, 145 insertions(+), 141 deletions(-)

diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc
index 0a40cc32918..ac1e4101805 100644
--- a/vowpalwabbit/cbify.cc
+++ b/vowpalwabbit/cbify.cc
@@ -267,6 +267,48 @@ bool ind_update(cbify& data, size_t ec_type)
 		return data.ind_bandit;
 }
 
+float compute_weight_multiplier(cbify& data, size_t i, size_t ec_type)
+{
+	float weight_multiplier;
+
+	if (ec_type == SUPERVISED)
+	{
+		if (data.lambdas[i] >= 0.5)
+		 	weight_multiplier = (1 - data.lambdas[i]) / data.lambdas[i];
+		else
+			weight_multiplier = 1;
+	}
+	else
+	{
+		if (data.lambdas[i] >= 0.5)
+			weight_multiplier = 1;
+		else
+			weight_multiplier = data.lambdas[i] / (1-data.lambdas[i]);
+
+		if (data.weighting_scheme == DATASET_WT)
+			weight_multiplier = weight_multiplier * data.warm_start_period / ( (data.bandit_iter+1) * (data.bandit_iter+2) );
+	}
+	return weight_multiplier;
+}
+
+uint32_t find_min(v_array<float> arr)
+{
+	float min_val = FLT_MAX;
+	uint32_t argmin = 0;
+
+	for (uint32_t i = 0; i < arr.size(); i++)
+	{
+		//cout<<arr[i]<<endl;
+		if (arr[i] < min_val)
+		{
+			min_val = arr[i];
+			argmin = i;
+		}
+	}
+
+	return argmin;
+}
+
 
 void finish(cbify& data)
 {
@@ -352,47 +394,6 @@ void copy_example_to_adf(cbify& data, example& ec)
   }
 }
 
-uint32_t find_min(v_array<float> arr)
-{
-	float min_val = FLT_MAX;
-	uint32_t argmin = 0;
-
-	for (uint32_t i = 0; i < arr.size(); i++)
-	{
-		//cout<<arr[i]<<endl;
-		if (arr[i] < min_val)
-		{
-			min_val = arr[i];
-			argmin = i;
-		}
-	}
-
-	return argmin;
-}
-
-uint32_t predict_sublearner(cbify& data, base_learner& base, uint32_t i)
-{
-	example* ecs = data.adf_data.ecs;
-	example* empty = data.adf_data.empty_example;
-
-
-	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
-	{
-	  base.predict(ecs[a], i);
-		//data.all->cost_sensitive->predict(ecs[a], argmin);
-	}
-	base.predict(*empty, i);
-	//data.all->cost_sensitive->predict(*empty, argmin);
-
-  uint32_t pred_action = ecs[0].pred.a_s[0].action+1;
-
-  //Need to clear the prediction, otherwise there will be a memory leak
-  ecs[0].pred.a_s.delete_v();
-
-	return pred_action;
-
-}
-
 void convert_mc_to_cs(cbify& data, example& ec)
 {
 	//generate cost-sensitive label (only for CSOAA's use - this will be retracted at the end)
@@ -419,76 +420,6 @@ size_t predict_sublearner_noadf(cbify& data, example& ec, uint32_t i)
 	return ec.pred.multiclass;
 }
 
-void accumulate_costs_ips_adf(cbify& data, example& ec, CB::cb_class& cl, base_learner& base)
-{
-
-	if (data.validation_method == BANDIT_VALI)
-	{
-		uint32_t best_action;
-
-		//IPS for approximating the cumulative costs for all lambdas
-		for (uint32_t i = 0; i < data.choices_lambda; i++)
-		{
-			best_action = predict_sublearner(data, base, i);
-
-			if (best_action == cl.action)
-				data.cumulative_costs[i] += cl.cost / cl.probability;
-
-			//cout<<data.cumulative_costs[i]<<endl;
-		}
-		//cout<<endl;
-	}
-	else
-	{
-		//only update cumulative costs every warm_start_period iterations
-		if ( abs(log2(data.bandit_iter) - floor(log2(data.bandit_iter))) < 1e-4 )
-		{
-			for (uint32_t i = 0; i < data.choices_lambda; i++)
-				data.cumulative_costs[i] = 0;
-
-			//cout<<"updating validation error on supervised data: " << data.bandit_iter / data.warm_start_period << endl;
-			for (uint32_t i = 0; i < data.choices_lambda; i++)
-			{
-				for (uint32_t j = 0; j < data.warm_start_period; j++)
-				{
-					example& ec_valid = data.supervised_validation[j];
-			  	copy_example_to_adf(data, ec_valid);
-					uint32_t pred_label = predict_sublearner(data, base, i);
-					data.cumulative_costs[i] += loss(data, ec_valid.l.multi.label, pred_label);
-					//cout<<ec_valid.l.multi.label<<" "<<pred_label<<endl;
-				}
-				//cout<<data.cumulative_costs[i]<<endl;
-			}
-
-		}
-
-	}
-
-}
-
-float compute_weight_multiplier(cbify& data, size_t i, size_t ec_type)
-{
-	float weight_multiplier;
-
-	if (ec_type == SUPERVISED)
-	{
-		if (data.lambdas[i] >= 0.5)
-		 	weight_multiplier = (1 - data.lambdas[i]) / data.lambdas[i];
-		else
-			weight_multiplier = 1;
-	}
-	else
-	{
-		if (data.lambdas[i] >= 0.5)
-			weight_multiplier = 1;
-		else
-			weight_multiplier = data.lambdas[i] / (1-data.lambdas[i]);
-
-		if (data.weighting_scheme == DATASET_WT)
-			weight_multiplier = weight_multiplier * data.warm_start_period / ( (data.bandit_iter+1) * (data.bandit_iter+2) );
-	}
-	return weight_multiplier;
-}
 
 size_t predict_cs(cbify& data, example& ec)
 {
@@ -710,6 +641,29 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 	}
 }
 
+
+uint32_t predict_sublearner(cbify& data, base_learner& base, uint32_t i)
+{
+	example* ecs = data.adf_data.ecs;
+	example* empty = data.adf_data.empty_example;
+
+
+	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
+	{
+	  base.predict(ecs[a], i);
+		//data.all->cost_sensitive->predict(ecs[a], argmin);
+	}
+	base.predict(*empty, i);
+	//data.all->cost_sensitive->predict(*empty, argmin);
+
+  uint32_t pred_action = ecs[0].pred.a_s[0].action+1;
+
+  //Need to clear the prediction, otherwise there will be a memory leak
+  ecs[0].pred.a_s.delete_v();
+
+	return pred_action;
+}
+
 size_t predict_cs_adf(cbify& data, base_learner& base)
 {
 	uint32_t argmin = find_min(data.cumulative_costs);
@@ -719,6 +673,83 @@ size_t predict_cs_adf(cbify& data, base_learner& base)
 	return best_action;
 }
 
+
+void add_to_sup_validation_adf(cbify& data, example& ec)
+{
+	//cout<<ec.l.cs.costs.size()<<endl;
+	example* ec_copy = calloc_or_throw<example>(1);
+	VW::copy_example_data(false, ec_copy, &ec, 0, MULTICLASS::mc_label.copy_label);
+	data.supervised_validation.push_back(*ec_copy);
+	free(ec_copy);
+}
+
+
+void accumulate_costs_ips_adf(cbify& data, example& ec, CB::cb_class& cl, base_learner& base)
+{
+
+	if (data.validation_method == BANDIT_VALI)
+	{
+		uint32_t best_action;
+
+		//IPS for approximating the cumulative costs for all lambdas
+		for (uint32_t i = 0; i < data.choices_lambda; i++)
+		{
+			best_action = predict_sublearner(data, base, i);
+
+			if (best_action == cl.action)
+				data.cumulative_costs[i] += cl.cost / cl.probability;
+
+			//cout<<data.cumulative_costs[i]<<endl;
+		}
+		//cout<<endl;
+	}
+	else
+	{
+		//only update cumulative costs every warm_start_period iterations
+		if ( abs(log2(data.bandit_iter) - floor(log2(data.bandit_iter))) < 1e-4 )
+		{
+			for (uint32_t i = 0; i < data.choices_lambda; i++)
+				data.cumulative_costs[i] = 0;
+
+			//cout<<"updating validation error on supervised data: " << data.bandit_iter / data.warm_start_period << endl;
+			for (uint32_t i = 0; i < data.choices_lambda; i++)
+			{
+				for (uint32_t j = 0; j < data.warm_start_period; j++)
+				{
+					example& ec_valid = data.supervised_validation[j];
+			  	copy_example_to_adf(data, ec_valid);
+					uint32_t pred_label = predict_sublearner(data, base, i);
+					data.cumulative_costs[i] += loss(data, ec_valid.l.multi.label, pred_label);
+					//cout<<ec_valid.l.multi.label<<" "<<pred_label<<endl;
+				}
+				//cout<<data.cumulative_costs[i]<<endl;
+			}
+		}
+	}
+}
+
+void accumulate_variance_adf(cbify& data, base_learner& base)
+{
+	auto& out_ec = data.adf_data.ecs[0];
+
+	data.a_s.erase();
+	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
+		data.a_s.push_back({out_ec.pred.a_s[a].action, out_ec.pred.a_s[a].score});
+
+	size_t pred_best_approx = predict_cs_adf(data, base);
+	float temp_variance;
+
+	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
+		if (pred_best_approx == data.a_s[a].action + 1)
+			temp_variance = 1.0 / data.a_s[a].score;
+
+	data.cumulative_variance += temp_variance;
+
+	//cout<<"variance at bandit round "<< data.bandit_iter << " = " << temp_variance << endl;
+	//cout<<pred_pi<<" "<<pred_best_approx<<" "<<ld.label<<endl;
+}
+
+
 size_t predict_bandit_adf(cbify& data, base_learner& base)
 {
 	example* ecs = data.adf_data.ecs;
@@ -768,7 +799,7 @@ void generate_corrupted_cs_adf(cbify& data, MULTICLASS::label_t ld, size_t corru
 	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
 	{
 		ecs[a].l.cs = csls[a];
-		cout<<ecs[a].l.cs.costs.size()<<endl;
+		//cout<<ecs[a].l.cs.costs.size()<<endl;
 	}
 	empty_example->l.cs = *csl_empty;
 
@@ -835,34 +866,6 @@ void learn_bandit_adf(cbify& data, base_learner& base, size_t ec_type)
 		ecs[a].weight = data.old_weights[a];
 }
 
-void accumulate_variance_adf(cbify& data, base_learner& base)
-{
-	auto& out_ec = data.adf_data.ecs[0];
-
-	data.a_s.erase();
-	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
-		data.a_s.push_back({out_ec.pred.a_s[a].action, out_ec.pred.a_s[a].score});
-
-	size_t pred_best_approx = predict_cs_adf(data, base);
-	float temp_variance;
-
-	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
-		if (pred_best_approx == data.a_s[a].action + 1)
-			temp_variance = 1.0 / data.a_s[a].score;
-
-	data.cumulative_variance += temp_variance;
-
-	//cout<<"variance at bandit round "<< data.bandit_iter << " = " << temp_variance << endl;
-	//cout<<pred_pi<<" "<<pred_best_approx<<" "<<ld.label<<endl;
-}
-
-void add_to_sup_validation_adf(cbify& data, example& ec)
-{
-	//cout<<ec.l.cs.costs.size()<<endl;
-	example& ec_copy = data.supervised_validation[data.warm_start_iter];
-	VW::copy_example_data(false, &ec_copy, &ec, 0, MULTICLASS::mc_label.copy_label);
-}
-
 void predict_or_learn_cs_adf(cbify& data, base_learner& base, example& ec, bool is_update, size_t ec_type)
 {
 	//Store the multiclass input label
@@ -927,6 +930,7 @@ void predict_or_learn_bandit_adf(cbify& data, base_learner& base, example& ec, b
 	ec.pred.multiclass = cl.action;
 }
 
+
 template <bool is_learn>
 void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 {
@@ -1005,7 +1009,7 @@ void init_adf_data(cbify& data, const size_t num_actions)
 	{
 		data.csls[a].costs = v_init<COST_SENSITIVE::wclass>();
 		data.csls[a].costs.push_back({0, a+1, 0, 0});
-		cout<<data.csls[a].costs.size()<<endl;
+		//cout<<data.csls[a].costs.size()<<endl;
 	}
 
 }

From cf3b488c10f33fa2186b3d38a4985077822a4867 Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Mon, 28 May 2018 22:13:57 -0400
Subject: [PATCH 081/127] mwt explorer kept outputting action 0

---
 vowpalwabbit/cbify.cc | 195 +++++++++++++++++++++++-------------------
 1 file changed, 109 insertions(+), 86 deletions(-)

diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc
index ac1e4101805..67a0113a3b8 100644
--- a/vowpalwabbit/cbify.cc
+++ b/vowpalwabbit/cbify.cc
@@ -116,6 +116,14 @@ struct cbify
 
 };
 
+template<class T>
+void deep_copy_array(v_array<T>& dst, v_array<T>& src)
+{
+	dst.erase();
+	for (size_t i = 0; i < src.size(); ++i)
+		dst.push_back(src[i]);
+}
+
 float minimax_lambda(float epsilon, size_t num_actions, size_t warm_start_period, size_t bandit_period, size_t dim)
 {
 	/*
@@ -352,6 +360,8 @@ void finish(cbify& data)
     free(data.cbl_empty);
 
     free(data.old_weights);
+
+		CB::cb_label.delete_label(&data.cb_label);
   }
 	else
 	{
@@ -477,6 +487,7 @@ uint32_t predict_bandit(cbify& data, base_learner& base, example& ec)
 {
 	// we need the cb cost array to be an empty array to make cb prediction
 	ec.l.cb.costs = v_init<CB::cb_class>();
+	// TODO: not sure why we need the following sentence
 	ec.pred.a_s = data.a_s;
 
 	uint32_t argmin = find_min(data.cumulative_costs);
@@ -641,9 +652,10 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 	}
 }
 
-
-uint32_t predict_sublearner(cbify& data, base_learner& base, uint32_t i)
+uint32_t predict_sublearner_adf(cbify& data, base_learner& base, example& ec, uint32_t i)
 {
+	//copy_example_to_adf(data, ec);
+
 	example* ecs = data.adf_data.ecs;
 	example* empty = data.adf_data.empty_example;
 
@@ -664,13 +676,10 @@ uint32_t predict_sublearner(cbify& data, base_learner& base, uint32_t i)
 	return pred_action;
 }
 
-size_t predict_cs_adf(cbify& data, base_learner& base)
+size_t predict_cs_adf(cbify& data, base_learner& base, example& ec)
 {
 	uint32_t argmin = find_min(data.cumulative_costs);
-
-	size_t best_action = predict_sublearner(data, base, argmin);
-
-	return best_action;
+	return predict_sublearner_adf(data, base, ec, argmin);
 }
 
 
@@ -684,21 +693,18 @@ void add_to_sup_validation_adf(cbify& data, example& ec)
 }
 
 
-void accumulate_costs_ips_adf(cbify& data, example& ec, CB::cb_class& cl, base_learner& base)
+void accumulate_costs_ips_adf(cbify& data, base_learner& base, example& ec)
 {
-
+	CB::cb_class& cl = data.cb_label.costs[0];
 	if (data.validation_method == BANDIT_VALI)
 	{
-		uint32_t best_action;
-
 		//IPS for approximating the cumulative costs for all lambdas
 		for (uint32_t i = 0; i < data.choices_lambda; i++)
 		{
-			best_action = predict_sublearner(data, base, i);
+			uint32_t action = predict_sublearner_adf(data, base, ec, i);
 
-			if (best_action == cl.action)
+			if (action == cl.action)
 				data.cumulative_costs[i] += cl.cost / cl.probability;
-
 			//cout<<data.cumulative_costs[i]<<endl;
 		}
 		//cout<<endl;
@@ -717,8 +723,7 @@ void accumulate_costs_ips_adf(cbify& data, example& ec, CB::cb_class& cl, base_l
 				for (uint32_t j = 0; j < data.warm_start_period; j++)
 				{
 					example& ec_valid = data.supervised_validation[j];
-			  	copy_example_to_adf(data, ec_valid);
-					uint32_t pred_label = predict_sublearner(data, base, i);
+					uint32_t pred_label = predict_sublearner_adf(data, base, ec_valid, i);
 					data.cumulative_costs[i] += loss(data, ec_valid.l.multi.label, pred_label);
 					//cout<<ec_valid.l.multi.label<<" "<<pred_label<<endl;
 				}
@@ -728,15 +733,14 @@ void accumulate_costs_ips_adf(cbify& data, example& ec, CB::cb_class& cl, base_l
 	}
 }
 
-void accumulate_variance_adf(cbify& data, base_learner& base)
+void accumulate_variance_adf(cbify& data, base_learner& base, example& ec)
 {
-	auto& out_ec = data.adf_data.ecs[0];
-
-	data.a_s.erase();
-	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
-		data.a_s.push_back({out_ec.pred.a_s[a].action, out_ec.pred.a_s[a].score});
+	//auto& out_ec = data.adf_data.ecs[0];
+	//data.a_s.erase();
+	//for (size_t a = 0; a < data.adf_data.num_actions; ++a)
+	//	data.a_s.push_back({out_ec.pred.a_s[a].action, out_ec.pred.a_s[a].score});
 
-	size_t pred_best_approx = predict_cs_adf(data, base);
+	size_t pred_best_approx = predict_cs_adf(data, base, ec);
 	float temp_variance;
 
 	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
@@ -749,30 +753,6 @@ void accumulate_variance_adf(cbify& data, base_learner& base)
 	//cout<<pred_pi<<" "<<pred_best_approx<<" "<<ld.label<<endl;
 }
 
-
-size_t predict_bandit_adf(cbify& data, base_learner& base)
-{
-	example* ecs = data.adf_data.ecs;
-	example* empty_example = data.adf_data.empty_example;
-
-	uint32_t argmin = find_min(data.cumulative_costs);
-
-	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
-	{
-		base.predict(ecs[a], argmin);
-	}
-	base.predict(*empty_example, argmin);
-
-	// get output scores
-	auto& out_ec = data.adf_data.ecs[0];
-	uint32_t idx = data.mwt_explorer->Choose_Action(
-									 *data.generic_explorer,
-									 StringUtils::to_string(data.example_counter++), out_ec) - 1;
-
-	return idx;
-
-}
-
 void multiclass_to_cs_adf(cbify& data, COST_SENSITIVE::label* csls, size_t corrupted_label)
 {
 	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
@@ -830,6 +810,61 @@ void learn_cs_adf(cbify& data, size_t ec_type)
 		ecs[a].weight = data.old_weights[a];
 }
 
+void predict_or_learn_cs_adf(cbify& data, base_learner& base, example& ec, size_t ec_type)
+{
+	//Store the multiclass input label
+	MULTICLASS::label_t ld = ec.l.multi;
+
+	uint32_t best_action = predict_cs_adf(data, base, ec);
+	data.mc_pred = best_action;
+
+	//data.all->cost_sensitive->predict(ec,argmin);
+
+	//generate cost-sensitive label
+	// ecs[a].weight *= 1;
+	//				cout << "size cbify = " << ecs[a].l.cs.costs.size() << endl;
+	size_t corrupted_label = corrupt_action(ld.label, data, ec_type);
+	generate_corrupted_cs_adf(data, ld, corrupted_label);
+
+	if (ind_update(data, ec_type))
+		learn_cs_adf(data, ec_type);
+
+	ec.pred.multiclass = best_action;
+	ec.l.multi = ld;
+
+	//a hack here - allocated memories not deleted
+	//to be corrected
+	//if (data.validation_method == SUPERVISED_VALI)
+	//	add_to_sup_validation_adf(data, ec);
+}
+
+size_t predict_bandit_adf(cbify& data, base_learner& base, example& ec)
+{
+	//copy_example_to_adf(data, ec);
+
+	example* ecs = data.adf_data.ecs;
+	example* empty_example = data.adf_data.empty_example;
+
+	uint32_t argmin = find_min(data.cumulative_costs);
+
+	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
+	{
+		base.predict(ecs[a], argmin);
+	}
+	base.predict(*empty_example, argmin);
+
+	// get output scores
+	auto& out_ec = data.adf_data.ecs[0];
+	uint32_t idx = data.mwt_explorer->Choose_Action(
+									 *data.generic_explorer,
+									 StringUtils::to_string(data.example_counter++), out_ec) - 1;
+
+	deep_copy_array<action_score>(data.a_s, out_ec.pred.a_s);
+
+	return idx;
+
+}
+
 void generate_corrupted_cb_adf(cbify& data, CB::cb_class& cl, MULTICLASS::label_t& ld, size_t idx, size_t corrupted_label)
 {
 	auto& out_ec = data.adf_data.ecs[0];
@@ -866,36 +901,7 @@ void learn_bandit_adf(cbify& data, base_learner& base, size_t ec_type)
 		ecs[a].weight = data.old_weights[a];
 }
 
-void predict_or_learn_cs_adf(cbify& data, base_learner& base, example& ec, bool is_update, size_t ec_type)
-{
-	//Store the multiclass input label
-	MULTICLASS::label_t ld = ec.l.multi;
-
-	//best_action = predict_sublearner(data, base, argmin);
-	uint32_t best_action = predict_cs_adf(data, base);
-
-	//data.all->cost_sensitive->predict(ec,argmin);
-
-	//generate cost-sensitive label
-	// ecs[a].weight *= 1;
-	//				cout << "size cbify = " << ecs[a].l.cs.costs.size() << endl;
-	size_t corrupted_label = corrupt_action(ld.label, data, ec_type);
-	generate_corrupted_cs_adf(data, ld, corrupted_label);
-
-	if (is_update)
-		learn_cs_adf(data, ec_type);
-
-	ec.pred.multiclass = best_action;
-	ec.l.multi = ld;
-
-	//a hack here - allocated memories not deleted
-	//to be corrected
-	if (data.validation_method == SUPERVISED_VALI)
-		add_to_sup_validation_adf(data, ec);
-}
-
-
-void predict_or_learn_bandit_adf(cbify& data, base_learner& base, example& ec, bool is_update, size_t ec_type)
+void predict_or_learn_bandit_adf(cbify& data, base_learner& base, example& ec, size_t ec_type)
 {
 	//Store the multiclass input label
 	MULTICLASS::label_t ld = ec.l.multi;
@@ -907,24 +913,25 @@ void predict_or_learn_bandit_adf(cbify& data, base_learner& base, example& ec, b
 	//data.cbl_empty->costs = data.adf_data.empty_example->l.cb.costs;
 
 	//size_t pred_pi = predict_cs_adf(data, base, ec);
-	uint32_t idx = predict_bandit_adf(data, base);
+	uint32_t idx = predict_bandit_adf(data, base, ec);
+	data.mc_pred = idx;
 
-	CB::cb_class cl;
+	CB::cb_class& cl = data.cb_label.costs[0];
 
 	size_t corrupted_label = corrupt_action(ld.label, data, ec_type);
 	generate_corrupted_cb_adf(data, cl, ld, idx, corrupted_label);
 
 	// accumulate the cumulative costs of lambdas
-	accumulate_costs_ips_adf(data, ec, cl, base);
+	//accumulate_costs_ips_adf(data, ec, cl, base);
 
 	// add cb label to chosen action
 	auto& lab = data.adf_data.ecs[cl.action - 1].l.cb;
 	lab.costs.push_back(cl);
 
-	if (is_update)
+	if (ind_update(data, ec_type))
 		learn_bandit_adf(data, base, ec_type);
 
-	accumulate_variance_adf(data, base);
+	//accumulate_variance_adf(data, base, ec);
 
   lab.costs.delete_v();
 	ec.pred.multiclass = cl.action;
@@ -948,22 +955,33 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 
 	if (data.warm_start_iter < data.warm_start_period) // Call the cost-sensitive learner directly
 	{
+		data.corrupted_label = corrupt_action(ec.l.multi.label, data, SUPERVISED);
+
 		if (data.warm_start_type == SUPERVISED_WS)
-			predict_or_learn_cs_adf(data, base, ec, data.ind_supervised, SUPERVISED);
+			predict_or_learn_cs_adf(data, base, ec, SUPERVISED);
 		else
-			predict_or_learn_bandit_adf(data, base, ec, data.ind_supervised, SUPERVISED);
+			predict_or_learn_bandit_adf(data, base, ec, SUPERVISED);
+
+		if (data.validation_method == SUPERVISED_VALI)
+			add_to_sup_validation_adf(data, ec);
+
 		ec.weight = 0;
+		ec.pred.multiclass = data.mc_pred;
 		data.warm_start_iter++;
 	}
 	else if (data.bandit_iter < data.bandit_period) // call the bandit learner
 	{
-		predict_or_learn_bandit_adf(data, base, ec, data.ind_bandit, BANDIT);
+		data.corrupted_label = corrupt_action(ec.l.multi.label, data, BANDIT);
+		predict_or_learn_bandit_adf(data, base, ec, BANDIT);
+		data.bandit_iter++;
 		if (data.bandit_iter == data.bandit_period)
 		{
 			cout<<"Ideal average variance = "<<data.num_actions / data.epsilon<<endl;
 			cout<<"Measured average variance = "<<data.cumulative_variance / data.bandit_period<<endl;
 		}
-    data.bandit_iter++;
+		accumulate_costs_ips_adf(data, base, ec);
+		accumulate_variance_adf(data, base, ec);
+		ec.pred.multiclass = data.mc_pred;
 	}
 	else
 	{
@@ -971,6 +989,7 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 		ec.weight = 0;
 	}
 
+	//ensure they are equipped with cb label at the end of the iteration
 	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
 		data.adf_data.ecs[a].l.cb.costs = data.cbls[a].costs;
 	data.adf_data.empty_example->l.cb.costs = data.cbl_empty->costs;
@@ -1012,6 +1031,9 @@ void init_adf_data(cbify& data, const size_t num_actions)
 		//cout<<data.csls[a].costs.size()<<endl;
 	}
 
+	data.cb_label.costs = v_init<CB::cb_class>();
+	data.cb_label.costs.push_back({0, 1, 0, 0});
+
 }
 
 
@@ -1118,6 +1140,7 @@ base_learner* cbify_setup(vw& all)
 		for (size_t a = 0; a < num_actions; ++a)
 			data.cs_label.costs.push_back({0, a+1, 0, 0});
 
+		data.cb_label.costs = v_init<CB::cb_class>();
 		data.cb_label.costs.push_back({0, 1, 0, 0});
 	}
 

From e9ec432acbe1a00fdf0ebc577a55e35581efa6fc Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Mon, 28 May 2018 22:17:38 -0400
Subject: [PATCH 082/127] roll back to a state before reorg that is working

---
 vowpalwabbit/cbify.cc | 74 ++++++++++++-------------------------------
 1 file changed, 21 insertions(+), 53 deletions(-)

diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc
index 67a0113a3b8..8b4a86bb57c 100644
--- a/vowpalwabbit/cbify.cc
+++ b/vowpalwabbit/cbify.cc
@@ -116,14 +116,6 @@ struct cbify
 
 };
 
-template<class T>
-void deep_copy_array(v_array<T>& dst, v_array<T>& src)
-{
-	dst.erase();
-	for (size_t i = 0; i < src.size(); ++i)
-		dst.push_back(src[i]);
-}
-
 float minimax_lambda(float epsilon, size_t num_actions, size_t warm_start_period, size_t bandit_period, size_t dim)
 {
 	/*
@@ -360,8 +352,6 @@ void finish(cbify& data)
     free(data.cbl_empty);
 
     free(data.old_weights);
-
-		CB::cb_label.delete_label(&data.cb_label);
   }
 	else
 	{
@@ -654,7 +644,7 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 
 uint32_t predict_sublearner_adf(cbify& data, base_learner& base, example& ec, uint32_t i)
 {
-	//copy_example_to_adf(data, ec);
+	copy_example_to_adf(data, ec);
 
 	example* ecs = data.adf_data.ecs;
 	example* empty = data.adf_data.empty_example;
@@ -693,9 +683,8 @@ void add_to_sup_validation_adf(cbify& data, example& ec)
 }
 
 
-void accumulate_costs_ips_adf(cbify& data, base_learner& base, example& ec)
+void accumulate_costs_ips_adf(cbify& data, example& ec, CB::cb_class& cl, base_learner& base)
 {
-	CB::cb_class& cl = data.cb_label.costs[0];
 	if (data.validation_method == BANDIT_VALI)
 	{
 		//IPS for approximating the cumulative costs for all lambdas
@@ -735,10 +724,11 @@ void accumulate_costs_ips_adf(cbify& data, base_learner& base, example& ec)
 
 void accumulate_variance_adf(cbify& data, base_learner& base, example& ec)
 {
-	//auto& out_ec = data.adf_data.ecs[0];
-	//data.a_s.erase();
-	//for (size_t a = 0; a < data.adf_data.num_actions; ++a)
-	//	data.a_s.push_back({out_ec.pred.a_s[a].action, out_ec.pred.a_s[a].score});
+	auto& out_ec = data.adf_data.ecs[0];
+
+	data.a_s.erase();
+	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
+		data.a_s.push_back({out_ec.pred.a_s[a].action, out_ec.pred.a_s[a].score});
 
 	size_t pred_best_approx = predict_cs_adf(data, base, ec);
 	float temp_variance;
@@ -810,13 +800,12 @@ void learn_cs_adf(cbify& data, size_t ec_type)
 		ecs[a].weight = data.old_weights[a];
 }
 
-void predict_or_learn_cs_adf(cbify& data, base_learner& base, example& ec, size_t ec_type)
+void predict_or_learn_cs_adf(cbify& data, base_learner& base, example& ec, bool is_update, size_t ec_type)
 {
 	//Store the multiclass input label
 	MULTICLASS::label_t ld = ec.l.multi;
 
 	uint32_t best_action = predict_cs_adf(data, base, ec);
-	data.mc_pred = best_action;
 
 	//data.all->cost_sensitive->predict(ec,argmin);
 
@@ -826,7 +815,7 @@ void predict_or_learn_cs_adf(cbify& data, base_learner& base, example& ec, size_
 	size_t corrupted_label = corrupt_action(ld.label, data, ec_type);
 	generate_corrupted_cs_adf(data, ld, corrupted_label);
 
-	if (ind_update(data, ec_type))
+	if (is_update)
 		learn_cs_adf(data, ec_type);
 
 	ec.pred.multiclass = best_action;
@@ -834,14 +823,12 @@ void predict_or_learn_cs_adf(cbify& data, base_learner& base, example& ec, size_
 
 	//a hack here - allocated memories not deleted
 	//to be corrected
-	//if (data.validation_method == SUPERVISED_VALI)
-	//	add_to_sup_validation_adf(data, ec);
+	if (data.validation_method == SUPERVISED_VALI)
+		add_to_sup_validation_adf(data, ec);
 }
 
-size_t predict_bandit_adf(cbify& data, base_learner& base, example& ec)
+size_t predict_bandit_adf(cbify& data, base_learner& base)
 {
-	//copy_example_to_adf(data, ec);
-
 	example* ecs = data.adf_data.ecs;
 	example* empty_example = data.adf_data.empty_example;
 
@@ -859,8 +846,6 @@ size_t predict_bandit_adf(cbify& data, base_learner& base, example& ec)
 									 *data.generic_explorer,
 									 StringUtils::to_string(data.example_counter++), out_ec) - 1;
 
-	deep_copy_array<action_score>(data.a_s, out_ec.pred.a_s);
-
 	return idx;
 
 }
@@ -901,7 +886,7 @@ void learn_bandit_adf(cbify& data, base_learner& base, size_t ec_type)
 		ecs[a].weight = data.old_weights[a];
 }
 
-void predict_or_learn_bandit_adf(cbify& data, base_learner& base, example& ec, size_t ec_type)
+void predict_or_learn_bandit_adf(cbify& data, base_learner& base, example& ec, bool is_update, size_t ec_type)
 {
 	//Store the multiclass input label
 	MULTICLASS::label_t ld = ec.l.multi;
@@ -913,25 +898,24 @@ void predict_or_learn_bandit_adf(cbify& data, base_learner& base, example& ec, s
 	//data.cbl_empty->costs = data.adf_data.empty_example->l.cb.costs;
 
 	//size_t pred_pi = predict_cs_adf(data, base, ec);
-	uint32_t idx = predict_bandit_adf(data, base, ec);
-	data.mc_pred = idx;
+	uint32_t idx = predict_bandit_adf(data, base);
 
-	CB::cb_class& cl = data.cb_label.costs[0];
+	CB::cb_class cl;
 
 	size_t corrupted_label = corrupt_action(ld.label, data, ec_type);
 	generate_corrupted_cb_adf(data, cl, ld, idx, corrupted_label);
 
 	// accumulate the cumulative costs of lambdas
-	//accumulate_costs_ips_adf(data, ec, cl, base);
+	accumulate_costs_ips_adf(data, ec, cl, base);
 
 	// add cb label to chosen action
 	auto& lab = data.adf_data.ecs[cl.action - 1].l.cb;
 	lab.costs.push_back(cl);
 
-	if (ind_update(data, ec_type))
+	if (is_update)
 		learn_bandit_adf(data, base, ec_type);
 
-	//accumulate_variance_adf(data, base, ec);
+	accumulate_variance_adf(data, base, ec);
 
   lab.costs.delete_v();
 	ec.pred.multiclass = cl.action;
@@ -955,33 +939,22 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 
 	if (data.warm_start_iter < data.warm_start_period) // Call the cost-sensitive learner directly
 	{
-		data.corrupted_label = corrupt_action(ec.l.multi.label, data, SUPERVISED);
-
 		if (data.warm_start_type == SUPERVISED_WS)
-			predict_or_learn_cs_adf(data, base, ec, SUPERVISED);
+			predict_or_learn_cs_adf(data, base, ec, data.ind_supervised, SUPERVISED);
 		else
-			predict_or_learn_bandit_adf(data, base, ec, SUPERVISED);
-
-		if (data.validation_method == SUPERVISED_VALI)
-			add_to_sup_validation_adf(data, ec);
-
+			predict_or_learn_bandit_adf(data, base, ec, data.ind_supervised, SUPERVISED);
 		ec.weight = 0;
-		ec.pred.multiclass = data.mc_pred;
 		data.warm_start_iter++;
 	}
 	else if (data.bandit_iter < data.bandit_period) // call the bandit learner
 	{
-		data.corrupted_label = corrupt_action(ec.l.multi.label, data, BANDIT);
-		predict_or_learn_bandit_adf(data, base, ec, BANDIT);
+		predict_or_learn_bandit_adf(data, base, ec, data.ind_bandit, BANDIT);
 		data.bandit_iter++;
 		if (data.bandit_iter == data.bandit_period)
 		{
 			cout<<"Ideal average variance = "<<data.num_actions / data.epsilon<<endl;
 			cout<<"Measured average variance = "<<data.cumulative_variance / data.bandit_period<<endl;
 		}
-		accumulate_costs_ips_adf(data, base, ec);
-		accumulate_variance_adf(data, base, ec);
-		ec.pred.multiclass = data.mc_pred;
 	}
 	else
 	{
@@ -989,7 +962,6 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 		ec.weight = 0;
 	}
 
-	//ensure they are equipped with cb label at the end of the iteration
 	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
 		data.adf_data.ecs[a].l.cb.costs = data.cbls[a].costs;
 	data.adf_data.empty_example->l.cb.costs = data.cbl_empty->costs;
@@ -1031,9 +1003,6 @@ void init_adf_data(cbify& data, const size_t num_actions)
 		//cout<<data.csls[a].costs.size()<<endl;
 	}
 
-	data.cb_label.costs = v_init<CB::cb_class>();
-	data.cb_label.costs.push_back({0, 1, 0, 0});
-
 }
 
 
@@ -1140,7 +1109,6 @@ base_learner* cbify_setup(vw& all)
 		for (size_t a = 0; a < num_actions; ++a)
 			data.cs_label.costs.push_back({0, a+1, 0, 0});
 
-		data.cb_label.costs = v_init<CB::cb_class>();
 		data.cb_label.costs.push_back({0, 1, 0, 0});
 	}
 

From b60e872337181a158b2d06c11912534fbd723558 Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Tue, 29 May 2018 01:08:57 -0400
Subject: [PATCH 083/127] intermediate state

---
 vowpalwabbit/cbify.cc | 105 ++++++++++++++++++++++--------------------
 1 file changed, 56 insertions(+), 49 deletions(-)

diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc
index 8b4a86bb57c..ac77cb8a56e 100644
--- a/vowpalwabbit/cbify.cc
+++ b/vowpalwabbit/cbify.cc
@@ -312,7 +312,7 @@ uint32_t find_min(v_array<float> arr)
 
 void finish(cbify& data)
 {
-  //CB::cb_label.delete_label(&data.cb_label);
+  CB::cb_label.delete_label(&data.cb_label);
   //data.probs.delete_v();
   delete_it(data.scorer);
   delete_it(data.generic_explorer);
@@ -356,7 +356,6 @@ void finish(cbify& data)
 	else
 	{
 		COST_SENSITIVE::cs_label.delete_label(&data.cs_label);
-		CB::cb_label.delete_label(&data.cb_label);
 	}
 
 }
@@ -420,6 +419,47 @@ size_t predict_sublearner_noadf(cbify& data, example& ec, uint32_t i)
 	return ec.pred.multiclass;
 }
 
+void accumulate_costs_ips(cbify& data, example& ec)
+{
+	CB::cb_class& cl = data.cb_label.costs[0];
+	// validation using bandit data
+	if (data.validation_method == 1)
+	{
+		//IPS for approximating the cumulative costs for all lambdas
+		for (uint32_t i = 0; i < data.choices_lambda; i++)
+		{
+			uint32_t action = predict_sublearner_noadf(data, ec, i);
+
+			if (action == cl.action)
+				data.cumulative_costs[i] += cl.cost / cl.probability;
+			//cout<<data.cumulative_costs[i]<<endl;
+		}
+		//cout<<endl;
+	}
+	else //validation using supervised data (their labels are already set to cost-sensitive labels)
+	{
+		//only update cumulative costs every warm_start_period iterations
+		if (abs(log2(data.bandit_iter) - floor(log2(data.bandit_iter))) < 1e-4)
+		{
+			for (uint32_t i = 0; i < data.choices_lambda; i++)
+				data.cumulative_costs[i] = 0;
+
+			//cout<<"updating validation error on supervised data: " << data.bandit_iter / data.warm_start_period << endl;
+			for (uint32_t i = 0; i < data.choices_lambda; i++)
+			{
+				//go over the supervised validation set
+				for (uint32_t j = 0; j < data.warm_start_period; j++)
+				{
+					example& ec_valid = data.supervised_validation[j];
+					uint32_t action = predict_sublearner_noadf(data, ec_valid, i);
+					data.cumulative_costs[i] += loss(data, ec_valid.l.multi.label, action);
+				}
+				//cout<<data.cumulative_costs[i]<<endl;
+			}
+			//cout<<endl;
+		}
+	}
+}
 
 size_t predict_cs(cbify& data, example& ec)
 {
@@ -514,6 +554,10 @@ void predict_or_learn_bandit(cbify& data, base_learner& base, example& ec, size_
 	//make sure the prediction here is a cb prediction
 	//ec.pred = data.pred;
 
+	// accumulate the cumulative costs of lambdas, given data.cb_label has the ips info
+	if (ec_type == BANDIT)
+		accumulate_costs_ips(data, ec);
+
 	if (ind_update(data, ec_type))
 		learn_bandit(data, base, ec, ec_type);
 
@@ -537,48 +581,6 @@ void add_to_sup_validation(cbify& data, example& ec)
 	free(ec_copy);
 }
 
-void accumulate_costs_ips(cbify& data, example& ec)
-{
-	CB::cb_class& cl = data.cb_label.costs[0];
-	// validation using bandit data
-	if (data.validation_method == 1)
-	{
-		//IPS for approximating the cumulative costs for all lambdas
-		for (uint32_t i = 0; i < data.choices_lambda; i++)
-		{
-			uint32_t action = predict_sublearner_noadf(data, ec, i);
-
-			if (action == cl.action)
-				data.cumulative_costs[i] += cl.cost / cl.probability;
-			//cout<<data.cumulative_costs[i]<<endl;
-		}
-		//cout<<endl;
-	}
-	else //validation using supervised data (their labels are already set to cost-sensitive labels)
-	{
-		//only update cumulative costs every warm_start_period iterations
-		if (abs(log2(data.bandit_iter) - floor(log2(data.bandit_iter))) < 1e-4)
-		{
-			for (uint32_t i = 0; i < data.choices_lambda; i++)
-				data.cumulative_costs[i] = 0;
-
-			//cout<<"updating validation error on supervised data: " << data.bandit_iter / data.warm_start_period << endl;
-			for (uint32_t i = 0; i < data.choices_lambda; i++)
-			{
-				//go over the supervised validation set
-				for (uint32_t j = 0; j < data.warm_start_period; j++)
-				{
-					example& ec_valid = data.supervised_validation[j];
-					uint32_t action = predict_sublearner_noadf(data, ec_valid, i);
-					data.cumulative_costs[i] += loss(data, ec_valid.l.multi.label, action);
-				}
-				//cout<<data.cumulative_costs[i]<<endl;
-			}
-			//cout<<endl;
-		}
-	}
-}
-
 void accumulate_variance(cbify& data, example& ec)
 {
 	size_t pred_best_approx = predict_cs(data, ec);
@@ -627,8 +629,6 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 			cout<<"Measured average variance = "<<data.cumulative_variance / data.bandit_period<<endl;
 		}
 
-		// accumulate the cumulative costs of lambdas, given data.cb_label has the ips info
-		accumulate_costs_ips(data, ec);
 		// accumulate the cumulative variances, given we have data.a_s has the score info
 		accumulate_variance(data, ec);
 		ec.pred.multiclass = data.mc_pred;
@@ -683,8 +683,9 @@ void add_to_sup_validation_adf(cbify& data, example& ec)
 }
 
 
-void accumulate_costs_ips_adf(cbify& data, example& ec, CB::cb_class& cl, base_learner& base)
+void accumulate_costs_ips_adf(cbify& data, example& ec, base_learner& base)
 {
+	CB::cb_class& cl = data.cb_label.costs[0];
 	if (data.validation_method == BANDIT_VALI)
 	{
 		//IPS for approximating the cumulative costs for all lambdas
@@ -906,8 +907,11 @@ void predict_or_learn_bandit_adf(cbify& data, base_learner& base, example& ec, b
 	generate_corrupted_cb_adf(data, cl, ld, idx, corrupted_label);
 
 	// accumulate the cumulative costs of lambdas
-	accumulate_costs_ips_adf(data, ec, cl, base);
-
+	if (ec_type == BANDIT)
+	{
+		data.cb_label.costs[0] = cl;
+		accumulate_costs_ips_adf(data, ec, base);
+	}
 	// add cb label to chosen action
 	auto& lab = data.adf_data.ecs[cl.action - 1].l.cb;
 	lab.costs.push_back(cl);
@@ -1002,6 +1006,8 @@ void init_adf_data(cbify& data, const size_t num_actions)
 		data.csls[a].costs.push_back({0, a+1, 0, 0});
 		//cout<<data.csls[a].costs.size()<<endl;
 	}
+	data.cb_label.costs = v_init<CB::cb_class>();
+	data.cb_label.costs.push_back({0, 1, 0, 0});
 
 }
 
@@ -1109,6 +1115,7 @@ base_learner* cbify_setup(vw& all)
 		for (size_t a = 0; a < num_actions; ++a)
 			data.cs_label.costs.push_back({0, a+1, 0, 0});
 
+		data.cb_label.costs = v_init<CB::cb_class>();
 		data.cb_label.costs.push_back({0, 1, 0, 0});
 	}
 

From df4267d5f5bfc686b1a21a337197347ddf4be2d3 Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Tue, 29 May 2018 01:38:26 -0400
Subject: [PATCH 084/127] fixed a problem in noadf:lambda selection now happens
 before update

---
 vowpalwabbit/cbify.cc | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc
index ac77cb8a56e..2a384140baa 100644
--- a/vowpalwabbit/cbify.cc
+++ b/vowpalwabbit/cbify.cc
@@ -522,7 +522,7 @@ uint32_t predict_bandit(cbify& data, base_learner& base, example& ec)
 
 	uint32_t argmin = find_min(data.cumulative_costs);
 	base.predict(ec, argmin);
-	//data.pred = ec.pred;
+	data.pred = ec.pred;
 
 	uint32_t action = data.mwt_explorer->Choose_Action(*data.generic_explorer, StringUtils::to_string(data.example_counter++), ec);
 	ec.l.cb.costs.delete_v();
@@ -551,13 +551,14 @@ void predict_or_learn_bandit(cbify& data, base_learner& base, example& ec, size_
 
 	convert_mc_to_cb(data, ec, action);
 
-	//make sure the prediction here is a cb prediction
-	//ec.pred = data.pred;
-
 	// accumulate the cumulative costs of lambdas, given data.cb_label has the ips info
 	if (ec_type == BANDIT)
+	{
 		accumulate_costs_ips(data, ec);
-
+		ec.l.cb = data.cb_label;
+		//make sure the prediction here is a cb prediction
+		ec.pred = data.pred;
+	}
 	if (ind_update(data, ec_type))
 		learn_bandit(data, base, ec, ec_type);
 

From a350e0a4a77871591c2ccd1d705c09ce94f61f25 Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Tue, 5 Jun 2018 17:18:04 -0400
Subject: [PATCH 085/127] there is still a memory leak issue for
 ecs[0].pred.a_s

---
 scripts/plot_warm_start.py | 109 ++++++++++++++++++++++---------------
 vowpalwabbit/cbify.cc      |  70 +++++++++++++-----------
 2 files changed, 105 insertions(+), 74 deletions(-)

diff --git a/scripts/plot_warm_start.py b/scripts/plot_warm_start.py
index c2f2faec726..e24eb2eb397 100644
--- a/scripts/plot_warm_start.py
+++ b/scripts/plot_warm_start.py
@@ -17,8 +17,8 @@ def __init__(self):
 		# Setting up argument-independent learning parameters in the constructor
 		self.baselines_on = True
 		self.algs_on = True
-		self.optimal_on = True
-		self.majority_on = True
+		self.optimal_on = False
+		self.majority_on = False
 
 		self.num_checkpoints = 200
 
@@ -32,18 +32,18 @@ def __init__(self):
 
 		#mod.choices_corrupt_type_supervised = [1,2,3]
 		#mod.choices_corrupt_prob_supervised = [0.0,0.5,1.0]
-		self.choices_corrupt_type_supervised = [3]
-		self.choices_corrupt_prob_supervised = [0,0.25,0.5]
+		self.choices_corrupt_type_supervised = [1]
+		self.choices_corrupt_prob_supervised = [0.0]
 
 		self.learning_rates_template = [0.1, 0.03, 0.3, 0.01, 1.0, 0.003, 3.0, 0.001, 10.0]
 
 		self.adf_on = True
 
-		self.corrupt_type_bandit = 1
-		self.corrupt_prob_bandit = 0.0
+		self.choices_corrupt_type_bandit = [1,2,3]
+		self.choices_corrupt_prob_bandit = [0.0,0.5]
 
 		self.validation_method = 1
-		self.weighting_scheme = 1
+		self.weighting_scheme = 2
 
 		#self.epsilon = 0.05
 		#self.epsilon_on = True
@@ -131,7 +131,7 @@ def gen_vw_options(mod):
 		mod.param['bandit'] = 0
 	else:
 		# General CB
-		mod.vw_template = {'data':'', 'corrupt_type_bandit':0, 'corrupt_prob_bandit':0.0, 'bandit':0, 'cb_type':'mtr',
+		mod.vw_template = {'data':'', 'progress':2.0, 'corrupt_type_bandit':0, 'corrupt_prob_bandit':0.0, 'bandit':0, 'cb_type':'mtr',
 		'choices_lambda':0, 'corrupt_type_supervised':0, 'corrupt_prob_supervised':0.0, 'lambda_scheme':1, 'learning_rate':0.5, 'warm_start_type':1, 'cbify':0, 'warm_start':0, 'overwrite_label':1, 'validation_method':1, 'weighting_scheme':1}
 
 		mod.param['warm_start'] = mod.param['warm_start_multiplier'] * mod.param['progress']
@@ -219,10 +219,16 @@ def gen_comparison_graph(mod):
 	mod.vw_output_filename = mod.vw_output_dir + param_to_str_simplified(mod) + '.txt'
 
 	#plot_errors(mod)
+	#print mod.param['validation_method']
+
 	execute_vw(mod)
 	vw_run_results = collect_stats(mod)
 	for vw_result in vw_run_results:
 		result_combined = merge_two_dicts(mod.param, vw_result)
+
+		print mod.result_template['no_interaction_update']
+		print result_combined['no_interaction_update']
+
 		result_formatted = format_setting(mod.result_template, result_combined)
 		record_result(mod, result_formatted)
 
@@ -242,6 +248,9 @@ def record_result(mod, result):
 	for k in mod.result_header_list:
 		result_row.append(result[k])
 
+	#print result['validation_method']
+	#print result_row
+
 	summary_file = open(mod.summary_file_name, 'a')
 	summary_file.write( intersperse(result_row, '\t') + '\n')
 	summary_file.close()
@@ -281,12 +290,15 @@ def dictify(param_name, param_choices):
 		dic = {}
 		dic[param_name] = param
 		result.append(dic)
+	print param_name, len(result)
 	return result
 
 def params_per_task(mod):
 	# Problem parameters
 	params_corrupt_type_sup = dictify('corrupt_type_supervised', mod.choices_corrupt_type_supervised)
 	params_corrupt_prob_sup = dictify('corrupt_prob_supervised', mod.choices_corrupt_prob_supervised)
+	params_corrupt_type_band = dictify('corrupt_type_bandit', mod.choices_corrupt_type_bandit)
+	params_corrupt_prob_band = dictify('corrupt_prob_bandit', mod.choices_corrupt_prob_bandit)
 	params_warm_start_multiplier = dictify('warm_start_multiplier', mod.warm_start_multipliers)
 	params_learning_rate = dictify('learning_rate', mod.learning_rates)
 
@@ -297,16 +309,18 @@ def params_per_task(mod):
 	params_cb_type = dictify('cb_type', mod.choices_cb_type)
 
 	# Common parameters
-	params_common = param_cartesian_multi([params_corrupt_type_sup, params_corrupt_prob_sup, params_warm_start_multiplier, params_learning_rate, params_cb_type, params_fold])
-	params_common = filter(lambda param: param['corrupt_type_supervised'] == 3 or abs(param['corrupt_prob_supervised']) > 1e-4, params_common)
+	params_common = param_cartesian_multi([params_corrupt_type_sup, params_corrupt_prob_sup,
+	params_corrupt_type_band, params_corrupt_prob_band,
+	params_warm_start_multiplier, params_learning_rate, params_cb_type, params_fold])
+	params_common = filter(lambda param: param['corrupt_type_bandit'] == 3 or abs(param['corrupt_prob_bandit']) > 1e-4, params_common)
 
 	# Baseline parameters construction
 	if mod.baselines_on:
 		params_baseline_basic = [
-		[{'choices_lambda': 1, 'warm_start_type': 1, 'lambda_scheme': 3}], [{'no_warm_start_update': True}, {'no_warm_start_update': False}], [{'no_interaction_update': True}, {'no_interaction_update': False}]
+		[{'choices_lambda': 1, 'warm_start_type': 1, 'lambda_scheme': 3}], [{'no_warm_start_update': True, 'no_interaction_update': False}, {'no_warm_start_update': False, 'no_interaction_update': True}]
 		]
 		params_baseline = param_cartesian_multi([params_common] + params_baseline_basic)
-		params_baseline = filter(lambda param: param['no_warm_start_update'] == True or param['no_interaction_update'] == True, params_baseline)
+		#params_baseline = filter(lambda param: param['no_warm_start_update'] == True or param['no_interaction_update'] == True, params_baseline)
 	else:
 		params_baseline = []
 
@@ -314,21 +328,28 @@ def params_per_task(mod):
 	# Algorithm parameters construction
 	if mod.algs_on:
 		params_choices_lambd = dictify('choices_lambda', mod.choices_choices_lambda)
-		params_algs_1 = param_cartesian(params_choices_lambd, [{'no_warm_start_update': False, 'no_interaction_update': False, 'warm_start_type': 1, 'lambda_scheme': 3}] )
+		params_algs_1 = param_cartesian_multi([params_choices_lambd, [{'no_warm_start_update': False, 'no_interaction_update': False, 'warm_start_type': 1, 'lambda_scheme': 3}], [{'validation_method':2}, {'validation_method':3}]] )
 		params_algs_2 = [{'no_warm_start_update': False, 'no_interaction_update': False, 'warm_start_type': 2, 'lambda_scheme': 1, 'choices_lambda':1}]
 		params_algs = param_cartesian( params_common, params_algs_1 + params_algs_2 )
 	else:
 		params_algs = []
 
 
-	params_constant = [{'validation_method':mod.validation_method,
-	'weighting_scheme':mod.weighting_scheme,
-	'corrupt_type_bandit':mod.corrupt_type_bandit,
-	'corrupt_prob_bandit':mod.corrupt_prob_bandit,
+	params_constant = [{'weighting_scheme':mod.weighting_scheme,
 	'adf_on':True}]
 
 	params_baseline_and_algs = param_cartesian_multi([params_constant, params_baseline + params_algs])
 
+	#for p in params_common:
+	#	print p
+
+	#for p in params_baseline:
+	#	print p
+
+	print len(params_common)
+	print len(params_baseline)
+	print len(params_algs)
+	print len(params_baseline_and_algs)
 
 	# Optimal baselines parameter construction
 	if mod.optimal_on:
@@ -351,10 +372,10 @@ def params_per_task(mod):
 	params_dataset = dictify('dataset', mod.dss)
 	params_all = param_cartesian_multi( [params_dataset, params_baseline_and_algs + params_optimal + params_majority] )
 
-	params_all = sorted(params_all)
+	params_all = sorted(params_all, key=lambda d: (d['dataset'], d['corrupt_type_supervised'], d['corrupt_prob_supervised'], d['corrupt_type_bandit'], d['corrupt_prob_bandit']))
 	print 'The total number of VW commands to run is: ', len(params_all)
-	for row in params_all:
-		print row
+	#for row in params_all:
+	#	print row
 	return get_params_task(params_all)
 
 
@@ -458,6 +479,8 @@ def main_loop(mod):
 
 	write_summary_header(mod)
 	for mod.param in mod.config_task:
+		if (mod.param['no_interaction_update'] is True):
+			raw_input(' ')
 		gen_comparison_graph(mod)
 
 def create_dir(dir):
@@ -482,16 +505,34 @@ def remove_suffix(filename):
 	args = parser.parse_args()
 	flag_dir = args.results_dir + 'flag/'
 
+	mod = model()
+	mod.num_tasks = args.num_tasks
+	mod.task_id = args.task_id
+	mod.vw_path = '../vowpalwabbit/vw'
+	mod.ds_path = args.ds_dir
+	mod.results_path = args.results_dir
+	print 'reading dataset files..'
+	#TODO: this line specifically for multiple folds
+	#Need a systematic way to detect subfolder names
+	mod.dss = ds_files(mod.ds_path + '1/')
+
+	print len(mod.dss)
+
+	if args.num_datasets == -1 or args.num_datasets > len(mod.dss):
+		pass
+	else:
+		mod.dss = mod.dss[:args.num_datasets]
+
+	#print mod.dss
+
 	if args.task_id == 0:
 		# To avoid race condition of writing to the same file at the same time
 		create_dir(args.results_dir)
 
 		# This is specifically designed for teamscratch, as accessing a folder
-		# with a huge number of files can be super slow. Hence, we create a subfolder
-		# for each dataset to alleviate this.
-		dss = ds_files(args.ds_dir + '1/')
-		dss = dss[:args.num_datasets]
-		for ds in dss:
+		# with a huge number of result files can be super slow. Hence, we create a
+		# subfolder for each dataset to alleviate this.
+		for ds in mod.dss:
 			ds_no_suffix = remove_suffix(ds)
 			create_dir(args.results_dir + ds_no_suffix + '/')
 
@@ -502,14 +543,6 @@ def remove_suffix(filename):
 		while not os.path.exists(flag_dir):
 			time.sleep(1)
 
-	mod = model()
-
-	mod.num_tasks = args.num_tasks
-	mod.task_id = args.task_id
-	mod.vw_path = '../vowpalwabbit/vw'
-	mod.ds_path = args.ds_dir
-	mod.results_path = args.results_dir
-
 	if args.num_learning_rates <= 0 or args.num_learning_rates >= 10:
 		mod.learning_rates = mod.learning_rates_template
 	else:
@@ -517,16 +550,6 @@ def remove_suffix(filename):
 	#mod.folds = range(1,11)
 	mod.folds = range(1, args.num_folds+1)
 
-	print 'reading dataset files..'
-	#TODO: this line specifically for multiple folds
-	#Need a systematic way to detect subfolder names
-	mod.dss = ds_files(mod.ds_path + '1/')
-	print len(mod.dss)
-
-	if args.num_datasets == -1 or args.num_datasets > len(mod.dss):
-		pass
-	else:
-		mod.dss = mod.dss[:args.num_datasets]
 	#mod.dss = ["ds_223_63.vw.gz"]
 	#mod.dss = mod.dss[:5]
 
diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc
index 2a384140baa..6acd2a33ff7 100644
--- a/vowpalwabbit/cbify.cc
+++ b/vowpalwabbit/cbify.cc
@@ -19,7 +19,8 @@
 #define OVERWRITE 3
 
 #define BANDIT_VALI 1
-#define SUPERVISED_VALI 2
+#define SUPERVISED_VALI_SPLIT 2
+#define SUPERVISED_VALI_NOSPLIT 3
 
 #define INSTANCE_WT 1
 #define DATASET_WT 2
@@ -286,7 +287,9 @@ float compute_weight_multiplier(cbify& data, size_t i, size_t ec_type)
 			weight_multiplier = data.lambdas[i] / (1-data.lambdas[i]);
 
 		if (data.weighting_scheme == DATASET_WT)
-			weight_multiplier = weight_multiplier * data.warm_start_period / ( (data.bandit_iter+1) * (data.bandit_iter+2) );
+			weight_multiplier = weight_multiplier * data.warm_start_period / data.bandit_period;
+
+		//(data.bandit_iter+1) * (data.bandit_iter+2)
 	}
 	return weight_multiplier;
 }
@@ -322,7 +325,7 @@ void finish(cbify& data)
 	data.lambdas.delete_v();
 	data.cumulative_costs.delete_v();
 
-	if (data.validation_method == SUPERVISED_VALI)
+	if (data.validation_method != BANDIT_VALI )
 	{
 		for (size_t i = 0; i < data.warm_start_period; ++i)
 			VW::dealloc_example(MULTICLASS::mc_label.delete_label, data.supervised_validation[i]);
@@ -423,7 +426,7 @@ void accumulate_costs_ips(cbify& data, example& ec)
 {
 	CB::cb_class& cl = data.cb_label.costs[0];
 	// validation using bandit data
-	if (data.validation_method == 1)
+	if (data.validation_method == BANDIT_VALI)
 	{
 		//IPS for approximating the cumulative costs for all lambdas
 		for (uint32_t i = 0; i < data.choices_lambda; i++)
@@ -612,7 +615,7 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec)
 		else
 			predict_or_learn_bandit(data, base, ec, SUPERVISED);
 
-		if (data.validation_method == SUPERVISED_VALI)
+		if (data.validation_method != BANDIT_VALI)
 			add_to_sup_validation(data, ec);
 
 		ec.weight = 0;
@@ -650,7 +653,6 @@ uint32_t predict_sublearner_adf(cbify& data, base_learner& base, example& ec, ui
 	example* ecs = data.adf_data.ecs;
 	example* empty = data.adf_data.empty_example;
 
-
 	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
 	{
 	  base.predict(ecs[a], i);
@@ -661,9 +663,6 @@ uint32_t predict_sublearner_adf(cbify& data, base_learner& base, example& ec, ui
 
   uint32_t pred_action = ecs[0].pred.a_s[0].action+1;
 
-  //Need to clear the prediction, otherwise there will be a memory leak
-  ecs[0].pred.a_s.delete_v();
-
 	return pred_action;
 }
 
@@ -703,15 +702,34 @@ void accumulate_costs_ips_adf(cbify& data, example& ec, base_learner& base)
 	else
 	{
 		//only update cumulative costs every warm_start_period iterations
-		if ( abs(log2(data.bandit_iter) - floor(log2(data.bandit_iter))) < 1e-4 )
+		if ( data.bandit_iter >= 1 && abs( log2(data.bandit_iter+1) - floor(log2(data.bandit_iter+1)) ) < 1e-4 )
 		{
+			uint32_t total_epoch_num = ceil(log2(data.bandit_period));
+			uint32_t epoch_num = log2(data.bandit_iter+1) - 1;
+			uint32_t sup_train_size = data.warm_start_period / 2;
+			uint32_t sup_vali_size = data.warm_start_period - sup_train_size;
+			float batch_vali_size = ((float) sup_vali_size) / total_epoch_num;
+			uint32_t lb, ub;
+
 			for (uint32_t i = 0; i < data.choices_lambda; i++)
 				data.cumulative_costs[i] = 0;
 
+			if (data.validation_method == SUPERVISED_VALI_SPLIT)
+			{
+				lb = sup_train_size + ceil(batch_vali_size * epoch_num);
+				ub = sup_train_size + ceil(batch_vali_size * (epoch_num + 1));
+			}
+			else
+			{
+				lb = sup_train_size;
+				ub = sup_train_size + sup_vali_size;
+			}
+
+
 			//cout<<"updating validation error on supervised data: " << data.bandit_iter / data.warm_start_period << endl;
 			for (uint32_t i = 0; i < data.choices_lambda; i++)
 			{
-				for (uint32_t j = 0; j < data.warm_start_period; j++)
+				for (uint32_t j = lb; j < ub; j++)
 				{
 					example& ec_valid = data.supervised_validation[j];
 					uint32_t pred_label = predict_sublearner_adf(data, base, ec_valid, i);
@@ -726,12 +744,6 @@ void accumulate_costs_ips_adf(cbify& data, example& ec, base_learner& base)
 
 void accumulate_variance_adf(cbify& data, base_learner& base, example& ec)
 {
-	auto& out_ec = data.adf_data.ecs[0];
-
-	data.a_s.erase();
-	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
-		data.a_s.push_back({out_ec.pred.a_s[a].action, out_ec.pred.a_s[a].score});
-
 	size_t pred_best_approx = predict_cs_adf(data, base, ec);
 	float temp_variance;
 
@@ -825,22 +837,14 @@ void predict_or_learn_cs_adf(cbify& data, base_learner& base, example& ec, bool
 
 	//a hack here - allocated memories not deleted
 	//to be corrected
-	if (data.validation_method == SUPERVISED_VALI)
+	if (data.validation_method != BANDIT_VALI)
 		add_to_sup_validation_adf(data, ec);
 }
 
-size_t predict_bandit_adf(cbify& data, base_learner& base)
+size_t predict_bandit_adf(cbify& data, base_learner& base, example& ec)
 {
-	example* ecs = data.adf_data.ecs;
-	example* empty_example = data.adf_data.empty_example;
-
 	uint32_t argmin = find_min(data.cumulative_costs);
-
-	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
-	{
-		base.predict(ecs[a], argmin);
-	}
-	base.predict(*empty_example, argmin);
+	predict_sublearner_adf(data, base, ec, argmin);
 
 	// get output scores
 	auto& out_ec = data.adf_data.ecs[0];
@@ -848,6 +852,10 @@ size_t predict_bandit_adf(cbify& data, base_learner& base)
 									 *data.generic_explorer,
 									 StringUtils::to_string(data.example_counter++), out_ec) - 1;
 
+	data.a_s.erase();
+	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
+		data.a_s.push_back({out_ec.pred.a_s[a].action, out_ec.pred.a_s[a].score});
+
 	return idx;
 
 }
@@ -900,7 +908,7 @@ void predict_or_learn_bandit_adf(cbify& data, base_learner& base, example& ec, b
 	//data.cbl_empty->costs = data.adf_data.empty_example->l.cb.costs;
 
 	//size_t pred_pi = predict_cs_adf(data, base, ec);
-	uint32_t idx = predict_bandit_adf(data, base);
+	uint32_t idx = predict_bandit_adf(data, base, ec);
 
 	CB::cb_class cl;
 
@@ -966,7 +974,7 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 		ec.pred.multiclass = 0;
 		ec.weight = 0;
 	}
-
+	//data.adf_data.ecs[0].pred.a_s.erase();
 	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
 		data.adf_data.ecs[a].l.cb.costs = data.cbls[a].costs;
 	data.adf_data.empty_example->l.cb.costs = data.cbl_empty->costs;
@@ -1076,7 +1084,7 @@ base_learner* cbify_setup(vw& all)
 	//cout<<"does epsilon exist?"<<vm.count("epsilon")<<endl;
 	//cout<<"epsilon = "<<data.epsilon<<endl;
 
-	if (data.validation_method == SUPERVISED_VALI)
+	if (data.validation_method != BANDIT_VALI)
 	{
 		data.supervised_validation = v_init<example>();
 		//calloc_or_throw<example>(data.warm_start_period);

From f7f13665cf37688a2cb72b102f154ed56722c27b Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Wed, 6 Jun 2018 14:14:37 -0400
Subject: [PATCH 086/127] lines for respective validation methods

---
 scripts/alg_comparison.py  | 170 +++++++++++++++++++++++++++----------
 scripts/plot_warm_start.py |  17 ++--
 2 files changed, 135 insertions(+), 52 deletions(-)

diff --git a/scripts/alg_comparison.py b/scripts/alg_comparison.py
index 5e0dc1136f8..b444c04cfe0 100644
--- a/scripts/alg_comparison.py
+++ b/scripts/alg_comparison.py
@@ -94,36 +94,114 @@ def alg_info(alg_name, result_lst):
 		return result_lst[3]
 	if (alg_name[2] == False and alg_name[3] == True):
 		return result_lst[4]
-	if (alg_name[2] == False and alg_name[3] == False and alg_name[1] == 2):
+	if (alg_name[2] == False and alg_name[3] == False and alg_name[1] == 2 and alg_name[4] == 2):
 		return result_lst[5]
-	if (alg_name[2] == False and alg_name[3] == False and alg_name[1] == 4):
+	if (alg_name[2] == False and alg_name[3] == False and alg_name[1] == 4 and alg_name[4] == 2):
 		return result_lst[6]
-	if (alg_name[2] == False and alg_name[3] == False and alg_name[1] == 8):
+	if (alg_name[2] == False and alg_name[3] == False and alg_name[1] == 8 and alg_name[4] == 2):
 		return result_lst[7]
-	if (alg_name[2] == False and alg_name[3] == False and alg_name[1] == 16):
+	if (alg_name[2] == False and alg_name[3] == False and alg_name[1] == 16 and alg_name[4] == 2):
 		return result_lst[8]
+	if (alg_name[2] == False and alg_name[3] == False and alg_name[1] == 2 and alg_name[4] == 3):
+		return result_lst[9]
+	if (alg_name[2] == False and alg_name[3] == False and alg_name[1] == 4 and alg_name[4] == 3):
+		return result_lst[10]
+	if (alg_name[2] == False and alg_name[3] == False and alg_name[1] == 8 and alg_name[4] == 3):
+		return result_lst[11]
+	if (alg_name[2] == False and alg_name[3] == False and alg_name[1] == 16 and alg_name[4] == 3):
+		return result_lst[12]
 
-	return result_lst[9]
+	return result_lst[13]
 
 def alg_str(alg_name):
-	return alg_info(alg_name, ['Most-Freq', 'Sim-Bandit', 'Class-1', 'Bandit-Only', 'Sup-Only', 'MinimaxBandits', 'AwesomeBandits with $|\Lambda|$=4', 'AwesomeBandits with $|\Lambda|$=8', 'AwesomeBandits with $|\Lambda|$=16', 'unknown'])
+	return alg_info(alg_name,
+	['Most-Freq',
+	'Sim-Bandit',
+	'Class-1',
+	'Bandit-Only',
+	'Sup-Only',
+	'MinimaxBandits, one validation',
+	'AwesomeBandits with $|\Lambda|$=4, one validation',
+	'AwesomeBandits with $|\Lambda|$=8, one validation',
+	'AwesomeBandits with $|\Lambda|$=16, one validation',
+	'MinimaxBandits, multiple validation',
+	'AwesomeBandits with $|\Lambda|$=4, multiple validation',
+	'AwesomeBandits with $|\Lambda|$=8, multiple validation',
+	'AwesomeBandits with $|\Lambda|$=16, multiple validation',
+	'unknown'])
 
 def alg_str_compatible(alg_name):
-	return alg_info(alg_name, ['Most-Freq', 'Sim-Bandit', 'Class-1', 'Bandit-Only', 'Sup-Only', 'Choices_lambda=2', 'Choices_lambda=4', 'Choices_lambda=8', 'Choices_lambda=16', 'unknown'])
+	return alg_info(alg_name,
+	['Most-Freq',
+	'Sim-Bandit',
+	'Class-1',
+	'Bandit-Only',
+	'Sup-Only',
+	'Choices_lambda=2, validation_method=2',
+	'Choices_lambda=4, validation_method=2',
+	'Choices_lambda=8, validation_method=2',
+	'Choices_lambda=16, validation_method=2',
+	'Choices_lambda=2, validation_method=3',
+	'Choices_lambda=4, validation_method=3',
+	'Choices_lambda=8, validation_method=3',
+	'Choices_lambda=16, validation_method=3',
+	'unknown'])
 
 def alg_color_style(alg_name):
 	palette = sns.color_palette('colorblind')
 	colors = palette.as_hex()
 	#colors = [colors[5], colors[4], 'black', colors[2], colors[1], colors[3], 'black', colors[0], 'black', 'black']
-	colors = [colors[5], colors[3], 'black', colors[0], colors[1], colors[2], colors[2], colors[2], colors[2], 'black' ]
-
-	styles = ['solid', 'solid', 'solid', 'solid', 'dashed', 'dotted', 'dashdot', 'solid', 'dashed', 'solid']
+	colors = [
+	colors[5],
+	colors[3],
+	'black',
+	colors[0],
+	colors[1],
+	colors[2],
+	colors[2],
+	colors[2],
+	colors[2],
+	colors[4],
+	colors[4],
+	colors[4],
+	colors[4],
+	'black' ]
+
+	styles = [
+	'solid',
+	'solid',
+	'solid',
+	'solid',
+	'dashed',
+	'dotted',
+	'dashdot',
+	'solid',
+	'dashed',
+	'dotted',
+	'dashdot',
+	'solid',
+	'dashed',
+	'solid']
 
 	return alg_info(alg_name, zip(colors, styles))
 	#['black', 'magenta', 'lime', 'green', 'blue', 'darkorange','darksalmon', 'red', 'cyan']
 
 def alg_index(alg_name):
-	return alg_info(alg_name, [7.0, 6.0, 8.0, 5.0, 4.0, 2.0, 1.0, 1.2, 1.5, 9.0])
+	return alg_info(alg_name,
+	[7.0,
+	6.0,
+	8.0,
+	5.0,
+	4.0,
+	2.0,
+	1.0,
+	1.2,
+	1.5,
+	3.0,
+	2.0,
+	2.2,
+	2.5,
+	9.0])
 
 
 def order_legends(indices):
@@ -145,9 +223,11 @@ def save_legend(mod, indices):
 	figlegend.savefig(mod.problemdir+'legend.pdf')
 
 def problem_str(name_problem):
-	return 'supervised_corrupt_type='+str(name_problem[0]) \
-			+'_supervised_corrupt_prob='+str(name_problem[1]) \
-			+'_bandit_supervised_size_ratio='+str(name_problem[2])
+	return 'sct='+str(name_problem[0]) \
+			+'_scp='+str(name_problem[1]) \
+			+'_bct='+str(name_problem[2]) \
+			+'_bcp='+str(name_problem[3]) \
+			+'_ratio='+str(name_problem[4])
 
 def noise_type_str(noise_type):
 	if noise_type == 1:
@@ -242,14 +322,13 @@ def plot_all_pair_comp(alg_results, sizes, mod):
 				plt.savefig(mod.problemdir+alg_str_compatible(alg_names[i])+'_vs_'+alg_str_compatible(alg_names[j])+'.pdf')
 				plt.clf()
 
-def init_results(result_table):
-	alg_results = {}
-	for idx, row in result_table.iterrows():
-		alg_name = (row['warm_start_type'], row['choices_lambda'], row['no_supervised'], row['no_bandit'])
-		alg_results[alg_name] = []
-
-	alg_results[(0, 0, False, False)] = []
-	return alg_results
+#def init_results(result_table):
+#	alg_results = {}
+#	for idx, row in result_table.iterrows():
+#		alg_name = (row['warm_start_type'], row['choices_lambda'], row['no_warm_start_update'], row['no_interaction_update'])
+#		alg_results[alg_name] = []
+#	alg_results[(0, 0, False, False)] = []
+#	return alg_results
 
 def normalize_score(unnormalized_result, mod):
 	if mod.normalize_type == 1:
@@ -295,7 +374,7 @@ def get_unnormalized_results(result_table):
 			new_size = row['bandit_size']
 
 		if row['bandit_size'] == new_size:
-			alg_name = (row['warm_start_type'], row['choices_lambda'], row['no_supervised'], row['no_bandit'])
+			alg_name = (row['warm_start_type'], row['choices_lambda'], row['no_warm_start_update'], row['no_interaction_update'], row['validation_method'])
 			new_unnormalized_results[alg_name] = row['avg_error']
 		i += 1
 
@@ -311,7 +390,10 @@ def plot_all(mod, all_results):
 	#all_results = all_results[all_results['corrupt_prob_supervised']!=0.0]
 
 	grouped_by_problem = all_results.groupby(['corrupt_type_supervised',
-						'corrupt_prob_supervised','bandit_supervised_size_ratio'])
+						'corrupt_prob_supervised',
+						'corrupt_type_bandit',
+						'corrupt_prob_bandit',
+						'bandit_supervised_size_ratio'])
 
 	#then group by dataset and warm_start size (corresponding to each point in cdf)
 	for name_problem, group_problem in grouped_by_problem:
@@ -320,7 +402,7 @@ def plot_all(mod, all_results):
 		sizes = None
 		mod.name_problem = name_problem
 
-		grouped_by_dataset = group_problem.groupby(['dataset','warm_start_size'])
+		grouped_by_dataset = group_problem.groupby(['dataset','warm_start'])
 		#then select unique combinations of (no_supervised, no_bandit, choices_lambda)
 		#e.g. (True, True, 1), (True, False, 1), (False, True, 1), (False, False, 2)
 		#(False, False, 8), and compute a normalized score
@@ -328,14 +410,15 @@ def plot_all(mod, all_results):
 		for name_dataset, group_dataset in grouped_by_dataset:
 			result_table = group_dataset
 
-			grouped_by_algorithm = group_dataset.groupby(['warm_start_type', 'choices_lambda', 'no_supervised', 'no_bandit'])
+			grouped_by_algorithm = group_dataset.groupby(['warm_start_type', 'choices_lambda', 'no_warm_start_update', 'no_interaction_update',
+			'validation_method'])
 
 			mod.name_dataset = name_dataset
 
 			#The 'learning_rate' would be the only free degree here now. Taking the
-			#min aggregation will give us the 7 algorithms we are evaluating.
+			#min aggregation will give us the algorithms we are evaluating.
 
-			#In the future this should be changed now if we run multiple folds: we
+			#In the future this should be changed if we run multiple folds: we
 			#should average among folds before choosing the min
 			result_table = grouped_by_algorithm.min()
 			result_table = result_table.reset_index()
@@ -344,13 +427,6 @@ def plot_all(mod, all_results):
 
 
 			#group_dataset.groupby(['choices_lambda','no_supervised',														'no_bandit'])
-
-			#first time - generate names of algorithms considered
-			if normalized_results is None:
-				sizes = []
-				normalized_results = init_results(result_table)
-				unnormalized_results = init_results(result_table)
-
 				#print alg_results
 				#dummy = input('')
 
@@ -358,10 +434,15 @@ def plot_all(mod, all_results):
 			#print result_table
 
 			new_size, new_unnormalized_result = get_unnormalized_results(result_table)
-			new_unnormalized_result[(0, 0, False, False)] = get_maj_error(mod.maj_error_table, mod.name_dataset)
-
+			new_unnormalized_result[(0, 0, False, False, 1)] = get_maj_error(mod.maj_error_table, mod.name_dataset)
 			new_normalized_result = normalize_score(new_unnormalized_result, mod)
 
+			#first time - generate names of algorithms considered
+			if normalized_results is None:
+				sizes = []
+				unnormalized_results = dict([(k,[]) for k in new_unnormalized_result.keys()])
+				normalized_results = dict([(k,[]) for k in new_unnormalized_result.keys()])
+
 			update_result_dict(unnormalized_results, new_unnormalized_result)
 			update_result_dict(normalized_results, new_normalized_result)
 			sizes.append(new_size)
@@ -420,9 +501,8 @@ def load_from_sum(mod):
 
 
 # This is a hack - need to do this systematically in the future
-def load_maj_error(mod):
-	maj_error_table = parse_sum_file(mod.maj_error_dir)
-	return maj_error_table
+#def load_maj_error(mod):
+#	return parse_sum_file(mod.maj_error_dir)
 
 
 if __name__ == '__main__':
@@ -443,6 +523,7 @@ def load_maj_error(mod):
 	mod.pair_comp_on = False
 	mod.cdf_on = True
 	mod.maj_error_dir = '../../../figs_all/expt_0509/figs_maj_errors/0of1.sum'
+	mod.best_error_dir = '../../../figs_all/expt_0606/best_errors/0of1.sum'
 
 	mod.fulldir = mod.results_dir + mod.plot_subdir
 	if not os.path.exists(mod.fulldir):
@@ -462,15 +543,13 @@ def load_maj_error(mod):
 	#to each cdf graph)
 	all_results = mod.all_results
 
-	mod.best_error_table = all_results[all_results['choices_lambda'] == 0]
-
 	#print mod.best_error_table[mod.best_error_table['dataset'] == 'ds_160_5.vw.gz']
 	#raw_input(' ')
 
 	all_results = all_results[all_results['choices_lambda'] != 0]
 
 	#ignore the no update row:
-	all_results = all_results[(all_results['no_supervised'] == False) | (all_results['no_bandit'] == False)]
+	all_results = all_results[(all_results['no_warm_start_update'] == False) | (all_results['no_interaction_update'] == False)]
 	#ignore the choice_lambda = 4 row
 	all_results = all_results[(all_results['choices_lambda'] != 4)]
 
@@ -481,7 +560,10 @@ def load_maj_error(mod):
 	#	pass
 	#else:
 
-	mod.maj_error_table = load_maj_error(mod)
+	mod.maj_error_table = parse_sum_file(mod.maj_error_dir)
+	mod.maj_error_table = mod.maj_error_table[mod.maj_error_table['majority_approx']]
+	mod.best_error_table = parse_sum_file(mod.best_error_dir)
+	mod.best_error_table = mod.best_error_table[mod.best_error_table['optimal_approx']]
 
 	if mod.filter == '1':
 		pass
diff --git a/scripts/plot_warm_start.py b/scripts/plot_warm_start.py
index e24eb2eb397..21204e18784 100644
--- a/scripts/plot_warm_start.py
+++ b/scripts/plot_warm_start.py
@@ -24,7 +24,7 @@ def __init__(self):
 
 		# use fractions instead of absolute numbers
 		#mod.warm_start_multipliers = [pow(2,i) for i in range(4)]
-		self.warm_start_multipliers = [pow(2,i) for i in range(1)]
+		self.warm_start_multipliers = [pow(2,i) for i in range(4)]
 
 		self.choices_cb_type = ['mtr']
 		#mod.choices_choices_lambda = [2,4,8]
@@ -40,7 +40,7 @@ def __init__(self):
 		self.adf_on = True
 
 		self.choices_corrupt_type_bandit = [1,2,3]
-		self.choices_corrupt_prob_bandit = [0.0,0.5]
+		self.choices_corrupt_prob_bandit = [0.0,0.5,1.0]
 
 		self.validation_method = 1
 		self.weighting_scheme = 2
@@ -226,8 +226,8 @@ def gen_comparison_graph(mod):
 	for vw_result in vw_run_results:
 		result_combined = merge_two_dicts(mod.param, vw_result)
 
-		print mod.result_template['no_interaction_update']
-		print result_combined['no_interaction_update']
+		#print mod.result_template['no_interaction_update']
+		#print result_combined['no_interaction_update']
 
 		result_formatted = format_setting(mod.result_template, result_combined)
 		record_result(mod, result_formatted)
@@ -353,12 +353,13 @@ def params_per_task(mod):
 
 	# Optimal baselines parameter construction
 	if mod.optimal_on:
-		params_optimal = [{ 'optimal_approx': True, 'fold': 1 }]
+		params_optimal = [{ 'optimal_approx': True, 'fold': 1, 'corrupt_type_supervised':1, 'corrupt_prob_supervised':0.0, 'corrupt_type_bandit':1, 'corrupt_prob_bandit':0.0} ]
 	else:
 		params_optimal = []
 
 	if mod.majority_on:
-		params_majority = [{ 'majority_approx': True, 'fold': 1 }]
+		params_majority = [{ 'majority_approx': True, 'fold': 1,
+		'corrupt_type_supervised':1, 'corrupt_prob_supervised':0.0, 'corrupt_type_bandit':1, 'corrupt_prob_bandit':0.0} ]
 	else:
 		params_majority = []
 
@@ -479,8 +480,8 @@ def main_loop(mod):
 
 	write_summary_header(mod)
 	for mod.param in mod.config_task:
-		if (mod.param['no_interaction_update'] is True):
-			raw_input(' ')
+		#if (mod.param['no_interaction_update'] is True):
+		#	raw_input(' ')
 		gen_comparison_graph(mod)
 
 def create_dir(dir):

From 9d6a36450d91e1790f4b5205e89d83c3af6f82d6 Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Wed, 6 Jun 2018 14:22:53 -0400
Subject: [PATCH 087/127] commented out matplotlib

---
 scripts/plot_warm_start.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/scripts/plot_warm_start.py b/scripts/plot_warm_start.py
index 21204e18784..0de4fca9a82 100644
--- a/scripts/plot_warm_start.py
+++ b/scripts/plot_warm_start.py
@@ -1,6 +1,6 @@
-import matplotlib
-matplotlib.use('Agg')
-import matplotlib.pyplot as plt
+#import matplotlib
+#matplotlib.use('Agg')
+#import matplotlib.pyplot as plt
 import subprocess
 import pylab
 from itertools import product

From a47aba8bb4e07c0791d312e837260993af86c20f Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Wed, 6 Jun 2018 14:23:27 -0400
Subject: [PATCH 088/127] commented out matplotlib

---
 scripts/plot_warm_start.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/plot_warm_start.py b/scripts/plot_warm_start.py
index 0de4fca9a82..b81f424a0da 100644
--- a/scripts/plot_warm_start.py
+++ b/scripts/plot_warm_start.py
@@ -2,7 +2,7 @@
 #matplotlib.use('Agg')
 #import matplotlib.pyplot as plt
 import subprocess
-import pylab
+#import pylab
 from itertools import product
 import os
 import math

From b4d87e04e6e090300b4b59c5d492b8a64243a226 Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Thu, 7 Jun 2018 21:48:54 -0400
Subject: [PATCH 089/127] rename running script

---
 ...{plot_warm_start.py => run_vw_commands.py} | 11 +--
 vowpalwabbit/cbify.cc                         | 80 +++++++++++++------
 2 files changed, 61 insertions(+), 30 deletions(-)
 rename scripts/{plot_warm_start.py => run_vw_commands.py} (96%)

diff --git a/scripts/plot_warm_start.py b/scripts/run_vw_commands.py
similarity index 96%
rename from scripts/plot_warm_start.py
rename to scripts/run_vw_commands.py
index b81f424a0da..8226839509f 100644
--- a/scripts/plot_warm_start.py
+++ b/scripts/run_vw_commands.py
@@ -1,8 +1,4 @@
-#import matplotlib
-#matplotlib.use('Agg')
-#import matplotlib.pyplot as plt
 import subprocess
-#import pylab
 from itertools import product
 import os
 import math
@@ -334,11 +330,12 @@ def params_per_task(mod):
 	else:
 		params_algs = []
 
-
-	params_constant = [{'weighting_scheme':mod.weighting_scheme,
+	params_constant_baseline = [{'weighting_scheme':1,
+	'adf_on':True}]
+	params_constant_algs = [{'weighting_scheme':mod.weighting_scheme,
 	'adf_on':True}]
 
-	params_baseline_and_algs = param_cartesian_multi([params_constant, params_baseline + params_algs])
+	params_baseline_and_algs = param_cartesian_multi([params_constant_baseline, params_baseline]) + param_cartesian_multi([params_constant_algs, params_algs])
 
 	#for p in params_common:
 	#	print p
diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc
index 6acd2a33ff7..2e55b02f7e2 100644
--- a/vowpalwabbit/cbify.cc
+++ b/vowpalwabbit/cbify.cc
@@ -28,6 +28,7 @@
 #define ABS_CENTRAL 1
 #define MINIMAX_CENTRAL 2
 #define MINIMAX_CENTRAL_ZEROONE 3
+#define ABS_CENTRAL_ZEROONE 4
 
 
 using namespace LEARNER;
@@ -114,6 +115,7 @@ struct cbify
 	size_t overwrite_label;
 	size_t warm_start_type;
 	size_t mc_pred;
+	uint32_t warm_start_train_size;
 
 };
 
@@ -165,7 +167,7 @@ void setup_lambdas(cbify& data, example& ec)
 
 	uint32_t mid = data.choices_lambda / 2;
 
-	if (data.lambda_scheme == ABS_CENTRAL)
+	if (data.lambda_scheme == ABS_CENTRAL || data.lambda_scheme == ABS_CENTRAL_ZEROONE)
 		lambdas[mid] = 0.5;
 	else
 		lambdas[mid] = minimax_lambda(data.epsilon, data.num_actions, data.warm_start_period, data.bandit_period, ec.num_features);
@@ -176,7 +178,7 @@ void setup_lambdas(cbify& data, example& ec)
 	for (uint32_t i = mid+1; i < data.choices_lambda; i++)
 		lambdas[i] = 1 - (1-lambdas[i-1]) / 2;
 
-	if (data.lambda_scheme == MINIMAX_CENTRAL_ZEROONE)
+	if (data.lambda_scheme == MINIMAX_CENTRAL_ZEROONE || data.lambda_scheme == ABS_CENTRAL_ZEROONE)
 	{
 		lambdas[0] = 0.0;
 		lambdas[data.choices_lambda-1] = 1.0;
@@ -271,13 +273,42 @@ bool ind_update(cbify& data, size_t ec_type)
 float compute_weight_multiplier(cbify& data, size_t i, size_t ec_type)
 {
 	float weight_multiplier;
+	float ws_train_size = data.warm_start_train_size;
+	float intr_train_size = data.bandit_period;
 
-	if (ec_type == SUPERVISED)
+	if (data.validation_method != BANDIT_VALI)
+	{
+		if (ec_type == SUPERVISED && data.warm_start_iter >= ws_train_size)
+			return 0.0;
+	}
+
+	float total_size = ws_train_size + intr_train_size;
+	if (data.weighting_scheme == INSTANCE_WT)
+	{
+		if (ec_type == SUPERVISED)
+			weight_multiplier = (1-data.lambdas[i]) * total_size / ws_train_size;
+		else
+			weight_multiplier = data.lambdas[i] * total_size / intr_train_size;
+	}
+	else
+	{
+		float total_weight = (1-data.lambdas[i]) * ws_train_size + data.lambdas[i] * intr_train_size;
+
+		if (ec_type == SUPERVISED)
+			weight_multiplier = (1-data.lambdas[i]) * total_size / total_weight;
+		else
+			weight_multiplier = data.lambdas[i] * total_size / total_weight;
+	}
+
+	/*if (ec_type == SUPERVISED)
 	{
 		if (data.lambdas[i] >= 0.5)
 		 	weight_multiplier = (1 - data.lambdas[i]) / data.lambdas[i];
 		else
 			weight_multiplier = 1;
+
+		if (data.validation_method != BANDIT_VALI && data.warm_start_iter >= data.warm_start_train_size)
+			weight_multiplier = 0.0;
 	}
 	else
 	{
@@ -287,10 +318,10 @@ float compute_weight_multiplier(cbify& data, size_t i, size_t ec_type)
 			weight_multiplier = data.lambdas[i] / (1-data.lambdas[i]);
 
 		if (data.weighting_scheme == DATASET_WT)
-			weight_multiplier = weight_multiplier * data.warm_start_period / data.bandit_period;
+			weight_multiplier = weight_multiplier * data.warm_start_train_size / data.bandit_period;
 
 		//(data.bandit_iter+1) * (data.bandit_iter+2)
-	}
+	}*/
 	return weight_multiplier;
 }
 
@@ -315,6 +346,9 @@ uint32_t find_min(v_array<float> arr)
 
 void finish(cbify& data)
 {
+	cout<<"Ideal average variance = "<<data.num_actions / data.epsilon<<endl;
+	cout<<"Measured average variance = "<<data.cumulative_variance / data.bandit_period<<endl;
+
   CB::cb_label.delete_label(&data.cb_label);
   //data.probs.delete_v();
   delete_it(data.scorer);
@@ -704,25 +738,25 @@ void accumulate_costs_ips_adf(cbify& data, example& ec, base_learner& base)
 		//only update cumulative costs every warm_start_period iterations
 		if ( data.bandit_iter >= 1 && abs( log2(data.bandit_iter+1) - floor(log2(data.bandit_iter+1)) ) < 1e-4 )
 		{
-			uint32_t total_epoch_num = ceil(log2(data.bandit_period));
-			uint32_t epoch_num = log2(data.bandit_iter+1) - 1;
-			uint32_t sup_train_size = data.warm_start_period / 2;
-			uint32_t sup_vali_size = data.warm_start_period - sup_train_size;
-			float batch_vali_size = ((float) sup_vali_size) / total_epoch_num;
-			uint32_t lb, ub;
-
 			for (uint32_t i = 0; i < data.choices_lambda; i++)
 				data.cumulative_costs[i] = 0;
 
+			uint32_t num_epochs = ceil(log2(data.bandit_period));
+			uint32_t epoch = log2(data.bandit_iter+1) - 1;
+			uint32_t ws_train_size = data.warm_start_train_size;
+			uint32_t ws_vali_size = data.warm_start_period - data.warm_start_train_size;
+			float batch_vali_size = ((float) ws_vali_size) / num_epochs;
+			uint32_t lb, ub;
+
 			if (data.validation_method == SUPERVISED_VALI_SPLIT)
 			{
-				lb = sup_train_size + ceil(batch_vali_size * epoch_num);
-				ub = sup_train_size + ceil(batch_vali_size * (epoch_num + 1));
+				lb = ws_train_size + ceil(batch_vali_size * epoch);
+				ub = ws_train_size + ceil(batch_vali_size * (epoch + 1));
 			}
 			else
 			{
-				lb = sup_train_size;
-				ub = sup_train_size + sup_vali_size;
+				lb = ws_train_size;
+				ub = ws_train_size + ws_vali_size;
 			}
 
 
@@ -963,11 +997,8 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec)
 	{
 		predict_or_learn_bandit_adf(data, base, ec, data.ind_bandit, BANDIT);
 		data.bandit_iter++;
-		if (data.bandit_iter == data.bandit_period)
-		{
-			cout<<"Ideal average variance = "<<data.num_actions / data.epsilon<<endl;
-			cout<<"Measured average variance = "<<data.cumulative_variance / data.bandit_period<<endl;
-		}
+		//if (data.bandit_iter == data.bandit_period)
+		//{}
 	}
 	else
 	{
@@ -1009,7 +1040,7 @@ void init_adf_data(cbify& data, const size_t num_actions)
 	data.csl_empty->costs[0].class_index = 0;
 	data.csl_empty->costs[0].x = FLT_MAX;
 
-	for (size_t a = 0; a < num_actions; ++a)
+	for (uint32_t a = 0; a < num_actions; ++a)
 	{
 		data.csls[a].costs = v_init<COST_SENSITIVE::wclass>();
 		data.csls[a].costs.push_back({0, a+1, 0, 0});
@@ -1088,7 +1119,10 @@ base_learner* cbify_setup(vw& all)
 	{
 		data.supervised_validation = v_init<example>();
 		//calloc_or_throw<example>(data.warm_start_period);
+		data.warm_start_train_size = data.warm_start_period / 2;
 	}
+	else
+		data.warm_start_train_size = data.warm_start_period;
 
 
 	data.bandit_iter = 0;
@@ -1121,7 +1155,7 @@ base_learner* cbify_setup(vw& all)
 		data.cs_label.costs = v_init<COST_SENSITIVE::wclass>();
 		//Note: these two lines are important, otherwise the cost sensitive vector seems to be unbounded.
 
-		for (size_t a = 0; a < num_actions; ++a)
+		for (uint32_t a = 0; a < num_actions; ++a)
 			data.cs_label.costs.push_back({0, a+1, 0, 0});
 
 		data.cb_label.costs = v_init<CB::cb_class>();

From 19c161cf97d65902fbaac8b4cd7d4d138228e41f Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Thu, 7 Jun 2018 22:00:52 -0400
Subject: [PATCH 090/127] trial on compiling vw in one of the subtasks

---
 scripts/run_vw_commands.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/scripts/run_vw_commands.py b/scripts/run_vw_commands.py
index 8226839509f..e2d7f60243d 100644
--- a/scripts/run_vw_commands.py
+++ b/scripts/run_vw_commands.py
@@ -524,6 +524,10 @@ def remove_suffix(filename):
 	#print mod.dss
 
 	if args.task_id == 0:
+		process = subprocess.Popen('make -C .. clean; make -C ..', shell=True, stdout=f, stderr=f)
+		#subprocess.check_call(cmd, shell=True)
+		process.wait()
+
 		# To avoid race condition of writing to the same file at the same time
 		create_dir(args.results_dir)
 

From 405622de367c73cf62bbdc4f14f2e25165abcae7 Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Sun, 10 Jun 2018 22:01:13 -0400
Subject: [PATCH 091/127] before merging

---
 scripts/run_vw_commands.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/run_vw_commands.py b/scripts/run_vw_commands.py
index e2d7f60243d..5b6d2553a08 100644
--- a/scripts/run_vw_commands.py
+++ b/scripts/run_vw_commands.py
@@ -524,9 +524,9 @@ def remove_suffix(filename):
 	#print mod.dss
 
 	if args.task_id == 0:
-		process = subprocess.Popen('make -C .. clean; make -C ..', shell=True, stdout=f, stderr=f)
+		#process = subprocess.Popen('make -C .. clean; make -C ..', shell=True, stdout=f, stderr=f)
 		#subprocess.check_call(cmd, shell=True)
-		process.wait()
+		#process.wait()
 
 		# To avoid race condition of writing to the same file at the same time
 		create_dir(args.results_dir)

From ed980e358d847c0968fc76a284318c6d6753c40f Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Sat, 23 Jun 2018 19:33:04 -0400
Subject: [PATCH 092/127] cleaned up all errors except for calling cost
 sensitive learning

---
 vowpalwabbit/cb_adf.cc           |   8 +-
 vowpalwabbit/cbify.cc            | 294 ++++++++++++++++++++++---------
 vowpalwabbit/global_data.h       |   2 +-
 vowpalwabbit/io_buf.h            |   2 +-
 vowpalwabbit/parse_example.cc    |   2 +-
 vowpalwabbit/parse_primitives.cc |   2 +-
 vowpalwabbit/vw.h                |   2 +-
 7 files changed, 217 insertions(+), 95 deletions(-)

diff --git a/vowpalwabbit/cb_adf.cc b/vowpalwabbit/cb_adf.cc
index b1b1b4f9b36..9f690d74638 100644
--- a/vowpalwabbit/cb_adf.cc
+++ b/vowpalwabbit/cb_adf.cc
@@ -350,13 +350,15 @@ base_learner* cb_adf_setup(arguments& arg)
       .critical("cb_adf", "Do Contextual Bandit learning with multiline action dependent features.")
       .keep(ld->rank_all, "rank_all", "Return actions sorted by score order")
       (ld->no_predict, "no_predict", "Do not do a prediction when training")
-      .keep("cb_type", type_string, (string)"ips", "contextual bandit method to use in {ips,dm,dr, mtr}").missing())
+      .keep("cb_type", type_string, (string)"ips", "contextual bandit method to use in {ips,dm,dr, mtr}")
+			("cbify", ld->gen_cs.num_actions, 1U, "number of actions")
+			.missing())
     return nullptr;
 
   ld->all = arg.all;
 
-	cb_to_cs_adf& c = ld.gen_cs;
-	c.num_actions = (uint32_t)(all.vm["cbify"].as<size_t>());
+	//cb_to_cs_adf& c = ld.gen_cs;
+	//c.num_actions = (uint32_t)(all.vm["cbify"].as<size_t>());
 
   // number of weight vectors needed
   size_t problem_multiplier = 1;//default for IPS
diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc
index 6aada60f6a4..5bfd6759c84 100644
--- a/vowpalwabbit/cbify.cc
+++ b/vowpalwabbit/cbify.cc
@@ -4,7 +4,7 @@
 #include "rand48.h"
 #include "bs.h"
 #include "vw.h"
-#include "hash.h"
+#include "../explore/hash.h"
 #include "explore.h"
 
 #include <vector>
@@ -59,20 +59,21 @@ struct cbify
   float loss0;
   float loss1;
 
+	//warm start parameters
 	uint32_t ws_period;
 	uint32_t inter_period;
 	uint32_t choices_lambda;
 	bool upd_ws;
 	bool upd_inter;
-	uint32_t cor_type_ws;
+	int cor_type_ws;
 	float cor_prob_ws;
-	uint32_t cor_type_inter;
+	int cor_type_inter;
 	float cor_prob_inter;
-	uint32_t vali_method;
-	uint32_t wt_scheme;
-	uint32_t lambda_scheme;
+	int vali_method;
+	int wt_scheme;
+	int lambda_scheme;
 	uint32_t overwrite_label;
-	uint32_t ws_type;
+	int ws_type;
 
 	//auxiliary variables
 	uint32_t num_actions;
@@ -81,6 +82,15 @@ struct cbify
 	action_scores a_s_adf;
 	vector<float> cumulative_costs;
 	CB::cb_class cl_adf;
+	uint32_t ws_train_size;
+	uint32_t ws_vali_size;
+	vector<example> ws_vali;
+	float cumu_var;
+	uint32_t ws_iter;
+	uint32_t inter_iter;
+	MULTICLASS::label_t mc_label;
+	COST_SENSITIVE::label* csls;
+	COST_SENSITIVE::label* csl_empty;
 
 };
 
@@ -94,12 +104,35 @@ float loss(cbify& data, uint32_t label, uint32_t final_prediction)
 
 template<class T> inline void delete_it(T* p) { if (p != nullptr) delete p; }
 
+template <class T>
+uint32_t find_min(vector<T> arr)
+{
+	T min_val = FLT_MAX;
+	uint32_t argmin = 0;
+
+	for (uint32_t i = 0; i < arr.size(); i++)
+	{
+		//cout<<arr[i]<<endl;
+		if (arr[i] < min_val)
+		{
+			min_val = arr[i];
+			argmin = i;
+		}
+	}
+	return argmin;
+}
+
 void finish(cbify& data)
 {
   CB::cb_label.delete_label(&data.cb_label);
   data.a_s.delete_v();
   if (data.use_adf)
   {
+		cout<<"The average variance estimate is:"<<data.cumu_var / data.inter_period<<endl;
+		cout<<"The theoretical average variance is:"<<data.num_actions / data.epsilon<<endl;
+		uint32_t argmin = find_min(data.cumulative_costs);
+		cout<<"The last value of lambda chosen is:"<<data.lambdas[argmin]<<endl;
+
     for (size_t a = 0; a < data.adf_data.num_actions; ++a)
     {
       VW::dealloc_example(CB::cb_label.delete_label, data.adf_data.ecs[a]);
@@ -204,10 +237,11 @@ uint32_t generate_uar_action(cbify& data)
 	return data.num_actions;
 }
 
-uint32_t corrupt_action(uint32_t action, cbify& data, uint32_t ec_type)
+uint32_t corrupt_action(cbify& data, uint32_t action, int ec_type)
 {
 	float cor_prob;
 	uint32_t cor_type;
+	uint32_t cor_action;
 
 	if (ec_type == WARM_START)
 	{
@@ -216,82 +250,65 @@ uint32_t corrupt_action(uint32_t action, cbify& data, uint32_t ec_type)
 	}
 	else
 	{
-		corrupt_prob = data.cor_prob_inter;
-		corrupt_type = data.cor_type_inter;
+		cor_prob = data.cor_prob_inter;
+		cor_type = data.cor_type_inter;
 	}
 
 	float randf = merand48(data.all->random_state);
-	if (randf < corrupt_prob)
+	if (randf < cor_prob)
 	{
-		if (corrupt_type == UAR)
-			return generate_uar_action(data);
-		else if (corrupt_type == OVERWRITE)
-			return data.overwrite_label;
+		if (cor_type == UAR)
+			cor_action = generate_uar_action(data);
+		else if (cor_type == OVERWRITE)
+			cor_action = data.overwrite_label;
 		else
-			return (action % data.num_actions) + 1;
+			cor_action = (action % data.num_actions) + 1;
 	}
 	else
-		return action;
+		cor_action = action;
+	return cor_action;
 }
 
-bool ind_update(cbify& data, size_t ec_type)
+bool ind_update(cbify& data, int ec_type)
 {
 	if (ec_type == WARM_START)
 		return data.upd_ws;
 	else
-		return data.upd_bandit;
+		return data.upd_inter;
 }
 
-float compute_weight_multiplier(cbify& data, size_t i, size_t ec_type)
+float compute_weight_multiplier(cbify& data, size_t i, int ec_type)
 {
 	float weight_multiplier;
-	float ws_train_size = data.warm_start_train_size;
-	float intr_train_size = data.bandit_period;
+	float ws_train_size = data.ws_train_size;
+	float inter_train_size = data.inter_period;
 
-	if (data.vali_method != BANDIT_VALI)
+	if (data.vali_method != INTER_VALI)
 	{
-		if (ec_type == SUPERVISED && data.warm_start_iter >= ws_train_size)
+		if (ec_type == WARM_START && data.ws_iter >= ws_train_size)
 			return 0.0;
 	}
 
-	float total_size = ws_train_size + intr_train_size;
-	if (data.weighting_scheme == INSTANCE_WT)
+	float total_train_size = ws_train_size + inter_train_size;
+	if (data.wt_scheme == INSTANCE_WT)
 	{
-		if (ec_type == SUPERVISED)
-			weight_multiplier = (1-data.lambdas[i]) * total_size / ws_train_size;
+		if (ec_type == WARM_START)
+			weight_multiplier = (1-data.lambdas[i]) * total_train_size / ws_train_size;
 		else
-			weight_multiplier = data.lambdas[i] * total_size / intr_train_size;
+			weight_multiplier = data.lambdas[i] * total_train_size / inter_train_size;
 	}
 	else
 	{
-		float total_weight = (1-data.lambdas[i]) * ws_train_size + data.lambdas[i] * intr_train_size;
+		float total_weight = (1-data.lambdas[i]) * ws_train_size + data.lambdas[i] * inter_train_size;
 
-		if (ec_type == SUPERVISED)
-			weight_multiplier = (1-data.lambdas[i]) * total_size / total_weight;
+		if (ec_type == WARM_START)
+			weight_multiplier = (1-data.lambdas[i]) * total_train_size / total_weight;
 		else
-			weight_multiplier = data.lambdas[i] * total_size / total_weight;
+			weight_multiplier = data.lambdas[i] * total_train_size / total_weight;
 	}
 	return weight_multiplier;
 }
 
-template <class T>
-uint32_t find_min(vector<T> arr)
-{
-	T min_val = FLT_MAX;
-	uint32_t argmin = 0;
-
-	for (uint32_t i = 0; i < arr.size(); i++)
-	{
-		//cout<<arr[i]<<endl;
-		if (arr[i] < min_val)
-		{
-			min_val = arr[i];
-			argmin = i;
-		}
-	}
-	return argmin;
-}
-
 
 template <bool is_learn>
 void predict_or_learn(cbify& data, single_learner& base, example& ec)
@@ -328,7 +345,7 @@ void predict_or_learn(cbify& data, single_learner& base, example& ec)
   ec.pred.multiclass = chosen_action + 1;
 }
 
-uint32_t predict_sublearner_adf(cbify& data, base_learner& base, example& ec, uint32_t i)
+uint32_t predict_sublearner_adf(cbify& data, single_learner& base, example& ec, uint32_t i)
 {
 	copy_example_to_adf(data, ec);
 
@@ -342,7 +359,7 @@ uint32_t predict_sublearner_adf(cbify& data, base_learner& base, example& ec, ui
 	return ecs[0].pred.a_s[0].action+1;
 }
 
-void accumu_costs_iv_adf(cbify& data, base_learner& base, example& ec)
+void accumu_costs_iv_adf(cbify& data, single_learner& base, example& ec)
 {
 	CB::cb_class& cl = data.cl_adf;
 	//IPS for approximating the cumulative costs for all lambdas
@@ -359,6 +376,7 @@ void accumu_costs_iv_adf(cbify& data, base_learner& base, example& ec)
 
 void accumu_costs_wsv_adf(cbify& data, single_learner& base)
 {
+	uint32_t ws_vali_size = data.ws_vali_size;
 	//only update cumulative costs every warm_start_period iterations
 	if ( data.inter_iter >= 1 && abs( log2(data.inter_iter+1) - floor(log2(data.inter_iter+1)) ) < 1e-4 )
 	{
@@ -367,11 +385,10 @@ void accumu_costs_wsv_adf(cbify& data, single_learner& base)
 
 		uint32_t num_epochs = ceil(log2(data.inter_period));
 		uint32_t epoch = log2(data.inter_iter+1) - 1;
-		//uint32_t ws_vali_size = data.warm_start_period - data.warm_start_train_size;
 		float batch_vali_size = ((float) ws_vali_size) / num_epochs;
 		uint32_t lb, ub;
 
-		if (data.vali_method == SUPERVISED_VALI_SPLIT)
+		if (data.vali_method == WS_VALI_SPLIT)
 		{
 			lb = ceil(batch_vali_size * epoch);
 			ub = ceil(batch_vali_size * (epoch + 1));
@@ -396,11 +413,78 @@ void accumu_costs_wsv_adf(cbify& data, single_learner& base)
 	}
 }
 
+void add_to_vali(cbify& data, example& ec)
+{
+	//if this does not work, we can try declare ws_vali as an array
+	example ec_copy;
+	VW::copy_example_data(false, &ec_copy, &ec, 0, MULTICLASS::mc_label.copy_label);
+	data.ws_vali.push_back(ec_copy);
+}
+
+uint32_t predict_cs_adf(cbify& data, single_learner& base, example& ec)
+{
+	uint32_t argmin = find_min(data.cumulative_costs);
+	return predict_sublearner_adf(data, base, ec, argmin);
+}
+
+void learn_cs_adf(cbify& data, single_learner& base, example& ec, int ec_type)
+{
+	//generate cost-sensitive label (only for CSOAA's use - this will be retracted at the end)
+	auto& csls = data.csls;
+	auto& csl_empty = data.csl_empty;
+
+	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
+	{
+		csls[a].costs[0].class_index = a+1;
+		csls[a].costs[0].x = loss(data, ec.l.multi.label, a+1);
+	}
+
+	copy_example_to_adf(data, ec);
+	example* ecs = data.adf_data.ecs;
+	example* empty_example = data.adf_data.empty_example;
+
+	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
+	{
+		ecs[a].l.cs = csls[a];
+		//cout<<ecs[a].l.cs.costs.size()<<endl;
+	}
+	empty_example->l.cs = *csl_empty;
+
+	vector<float> old_weights;
+	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
+		old_weights[a] = ecs[a].weight;
+	for (uint32_t i = 0; i < data.choices_lambda; i++)
+	{
+		float weight_multiplier = compute_weight_multiplier(data, i, ec_type);
+		for (size_t a = 0; a < data.adf_data.num_actions; ++a)
+		{
+			ecs[a].weight = old_weights[a] * weight_multiplier;
+			data.all->cost_sensitive->learn(ecs[a],i);
+		}
+		data.all->cost_sensitive->learn(*empty_example,i);
+	}
+	//Seems like we don't need to set the weights back as this example will be
+	//discarded anyway
+	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
+		ecs[a].weight = old_weights[a];
+}
+
+void predict_or_learn_cs_adf(cbify& data, single_learner& base, example& ec, int ec_type)
+{
+	uint32_t action = predict_cs_adf(data, base, ec);
+
+	if (ind_update(data, ec_type))
+		learn_cs_adf(data, base, ec, ec_type);
+
+	ec.pred.multiclass = action;
+}
+
+
 uint32_t predict_bandit_adf(cbify& data, single_learner& base, example& ec)
 {
-  copy_example_to_adf(data, ec);
 	uint32_t argmin = find_min(data.cumulative_costs);
 
+  copy_example_to_adf(data, ec);
   for (size_t a = 0; a < data.adf_data.num_actions; ++a)
   {
     base.predict(data.adf_data.ecs[a], argmin);
@@ -413,28 +497,33 @@ uint32_t predict_bandit_adf(cbify& data, single_learner& base, example& ec)
   if (sample_after_normalizing(data.app_seed + data.example_counter++, begin_scores(out_ec.pred.a_s), end_scores(out_ec.pred.a_s), chosen_action))
     THROW("Failed to sample from pdf");
 
-	copy_array<action_score>(data.a_s_adf, out_ec.pred.a_s);
+	auto& a_s = data.a_s_adf;
+	copy_array<action_score>(a_s, out_ec.pred.a_s);
 
-	CB::cb_class cl;
+	auto& cl = data.cl_adf;
   cl.action = a_s[chosen_action].action + 1;
   cl.probability = a_s[chosen_action].score;
 
   if(!cl.action)
     THROW("No action with non-zero probability found!");
-  cl.cost = loss(data, ld.label, cl.action);
+  cl.cost = loss(data, ec.l.multi.label, cl.action);
 
 	ec.pred.multiclass = cl.action;
 
 	return chosen_action;
 }
 
-uint32_t learn_bandit_adf(cbify& data, single_learner& base, example& ec, uint32_t ec_type, uint32_t chosen_action, action_scores& a_s)
+void learn_bandit_adf(cbify& data, single_learner& base, example& ec, int ec_type, uint32_t chosen_action, action_scores& a_s)
 {
 	//Store the multiclass input label
   MULTICLASS::label_t ld = ec.l.multi;
+
 	copy_example_to_adf(data, ec);
+	example* ecs = data.adf_data.ecs;
+	example* empty_example = data.adf_data.empty_example;
 
   // add cb label to chosen action
+	auto& cl = data.cl_adf;
   auto& lab = data.adf_data.ecs[cl.action - 1].l.cb;
   lab.costs.push_back(cl);
 
@@ -447,10 +536,10 @@ uint32_t learn_bandit_adf(cbify& data, single_learner& base, example& ec, uint32
 		float weight_multiplier = compute_weight_multiplier(data, i, ec_type);
 	  for (size_t a = 0; a < data.adf_data.num_actions; ++a)
 	  {
-			ecs[a].weight = data.old_weights[a] * weight_multiplier;
-	    base.learn(data.adf_data.ecs[a]);
+			ecs[a].weight = old_weights[a] * weight_multiplier;
+	    base.learn(ecs[a]);
 	  }
-	  base.learn(*data.adf_data.empty_example);
+	  base.learn(*empty_example);
 	}
 
 	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
@@ -459,60 +548,71 @@ uint32_t learn_bandit_adf(cbify& data, single_learner& base, example& ec, uint32
   //ec.pred.multiclass = cl.action;
 }
 
-void predict_or_learn_bandit_adf(cbify& data, single_learner& base, example& ec, uint32_t ec_type)
+void predict_or_learn_bandit_adf(cbify& data, single_learner& base, example& ec, int ec_type)
 {
 	uint32_t action = predict_bandit_adf(data, base, ec);
 
-	if (ec_type == INTER && data.vali_method == INTER_VALI)
+	if (ec_type == INTERACTION && data.vali_method == INTER_VALI)
 		accumu_costs_iv_adf(data, base, ec);
 
 	if (ind_update(data, ec_type))
 		learn_bandit_adf(data, base, ec, ec_type, action, data.a_s_adf);
 
-	if (ec_type == INTER && (data.vali_method == WS_VALI_SPLIT || data.vali_method == WS_VALI_NOSPLIT))
+	if (ec_type == INTERACTION && (data.vali_method == WS_VALI_SPLIT || data.vali_method == WS_VALI_NOSPLIT))
 		accumu_costs_wsv_adf(data, base);
 
 	ec.pred.multiclass = action;
 }
 
+void accumu_var_adf(cbify& data, single_learner& base, example& ec)
+{
+	size_t pred_best_approx = predict_cs_adf(data, base, ec);
+	float temp_var;
+
+	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
+		if (pred_best_approx == data.a_s_adf[a].action + 1)
+			temp_var = 1.0 / data.a_s_adf[a].score;
+
+	data.cumu_var += temp_var;
+
+	//cout<<"variance at bandit round "<< data.bandit_iter << " = " << temp_variance << endl;
+	//cout<<pred_pi<<" "<<pred_best_approx<<" "<<ld.label<<endl;
+}
+
 template <bool is_learn>
 void predict_or_learn_adf(cbify& data, single_learner& base, example& ec)
 {
 	if (data.ws_iter < data.ws_period)
 	{
-		data.mc_label = ec.l.multiclass
-		ec.l.multiclass = corrupt_label(WARM_START)
+		data.mc_label = ec.l.multi;
+		ec.l.multi.label = corrupt_action(data, data.mc_label.label, WARM_START);
 		if (data.ws_iter < data.ws_train_size)
 		{
 			if (data.ws_type == SUPERVISED_WS)
-				predict_or_learn_supervised_adf(WARM_START)
+				predict_or_learn_cs_adf(data, base, ec, WARM_START);
 			else if (data.ws_type == BANDIT_WS)
-				predict_or_learn_bandit_adf(WARM_START)
+				predict_or_learn_bandit_adf(data, base, ec, WARM_START);
 		}
 		else
-		{
-			add_to_vali(ec)
-		}
+			add_to_vali(data, ec);
 
-		ec.l.multiclass = data.mc_label
+		ec.l.multi = data.mc_label;
 		ec.weight = 0;
 		data.ws_iter++;
 	}
 	else if (data.inter_iter < data.inter_period)
 	{
-		data.mc_label = ec.l.multiclass
-		ec.l.multiclass = corrupt_label(INTERACTION)
-		predict_or_learn_bandit_adf(INTERACTION);
-		accumulate_variance();
-		ec.l.multiclass = data.mc_label
+		data.mc_label = ec.l.multi;
+		ec.l.multi.label = corrupt_action(data, data.mc_label.label, INTERACTION);
+		predict_or_learn_bandit_adf(data, base, ec, INTERACTION);
+		accumu_var_adf(data, base, ec);
+		ec.l.multi = data.mc_label;
 		data.inter_iter++;
 	}
 	else
 	{
 		ec.weight = 0;
 	}
-
-
 }
 
 
@@ -530,6 +630,15 @@ void init_adf_data(cbify& data, const size_t num_actions)
   }
   CB::cb_label.default_label(&adf_data.empty_example->l.cb);
   adf_data.empty_example->in_use = true;
+
+	data.csls = calloc_or_throw<COST_SENSITIVE::label>(num_actions);
+	data.csl_empty = calloc_or_throw<COST_SENSITIVE::label>(1);
+	for (uint32_t a=0; a < num_actions; ++a)
+	{
+		COST_SENSITIVE::cs_label.default_label(&data.csls[a]);
+		data.csls[a].costs.push_back({0, a+1, 0, 0});
+	}
+	COST_SENSITIVE::cs_label.default_label(data.csl_empty);
 }
 
 base_learner* cbify_setup(arguments& arg)
@@ -542,11 +651,11 @@ base_learner* cbify_setup(arguments& arg)
       ("loss0", data->loss0, 0.f, "loss for correct label")
       ("loss1", data->loss1, 1.f, "loss for incorrect label")
 			("epsilon", data->epsilon, 0.05f, "greedy probability")
-			("warm_start", data->ws_period, 0, "number of training examples for warm start")
-			("interaction", data->inter_period, 0, "number of training examples for bandit processing")
-		  ("choices_lambda", data->choices_lambda, 1, "numbers of lambdas importance weights to aggregate")
-			("warm_start_update", data->ind_ws, true, "indicator of warm start updates")
-			("interaction_update", data->ind_inter, true, "indicator of interaction updates")
+			("warm_start", data->ws_period, 0U, "number of training examples for warm start")
+			("interaction", data->inter_period, 0U, "number of training examples for bandit processing")
+		  ("choices_lambda", data->choices_lambda, 1U, "numbers of lambdas importance weights to aggregate")
+			("warm_start_update", data->upd_ws, true, "indicator of warm start updates")
+			("interaction_update", data->upd_inter, true, "indicator of interaction updates")
 			("corrupt_type_warm_start", data->cor_type_ws, UAR, "type of label corruption in the warm start phase (1: uniformly at random, 2: circular, 3: replacing with overwriting label)")
 			("corrupt_prob_warm_start", data->cor_prob_ws, 0.f, "probability of label corruption in the warm start phase")
 			("corrupt_type_bandit", data->cor_type_inter, UAR, "type of label corruption in the interaction phase (1: uniformly at random, 2: circular, 3: replacing with overwriting label)")
@@ -554,7 +663,7 @@ base_learner* cbify_setup(arguments& arg)
 			("validation_method", data->vali_method, INTER_VALI, "lambda selection criterion (1 is using bandit with progressive validation, 2 is using supervised)")
 			("weighting_scheme", data->wt_scheme, INSTANCE_WT, "weighting scheme (1 is per instance weighting, 2 is per dataset weighting (where we use a diminishing weighting scheme) )")
 			("lambda_scheme", data->lambda_scheme, ABS_CENTRAL, "Lambda set scheme (1 is expanding based on center=0.5, 2 is expanding based on center=0.5 and enforcing 0,1 in Lambda, 3 is expanding based on center=minimax lambda, 4 is expanding based on center=minimax lambda and enforcing 0,1 in Lambda )")
-			("overwrite_label", data->overwrite_label, 1, "the label type 3 corruptions (overwriting) turn to")
+			("overwrite_label", data->overwrite_label, 1U, "the label type 3 corruptions (overwriting) turn to")
 			("warm_start_type", data->ws_type, SUPERVISED_WS, "the way of utilizing warm start data (1 is using supervised updates, 2 is using contextual bandit updates)").missing())
     return nullptr;
 
@@ -568,6 +677,17 @@ base_learner* cbify_setup(arguments& arg)
   if (data->use_adf)
     init_adf_data(*data.get(), num_actions);
 
+	if (data->vali_method == WS_VALI_SPLIT || data->vali_method == WS_VALI_NOSPLIT)
+	{
+		data->ws_train_size = ceil(data->ws_period / 2.0);
+		data->ws_vali_size = data->ws_period - data->ws_train_size;
+	}
+	else
+	{
+		data->ws_train_size = data->ws_period;
+		data->ws_vali_size = 0;
+	}
+
   if (count(arg.args.begin(), arg.args.end(),"--cb_explore") == 0 && !data->use_adf)
   {
     arg.args.push_back("--cb_explore");
diff --git a/vowpalwabbit/global_data.h b/vowpalwabbit/global_data.h
index d9e2d328cab..8043a3fcfcd 100644
--- a/vowpalwabbit/global_data.h
+++ b/vowpalwabbit/global_data.h
@@ -26,7 +26,7 @@ namespace po = boost::program_options;
 #include "learner.h"
 #include "v_hashmap.h"
 #include <time.h>
-#include "hash.h"
+#include "../explore/hash.h"
 #include "crossplat_compat.h"
 #include "error_reporting.h"
 #include "parser_helper.h"
diff --git a/vowpalwabbit/io_buf.h b/vowpalwabbit/io_buf.h
index 94d4902da7c..f8553daa2ec 100644
--- a/vowpalwabbit/io_buf.h
+++ b/vowpalwabbit/io_buf.h
@@ -16,7 +16,7 @@ license as described in the file LICENSE.
 #include <sstream>
 #include <errno.h>
 #include <stdexcept>
-#include "hash.h"
+#include "../explore/hash.h"
 #include "vw_exception.h"
 #include "vw_validate.h"
 
diff --git a/vowpalwabbit/parse_example.cc b/vowpalwabbit/parse_example.cc
index d9d64cf71dd..bfb06839119 100644
--- a/vowpalwabbit/parse_example.cc
+++ b/vowpalwabbit/parse_example.cc
@@ -7,7 +7,7 @@ license as described in the file LICENSE.
 #include <math.h>
 #include <ctype.h>
 #include "parse_example.h"
-#include "hash.h"
+#include "../explore/hash.h"
 #include "unique_sort.h"
 #include "global_data.h"
 #include "constant.h"
diff --git a/vowpalwabbit/parse_primitives.cc b/vowpalwabbit/parse_primitives.cc
index a9eaebc781b..c0d1d541f77 100644
--- a/vowpalwabbit/parse_primitives.cc
+++ b/vowpalwabbit/parse_primitives.cc
@@ -13,7 +13,7 @@ license as described in the file LICENSE.
 #include <sstream>
 
 #include "parse_primitives.h"
-#include "hash.h"
+#include "../explore/hash.h"
 #include "vw_exception.h"
 
 bool substring_equal(substring&a, substring&b)
diff --git a/vowpalwabbit/vw.h b/vowpalwabbit/vw.h
index 7744f9b5768..e764e72b23e 100644
--- a/vowpalwabbit/vw.h
+++ b/vowpalwabbit/vw.h
@@ -14,7 +14,7 @@ license as described in the file LICENSE.
 
 #include "global_data.h"
 #include "example.h"
-#include "hash.h"
+#include "../explore/hash.h"
 #include "simple_label.h"
 #include "parser.h"
 #include "parse_example.h"

From 4c3eed34638a53df33353e0513de45c7c75a0478 Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Mon, 9 Jul 2018 01:17:16 -0400
Subject: [PATCH 093/127] fixed offset bugs in cb_explore and
 multiline_predict_or_learn

---
 scripts/data_gen.py            |   6 +-
 vowpalwabbit/cb_adf.cc         |   8 +-
 vowpalwabbit/cb_explore_adf.cc |   5 +
 vowpalwabbit/cbify.cc          | 209 ++++++++++++++++++++-------------
 vowpalwabbit/learner.h         |  28 ++++-
 5 files changed, 158 insertions(+), 98 deletions(-)

diff --git a/scripts/data_gen.py b/scripts/data_gen.py
index 41bdee73c8f..aa30cb061c2 100644
--- a/scripts/data_gen.py
+++ b/scripts/data_gen.py
@@ -1,9 +1,9 @@
 import random
 import numpy as np
 
-classes = 10
-m = 100
-kwperclass = 20
+classes = 2
+m = 10
+kwperclass = 2
 
 def gen_keyword():
 	keyword = np.zeros((classes, m))
diff --git a/vowpalwabbit/cb_adf.cc b/vowpalwabbit/cb_adf.cc
index 5baacab31e1..bdfc9157d1c 100644
--- a/vowpalwabbit/cb_adf.cc
+++ b/vowpalwabbit/cb_adf.cc
@@ -350,15 +350,11 @@ base_learner* cb_adf_setup(arguments& arg)
       .critical("cb_adf", "Do Contextual Bandit learning with multiline action dependent features.")
       .keep(ld->rank_all, "rank_all", "Return actions sorted by score order")
       (ld->no_predict, "no_predict", "Do not do a prediction when training")
-      .keep("cb_type", type_string, (string)"ips", "contextual bandit method to use in {ips,dm,dr,mtr}")
-			("cbify", ld->gen_cs.num_actions, 1U, "number of actions")
-			.missing())
+      .keep("cb_type", type_string, (string)"ips", "contextual bandit method to use in {ips,dm,dr, mtr}").missing())
     return nullptr;
 
   ld->all = arg.all;
-
-	//cb_to_cs_adf& c = ld.gen_cs;
-	//c.num_actions = (uint32_t)(all.vm["cbify"].as<size_t>());
+	ld->gen_cs.num_actions = arg.vm["cbify"].as<uint32_t>();
 
   // number of weight vectors needed
   size_t problem_multiplier = 1;//default for IPS
diff --git a/vowpalwabbit/cb_explore_adf.cc b/vowpalwabbit/cb_explore_adf.cc
index b4ee60fb796..a93ad9043f3 100644
--- a/vowpalwabbit/cb_explore_adf.cc
+++ b/vowpalwabbit/cb_explore_adf.cc
@@ -263,6 +263,9 @@ void predict_or_learn_first(cb_explore_adf& data, multi_learner& base, multi_ex&
 template <bool is_learn>
 void predict_or_learn_greedy(cb_explore_adf& data, multi_learner& base, multi_ex& examples)
 {
+	//cout<<"data offset = "<<data.offset<<endl;
+	//cout<<"example feature offset before = "<<examples[0]->ft_offset<<endl;
+	data.offset = examples[0]->ft_offset;
   //cout << "in p_or_l_g" << endl;
   //Explore uniform random an epsilon fraction of the time.
   if (is_learn && test_adf_sequence(examples) != nullptr)
@@ -270,6 +273,8 @@ void predict_or_learn_greedy(cb_explore_adf& data, multi_learner& base, multi_ex
   else
     multiline_learn_or_predict<false>(base, examples, data.offset);
 
+	//cout<<"example feature offset after = "<<examples[0]->ft_offset<<endl;
+
   action_scores& preds = examples[0]->pred.a_s;
 
   uint32_t num_actions = (uint32_t)preds.size();
diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc
index 05b6333e999..860b5220d5d 100644
--- a/vowpalwabbit/cbify.cc
+++ b/vowpalwabbit/cbify.cc
@@ -16,6 +16,7 @@ using namespace std;
 
 #define WARM_START 1
 #define INTERACTION 2
+#define SKIP 3
 
 #define SUPERVISED_WS 1
 #define BANDIT_WS 2
@@ -88,6 +89,7 @@ struct cbify
 	uint32_t ws_iter;
 	uint32_t inter_iter;
 	MULTICLASS::label_t mc_label;
+	COST_SENSITIVE::label cs_label;
 	COST_SENSITIVE::label* csls;
 	COST_SENSITIVE::label* csl_empty;
 
@@ -139,10 +141,10 @@ void finish(cbify& data)
   data.a_s.delete_v();
   if (data.use_adf)
   {
-		cout<<"The average variance estimate is:"<<data.cumu_var / data.inter_period<<endl;
-		cout<<"The theoretical average variance is:"<<data.num_actions / data.epsilon<<endl;
+		cout<<"The average variance estimate is: "<<data.cumu_var / data.inter_period<<endl;
+		cout<<"The theoretical average variance is: "<<data.num_actions / data.epsilon<<endl;
 		uint32_t argmin = find_min(data.cumulative_costs);
-		cout<<"The last value of lambda chosen is:"<<data.lambdas[argmin]<<endl;
+		cout<<"The last value of lambda chosen is: "<<data.lambdas[argmin]<<endl;
 
     for (size_t a = 0; a < data.adf_data.num_actions; ++a)
       {
@@ -196,6 +198,8 @@ void setup_lambdas(cbify& data)
 {
 	// The lambdas are in fact arranged in ascending order (the 'middle' lambda is 0.5)
 	vector<float>& lambdas = data.lambdas;
+	for (uint32_t i = 0; i<data.choices_lambda; i++)
+		lambdas.push_back(0.f);
 
 	//interaction only
 	if (!data.upd_ws && data.upd_inter)
@@ -294,28 +298,28 @@ float compute_weight_multiplier(cbify& data, size_t i, int ec_type)
 	float ws_train_size = data.ws_train_size;
 	float inter_train_size = data.inter_period;
 
-	if (data.vali_method != INTER_VALI)
-	{
-		if (ec_type == WARM_START && data.ws_iter >= ws_train_size)
-			return 0.0;
-	}
+	//if (data.vali_method != INTER_VALI)
+	//{
+	//	if (ec_type == WARM_START && data.ws_iter >= ws_train_size)
+	//		return 0.0;
+	//}
 
 	float total_train_size = ws_train_size + inter_train_size;
 	if (data.wt_scheme == INSTANCE_WT)
 	{
+		float total_weight = (1-data.lambdas[i]) * ws_train_size + data.lambdas[i] * inter_train_size;
+
 		if (ec_type == WARM_START)
-			weight_multiplier = (1-data.lambdas[i]) * total_train_size / ws_train_size;
+			weight_multiplier = (1-data.lambdas[i]) * total_train_size / total_weight;
 		else
-			weight_multiplier = data.lambdas[i] * total_train_size / inter_train_size;
+			weight_multiplier = data.lambdas[i] * total_train_size / total_weight;
 	}
 	else
 	{
-		float total_weight = (1-data.lambdas[i]) * ws_train_size + data.lambdas[i] * inter_train_size;
-
 		if (ec_type == WARM_START)
-			weight_multiplier = (1-data.lambdas[i]) * total_train_size / total_weight;
+			weight_multiplier = (1-data.lambdas[i]) * total_train_size / ws_train_size;
 		else
-			weight_multiplier = data.lambdas[i] * total_train_size / total_weight;
+			weight_multiplier = data.lambdas[i] * total_train_size / inter_train_size;
 	}
 	return weight_multiplier;
 }
@@ -368,14 +372,17 @@ void predict_or_learn(cbify& data, single_learner& base, example& ec)
   ec.pred.multiclass = cl.action;
 }
 
-uint32_t predict_sublearner_adf(cbify& data, single_learner& base, example& ec, uint32_t i)
+uint32_t predict_sublearner_adf(cbify& data, multi_learner& base, example& ec, uint32_t i)
 {
+	//cout<<"predict using sublearner "<< i <<endl;
 	copy_example_to_adf(data, ec);
-	base.predict(ecs, i);
-	return data.adf_data.ecs[0].pred.a_s[0].action+1;
+	//uint32_t offset = data.adf_data.ecs[0]->ft_offset;
+	//multiline_learn_or_predict<false>(base, data.adf_data.ecs, offset, i);
+	base.predict(data.adf_data.ecs, i);
+	return data.adf_data.ecs[0]->pred.a_s[0].action+1;
 }
 
-void accumu_costs_iv_adf(cbify& data, single_learner& base, example& ec)
+void accumu_costs_iv_adf(cbify& data, multi_learner& base, example& ec)
 {
 	CB::cb_class& cl = data.cl_adf;
 	//IPS for approximating the cumulative costs for all lambdas
@@ -391,7 +398,7 @@ void accumu_costs_iv_adf(cbify& data, single_learner& base, example& ec)
 }
 
 template<bool use_cs>
-void accumu_costs_wsv_adf(cbify& data, single_learner& base)
+void accumu_costs_wsv_adf(cbify& data, multi_learner& base)
 {
 	uint32_t ws_vali_size = data.ws_vali_size;
 	//only update cumulative costs every warm_start_period iterations
@@ -449,13 +456,14 @@ void add_to_vali(cbify& data, example& ec)
 	data.ws_vali.push_back(ec_copy);
 }
 
-uint32_t predict_cs_adf(cbify& data, single_learner& base, example& ec)
+uint32_t predict_sup_adf(cbify& data, multi_learner& base, example& ec)
 {
 	uint32_t argmin = find_min(data.cumulative_costs);
 	return predict_sublearner_adf(data, base, ec, argmin);
 }
 
-void learn_cs_adf(cbify& data, single_learner& base, example& ec, int ec_type)
+template<bool use_cs>
+void learn_sup_adf(cbify& data, multi_learner& base, example& ec, int ec_type)
 {
 	copy_example_to_adf(data, ec);
 	//generate cost-sensitive label (only for CSOAA's use - this will be retracted at the end)
@@ -470,42 +478,47 @@ void learn_cs_adf(cbify& data, single_learner& base, example& ec, int ec_type)
 	}
 	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
 	{
-		data.adf_data.ecs[a].l.cs = csls[a];
+		data.adf_data.ecs[a]->l.cs = csls[a];
 		//cout<<ecs[a].l.cs.costs.size()<<endl;
 	}
 
 	vector<float> old_weights;
 	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
-		old_weights[a] = ecs[a].weight;
+		old_weights.push_back(data.adf_data.ecs[a]->weight);
 
 	for (uint32_t i = 0; i < data.choices_lambda; i++)
 	{
 		float weight_multiplier = compute_weight_multiplier(data, i, ec_type);
+		//cout<<"weight multiplier in sup = "<<weight_multiplier<<endl;
+
 		for (size_t a = 0; a < data.adf_data.num_actions; ++a)
-			ecs[a].weight = old_weights[a] * weight_multiplier;
+			data.adf_data.ecs[a]->weight = old_weights[a] * weight_multiplier;
 		multi_learner* cs_learner = as_multiline(data.all->cost_sensitive);
 		cs_learner->learn(data.adf_data.ecs, i);
+
+		//cout<<"cost-sensitive increment = "<<cs_learner->increment<<endl;
 	}
 	//Seems like we don't need to set the weights back as this example will be
 	//discarded anyway
 	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
-		ecs[a].weight = old_weights[a];
+		data.adf_data.ecs[a]->weight = old_weights[a];
 }
 
-void predict_or_learn_cs_adf(cbify& data, single_learner& base, example& ec, int ec_type)
+template<bool use_cs>
+void predict_or_learn_sup_adf(cbify& data, multi_learner& base, example& ec, int ec_type)
 {
-	uint32_t action = predict_cs_adf(data, base, ec);
+	uint32_t action = predict_sup_adf(data, base, ec);
 
 	if (ind_update(data, ec_type))
-		learn_cs_adf(data, base, ec, ec_type);
+		learn_sup_adf<use_cs>(data, base, ec, ec_type);
 
 	ec.pred.multiclass = action;
 }
 
-
-uint32_t predict_bandit_adf(cbify& data, single_learner& base, example& ec)
+uint32_t predict_bandit_adf(cbify& data, multi_learner& base, example& ec)
 {
 	uint32_t argmin = find_min(data.cumulative_costs);
+	//cout<<argmin<<endl;
 
   copy_example_to_adf(data, ec);
 	base.predict(data.adf_data.ecs, argmin);
@@ -515,11 +528,13 @@ uint32_t predict_bandit_adf(cbify& data, single_learner& base, example& ec)
   if (sample_after_normalizing(data.app_seed + data.example_counter++, begin_scores(out_ec.pred.a_s), end_scores(out_ec.pred.a_s), chosen_action))
     THROW("Failed to sample from pdf");
 
+	auto& a_s = data.a_s_adf;
+	copy_array<action_score>(a_s, out_ec.pred.a_s);
+
 	return chosen_action;
 }
 
-
-void learn_bandit_adf(cbify& data, single_learner& base, example& ec, int ec_type, uint32_t chosen_action, action_scores& a_s)
+void learn_bandit_adf(cbify& data, multi_learner& base, example& ec, int ec_type)
 {
 	//Store the multiclass input label
   //MULTICLASS::label_t ld = ec.l.multi;
@@ -527,46 +542,68 @@ void learn_bandit_adf(cbify& data, single_learner& base, example& ec, int ec_typ
 
   // add cb label to chosen action
 	auto& cl = data.cl_adf;
-  auto& lab = data.adf_data.ecs[cl.action - 1].l.cb;
+  auto& lab = data.adf_data.ecs[cl.action - 1]->l.cb;
   lab.costs.push_back(cl);
 
 	vector<float> old_weights;
 	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
-		old_weights[a] = data.adf_data.ecs[a].weight;
+		old_weights.push_back(data.adf_data.ecs[a]->weight);
 
 	for (uint32_t i = 0; i < data.choices_lambda; i++)
 	{
 		float weight_multiplier = compute_weight_multiplier(data, i, ec_type);
+
+		//cout<<"learn in sublearner "<< i <<" with weight multiplier "<<weight_multiplier<<endl;
 	  for (size_t a = 0; a < data.adf_data.num_actions; ++a)
-			ecs[a].weight = old_weights[a] * weight_multiplier;
-	  base.learn(data.adf_data.ecs);
+			data.adf_data.ecs[a]->weight = old_weights[a] * weight_multiplier;
+	  base.learn(data.adf_data.ecs, i);
+
+		//cout<<"cb-explore increment = "<<base.increment<<endl;
+		//uint32_t offset = data.adf_data.ecs[0]->ft_offset;
+		//multiline_learn_or_predict<true>(base, data.adf_data.ecs, offset, i);
 	}
 
 	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
-		data.adf_data.ecs[a].weight = old_weights[a];
-
-  //ec.pred.multiclass = cl.action;
+		data.adf_data.ecs[a]->weight = old_weights[a];
 }
 
-void predict_or_learn_bandit_adf(cbify& data, single_learner& base, example& ec, int ec_type)
+template<bool use_cs>
+void predict_or_learn_bandit_adf(cbify& data, multi_learner& base, example& ec, int ec_type)
 {
-	uint32_t action = predict_bandit_adf(data, base, ec);
+	uint32_t chosen_action = predict_bandit_adf(data, base, ec);
+
+	auto& cl = data.cl_adf;
+	auto& a_s = data.a_s_adf;
+	cl.action = a_s[chosen_action].action + 1;
+	cl.probability = a_s[chosen_action].score;
+
+	//cout<<cl.action<<" "<<cl.probability<<" "<<ec.l.multi.label<<endl;
+
+	if(!cl.action)
+		THROW("No action with non-zero probability found!");
+
+	if (use_cs)
+		cl.cost = loss_cs(data, ec.l.cs.costs, cl.action);
+	else
+		cl.cost = loss(data, ec.l.multi.label, cl.action);
 
 	if (ec_type == INTERACTION && data.vali_method == INTER_VALI)
 		accumu_costs_iv_adf(data, base, ec);
 
+	//cout<<cl.action<<" "<<cl.probability<<endl;
+
 	if (ind_update(data, ec_type))
-		learn_bandit_adf(data, base, ec, ec_type, action, data.a_s_adf);
+		learn_bandit_adf(data, base, ec, ec_type);
 
 	if (ec_type == INTERACTION && (data.vali_method == WS_VALI_SPLIT || data.vali_method == WS_VALI_NOSPLIT))
-		accumu_costs_wsv_adf(data, base);
+		accumu_costs_wsv_adf<use_cs>(data, base);
 
-	ec.pred.multiclass = action;
+	ec.pred.multiclass = cl.action;
 }
 
-void accumu_var_adf(cbify& data, single_learner& base, example& ec)
+void accumu_var_adf(cbify& data, multi_learner& base, example& ec)
 {
-	size_t pred_best_approx = predict_cs_adf(data, base, ec);
+	size_t pred_best_approx = predict_sup_adf(data, base, ec);
 	float temp_var;
 
 	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
@@ -580,52 +617,51 @@ void accumu_var_adf(cbify& data, single_learner& base, example& ec)
 }
 
 template <bool is_learn, bool use_cs>
-void predict_or_learn_adf(cbify& data, single_learner& base, example& ec)
+void predict_or_learn_adf(cbify& data, multi_learner& base, example& ec)
 {
-	if (data.ws_iter < data.ws_period)
+	if (use_cs)
 	{
-		if (!use_cs)
-		{
-			data.mc_label = ec.l.multi;
+		data.cs_label = ec.l.cs;
+	}
+	else
+	{
+		data.mc_label = ec.l.multi;
+		if (data.ws_iter < data.ws_period)
 			ec.l.multi.label = corrupt_action(data, data.mc_label.label, WARM_START);
-		}
+		else if (data.inter_iter < data.inter_period)
+			ec.l.multi.label = corrupt_action(data, data.mc_label.label, INTERACTION);
+	}
+
+	if (data.ws_iter < data.ws_period)
+	{
 		if (data.ws_iter < data.ws_train_size)
 		{
 			if (data.ws_type == SUPERVISED_WS)
-				predict_or_learn_cs_adf(data, base, ec, WARM_START);
+				predict_or_learn_sup_adf<use_cs>(data, base, ec, WARM_START);
 			else if (data.ws_type == BANDIT_WS)
-				predict_or_learn_bandit_adf(data, base, ec, WARM_START);
+				predict_or_learn_bandit_adf<use_cs>(data, base, ec, WARM_START);
 		}
 		else
-			add_to_vali(data, ec);
-
-		if (!use_cs)
-		{
-			ec.l.multi = data.mc_label;
-		}
+			add_to_vali<use_cs>(data, ec);
 		ec.weight = 0;
 		data.ws_iter++;
 	}
 	else if (data.inter_iter < data.inter_period)
 	{
-		if (!use_cs)
-		{
-			data.mc_label = ec.l.multi;
-			ec.l.multi.label = corrupt_action(data, data.mc_label.label, INTERACTION);
-		}
-		predict_or_learn_bandit_adf(data, base, ec, INTERACTION);
+		predict_or_learn_bandit_adf<use_cs>(data, base, ec, INTERACTION);
 		accumu_var_adf(data, base, ec);
-
-		if (!use_cs)
-		{
-			ec.l.multi = data.mc_label;
-		}
 		data.inter_iter++;
 	}
 	else
 	{
 		ec.weight = 0;
 	}
+
+	if (use_cs)
+		ec.l.cs = data.cs_label;
+	else
+		ec.l.multi = data.mc_label;
+
 }
 
 
@@ -650,6 +686,20 @@ void init_adf_data(cbify& data, const size_t num_actions)
 		data.csls[a].costs.push_back({0, a+1, 0, 0});
 	}
 	COST_SENSITIVE::cs_label.default_label(data.csl_empty);
+
+	if (data.vali_method == WS_VALI_SPLIT || data.vali_method == WS_VALI_NOSPLIT)
+	{
+		data.ws_train_size = ceil(data.ws_period / 2.0);
+		data.ws_vali_size = data.ws_period - data.ws_train_size;
+	}
+	else
+	{
+		data.ws_train_size = data.ws_period;
+		data.ws_vali_size = 0;
+	}
+	setup_lambdas(data);
+	for (uint32_t i = 0; i < data.choices_lambda; i++)
+		data.cumulative_costs.push_back(0.f);
 }
 
 base_learner* cbify_setup(arguments& arg)
@@ -663,7 +713,6 @@ base_learner* cbify_setup(arguments& arg)
       (use_cs, "cbify_cs", "consume cost-sensitive classification examples instead of multiclass")
       ("loss0", data->loss0, 0.f, "loss for correct label")
       ("loss1", data->loss1, 1.f, "loss for incorrect label")
-			("epsilon", data->epsilon, 0.05f, "greedy probability")
 			("warm_start", data->ws_period, 0U, "number of training examples for warm start")
 			("interaction", data->inter_period, 0U, "number of training examples for bandit processing")
 		  ("choices_lambda", data->choices_lambda, 1U, "numbers of lambdas importance weights to aggregate")
@@ -690,17 +739,6 @@ base_learner* cbify_setup(arguments& arg)
   if (data->use_adf)
     init_adf_data(*data.get(), num_actions);
 
-	if (data->vali_method == WS_VALI_SPLIT || data->vali_method == WS_VALI_NOSPLIT)
-	{
-		data->ws_train_size = ceil(data->ws_period / 2.0);
-		data->ws_vali_size = data->ws_period - data->ws_train_size;
-	}
-	else
-	{
-		data->ws_train_size = data->ws_period;
-		data->ws_vali_size = 0;
-	}
-
   if (count(arg.args.begin(), arg.args.end(),"--cb_explore") == 0 && !data->use_adf)
   {
     arg.args.push_back("--cb_explore");
@@ -728,10 +766,15 @@ base_learner* cbify_setup(arguments& arg)
   if (data->use_adf)
   {
     multi_learner* base = as_multiline(setup_base(arg));
+		// Not sure why we can only put this line here to pass the value of epsilon
+		data->epsilon = arg.vm["epsilon"].as<float>();
+
     if (use_cs)
       l = &init_cost_sensitive_learner(data, base, predict_or_learn_adf<true, true>, predict_or_learn_adf<false, true>, arg.all->p, data->choices_lambda);
     else
       l = &init_multiclass_learner(data, base, predict_or_learn_adf<true, false>, predict_or_learn_adf<false, false>, arg.all->p, data->choices_lambda);
+
+		//cout<<"cbify increment = "<<l->increment<<endl;
   }
   else
   {
diff --git a/vowpalwabbit/learner.h b/vowpalwabbit/learner.h
index bfb31b4479e..91a8add8dd7 100644
--- a/vowpalwabbit/learner.h
+++ b/vowpalwabbit/learner.h
@@ -92,22 +92,33 @@ inline float noop_sensitivity(void*, base_learner&, example&) { std::cout << std
 float recur_sensitivity(void*, base_learner&, example&);
 
 inline void increment_offset(example& ex, const size_t increment, const size_t i)
-{ ex.ft_offset += static_cast<uint32_t>(increment * i);
+{
+	//std::cout<<"in increment_offset singleex: increment = "<<increment<<" ex.ft_offset = "<<ex.ft_offset<<" i = "<<i<<std::endl;
+	ex.ft_offset += static_cast<uint32_t>(increment * i);
 }
 
 inline void increment_offset(multi_ex& ec_seq, const size_t increment, const size_t i)
-{ for (auto ec : ec_seq)
+{
+	for (auto ec : ec_seq)
+	{
+		//std::cout<<"in increment_offset multiex: increment = "<<increment<<" ec->ft_offset = "<<ec->ft_offset<<" i = "<<i<<std::endl;
     ec->ft_offset += static_cast<uint32_t>(increment * i);
+	}
 }
 
 inline void decrement_offset(example& ex, const size_t increment, const size_t i)
-{ assert(ex.ft_offset >= increment * i);
+{
+	//std::cout<<"in decrement_offset singleex: increment = "<<increment<<" ex.ft_offset = "<<ex.ft_offset<<" i = "<<i<<std::endl;
+	assert(ex.ft_offset >= increment * i);
   ex.ft_offset -= static_cast<uint32_t>(increment * i);
 }
 
 inline void decrement_offset(multi_ex& ec_seq, const size_t increment, const size_t i)
 { for (auto ec : ec_seq)
-  { assert(ec->ft_offset >= increment * i);
+  {
+		//commenting out this line for multiple learning rate aggregation purposes
+		//std::cout<<"in decrement_offset multiex: increment = "<<increment<<" ec->ft_offset = "<<ec->ft_offset<<" i = "<<i<<std::endl;
+		assert(ec->ft_offset >= increment * i);
     ec->ft_offset -= static_cast<uint32_t>(increment * i);
   }
 }
@@ -440,9 +451,11 @@ template<class T,class E> struct learner
 
   template<bool is_learn>
   void multiline_learn_or_predict(multi_learner& base, multi_ex& examples, const uint64_t offset, const uint32_t id = 0)
-  { std::vector<uint64_t> saved_offsets(examples.size());
+  { std::vector<uint64_t> saved_offsets;
     for (auto ec : examples)
-    { saved_offsets.push_back(ec->ft_offset);
+    {
+			//std::cout<<"saved offsets before = "<<ec->ft_offset<<std::endl;
+			saved_offsets.push_back(ec->ft_offset);
       ec->ft_offset = offset;
     }
 
@@ -452,6 +465,9 @@ template<class T,class E> struct learner
       base.predict(examples, id);
 
     for (size_t i = 0; i < examples.size(); i++)
+		{
       examples[i]->ft_offset = saved_offsets[i];
+			//std::cout<<"saved offsets after = "<<saved_offsets[i]<<std::endl;
+		}
   }
 }

From 81178968b6ebb7367452fd2b7604c8fca70b08c8 Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Tue, 10 Jul 2018 16:11:46 -0400
Subject: [PATCH 094/127] fixed error on split/nosplit swapping

---
 scripts/alg_comparison.py | 28 +++++++++++++++-------------
 vowpalwabbit/cbify.cc     |  4 ++--
 2 files changed, 17 insertions(+), 15 deletions(-)

diff --git a/scripts/alg_comparison.py b/scripts/alg_comparison.py
index 24dc9c87b8a..b2988ef182e 100644
--- a/scripts/alg_comparison.py
+++ b/scripts/alg_comparison.py
@@ -29,7 +29,7 @@ def sum_files(result_path):
 def parse_sum_file(sum_filename):
 	f = open(sum_filename, 'r')
 	#f.seek(0, 0)
-	table = pd.read_table(f, sep='\s+',lineterminator='\n')
+	table = pd.read_table(f, sep='\s+',lineterminator='\n',error_bad_lines=False)
 
 	return table
 
@@ -120,14 +120,14 @@ def alg_str(alg_name):
 	'Class-1',
 	'Bandit-Only',
 	'Sup-Only',
-	'MinimaxBandits, one validation',
-	'AwesomeBandits with $|\Lambda|$=4, one validation',
-	'AwesomeBandits with $|\Lambda|$=8, one validation',
-	'AwesomeBandits with $|\Lambda|$=16, one validation',
-	'MinimaxBandits, multiple validation',
-	'AwesomeBandits with $|\Lambda|$=4, multiple validation',
-	'AwesomeBandits with $|\Lambda|$=8, multiple validation',
-	'AwesomeBandits with $|\Lambda|$=16, multiple validation',
+	'MinimaxBandits, split validation',
+	'AwesomeBandits with $|\Lambda|$=4, split validation',
+	'AwesomeBandits with $|\Lambda|$=8, split validation',
+	'AwesomeBandits with $|\Lambda|$=16, split validation',
+	'MinimaxBandits, no-split validation',
+	'AwesomeBandits with $|\Lambda|$=4, no-split validation',
+	'AwesomeBandits with $|\Lambda|$=8, no-split validation',
+	'AwesomeBandits with $|\Lambda|$=16, no-split validation',
 	'unknown'])
 
 def alg_str_compatible(alg_name):
@@ -382,7 +382,9 @@ def get_unnormalized_results(result_table):
 	return new_size, new_unnormalized_results
 
 def update_result_dict(results_dict, new_result):
+	print results_dict
 	for k, v in new_result.iteritems():
+		print k
 		results_dict[k].append(v)
 
 
@@ -524,7 +526,7 @@ def load_from_sum(mod):
 	mod.pair_comp_on = False
 	mod.cdf_on = True
 	mod.maj_error_dir = '../../../figs_all/expt_0509/figs_maj_errors/0of1.sum'
-	mod.best_error_dir = '../../../figs_all/expt_0606/best_errors/0of1.sum'
+	mod.best_error_dir = '../../../figs_all/expt_0606/0of1.sum'
 
 	mod.fulldir = mod.results_dir + mod.plot_subdir
 	if not os.path.exists(mod.fulldir):
@@ -571,7 +573,7 @@ def load_from_sum(mod):
 	elif mod.filter == '2':
 		#print all_results['warm_start_size'] >= 100
 		#raw_input(' ')
-		all_results = all_results[all_results['warm_start_size'] >= 100]
+		all_results = all_results[all_results['warm_start'] >= 200]
 	elif mod.filter == '3':
 		all_results = all_results[all_results['num_classes'] >= 3]
 	elif mod.filter == '4':
@@ -580,10 +582,10 @@ def load_from_sum(mod):
 		all_results = all_results[all_results['total_size'] >= 10000]
 		all_results = all_results[all_results['num_classes'] >= 3]
 	elif mod.filter == '6':
-		all_results = all_results[all_results['warm_start_size'] >= 100]
+		all_results = all_results[all_results['warm_start'] >= 100]
 		all_results = all_results[all_results['learning_rate'] == 0.3]
 	elif mod.filter == '7':
-		all_results = all_results[all_results['warm_start_size'] >= 100]
+		all_results = all_results[all_results['warm_start'] >= 100]
 		all_results = all_results[all_results['num_classes'] >= 3]
 
 	plot_all(mod, all_results)
diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc
index 860b5220d5d..564a8455e6f 100644
--- a/vowpalwabbit/cbify.cc
+++ b/vowpalwabbit/cbify.cc
@@ -720,8 +720,8 @@ base_learner* cbify_setup(arguments& arg)
 			("interaction_update", data->upd_inter, true, "indicator of interaction updates")
 			("corrupt_type_warm_start", data->cor_type_ws, UAR, "type of label corruption in the warm start phase (1: uniformly at random, 2: circular, 3: replacing with overwriting label)")
 			("corrupt_prob_warm_start", data->cor_prob_ws, 0.f, "probability of label corruption in the warm start phase")
-			("corrupt_type_bandit", data->cor_type_inter, UAR, "type of label corruption in the interaction phase (1: uniformly at random, 2: circular, 3: replacing with overwriting label)")
-			("corrupt_prob_bandit", data->cor_prob_inter, 0.f, "probability of label corruption in the interaction phase")
+			("corrupt_type_interaction", data->cor_type_inter, UAR, "type of label corruption in the interaction phase (1: uniformly at random, 2: circular, 3: replacing with overwriting label)")
+			("corrupt_prob_interaction", data->cor_prob_inter, 0.f, "probability of label corruption in the interaction phase")
 			("validation_method", data->vali_method, INTER_VALI, "lambda selection criterion (1 is using bandit with progressive validation, 2 is using supervised)")
 			("weighting_scheme", data->wt_scheme, INSTANCE_WT, "weighting scheme (1 is per instance weighting, 2 is per dataset weighting (where we use a diminishing weighting scheme) )")
 			("lambda_scheme", data->lambda_scheme, ABS_CENTRAL, "Lambda set scheme (1 is expanding based on center=0.5, 2 is expanding based on center=0.5 and enforcing 0,1 in Lambda, 3 is expanding based on center=minimax lambda, 4 is expanding based on center=minimax lambda and enforcing 0,1 in Lambda )")

From e12a8dab4da310017bf35312dd6ac15151c4dfdd Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Fri, 13 Jul 2018 21:55:35 -0400
Subject: [PATCH 095/127] fixed all memory leaks in warm start ground truth

---
 vowpalwabbit/cbify.cc | 70 ++++++++++++++++++++++++++++++-------------
 1 file changed, 49 insertions(+), 21 deletions(-)

diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc
index 564a8455e6f..40559b2197f 100644
--- a/vowpalwabbit/cbify.cc
+++ b/vowpalwabbit/cbify.cc
@@ -91,7 +91,8 @@ struct cbify
 	MULTICLASS::label_t mc_label;
 	COST_SENSITIVE::label cs_label;
 	COST_SENSITIVE::label* csls;
-	COST_SENSITIVE::label* csl_empty;
+	CB::label* cbls;
+	//COST_SENSITIVE::label* csl_empty;
 
 };
 
@@ -141,10 +142,17 @@ void finish(cbify& data)
   data.a_s.delete_v();
   if (data.use_adf)
   {
-		cout<<"The average variance estimate is: "<<data.cumu_var / data.inter_period<<endl;
-		cout<<"The theoretical average variance is: "<<data.num_actions / data.epsilon<<endl;
+		cout<<"average variance estimate = "<<data.cumu_var / data.inter_iter<<endl;
+		cout<<"theoretical average variance = "<<data.num_actions / data.epsilon<<endl;
 		uint32_t argmin = find_min(data.cumulative_costs);
-		cout<<"The last value of lambda chosen is: "<<data.lambdas[argmin]<<endl;
+		cout<<"last lambda chosen = "<<data.lambdas[argmin]<<" among lambdas ranging from "<<data.lambdas[0]<<" to "<<data.lambdas[data.choices_lambda-1]<<endl;
+
+		for (size_t a = 0; a < data.adf_data.num_actions; ++a)
+		{
+			COST_SENSITIVE::cs_label.delete_label(&data.csls[a]);
+		}
+		free(data.csls);
+		free(data.cbls);
 
     for (size_t a = 0; a < data.adf_data.num_actions; ++a)
       {
@@ -154,6 +162,11 @@ void finish(cbify& data)
       }
     data.adf_data.ecs.~vector<example*>();
   }
+
+	data.lambdas.~vector<float>();
+	data.cumulative_costs.~vector<float>();
+
+	data.a_s_adf.delete_v();
 }
 
 void copy_example_to_adf(cbify& data, example& ec)
@@ -297,6 +310,12 @@ float compute_weight_multiplier(cbify& data, size_t i, int ec_type)
 	float weight_multiplier;
 	float ws_train_size = data.ws_train_size;
 	float inter_train_size = data.inter_period;
+	float total_train_size = ws_train_size + inter_train_size;
+	float total_weight = (1-data.lambdas[i]) * ws_train_size + data.lambdas[i] * inter_train_size;
+
+	//cout<<"weight multiplier:"<<endl;
+	//cout<<i<<" "<<data.lambdas[i]<<endl;
+	//cout<<total_weight<<endl;
 
 	//if (data.vali_method != INTER_VALI)
 	//{
@@ -304,15 +323,12 @@ float compute_weight_multiplier(cbify& data, size_t i, int ec_type)
 	//		return 0.0;
 	//}
 
-	float total_train_size = ws_train_size + inter_train_size;
 	if (data.wt_scheme == INSTANCE_WT)
 	{
-		float total_weight = (1-data.lambdas[i]) * ws_train_size + data.lambdas[i] * inter_train_size;
-
 		if (ec_type == WARM_START)
-			weight_multiplier = (1-data.lambdas[i]) * total_train_size / total_weight;
+			weight_multiplier = (1-data.lambdas[i]) * total_train_size / (total_weight + FLT_MIN);
 		else
-			weight_multiplier = data.lambdas[i] * total_train_size / total_weight;
+			weight_multiplier = data.lambdas[i] * total_train_size / (total_weight + FLT_MIN);
 	}
 	else
 	{
@@ -379,6 +395,7 @@ uint32_t predict_sublearner_adf(cbify& data, multi_learner& base, example& ec, u
 	//uint32_t offset = data.adf_data.ecs[0]->ft_offset;
 	//multiline_learn_or_predict<false>(base, data.adf_data.ecs, offset, i);
 	base.predict(data.adf_data.ecs, i);
+	//cout<<"greedy label = " << data.adf_data.ecs[0]->pred.a_s[0].action+1 << endl;
 	return data.adf_data.ecs[0]->pred.a_s[0].action+1;
 }
 
@@ -468,6 +485,7 @@ void learn_sup_adf(cbify& data, multi_learner& base, example& ec, int ec_type)
 	copy_example_to_adf(data, ec);
 	//generate cost-sensitive label (only for CSOAA's use - this will be retracted at the end)
 	auto& csls = data.csls;
+	auto& cbls = data.cbls;
 	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
 	{
 		csls[a].costs[0].class_index = a+1;
@@ -478,6 +496,7 @@ void learn_sup_adf(cbify& data, multi_learner& base, example& ec, int ec_type)
 	}
 	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
 	{
+		cbls[a] = data.adf_data.ecs[a]->l.cb;
 		data.adf_data.ecs[a]->l.cs = csls[a];
 		//cout<<ecs[a].l.cs.costs.size()<<endl;
 	}
@@ -502,6 +521,9 @@ void learn_sup_adf(cbify& data, multi_learner& base, example& ec, int ec_type)
 	//discarded anyway
 	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
 		data.adf_data.ecs[a]->weight = old_weights[a];
+
+	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
+		data.adf_data.ecs[a]->l.cb = cbls[a];
 }
 
 template<bool use_cs>
@@ -528,6 +550,10 @@ uint32_t predict_bandit_adf(cbify& data, multi_learner& base, example& ec)
   if (sample_after_normalizing(data.app_seed + data.example_counter++, begin_scores(out_ec.pred.a_s), end_scores(out_ec.pred.a_s), chosen_action))
     THROW("Failed to sample from pdf");
 
+	//cout<<"predict using sublearner "<< argmin <<endl;
+	//cout<<"greedy label = " << data.adf_data.ecs[0]->pred.a_s[0].action+1 << endl;
+	//cout<<"chosen action = " << chosen_action << endl;
+
 	auto& a_s = data.a_s_adf;
 	copy_array<action_score>(a_s, out_ec.pred.a_s);
 
@@ -612,8 +638,8 @@ void accumu_var_adf(cbify& data, multi_learner& base, example& ec)
 
 	data.cumu_var += temp_var;
 
-	//cout<<"variance at bandit round "<< data.bandit_iter << " = " << temp_variance << endl;
-	//cout<<pred_pi<<" "<<pred_best_approx<<" "<<ld.label<<endl;
+	//cout<<"variance at bandit round "<< data.inter_iter << " = " << temp_var << endl;
+	//cout<<pred_best_approx<<" "<<data.a_s_adf[0].action+1<<endl;
 }
 
 template <bool is_learn, bool use_cs>
@@ -650,6 +676,7 @@ void predict_or_learn_adf(cbify& data, multi_learner& base, example& ec)
 	{
 		predict_or_learn_bandit_adf<use_cs>(data, base, ec, INTERACTION);
 		accumu_var_adf(data, base, ec);
+		data.a_s_adf.clear();
 		data.inter_iter++;
 	}
 	else
@@ -679,13 +706,14 @@ void init_adf_data(cbify& data, const size_t num_actions)
   }
 
 	data.csls = calloc_or_throw<COST_SENSITIVE::label>(num_actions);
-	data.csl_empty = calloc_or_throw<COST_SENSITIVE::label>(1);
+	//data.csl_empty = calloc_or_throw<COST_SENSITIVE::label>(1);
 	for (uint32_t a=0; a < num_actions; ++a)
 	{
 		COST_SENSITIVE::cs_label.default_label(&data.csls[a]);
 		data.csls[a].costs.push_back({0, a+1, 0, 0});
 	}
-	COST_SENSITIVE::cs_label.default_label(data.csl_empty);
+	//COST_SENSITIVE::cs_label.default_label(data.csl_empty);
+	data.cbls = calloc_or_throw<CB::label>(num_actions);
 
 	if (data.vali_method == WS_VALI_SPLIT || data.vali_method == WS_VALI_NOSPLIT)
 	{
@@ -713,20 +741,20 @@ base_learner* cbify_setup(arguments& arg)
       (use_cs, "cbify_cs", "consume cost-sensitive classification examples instead of multiclass")
       ("loss0", data->loss0, 0.f, "loss for correct label")
       ("loss1", data->loss1, 1.f, "loss for incorrect label")
-			("warm_start", data->ws_period, 0U, "number of training examples for warm start")
-			("interaction", data->inter_period, 0U, "number of training examples for bandit processing")
-		  ("choices_lambda", data->choices_lambda, 1U, "numbers of lambdas importance weights to aggregate")
+			("warm_start", data->ws_period, 0U, "number of training examples for warm start phase")
+			("interaction", data->inter_period, UINT32_MAX, "number of examples for the interactive contextual bandit learning phase")
 			("warm_start_update", data->upd_ws, true, "indicator of warm start updates")
 			("interaction_update", data->upd_inter, true, "indicator of interaction updates")
 			("corrupt_type_warm_start", data->cor_type_ws, UAR, "type of label corruption in the warm start phase (1: uniformly at random, 2: circular, 3: replacing with overwriting label)")
 			("corrupt_prob_warm_start", data->cor_prob_ws, 0.f, "probability of label corruption in the warm start phase")
 			("corrupt_type_interaction", data->cor_type_inter, UAR, "type of label corruption in the interaction phase (1: uniformly at random, 2: circular, 3: replacing with overwriting label)")
 			("corrupt_prob_interaction", data->cor_prob_inter, 0.f, "probability of label corruption in the interaction phase")
-			("validation_method", data->vali_method, INTER_VALI, "lambda selection criterion (1 is using bandit with progressive validation, 2 is using supervised)")
-			("weighting_scheme", data->wt_scheme, INSTANCE_WT, "weighting scheme (1 is per instance weighting, 2 is per dataset weighting (where we use a diminishing weighting scheme) )")
-			("lambda_scheme", data->lambda_scheme, ABS_CENTRAL, "Lambda set scheme (1 is expanding based on center=0.5, 2 is expanding based on center=0.5 and enforcing 0,1 in Lambda, 3 is expanding based on center=minimax lambda, 4 is expanding based on center=minimax lambda and enforcing 0,1 in Lambda )")
-			("overwrite_label", data->overwrite_label, 1U, "the label type 3 corruptions (overwriting) turn to")
-			("warm_start_type", data->ws_type, SUPERVISED_WS, "the way of utilizing warm start data (1 is using supervised updates, 2 is using contextual bandit updates)").missing())
+		  ("choices_lambda", data->choices_lambda, 1U, "the number of candidate lambdas to aggregate (lambda is the importance weight parameter between the two sources) ")
+			("lambda_scheme", data->lambda_scheme, ABS_CENTRAL, "The scheme for generating candidate lambda set (1: center lambda=0.5, 2: center lambda=0.5, min lambda=0, max lambda=1, 3: center lambda=epsilon/(#actions+epsilon), 4: center lambda=epsilon/(#actions+epsilon), min lambda=0, max lambda=1); the rest of candidate lambda values are generated using a doubling scheme")
+			("weighting_scheme", data->wt_scheme, INSTANCE_WT, "weighting scheme (1: per instance weighting, where for every lambda, each contextual bandit example have weight lambda/(1-lambda) times that of each warm start example, 2: per dataset weighting, where for every lambda, the contextual bandit dataset has total weight lambda/(1-lambda) times that of the warm start dataset)")
+			("validation_method", data->vali_method, INTER_VALI, "lambda selection criterion (1: using contextual bandit examples with progressive validation, 2: using warm start examples)")
+			("overwrite_label", data->overwrite_label, 1U, "the label used by type 3 corruptions (overwriting)")
+			("warm_start_type", data->ws_type, SUPERVISED_WS, "update method of utilizing warm start examples (1: using supervised updates, 2: using contextual bandit updates)").missing())
     return nullptr;
 
   data->use_adf = count(arg.args.begin(), arg.args.end(),"--cb_explore_adf") > 0;

From 4e639bcfaa97fbd235cad253dd3b65b8e93de530 Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Sat, 14 Jul 2018 01:45:39 -0400
Subject: [PATCH 096/127] fixed memory leaks in supervised ground truth

---
 vowpalwabbit/cb_explore_adf.cc |  7 +++++-
 vowpalwabbit/cbify.cc          | 40 +++++++++++++++++++++++-----------
 2 files changed, 33 insertions(+), 14 deletions(-)

diff --git a/vowpalwabbit/cb_explore_adf.cc b/vowpalwabbit/cb_explore_adf.cc
index a93ad9043f3..2196092600e 100644
--- a/vowpalwabbit/cb_explore_adf.cc
+++ b/vowpalwabbit/cb_explore_adf.cc
@@ -795,7 +795,12 @@ base_learner* cb_explore_adf_setup(arguments& arg)
     data->explore_type = REGCB;
   else
   {
-    if (!arg.vm.count("epsilon")) data->epsilon = 0.05f;
+    if (!arg.vm.count("epsilon"))
+		{
+			data->epsilon = 0.05f;
+			//a hacky way of passing the implicit epsilon value to cbify
+			arg.vm.insert(std::make_pair("epsilon", boost::program_options::variable_value(data->epsilon, false)));
+		}
     data->explore_type = EPS_GREEDY;
   }
 
diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc
index 40559b2197f..b7e7eb5f323 100644
--- a/vowpalwabbit/cbify.cc
+++ b/vowpalwabbit/cbify.cc
@@ -84,7 +84,7 @@ struct cbify
 	CB::cb_class cl_adf;
 	uint32_t ws_train_size;
 	uint32_t ws_vali_size;
-	vector<example> ws_vali;
+	vector<example*> ws_vali;
 	float cumu_var;
 	uint32_t ws_iter;
 	uint32_t inter_iter;
@@ -92,6 +92,7 @@ struct cbify
 	COST_SENSITIVE::label cs_label;
 	COST_SENSITIVE::label* csls;
 	CB::label* cbls;
+	bool use_cs;
 	//COST_SENSITIVE::label* csl_empty;
 
 };
@@ -161,12 +162,21 @@ void finish(cbify& data)
         free(data.adf_data.ecs[a]);
       }
     data.adf_data.ecs.~vector<example*>();
-  }
 
-	data.lambdas.~vector<float>();
-	data.cumulative_costs.~vector<float>();
+		data.lambdas.~vector<float>();
+		data.cumulative_costs.~vector<float>();
 
-	data.a_s_adf.delete_v();
+		data.a_s_adf.delete_v();
+		for (size_t i = 0; i < data.ws_vali.size(); ++i)
+		{
+			if (data.use_cs)
+				VW::dealloc_example(COST_SENSITIVE::cs_label.delete_label, *data.ws_vali[i]);
+			else
+				VW::dealloc_example(MULTICLASS::mc_label.delete_label, *data.ws_vali[i]);
+			free(data.ws_vali[i]);
+		}
+		data.ws_vali.~vector<example*>();
+  }
 }
 
 void copy_example_to_adf(cbify& data, example& ec)
@@ -439,18 +449,20 @@ void accumu_costs_wsv_adf(cbify& data, multi_learner& base)
 			lb = 0;
 			ub = ws_vali_size;
 		}
+		//cout<<"validation at iteration "<<data.inter_iter<<endl;
+		//cout<<"validation example range: "<< lb << " to " << ub << endl;
 		//cout<<"updating validation error on supervised data: " << data.bandit_iter / data.warm_start_period << endl;
 		for (uint32_t i = 0; i < data.choices_lambda; i++)
 		{
 			for (uint32_t j = lb; j < ub; j++)
 			{
-				example& ec_vali = data.ws_vali[j];
-				uint32_t pred_label = predict_sublearner_adf(data, base, ec_vali, i);
+				example* ec_vali = data.ws_vali[j];
+				uint32_t pred_label = predict_sublearner_adf(data, base, *ec_vali, i);
 
 				if (use_cs)
-					data.cumulative_costs[i] += loss_cs(data, ec_vali.l.cs.costs, pred_label);
+					data.cumulative_costs[i] += loss_cs(data, ec_vali->l.cs.costs, pred_label);
 				else
-					data.cumulative_costs[i] += loss(data, ec_vali.l.multi.label, pred_label);
+					data.cumulative_costs[i] += loss(data, ec_vali->l.multi.label, pred_label);
 
 				//cout<<ec_vali.l.multi.label<<" "<<pred_label<<endl;
 			}
@@ -463,12 +475,12 @@ template<bool use_cs>
 void add_to_vali(cbify& data, example& ec)
 {
 	//if this does not work, we can try declare ws_vali as an array
-	example ec_copy;
+	example* ec_copy = VW::alloc_examples(sizeof(polylabel), 1);
 
 	if (use_cs)
-		VW::copy_example_data(false, &ec_copy, &ec, 0, COST_SENSITIVE::cs_label.copy_label);
+		VW::copy_example_data(false, ec_copy, &ec, 0, COST_SENSITIVE::cs_label.copy_label);
 	else
-		VW::copy_example_data(false, &ec_copy, &ec, 0, MULTICLASS::mc_label.copy_label);
+		VW::copy_example_data(false, ec_copy, &ec, 0, MULTICLASS::mc_label.copy_label);
 
 	data.ws_vali.push_back(ec_copy);
 }
@@ -752,7 +764,7 @@ base_learner* cbify_setup(arguments& arg)
 		  ("choices_lambda", data->choices_lambda, 1U, "the number of candidate lambdas to aggregate (lambda is the importance weight parameter between the two sources) ")
 			("lambda_scheme", data->lambda_scheme, ABS_CENTRAL, "The scheme for generating candidate lambda set (1: center lambda=0.5, 2: center lambda=0.5, min lambda=0, max lambda=1, 3: center lambda=epsilon/(#actions+epsilon), 4: center lambda=epsilon/(#actions+epsilon), min lambda=0, max lambda=1); the rest of candidate lambda values are generated using a doubling scheme")
 			("weighting_scheme", data->wt_scheme, INSTANCE_WT, "weighting scheme (1: per instance weighting, where for every lambda, each contextual bandit example have weight lambda/(1-lambda) times that of each warm start example, 2: per dataset weighting, where for every lambda, the contextual bandit dataset has total weight lambda/(1-lambda) times that of the warm start dataset)")
-			("validation_method", data->vali_method, INTER_VALI, "lambda selection criterion (1: using contextual bandit examples with progressive validation, 2: using warm start examples)")
+			("validation_method", data->vali_method, INTER_VALI, "lambda selection criterion (1: using contextual bandit examples with progressive validation, 2: using warm start examples, with fresh validation examples at each epoch, 3: using warm start examples, with a single validation set throughout)")
 			("overwrite_label", data->overwrite_label, 1U, "the label used by type 3 corruptions (overwriting)")
 			("warm_start_type", data->ws_type, SUPERVISED_WS, "update method of utilizing warm start examples (1: using supervised updates, 2: using contextual bandit updates)").missing())
     return nullptr;
@@ -763,6 +775,7 @@ base_learner* cbify_setup(arguments& arg)
   data->all = arg.all;
 
 	data->num_actions = num_actions;
+	data->use_cs = use_cs;
 
   if (data->use_adf)
     init_adf_data(*data.get(), num_actions);
@@ -795,6 +808,7 @@ base_learner* cbify_setup(arguments& arg)
   {
     multi_learner* base = as_multiline(setup_base(arg));
 		// Not sure why we can only put this line here to pass the value of epsilon
+		cout<<"count: "<<arg.vm.count("epsilon")<<endl;
 		data->epsilon = arg.vm["epsilon"].as<float>();
 
     if (use_cs)

From 6540308107dc1f68270b8dcb1cec30efb8deeb82 Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Mon, 16 Jul 2018 11:26:10 -0400
Subject: [PATCH 097/127] added cbify warm start test cases

---
 test/RunTests                                 | 36 +++++++++++++
 test/train-sets/ref/cbify_ws.stderr           | 19 +++++++
 test/train-sets/ref/cbify_ws_cyc.stderr       | 19 +++++++
 .../ref/cbify_ws_lambda_zeroone.stderr        | 19 +++++++
 test/train-sets/ref/cbify_ws_maj.stderr       | 19 +++++++
 .../train-sets/ref/cbify_ws_no_int_upd.stderr | 19 +++++++
 test/train-sets/ref/cbify_ws_no_ws_upd.stderr | 19 +++++++
 test/train-sets/ref/cbify_ws_simbandit.stderr | 19 +++++++
 test/train-sets/ref/cbify_ws_uar.stderr       | 19 +++++++
 test/train-sets/ref/cbify_ws_wsgt.stderr      | 19 +++++++
 vowpalwabbit/cb_adf.cc                        |  2 +-
 vowpalwabbit/cbify.cc                         | 53 +++++++++----------
 12 files changed, 234 insertions(+), 28 deletions(-)
 create mode 100644 test/train-sets/ref/cbify_ws.stderr
 create mode 100644 test/train-sets/ref/cbify_ws_cyc.stderr
 create mode 100644 test/train-sets/ref/cbify_ws_lambda_zeroone.stderr
 create mode 100644 test/train-sets/ref/cbify_ws_maj.stderr
 create mode 100644 test/train-sets/ref/cbify_ws_no_int_upd.stderr
 create mode 100644 test/train-sets/ref/cbify_ws_no_ws_upd.stderr
 create mode 100644 test/train-sets/ref/cbify_ws_simbandit.stderr
 create mode 100644 test/train-sets/ref/cbify_ws_uar.stderr
 create mode 100644 test/train-sets/ref/cbify_ws_wsgt.stderr

diff --git a/test/RunTests b/test/RunTests
index d7e87b85858..5f51bb68642 100755
--- a/test/RunTests
+++ b/test/RunTests
@@ -1632,3 +1632,39 @@ echo "1 | feature:1" | {VW} -a --initial_weight 0.1 --initial_t 0.3
 # Test 174 cbify adf, regcbopt
 {VW} --cbify 10 --cb_explore_adf --cb_type mtr --regcbopt --mellowness 0.01 -d train-sets/multiclass
     train-sets/ref/cbify_regcbopt.stderr
+
+# Test 175 cbify warm start
+{VW} --cbify 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 -d train-sets/multiclass
+	/train-sets/ref/cbify_ws.stderr
+
+# Test 176 cbify warm start with lambda set containing 0/1
+{VW} --cbify 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --lambda_scheme 2 -d train-sets/multiclass
+	/train-sets/ref/cbify_ws_lambda_zeroone.stderr
+
+# Test 177 cbify warm start with warm start update turned off
+{VW} --cbify 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --warm_start_update false -d train-sets/multiclass
+	/train-sets/ref/cbify_ws_no_ws_upd.stderr
+
+# Test 178 cbify warm start with interaction update turned off
+{VW} --cbify 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --interaction_update false -d train-sets/multiclass
+	/train-sets/ref/cbify_ws_no_int_upd.stderr
+
+# Test 179 cbify warm start with bandit warm start type (Sim-Bandit)
+{VW} --cbify 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --warm_start_type 2 -d train-sets/multiclass
+	/train-sets/ref/cbify_ws_simbandit.stderr
+
+# Test 180 cbify warm start with UAR supervised corruption
+{VW} --cbify 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --corrupt_type_warm_start 1 --corrupt_prob_warm_start 0.5 -d train-sets/multiclass
+	/train-sets/ref/cbify_ws_uar.stderr
+
+# Test 181 cbify warm start with CYC supervised corruption
+{VW} --cbify 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --corrupt_type_warm_start 2 --corrupt_prob_warm_start 0.5 -d train-sets/multiclass
+	/train-sets/ref/cbify_ws_cyc.stderr
+
+# Test 182 cbify warm start with MAJ supervised corruption
+{VW} --cbify 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --corrupt_type_warm_start 3 --corrupt_prob_warm_start 0.5 --overwrite_label 1 -d train-sets/multiclass
+	/train-sets/ref/cbify_ws_maj.stderr
+
+# Test 183 cbify warm start with warm start distribution being the ground truth
+{VW} --cbify 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --corrupt_type_interaction 2 --corrupt_prob_interaction 0.5 --weighting_scheme 2 --validation_method 2 -d train-sets/multiclass
+	/train-sets/ref/cbify_ws_wsgt.stderr
diff --git a/test/train-sets/ref/cbify_ws.stderr b/test/train-sets/ref/cbify_ws.stderr
new file mode 100644
index 00000000000..6d05ba5a0db
--- /dev/null
+++ b/test/train-sets/ref/cbify_ws.stderr
@@ -0,0 +1,19 @@
+Num weight bits = 18
+learning rate = 0.5
+initial_t = 0
+power_t = 0.5
+using no cache
+Reading datafile = train-sets/multiclass
+num sources = 1
+average  since         example        example  current  current  current
+loss     last          counter         weight    label  predict features
+1.000000 1.000000            4            1.0        4        3        2
+1.000000 1.000000            5            2.0        5        3        2
+1.000000 1.000000            7            4.0        7        3        2
+
+finished run
+number of examples = 10
+weighted example sum = 7.000000
+weighted label sum = 0.000000
+average loss = 1.000000
+total feature number = 20
diff --git a/test/train-sets/ref/cbify_ws_cyc.stderr b/test/train-sets/ref/cbify_ws_cyc.stderr
new file mode 100644
index 00000000000..6d05ba5a0db
--- /dev/null
+++ b/test/train-sets/ref/cbify_ws_cyc.stderr
@@ -0,0 +1,19 @@
+Num weight bits = 18
+learning rate = 0.5
+initial_t = 0
+power_t = 0.5
+using no cache
+Reading datafile = train-sets/multiclass
+num sources = 1
+average  since         example        example  current  current  current
+loss     last          counter         weight    label  predict features
+1.000000 1.000000            4            1.0        4        3        2
+1.000000 1.000000            5            2.0        5        3        2
+1.000000 1.000000            7            4.0        7        3        2
+
+finished run
+number of examples = 10
+weighted example sum = 7.000000
+weighted label sum = 0.000000
+average loss = 1.000000
+total feature number = 20
diff --git a/test/train-sets/ref/cbify_ws_lambda_zeroone.stderr b/test/train-sets/ref/cbify_ws_lambda_zeroone.stderr
new file mode 100644
index 00000000000..344c43a5335
--- /dev/null
+++ b/test/train-sets/ref/cbify_ws_lambda_zeroone.stderr
@@ -0,0 +1,19 @@
+Num weight bits = 18
+learning rate = 0.5
+initial_t = 0
+power_t = 0.5
+using no cache
+Reading datafile = train-sets/multiclass
+num sources = 1
+average  since         example        example  current  current  current
+loss     last          counter         weight    label  predict features
+1.000000 1.000000            4            1.0        4        3        2
+1.000000 1.000000            5            2.0        5        9        2
+0.750000 0.500000            7            4.0        7        7        2
+
+finished run
+number of examples = 10
+weighted example sum = 7.000000
+weighted label sum = 0.000000
+average loss = 0.857143
+total feature number = 20
diff --git a/test/train-sets/ref/cbify_ws_maj.stderr b/test/train-sets/ref/cbify_ws_maj.stderr
new file mode 100644
index 00000000000..6d05ba5a0db
--- /dev/null
+++ b/test/train-sets/ref/cbify_ws_maj.stderr
@@ -0,0 +1,19 @@
+Num weight bits = 18
+learning rate = 0.5
+initial_t = 0
+power_t = 0.5
+using no cache
+Reading datafile = train-sets/multiclass
+num sources = 1
+average  since         example        example  current  current  current
+loss     last          counter         weight    label  predict features
+1.000000 1.000000            4            1.0        4        3        2
+1.000000 1.000000            5            2.0        5        3        2
+1.000000 1.000000            7            4.0        7        3        2
+
+finished run
+number of examples = 10
+weighted example sum = 7.000000
+weighted label sum = 0.000000
+average loss = 1.000000
+total feature number = 20
diff --git a/test/train-sets/ref/cbify_ws_no_int_upd.stderr b/test/train-sets/ref/cbify_ws_no_int_upd.stderr
new file mode 100644
index 00000000000..6d05ba5a0db
--- /dev/null
+++ b/test/train-sets/ref/cbify_ws_no_int_upd.stderr
@@ -0,0 +1,19 @@
+Num weight bits = 18
+learning rate = 0.5
+initial_t = 0
+power_t = 0.5
+using no cache
+Reading datafile = train-sets/multiclass
+num sources = 1
+average  since         example        example  current  current  current
+loss     last          counter         weight    label  predict features
+1.000000 1.000000            4            1.0        4        3        2
+1.000000 1.000000            5            2.0        5        3        2
+1.000000 1.000000            7            4.0        7        3        2
+
+finished run
+number of examples = 10
+weighted example sum = 7.000000
+weighted label sum = 0.000000
+average loss = 1.000000
+total feature number = 20
diff --git a/test/train-sets/ref/cbify_ws_no_ws_upd.stderr b/test/train-sets/ref/cbify_ws_no_ws_upd.stderr
new file mode 100644
index 00000000000..4b334d4e73b
--- /dev/null
+++ b/test/train-sets/ref/cbify_ws_no_ws_upd.stderr
@@ -0,0 +1,19 @@
+Num weight bits = 18
+learning rate = 0.5
+initial_t = 0
+power_t = 0.5
+using no cache
+Reading datafile = train-sets/multiclass
+num sources = 1
+average  since         example        example  current  current  current
+loss     last          counter         weight    label  predict features
+1.000000 1.000000            4            1.0        4        5        2
+1.000000 1.000000            5            2.0        5        9        2
+0.750000 0.500000            7            4.0        7        7        2
+
+finished run
+number of examples = 10
+weighted example sum = 7.000000
+weighted label sum = 0.000000
+average loss = 0.714286
+total feature number = 20
diff --git a/test/train-sets/ref/cbify_ws_simbandit.stderr b/test/train-sets/ref/cbify_ws_simbandit.stderr
new file mode 100644
index 00000000000..6d935a38a61
--- /dev/null
+++ b/test/train-sets/ref/cbify_ws_simbandit.stderr
@@ -0,0 +1,19 @@
+Num weight bits = 18
+learning rate = 0.5
+initial_t = 0
+power_t = 0.5
+using no cache
+Reading datafile = train-sets/multiclass
+num sources = 1
+average  since         example        example  current  current  current
+loss     last          counter         weight    label  predict features
+1.000000 1.000000            4            1.0        4        7        2
+1.000000 1.000000            5            2.0        5        1        2
+0.750000 0.500000            7            4.0        7       10        2
+
+finished run
+number of examples = 10
+weighted example sum = 7.000000
+weighted label sum = 0.000000
+average loss = 0.857143
+total feature number = 20
diff --git a/test/train-sets/ref/cbify_ws_uar.stderr b/test/train-sets/ref/cbify_ws_uar.stderr
new file mode 100644
index 00000000000..6d05ba5a0db
--- /dev/null
+++ b/test/train-sets/ref/cbify_ws_uar.stderr
@@ -0,0 +1,19 @@
+Num weight bits = 18
+learning rate = 0.5
+initial_t = 0
+power_t = 0.5
+using no cache
+Reading datafile = train-sets/multiclass
+num sources = 1
+average  since         example        example  current  current  current
+loss     last          counter         weight    label  predict features
+1.000000 1.000000            4            1.0        4        3        2
+1.000000 1.000000            5            2.0        5        3        2
+1.000000 1.000000            7            4.0        7        3        2
+
+finished run
+number of examples = 10
+weighted example sum = 7.000000
+weighted label sum = 0.000000
+average loss = 1.000000
+total feature number = 20
diff --git a/test/train-sets/ref/cbify_ws_wsgt.stderr b/test/train-sets/ref/cbify_ws_wsgt.stderr
new file mode 100644
index 00000000000..d05436ac3a2
--- /dev/null
+++ b/test/train-sets/ref/cbify_ws_wsgt.stderr
@@ -0,0 +1,19 @@
+Num weight bits = 18
+learning rate = 0.5
+initial_t = 0
+power_t = 0.5
+using no cache
+Reading datafile = train-sets/multiclass
+num sources = 1
+average  since         example        example  current  current  current
+loss     last          counter         weight    label  predict features
+1.000000 1.000000            4            1.0        4        2        2
+1.000000 1.000000            5            2.0        5        2        2
+1.000000 1.000000            7            4.0        7        2        2
+
+finished run
+number of examples = 10
+weighted example sum = 7.000000
+weighted label sum = 0.000000
+average loss = 1.000000
+total feature number = 20
diff --git a/vowpalwabbit/cb_adf.cc b/vowpalwabbit/cb_adf.cc
index bdfc9157d1c..53a8bb5a4db 100644
--- a/vowpalwabbit/cb_adf.cc
+++ b/vowpalwabbit/cb_adf.cc
@@ -114,7 +114,7 @@ void learn_MTR(cb_adf& mydata, multi_learner& base, multi_ex& examples)
   uint32_t nf = (uint32_t)examples[mydata.gen_cs.mtr_example]->num_features;
   float old_weight = examples[mydata.gen_cs.mtr_example]->weight;
 
-	//adjust the importance weight to scale by a factor of 1/K (the last term)
+	//adjust the importance weight to scale by a factor of 1/num_actions (the last term)
   examples[mydata.gen_cs.mtr_example]->weight *= 1.f / examples[mydata.gen_cs.mtr_example]->l.cb.costs[0].probability * ((float)mydata.gen_cs.event_sum / (float)mydata.gen_cs.action_sum) * (1.f / mydata.gen_cs.num_actions);
   GEN_CS::call_cs_ldf<true>(base, mydata.gen_cs.mtr_ec_seq, mydata.cb_labels, mydata.cs_labels, mydata.prepped_cs_labels, mydata.offset);
   examples[mydata.gen_cs.mtr_example]->num_features = nf;
diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc
index b7e7eb5f323..0de288757c8 100644
--- a/vowpalwabbit/cbify.cc
+++ b/vowpalwabbit/cbify.cc
@@ -93,7 +93,6 @@ struct cbify
 	COST_SENSITIVE::label* csls;
 	CB::label* cbls;
 	bool use_cs;
-	//COST_SENSITIVE::label* csl_empty;
 
 };
 
@@ -219,7 +218,7 @@ float minimax_lambda(float epsilon, size_t num_actions, size_t warm_start_period
 
 void setup_lambdas(cbify& data)
 {
-	// The lambdas are in fact arranged in ascending order (the 'middle' lambda is 0.5)
+	// The lambdas are arranged in ascending order
 	vector<float>& lambdas = data.lambdas;
 	for (uint32_t i = 0; i<data.choices_lambda; i++)
 		lambdas.push_back(0.f);
@@ -251,10 +250,10 @@ void setup_lambdas(cbify& data)
 		lambdas[mid] = minimax_lambda(data.epsilon, data.num_actions, data.ws_period, data.inter_period);
 
 	for (uint32_t i = mid; i > 0; i--)
-		lambdas[i-1] = lambdas[i] / 2;
+		lambdas[i-1] = lambdas[i] / 2.0;
 
 	for (uint32_t i = mid+1; i < data.choices_lambda; i++)
-		lambdas[i] = 1 - (1-lambdas[i-1]) / 2;
+		lambdas[i] = 1 - (1-lambdas[i-1]) / 2.0;
 
 	if (data.lambda_scheme == MINIMAX_CENTRAL_ZEROONE || data.lambda_scheme == ABS_CENTRAL_ZEROONE)
 	{
@@ -323,16 +322,9 @@ float compute_weight_multiplier(cbify& data, size_t i, int ec_type)
 	float total_train_size = ws_train_size + inter_train_size;
 	float total_weight = (1-data.lambdas[i]) * ws_train_size + data.lambdas[i] * inter_train_size;
 
-	//cout<<"weight multiplier:"<<endl;
 	//cout<<i<<" "<<data.lambdas[i]<<endl;
 	//cout<<total_weight<<endl;
 
-	//if (data.vali_method != INTER_VALI)
-	//{
-	//	if (ec_type == WARM_START && data.ws_iter >= ws_train_size)
-	//		return 0.0;
-	//}
-
 	if (data.wt_scheme == INSTANCE_WT)
 	{
 		if (ec_type == WARM_START)
@@ -347,6 +339,9 @@ float compute_weight_multiplier(cbify& data, size_t i, int ec_type)
 		else
 			weight_multiplier = data.lambdas[i] * total_train_size / inter_train_size;
 	}
+
+	//cout<<"weight multiplier: "<<weight_multiplier<<endl;
+
 	return weight_multiplier;
 }
 
@@ -474,7 +469,7 @@ void accumu_costs_wsv_adf(cbify& data, multi_learner& base)
 template<bool use_cs>
 void add_to_vali(cbify& data, example& ec)
 {
-	//if this does not work, we can try declare ws_vali as an array
+	//TODO: set the first parameter properly
 	example* ec_copy = VW::alloc_examples(sizeof(polylabel), 1);
 
 	if (use_cs)
@@ -495,7 +490,7 @@ template<bool use_cs>
 void learn_sup_adf(cbify& data, multi_learner& base, example& ec, int ec_type)
 {
 	copy_example_to_adf(data, ec);
-	//generate cost-sensitive label (only for CSOAA's use - this will be retracted at the end)
+	//generate cost-sensitive label (for CSOAA's temporary use)
 	auto& csls = data.csls;
 	auto& cbls = data.cbls;
 	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
@@ -552,7 +547,6 @@ void predict_or_learn_sup_adf(cbify& data, multi_learner& base, example& ec, int
 uint32_t predict_bandit_adf(cbify& data, multi_learner& base, example& ec)
 {
 	uint32_t argmin = find_min(data.cumulative_costs);
-	//cout<<argmin<<endl;
 
   copy_example_to_adf(data, ec);
 	base.predict(data.adf_data.ecs, argmin);
@@ -574,8 +568,6 @@ uint32_t predict_bandit_adf(cbify& data, multi_learner& base, example& ec)
 
 void learn_bandit_adf(cbify& data, multi_learner& base, example& ec, int ec_type)
 {
-	//Store the multiclass input label
-  //MULTICLASS::label_t ld = ec.l.multi;
 	copy_example_to_adf(data, ec);
 
   // add cb label to chosen action
@@ -657,19 +649,21 @@ void accumu_var_adf(cbify& data, multi_learner& base, example& ec)
 template <bool is_learn, bool use_cs>
 void predict_or_learn_adf(cbify& data, multi_learner& base, example& ec)
 {
+	// Corrupt labels (only corrupting multiclass labels as of now)
+
 	if (use_cs)
-	{
 		data.cs_label = ec.l.cs;
-	}
 	else
 	{
 		data.mc_label = ec.l.multi;
-		if (data.ws_iter < data.ws_period)
+		/*if (data.ws_iter < data.ws_period)
 			ec.l.multi.label = corrupt_action(data, data.mc_label.label, WARM_START);
 		else if (data.inter_iter < data.inter_period)
 			ec.l.multi.label = corrupt_action(data, data.mc_label.label, INTERACTION);
+		*/
 	}
 
+	// Warm start phase
 	if (data.ws_iter < data.ws_period)
 	{
 		if (data.ws_iter < data.ws_train_size)
@@ -684,6 +678,7 @@ void predict_or_learn_adf(cbify& data, multi_learner& base, example& ec)
 		ec.weight = 0;
 		data.ws_iter++;
 	}
+	// Interaction phase
 	else if (data.inter_iter < data.inter_period)
 	{
 		predict_or_learn_bandit_adf<use_cs>(data, base, ec, INTERACTION);
@@ -691,11 +686,11 @@ void predict_or_learn_adf(cbify& data, multi_learner& base, example& ec)
 		data.a_s_adf.clear();
 		data.inter_iter++;
 	}
+	// Skipping the rest of the examples
 	else
-	{
 		ec.weight = 0;
-	}
 
+	// Store the original labels back
 	if (use_cs)
 		ec.l.cs = data.cs_label;
 	else
@@ -703,7 +698,6 @@ void predict_or_learn_adf(cbify& data, multi_learner& base, example& ec)
 
 }
 
-
 void init_adf_data(cbify& data, const size_t num_actions)
 {
   auto& adf_data = data.adf_data;
@@ -717,14 +711,13 @@ void init_adf_data(cbify& data, const size_t num_actions)
     CB::cb_label.default_label(&lab);
   }
 
+	// The rest of the initialization is for warm start CB
 	data.csls = calloc_or_throw<COST_SENSITIVE::label>(num_actions);
-	//data.csl_empty = calloc_or_throw<COST_SENSITIVE::label>(1);
 	for (uint32_t a=0; a < num_actions; ++a)
 	{
 		COST_SENSITIVE::cs_label.default_label(&data.csls[a]);
 		data.csls[a].costs.push_back({0, a+1, 0, 0});
 	}
-	//COST_SENSITIVE::cs_label.default_label(data.csl_empty);
 	data.cbls = calloc_or_throw<CB::label>(num_actions);
 
 	if (data.vali_method == WS_VALI_SPLIT || data.vali_method == WS_VALI_NOSPLIT)
@@ -737,9 +730,13 @@ void init_adf_data(cbify& data, const size_t num_actions)
 		data.ws_train_size = data.ws_period;
 		data.ws_vali_size = 0;
 	}
+	data.ws_iter = 0;
+	data.inter_iter = 0;
+
 	setup_lambdas(data);
 	for (uint32_t i = 0; i < data.choices_lambda; i++)
 		data.cumulative_costs.push_back(0.f);
+	data.cumu_var = 0.f;
 }
 
 base_learner* cbify_setup(arguments& arg)
@@ -807,9 +804,11 @@ base_learner* cbify_setup(arguments& arg)
   if (data->use_adf)
   {
     multi_learner* base = as_multiline(setup_base(arg));
-		// Not sure why we can only put this line here to pass the value of epsilon
-		cout<<"count: "<<arg.vm.count("epsilon")<<endl;
-		data->epsilon = arg.vm["epsilon"].as<float>();
+		// Note: the current version of warm start CB can only support epsilon greedy exploration
+		// algorithm - we need to wait for the default epsilon value to be passed from cb_explore
+		// is there is one
+		//cout<<"count: "<<arg.vm.count("epsilon") <<endl;
+		data->epsilon = arg.vm.count("epsilon") > 0 ? arg.vm["epsilon"].as<float>() : 0.0f;
 
     if (use_cs)
       l = &init_cost_sensitive_learner(data, base, predict_or_learn_adf<true, true>, predict_or_learn_adf<false, true>, arg.all->p, data->choices_lambda);

From be93a25e41827b131327ed7f48bb309a7afa92e6 Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Mon, 16 Jul 2018 11:55:25 -0400
Subject: [PATCH 098/127] removed unnecessary include path prefix

---
 vowpalwabbit/global_data.h       | 2 +-
 vowpalwabbit/io_buf.h            | 2 +-
 vowpalwabbit/parse_example.cc    | 2 +-
 vowpalwabbit/parse_primitives.cc | 2 +-
 vowpalwabbit/vw.h                | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/vowpalwabbit/global_data.h b/vowpalwabbit/global_data.h
index a34491b48a0..be80d8d6fc1 100644
--- a/vowpalwabbit/global_data.h
+++ b/vowpalwabbit/global_data.h
@@ -26,7 +26,7 @@ namespace po = boost::program_options;
 #include "learner.h"
 #include "v_hashmap.h"
 #include <time.h>
-#include "../explore/hash.h"
+#include "hash.h"
 #include "crossplat_compat.h"
 #include "error_reporting.h"
 #include "parser_helper.h"
diff --git a/vowpalwabbit/io_buf.h b/vowpalwabbit/io_buf.h
index f8553daa2ec..94d4902da7c 100644
--- a/vowpalwabbit/io_buf.h
+++ b/vowpalwabbit/io_buf.h
@@ -16,7 +16,7 @@ license as described in the file LICENSE.
 #include <sstream>
 #include <errno.h>
 #include <stdexcept>
-#include "../explore/hash.h"
+#include "hash.h"
 #include "vw_exception.h"
 #include "vw_validate.h"
 
diff --git a/vowpalwabbit/parse_example.cc b/vowpalwabbit/parse_example.cc
index bfb06839119..d9d64cf71dd 100644
--- a/vowpalwabbit/parse_example.cc
+++ b/vowpalwabbit/parse_example.cc
@@ -7,7 +7,7 @@ license as described in the file LICENSE.
 #include <math.h>
 #include <ctype.h>
 #include "parse_example.h"
-#include "../explore/hash.h"
+#include "hash.h"
 #include "unique_sort.h"
 #include "global_data.h"
 #include "constant.h"
diff --git a/vowpalwabbit/parse_primitives.cc b/vowpalwabbit/parse_primitives.cc
index 9e728ef9df3..3dbad443151 100644
--- a/vowpalwabbit/parse_primitives.cc
+++ b/vowpalwabbit/parse_primitives.cc
@@ -13,7 +13,7 @@ license as described in the file LICENSE.
 #include <sstream>
 
 #include "parse_primitives.h"
-#include "../explore/hash.h"
+#include "hash.h"
 #include "vw_exception.h"
 
 bool substring_equal(const substring& a, const substring& b)
diff --git a/vowpalwabbit/vw.h b/vowpalwabbit/vw.h
index e764e72b23e..7744f9b5768 100644
--- a/vowpalwabbit/vw.h
+++ b/vowpalwabbit/vw.h
@@ -14,7 +14,7 @@ license as described in the file LICENSE.
 
 #include "global_data.h"
 #include "example.h"
-#include "../explore/hash.h"
+#include "hash.h"
 #include "simple_label.h"
 #include "parser.h"
 #include "parse_example.h"

From 8a51d165a826ecf0ea22a9e244d4f72a4a24e8c5 Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Mon, 23 Jul 2018 01:53:47 -0400
Subject: [PATCH 099/127] cleaning up script

---
 scripts/run_vw_commands.py | 283 ++++++++++++++++++++++++++-----------
 1 file changed, 204 insertions(+), 79 deletions(-)

diff --git a/scripts/run_vw_commands.py b/scripts/run_vw_commands.py
index 5b6d2553a08..e4ac5d1bf7a 100644
--- a/scripts/run_vw_commands.py
+++ b/scripts/run_vw_commands.py
@@ -115,23 +115,51 @@ def gen_vw_options_list(mod):
 def gen_vw_options(mod):
 	if 'optimal_approx' in mod.param:
 		# Fully supervised on full dataset
-		mod.vw_template = {'data':'', 'progress':2.0, 'passes':0, 'oaa':0, 'cache_file':''}
+		mod.vw_template =
+		{'data':'',
+		 'progress':2.0,
+		 'passes':0,
+		 'oaa':0,
+		 'cache_file':''}
 		mod.param['passes'] = 5
 		mod.param['oaa'] = mod.param['num_classes']
 		mod.param['cache_file'] = mod.param['data'] + '.cache'
 	elif 'majority_approx' in mod.param:
 		# Compute majority error; basically we would like to skip vw running as fast as possible
-		mod.vw_template = {'data':'', 'progress':2.0, 'cbify':0, 'warm_start':0, 'bandit':0}
+		mod.vw_template =
+		{'data':'',
+		 'progress':2.0,
+		 'cbify':0,
+		 'warm_start':0,
+		 'bandit':0}
 		mod.param['cbify'] = mod.param['num_classes']
 		mod.param['warm_start'] = 0
-		mod.param['bandit'] = 0
+		mod.param['interaction'] = 0
 	else:
 		# General CB
-		mod.vw_template = {'data':'', 'progress':2.0, 'corrupt_type_bandit':0, 'corrupt_prob_bandit':0.0, 'bandit':0, 'cb_type':'mtr',
-		'choices_lambda':0, 'corrupt_type_supervised':0, 'corrupt_prob_supervised':0.0, 'lambda_scheme':1, 'learning_rate':0.5, 'warm_start_type':1, 'cbify':0, 'warm_start':0, 'overwrite_label':1, 'validation_method':1, 'weighting_scheme':1}
+		mod.vw_template =
+		{'data':'',
+		 'progress':2.0,
+ 		 'cb_type':'mtr',
+		 'cbify':0,
+		 'warm_start':0,
+		 'interaction':0,
+ 		 'choices_lambda':0,
+		 'corrupt_type_interaction':0,
+		 'corrupt_prob_interaction':0.0,
+		 'corrupt_type_supervised':0,
+		 'corrupt_prob_supervised':0.0,
+		 'warm_start_update': True,
+		 'interaction_update': True,
+		 'lambda_scheme':1,
+		 'learning_rate':0.5,
+		 'warm_start_type':1,
+		 'overwrite_label':1,
+		 'validation_method':1,
+		 'weighting_scheme':1}
 
 		mod.param['warm_start'] = mod.param['warm_start_multiplier'] * mod.param['progress']
-		mod.param['bandit'] = mod.param['total_size'] - mod.param['warm_start']
+		mod.param['interaction'] = mod.param['total_size'] - mod.param['warm_start']
 		mod.param['cbify'] = mod.param['num_classes']
 		mod.param['overwrite_label'] = mod.param['majority_class']
 
@@ -142,12 +170,6 @@ def gen_vw_options(mod):
 			mod.param['cb_explore'] = mod.param['num_classes']
 			mod.vw_template['cb_explore'] = 0
 
-		if mod.param['no_warm_start_update'] is True:
-			mod.param['no_supervised'] = ' '
-			mod.vw_template['no_supervised'] = ' '
-		if mod.param['no_interaction_update'] is True:
-			mod.param['no_bandit'] = ' '
-			mod.vw_template['no_bandit'] = ' '
 
 def execute_vw(mod):
 	gen_vw_options(mod)
@@ -186,12 +208,26 @@ def replace_keys(dic, simplified_keymap):
 def param_to_str_simplified(mod):
 	#print 'before replace'
 	#print param
-	vw_run_param_set = ['lambda_scheme','learning_rate','validation_method',
-	'fold','no_warm_start_update','no_interaction_update',
-	'corrupt_prob_bandit', 'corrupt_prob_supervised',
-	'corrupt_type_bandit', 'corrupt_type_supervised',
-	'warm_start_type','warm_start_multiplier','choices_lambda','weighting_scheme',
-	'cb_type','optimal_approx','majority_approx','dataset', 'adf_on']
+	vw_run_param_set =
+	['lambda_scheme',
+	 'learning_rate',
+	 'validation_method',
+	 'fold',
+	 'no_warm_start_update',
+	 'no_interaction_update',
+	 'corrupt_prob_interaction',
+	 'corrupt_prob_warm_start',
+	 'corrupt_type_interaction',
+	 'corrupt_type_warm_start',
+	 'warm_start_type',
+	 'warm_start_multiplier',
+	 'choices_lambda',
+	 'weighting_scheme',
+	 'cb_type',
+	 'optimal_approx',
+	 'majority_approx',
+	 'dataset',
+	 'adf_on']
 
 	mod.template_red = dict([(k,mod.result_template[k]) for k in vw_run_param_set])
 	mod.simplified_keymap_red = dict([(k,mod.simplified_keymap[k]) for k in vw_run_param_set])
@@ -291,86 +327,173 @@ def dictify(param_name, param_choices):
 
 def params_per_task(mod):
 	# Problem parameters
-	params_corrupt_type_sup = dictify('corrupt_type_supervised', mod.choices_corrupt_type_supervised)
-	params_corrupt_prob_sup = dictify('corrupt_prob_supervised', mod.choices_corrupt_prob_supervised)
-	params_corrupt_type_band = dictify('corrupt_type_bandit', mod.choices_corrupt_type_bandit)
-	params_corrupt_prob_band = dictify('corrupt_prob_bandit', mod.choices_corrupt_prob_bandit)
-	params_warm_start_multiplier = dictify('warm_start_multiplier', mod.warm_start_multipliers)
-	params_learning_rate = dictify('learning_rate', mod.learning_rates)
-
+	prm_cor_type_ws = dictify('corrupt_type_warm_start', mod.choices_cor_type_ws)
+	prm_cor_prob_ws = dictify('corrupt_prob_warm_start', mod.choices_cor_prob_ws)
+	prm_cor_type_inter = dictify('corrupt_type_interaction', mod.choices_cor_type_inter)
+	prm_cor_prob_inter = dictify('corrupt_prob_interaction', mod.choices_cor_prob_inter)
+	prm_ws_multiplier = dictify('warm_start_multiplier', mod.ws_multipliers)
+	prm_lrs = dictify('learning_rate', mod.learning_rates)
 	# could potentially induce a bug if the maj and best does not have this parameter
-	params_fold = dictify('fold', mod.folds)
-
+	prm_fold = dictify('fold', mod.folds)
 	# Algorithm parameters
-	params_cb_type = dictify('cb_type', mod.choices_cb_type)
+	prm_cb_type = dictify('cb_type', mod.choices_cb_type)
+	prm_dataset = dictify('dataset', mod.dss)
+	prm_choices_lbd = dictify('choices_lambda', mod.choices_choices_lambda)
+	prm_adf_on = dictify('adf_on', [True])
 
 	# Common parameters
-	params_common = param_cartesian_multi([params_corrupt_type_sup, params_corrupt_prob_sup,
-	params_corrupt_type_band, params_corrupt_prob_band,
-	params_warm_start_multiplier, params_learning_rate, params_cb_type, params_fold])
-	params_common = filter(lambda param: param['corrupt_type_bandit'] == 3 or abs(param['corrupt_prob_bandit']) > 1e-4, params_common)
+	prm_com = param_cartesian_multi(
+	[prm_cor_type_ws,
+	 prm_cor_prob_ws,
+	 prm_cor_type_inter,
+	 prm_cor_prob_inter,
+	 prm_ws_multiplier,
+	 prm_lrs,
+	 prm_cb_type,
+	 prm_fold,
+	 prm_adf_on])
+
+	prm_com_inter_gt = filter(lambda p:
+		                    ((p['corrupt_type_interaction'] == 1 #noiseless for interaction data
+							and abs(param['corrupt_prob_interaction']) < 1e-4)
+							and
+		                    (p['corrupt_type_warm_start'] == 1 #filter out repetitive warm start data
+							or abs(param['corrupt_prob_warm_start']) > 1e-4)),
+						  prm_com)
+
+
+	prm_com_ws_gt = filter(lambda p:
+		                    ((p['corrupt_type_warm_start'] == 1 #noiseless for warm start data
+							and abs(param['corrupt_prob_warm_start']) < 1e-4)
+							and
+		                    (p['corrupt_type_interaction'] == 1 #filter out repetitive interaction data
+							or abs(param['corrupt_prob_interaction']) > 1e-4)),
+						  prm_com)
+
+	prm_com = prm_com_inter_gt + prm_com_ws_gt
 
 	# Baseline parameters construction
 	if mod.baselines_on:
-		params_baseline_basic = [
-		[{'choices_lambda': 1, 'warm_start_type': 1, 'lambda_scheme': 3}], [{'no_warm_start_update': True, 'no_interaction_update': False}, {'no_warm_start_update': False, 'no_interaction_update': True}]
+		prm_baseline_basic =
+		[
+			[
+				#Sup-Only
+		 		{'warm_start_type': 1,
+				 'warm_start_update': True,
+				 'interaction_update': False},
+				#Band-Only
+ 		 		{'warm_start_type': 1,
+ 				 'warm_start_update': False,
+ 				 'interaction_update': True},
+				#Sim-Bandit
+				{'warm_start_type': 2,
+				 'warm_start_update': True,
+ 				 'interaction_update': True}
+				#Sim-Bandit with no warm-start update
+				{'warm_start_type': 2,
+				 'warm_start_update': True,
+ 				 'interaction_update': False}
+			]
+		]
+
+		prm_baseline_const =
+		[
+			[
+				{'weighting_scheme':1,
+				 'adf_on':True,
+				 'lambda_scheme':3,
+				 'choices_lambda':1}
+			]
 		]
-		params_baseline = param_cartesian_multi([params_common] + params_baseline_basic)
-		#params_baseline = filter(lambda param: param['no_warm_start_update'] == True or param['no_interaction_update'] == True, params_baseline)
+		prm_baseline = param_cartesian_multi([prm_common] + prm_baseline_const + prm_baseline_basic)
 	else:
-		params_baseline = []
+		prm_baseline = []
 
 
 	# Algorithm parameters construction
 	if mod.algs_on:
-		params_choices_lambd = dictify('choices_lambda', mod.choices_choices_lambda)
-		params_algs_1 = param_cartesian_multi([params_choices_lambd, [{'no_warm_start_update': False, 'no_interaction_update': False, 'warm_start_type': 1, 'lambda_scheme': 3}], [{'validation_method':2}, {'validation_method':3}]] )
-		params_algs_2 = [{'no_warm_start_update': False, 'no_interaction_update': False, 'warm_start_type': 2, 'lambda_scheme': 1, 'choices_lambda':1}]
-		params_algs = param_cartesian( params_common, params_algs_1 + params_algs_2 )
+		# Algorithms for supervised validation
+		prm_ws_gt =
+		[
+			 [
+		  	 	{'warm_start_update': True,
+				 'interaction_update': True,
+				 'warm_start_type': 1,
+				 'lambda_scheme': 2,
+				 'weighting_scheme': 2}
+			 ],
+			 [
+			 	{'validation_method':2},
+				{'validation_method':3}
+			 ]
+	    ]
+
+		prm_inter_gt =
+		[
+			 [
+		  	 	{'warm_start_update': True,
+				 'interaction_update': True,
+				 'warm_start_type': 1,
+				 'lambda_scheme': 4,
+				 'weighting_scheme': 1}
+			 ],
+		]
+
+		prm_algs_ws_gt = param_cartesian_multi([prm_com_ws_gt] + [prm_choices_lbd] + prm_ws_gt)
+		prm_algs_inter_gt = param_cartesian_multi([prm_com_inter_gt] + [prm_choices_lbd] + prm_inter_gt)
+		prm_algs = prm_algs_ws_gt + prm_algs_inter_gt
 	else:
 		params_algs = []
 
-	params_constant_baseline = [{'weighting_scheme':1,
-	'adf_on':True}]
-	params_constant_algs = [{'weighting_scheme':mod.weighting_scheme,
-	'adf_on':True}]
-
-	params_baseline_and_algs = param_cartesian_multi([params_constant_baseline, params_baseline]) + param_cartesian_multi([params_constant_algs, params_algs])
-
-	#for p in params_common:
-	#	print p
-
-	#for p in params_baseline:
-	#	print p
-
-	print len(params_common)
-	print len(params_baseline)
-	print len(params_algs)
-	print len(params_baseline_and_algs)
-
 	# Optimal baselines parameter construction
 	if mod.optimal_on:
-		params_optimal = [{ 'optimal_approx': True, 'fold': 1, 'corrupt_type_supervised':1, 'corrupt_prob_supervised':0.0, 'corrupt_type_bandit':1, 'corrupt_prob_bandit':0.0} ]
+		params_optimal =
+		[
+			{'optimal_approx': True,
+			 'fold': 1,
+			 'corrupt_type_warm_start':1,
+			 'corrupt_prob_warm_start':0.0,
+			 'corrupt_type_interaction':1,
+			 'corrupt_prob_interaction':0.0}
+	    ]
 	else:
 		params_optimal = []
 
 	if mod.majority_on:
-		params_majority = [{ 'majority_approx': True, 'fold': 1,
-		'corrupt_type_supervised':1, 'corrupt_prob_supervised':0.0, 'corrupt_type_bandit':1, 'corrupt_prob_bandit':0.0} ]
+		params_majority =
+		[
+			{'majority_approx': True,
+			 'fold': 1,
+			 'corrupt_type_warm_start':1,
+			 'corrupt_prob_warm_start':0.0,
+			 'corrupt_type_interaction':1,
+			 'corrupt_prob_interaction':0.0}
+		]
 	else:
 		params_majority = []
 
 
-	#print len(params_baseline)
-	#print len(params_algs)
-	#print len(params_common)
+	#for p in params_common:
+	#	print p
+	#for p in params_baseline:
+	#	print p
+	print len(params_common)
+	print len(params_baseline)
+	print len(params_algs)
 	#raw_input('..')
 
 	# Common factor in all 3 groups: dataset
-	params_dataset = dictify('dataset', mod.dss)
-	params_all = param_cartesian_multi( [params_dataset, params_baseline_and_algs + params_optimal + params_majority] )
-
-	params_all = sorted(params_all, key=lambda d: (d['dataset'], d['corrupt_type_supervised'], d['corrupt_prob_supervised'], d['corrupt_type_bandit'], d['corrupt_prob_bandit']))
+	params_all = param_cartesian_multi(
+	[params_dataset,
+	 params_baseline_and_algs + params_optimal + params_majority])
+
+	params_all = sorted(params_all,
+						key=lambda d: (d['dataset'],
+						               d['corrupt_type_warm_start'],
+									   d['corrupt_prob_warm_start'],
+									   d['corrupt_type_interaction'],
+									   d['corrupt_prob_interaction'])
+					   )
 	print 'The total number of VW commands to run is: ', len(params_all)
 	#for row in params_all:
 	#	print row
@@ -446,29 +569,31 @@ def main_loop(mod):
 	('num_classes','nc', 0),
 	('total_size', 'ts', 0),
 	('majority_size','ms', 0),
-	('corrupt_type_supervised', 'cts', 0),
-	('corrupt_prob_supervised', 'cps', 0.0),
-	('corrupt_type_bandit', 'ctb', 0),
-	('corrupt_prob_bandit', 'cpb', 0.0),
+	('corrupt_type_warm_start', 'ctws', 0),
+	('corrupt_prob_warm_start', 'cpws', 0.0),
+	('corrupt_type_interaction', 'cti', 0),
+	('corrupt_prob_interaction', 'cpi', 0.0),
 	('adf_on', 'ao', True),
 	('warm_start_multiplier','wsm',1),
 	('warm_start', 'ws', 0),
 	('warm_start_type', 'wst', 0),
-	('bandit_size', 'bs', 0),
-	('bandit_supervised_size_ratio', 'bssr', 0),
+	('interaction', 'bs', 0),
+	('inter_ws_size_ratio', 'iwsr', 0),
 	('cb_type', 'cbt', 'mtr'),
 	('validation_method', 'vm', 0),
 	('weighting_scheme', 'wts', 0),
-	('lambda_scheme','ls',  0),
+	('lambda_scheme', 'ls', 0),
 	('choices_lambda', 'cl', 0),
-	('no_warm_start_update', 'nwsu', False),
-	('no_interaction_update', 'niu', False),
+	('warm_start_update', 'wsu', True),
+	('interaction_update', 'iu', True),
 	('learning_rate', 'lr', 0.0),
 	('optimal_approx', 'oa', False),
 	('majority_approx', 'ma', False),
 	('avg_error', 'ae', 0.0),
 	('actual_variance', 'av', 0.0),
-	('ideal_variance', 'iv', 0.0)]
+	('ideal_variance', 'iv', 0.0),
+	('last_lambda', 'll', 0.0),
+	]
 
  	num_cols = len(mod.result_template_list)
 	mod.result_header_list = [ mod.result_template_list[i][0] for i in range(num_cols) ]

From f95d1541b1a17576fd4e97998ff2a11f65b747cf Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Mon, 23 Jul 2018 15:52:55 -0400
Subject: [PATCH 100/127] finished updating the running vw script

---
 scripts/run_vw_commands.py | 242 ++++++++++++++++++-------------------
 1 file changed, 118 insertions(+), 124 deletions(-)

diff --git a/scripts/run_vw_commands.py b/scripts/run_vw_commands.py
index e4ac5d1bf7a..2b5a81e4be4 100644
--- a/scripts/run_vw_commands.py
+++ b/scripts/run_vw_commands.py
@@ -6,6 +6,7 @@
 import time
 import glob
 import re
+from collections import OrderedDict
 
 
 class model:
@@ -13,37 +14,33 @@ def __init__(self):
 		# Setting up argument-independent learning parameters in the constructor
 		self.baselines_on = True
 		self.algs_on = True
-		self.optimal_on = False
-		self.majority_on = False
+		self.optimal_on = True
+		self.majority_on = True
 
 		self.num_checkpoints = 200
 
 		# use fractions instead of absolute numbers
-		#mod.warm_start_multipliers = [pow(2,i) for i in range(4)]
-		self.warm_start_multipliers = [pow(2,i) for i in range(4)]
+		self.ws_multipliers = [pow(2,i) for i in range(4)]
 
 		self.choices_cb_type = ['mtr']
 		#mod.choices_choices_lambda = [2,4,8]
 		self.choices_choices_lambda = [2,8,16]
 
-		#mod.choices_corrupt_type_supervised = [1,2,3]
-		#mod.choices_corrupt_prob_supervised = [0.0,0.5,1.0]
-		self.choices_corrupt_type_supervised = [1]
-		self.choices_corrupt_prob_supervised = [0.0]
+		#mod.choices_cor_type_ws = [1,2,3]
+		#mod.choices_cor_prob_ws = [0.0,0.5,1.0]
+		self.choices_cor_type_ws = [1]
+		self.choices_cor_prob_ws = [0.0]
 
-		self.learning_rates_template = [0.1, 0.03, 0.3, 0.01, 1.0, 0.003, 3.0, 0.001, 10.0]
-
-		self.adf_on = True
-
-		self.choices_corrupt_type_bandit = [1,2,3]
-		self.choices_corrupt_prob_bandit = [0.0,0.5,1.0]
+		self.choices_cor_type_inter = [1,2,3]
+		self.choices_cor_prob_inter = [0.0,0.5,1.0]
 
 		self.validation_method = 1
 		self.weighting_scheme = 2
 
 		#self.epsilon = 0.05
 		#self.epsilon_on = True
-
+		self.lr_template = [0.1, 0.03, 0.3, 0.01, 1.0, 0.003, 3.0, 0.001, 10.0]
+		self.adf_on = True
 		self.critical_size_ratios = [184 * pow(2, -i) for i in range(7) ]
 
 
@@ -54,8 +51,8 @@ def collect_stats(mod):
 
 	vw_run_results = []
 	vw_result_template = {
-	'bandit_size': 0,
-	'bandit_supervised_size_ratio': 0,
+	'interaction': 0,
+	'inter_ws_size_ratio': 0,
 	'avg_error': 0.0,
 	'actual_variance': 0.0,
 	'ideal_variance': 0.0
@@ -88,19 +85,24 @@ def collect_stats(mod):
 			curr_pred_str, curr_feat_str = s
 
 			avg_loss = float(avg_loss_str)
-			bandit_effective = int(float(weight_str))
+			inter_effective = int(float(weight_str))
 
 			for ratio in mod.critical_size_ratios:
-				if bandit_effective >= (1 - 1e-7) * mod.param['warm_start'] * ratio and \
-				bandit_effective <= (1 + 1e-7) * mod.param['warm_start'] * ratio:
+				if inter_effective >= (1 - 1e-7) * mod.param['warm_start'] * ratio and \
+				inter_effective <= (1 + 1e-7) * mod.param['warm_start'] * ratio:
 					vw_result = vw_result_template.copy()
-					vw_result['bandit_size'] = bandit_effective
-					vw_result['bandit_supervised_size_ratio'] = ratio
+					vw_result['interaction'] = inter_effective
+					vw_result['inter_ws_size_ratio'] = ratio
 					vw_result['avg_error'] = avg_loss
 					vw_result['actual_variance'] = actual_var_value
 					vw_result['ideal_variance'] = ideal_var_value
 					vw_run_results.append(vw_result)
 	f.close()
+
+	#if len(vw_run_results) >= 1:
+	#	print mod.param['warm_start']
+	#	print vw_run_results
+	#raw_input('..')
 	return vw_run_results
 
 
@@ -115,48 +117,45 @@ def gen_vw_options_list(mod):
 def gen_vw_options(mod):
 	if 'optimal_approx' in mod.param:
 		# Fully supervised on full dataset
-		mod.vw_template =
-		{'data':'',
-		 'progress':2.0,
-		 'passes':0,
-		 'oaa':0,
-		 'cache_file':''}
+		mod.vw_template = OrderedDict([('data',''),
+									   ('progress',2.0),
+									   ('passes',0),
+									   ('oaa',0),
+									   ('cache_file','')])
 		mod.param['passes'] = 5
 		mod.param['oaa'] = mod.param['num_classes']
 		mod.param['cache_file'] = mod.param['data'] + '.cache'
 	elif 'majority_approx' in mod.param:
 		# Compute majority error; basically we would like to skip vw running as fast as possible
-		mod.vw_template =
-		{'data':'',
-		 'progress':2.0,
-		 'cbify':0,
-		 'warm_start':0,
-		 'bandit':0}
+		mod.vw_template = OrderedDict([('data',''),
+									   ('progress',2.0),
+									   ('cbify',0),
+									   ('warm_start',0),
+									   ('interaction',0)])
 		mod.param['cbify'] = mod.param['num_classes']
 		mod.param['warm_start'] = 0
 		mod.param['interaction'] = 0
 	else:
 		# General CB
-		mod.vw_template =
-		{'data':'',
-		 'progress':2.0,
- 		 'cb_type':'mtr',
-		 'cbify':0,
-		 'warm_start':0,
-		 'interaction':0,
- 		 'choices_lambda':0,
-		 'corrupt_type_interaction':0,
-		 'corrupt_prob_interaction':0.0,
-		 'corrupt_type_supervised':0,
-		 'corrupt_prob_supervised':0.0,
-		 'warm_start_update': True,
-		 'interaction_update': True,
-		 'lambda_scheme':1,
-		 'learning_rate':0.5,
-		 'warm_start_type':1,
-		 'overwrite_label':1,
-		 'validation_method':1,
-		 'weighting_scheme':1}
+		mod.vw_template = OrderedDict([('data',''),
+									   ('cbify',0),
+									   ('cb_type','mtr'),
+									   ('warm_start',0),
+									   ('interaction',0),
+									   ('corrupt_type_interaction',0),
+									   ('corrupt_prob_interaction',0.0),
+									   ('corrupt_type_warm_start',0),
+									   ('corrupt_prob_warm_start',0.0),
+									   ('warm_start_update',True),
+									   ('interaction_update',True),
+									   ('choices_lambda',0),
+									   ('lambda_scheme',1),
+									   ('warm_start_type',1),
+									   ('overwrite_label',1),
+									   ('validation_method',1),
+									   ('weighting_scheme',1),
+									   ('learning_rate',0.5),
+   									   ('progress',2.0),])
 
 		mod.param['warm_start'] = mod.param['warm_start_multiplier'] * mod.param['progress']
 		mod.param['interaction'] = mod.param['total_size'] - mod.param['warm_start']
@@ -194,55 +193,49 @@ def param_to_str(param):
 	param_list = [ str(k)+'='+str(v) for k,v in param.iteritems() ]
 	return intersperse(param_list, ',')
 
-def replace_if_in(dic, k, k_new):
-	if k in dic:
-		dic[k_new] = dic[k]
-		del dic[k]
-
 def replace_keys(dic, simplified_keymap):
-	dic_new = dic.copy()
-	for k, k_new in simplified_keymap.iteritems():
-		replace_if_in(dic_new, k, k_new)
+	dic_new = OrderedDict()
+	for k, v in dic.iteritems():
+		dic_new[simplified_keymap[k]] = v
 	return dic_new
 
 def param_to_str_simplified(mod):
 	#print 'before replace'
 	#print param
-	vw_run_param_set =
-	['lambda_scheme',
-	 'learning_rate',
-	 'validation_method',
+	vw_run_param_set = \
+	['dataset',
 	 'fold',
-	 'no_warm_start_update',
-	 'no_interaction_update',
+	 'lambda_scheme',
+	 'validation_method',
+	 'warm_start_multiplier',
 	 'corrupt_prob_interaction',
 	 'corrupt_prob_warm_start',
 	 'corrupt_type_interaction',
 	 'corrupt_type_warm_start',
+ 	 'warm_start_update',
+ 	 'interaction_update',
 	 'warm_start_type',
-	 'warm_start_multiplier',
 	 'choices_lambda',
 	 'weighting_scheme',
 	 'cb_type',
 	 'optimal_approx',
 	 'majority_approx',
-	 'dataset',
+	 'learning_rate',
 	 'adf_on']
 
-	mod.template_red = dict([(k,mod.result_template[k]) for k in vw_run_param_set])
-	mod.simplified_keymap_red = dict([(k,mod.simplified_keymap[k]) for k in vw_run_param_set])
+	mod.template_red = OrderedDict([(k,mod.result_template[k]) for k in vw_run_param_set])
+	#mod.simplified_keymap_red = dict([(k,mod.simplified_keymap[k]) for k in vw_run_param_set])
 	# step 1: use the above as a template to filter out irrelevant parameters
 	# in the vw output file title
 	param_formatted = format_setting(mod.template_red, mod.param)
 	# step 2: replace the key names with the simplified names
-	param_simplified = replace_keys(param_formatted, mod.simplified_keymap_red)
+	param_simplified = replace_keys(param_formatted, mod.simplified_keymap)
 	#print 'after replace'
 	#print param
 	return param_to_str(param_simplified)
 
-def gen_comparison_graph(mod):
+def run_single_expt(mod):
 	mod.param['data'] = mod.ds_path + str(mod.param['fold']) + '/' + mod.param['dataset']
-
 	mod.param['total_size'] = get_num_lines(mod.param['data'])
 	mod.param['num_classes'] = get_num_classes(mod.param['data'])
 	mod.param['majority_size'], mod.param['majority_class'] = get_majority_class(mod.param['data'])
@@ -264,7 +257,6 @@ def gen_comparison_graph(mod):
 		result_formatted = format_setting(mod.result_template, result_combined)
 		record_result(mod, result_formatted)
 
-	print('')
 
 # The following function is a "template filling" function
 # Given a template, we use the setting dict to fill it as much as possible
@@ -276,10 +268,9 @@ def format_setting(template, setting):
 	return formatted
 
 def record_result(mod, result):
-	result_row = []
-	for k in mod.result_header_list:
-		result_row.append(result[k])
-
+	result_row = result.values()
+	#for k in mod.result_header_list:
+	#	result_row.append(result[k])
 	#print result['validation_method']
 	#print result_row
 
@@ -353,28 +344,27 @@ def params_per_task(mod):
 	 prm_fold,
 	 prm_adf_on])
 
-	prm_com_inter_gt = filter(lambda p:
-		                    ((p['corrupt_type_interaction'] == 1 #noiseless for interaction data
-							and abs(param['corrupt_prob_interaction']) < 1e-4)
+	fltr_inter_gt = lambda p: ((p['corrupt_type_interaction'] == 1 #noiseless for interaction data
+							and abs(p['corrupt_prob_interaction']) < 1e-4)
 							and
 		                    (p['corrupt_type_warm_start'] == 1 #filter out repetitive warm start data
-							or abs(param['corrupt_prob_warm_start']) > 1e-4)),
-						  prm_com)
+							or abs(p['corrupt_prob_warm_start']) > 1e-4))
 
+	prm_com_inter_gt = filter(fltr_inter_gt, prm_com)
 
-	prm_com_ws_gt = filter(lambda p:
-		                    ((p['corrupt_type_warm_start'] == 1 #noiseless for warm start data
-							and abs(param['corrupt_prob_warm_start']) < 1e-4)
-							and
-		                    (p['corrupt_type_interaction'] == 1 #filter out repetitive interaction data
-							or abs(param['corrupt_prob_interaction']) > 1e-4)),
-						  prm_com)
+	fltr_ws_gt = lambda p: ((p['corrupt_type_warm_start'] == 1 #noiseless for warm start data
+						and abs(p['corrupt_prob_warm_start']) < 1e-4)
+						and
+	                    (p['corrupt_type_interaction'] == 1 #filter out repetitive interaction data
+						or abs(p['corrupt_prob_interaction']) > 1e-4))
+
+	prm_com_ws_gt = filter(fltr_ws_gt, prm_com)
 
-	prm_com = prm_com_inter_gt + prm_com_ws_gt
+	prm_com = filter(lambda p: (fltr_ws_gt(p) or fltr_inter_gt(p)), prm_com)
 
 	# Baseline parameters construction
 	if mod.baselines_on:
-		prm_baseline_basic =
+		prm_baseline_basic = \
 		[
 			[
 				#Sup-Only
@@ -388,7 +378,7 @@ def params_per_task(mod):
 				#Sim-Bandit
 				{'warm_start_type': 2,
 				 'warm_start_update': True,
- 				 'interaction_update': True}
+ 				 'interaction_update': True},
 				#Sim-Bandit with no warm-start update
 				{'warm_start_type': 2,
 				 'warm_start_update': True,
@@ -396,7 +386,7 @@ def params_per_task(mod):
 			]
 		]
 
-		prm_baseline_const =
+		prm_baseline_const = \
 		[
 			[
 				{'weighting_scheme':1,
@@ -405,7 +395,7 @@ def params_per_task(mod):
 				 'choices_lambda':1}
 			]
 		]
-		prm_baseline = param_cartesian_multi([prm_common] + prm_baseline_const + prm_baseline_basic)
+		prm_baseline = param_cartesian_multi([prm_com] + prm_baseline_const + prm_baseline_basic)
 	else:
 		prm_baseline = []
 
@@ -413,7 +403,7 @@ def params_per_task(mod):
 	# Algorithm parameters construction
 	if mod.algs_on:
 		# Algorithms for supervised validation
-		prm_ws_gt =
+		prm_ws_gt = \
 		[
 			 [
 		  	 	{'warm_start_update': True,
@@ -428,7 +418,7 @@ def params_per_task(mod):
 			 ]
 	    ]
 
-		prm_inter_gt =
+		prm_inter_gt = \
 		[
 			 [
 		  	 	{'warm_start_update': True,
@@ -443,11 +433,11 @@ def params_per_task(mod):
 		prm_algs_inter_gt = param_cartesian_multi([prm_com_inter_gt] + [prm_choices_lbd] + prm_inter_gt)
 		prm_algs = prm_algs_ws_gt + prm_algs_inter_gt
 	else:
-		params_algs = []
+		prm_algs = []
 
 	# Optimal baselines parameter construction
 	if mod.optimal_on:
-		params_optimal =
+		prm_optimal = \
 		[
 			{'optimal_approx': True,
 			 'fold': 1,
@@ -457,10 +447,10 @@ def params_per_task(mod):
 			 'corrupt_prob_interaction':0.0}
 	    ]
 	else:
-		params_optimal = []
+		prm_optimal = []
 
 	if mod.majority_on:
-		params_majority =
+		prm_majority = \
 		[
 			{'majority_approx': True,
 			 'fold': 1,
@@ -470,34 +460,37 @@ def params_per_task(mod):
 			 'corrupt_prob_interaction':0.0}
 		]
 	else:
-		params_majority = []
+		prm_majority = []
 
 
 	#for p in params_common:
 	#	print p
 	#for p in params_baseline:
 	#	print p
-	print len(params_common)
-	print len(params_baseline)
-	print len(params_algs)
+	#print len(prm_com_ws_gt), len(prm_algs_ws_gt)
+	#print len(prm_com_inter_gt), len(prm_algs_inter_gt)
+	#print len(prm_com)
+	#print len(prm_baseline)
+	#print len(prm_algs)
 	#raw_input('..')
 
 	# Common factor in all 3 groups: dataset
-	params_all = param_cartesian_multi(
-	[params_dataset,
-	 params_baseline_and_algs + params_optimal + params_majority])
+	prm_all = param_cartesian_multi(
+	[prm_dataset,
+
+	 prm_baseline + prm_algs + prm_optimal + prm_majority])
 
-	params_all = sorted(params_all,
+	prm_all = sorted(prm_all,
 						key=lambda d: (d['dataset'],
 						               d['corrupt_type_warm_start'],
 									   d['corrupt_prob_warm_start'],
 									   d['corrupt_type_interaction'],
 									   d['corrupt_prob_interaction'])
 					   )
-	print 'The total number of VW commands to run is: ', len(params_all)
-	#for row in params_all:
+	print 'The total number of VW commands to run is: ', len(prm_all)
+	#for row in prm_all:
 	#	print row
-	return get_params_task(params_all)
+	return get_params_task(prm_all)
 
 
 def get_params_task(params_all):
@@ -553,7 +546,7 @@ def vw_output_extract(mod, pattern):
 
 def write_summary_header(mod):
 	summary_file = open(mod.summary_file_name, 'w')
-	summary_header = intersperse(mod.result_header_list, '\t')
+	summary_header = intersperse(mod.result_template.keys(), '\t')
 	summary_file.write(summary_header+'\n')
 	summary_file.close()
 
@@ -596,15 +589,15 @@ def main_loop(mod):
 	]
 
  	num_cols = len(mod.result_template_list)
-	mod.result_header_list = [ mod.result_template_list[i][0] for i in range(num_cols) ]
-	mod.result_template = dict([ (mod.result_template_list[i][0], mod.result_template_list[i][2]) for i in range(num_cols) ])
-	mod.simplified_keymap = dict([ (mod.result_template_list[i][0], mod.result_template_list[i][1]) for i in range(num_cols) ])
+	#mod.result_header_list = [ mod.result_template_list[i][0] for i in range(num_cols) ]
+	mod.result_template = OrderedDict([ (mod.result_template_list[i][0], mod.result_template_list[i][2]) for i in range(num_cols) ])
+	mod.simplified_keymap = OrderedDict([ (mod.result_template_list[i][0], mod.result_template_list[i][1]) for i in range(num_cols) ])
 
 	write_summary_header(mod)
 	for mod.param in mod.config_task:
 		#if (mod.param['no_interaction_update'] is True):
 		#	raw_input(' ')
-		gen_comparison_graph(mod)
+		run_single_expt(mod)
 
 def create_dir(dir):
 	if not os.path.exists(dir):
@@ -649,9 +642,10 @@ def remove_suffix(filename):
 	#print mod.dss
 
 	if args.task_id == 0:
-		#process = subprocess.Popen('make -C .. clean; make -C ..', shell=True, stdout=f, stderr=f)
-		#subprocess.check_call(cmd, shell=True)
-		#process.wait()
+		# Compile vw in one of the subfolders
+		process = subprocess.Popen('make -C .. clean; make -C ..', shell=True, stdout=f, stderr=f)
+		subprocess.check_call(cmd, shell=True)
+		process.wait()
 
 		# To avoid race condition of writing to the same file at the same time
 		create_dir(args.results_dir)
@@ -671,9 +665,9 @@ def remove_suffix(filename):
 			time.sleep(1)
 
 	if args.num_learning_rates <= 0 or args.num_learning_rates >= 10:
-		mod.learning_rates = mod.learning_rates_template
+		mod.learning_rates = mod.lr_template
 	else:
-		mod.learning_rates = mod.learning_rates_template[:args.num_learning_rates]
+		mod.learning_rates = mod.lr_template[:args.num_learning_rates]
 	#mod.folds = range(1,11)
 	mod.folds = range(1, args.num_folds+1)
 

From 558f1a225014d9a817eb5bfffe7fb608d7b46aaa Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Mon, 6 Aug 2018 10:34:07 -0400
Subject: [PATCH 101/127] .

---
 scripts/alg_comparison.py  |  70 +++++++++++++++++-----
 scripts/run_vw_commands.py | 120 ++++++++++++++++++++++++-------------
 vowpalwabbit/cbify.cc      |   3 +-
 3 files changed, 136 insertions(+), 57 deletions(-)

diff --git a/scripts/alg_comparison.py b/scripts/alg_comparison.py
index b2988ef182e..674a4e86c89 100644
--- a/scripts/alg_comparison.py
+++ b/scripts/alg_comparison.py
@@ -13,6 +13,7 @@
 import seaborn as sns
 from matplotlib.colors import ListedColormap
 from matplotlib.font_manager import FontProperties
+from collections import Counter
 
 
 class model:
@@ -250,9 +251,9 @@ def problem_text(name_problem):
 
 def plot_cdf(alg_name, errs):
 
-	print alg_name
-	print errs
-	print len(errs)
+	#print alg_name
+	#print errs
+	#print len(errs)
 
 	col, sty = alg_color_style(alg_name)
 
@@ -302,6 +303,21 @@ def plot_all_cdfs(alg_results, mod):
 	save_legend(mod, indices)
 	plt.clf()
 
+def plot_all_lrs(lrs, mod):
+	alg_names = lrs.keys()
+
+	for i in range(len(alg_names)):
+		pylab.figure(figsize=(8,6))
+		lrs_alg = lrs[alg_names[i]]
+		counts = Counter(lrs_alg)
+		names = list(counts.keys())
+		names_sorted = sorted(names)
+		values = [counts[n] for n in names_sorted]
+		plt.barh(range(len(names_sorted)),values)
+		plt.yticks(range(len(names_sorted)),names_sorted)
+		plt.savefig(mod.problemdir+alg_str_compatible(alg_names[i])+'_lr.pdf')
+		plt.clf()
+
 
 def plot_all_pair_comp(alg_results, sizes, mod):
 	alg_names = alg_results.keys()
@@ -367,6 +383,7 @@ def get_maj_error(maj_error_table, name_dataset):
 
 def get_unnormalized_results(result_table):
 	new_unnormalized_results = {}
+	new_lr = {}
 	new_size = 0
 
 	i = 0
@@ -377,9 +394,10 @@ def get_unnormalized_results(result_table):
 		if row['bandit_size'] == new_size:
 			alg_name = (row['warm_start_type'], row['choices_lambda'], row['no_warm_start_update'], row['no_interaction_update'], row['validation_method'])
 			new_unnormalized_results[alg_name] = row['avg_error']
+			new_lr[alg_name] = row['learning_rate']
 		i += 1
 
-	return new_size, new_unnormalized_results
+	return new_size, new_unnormalized_results, new_lr
 
 def update_result_dict(results_dict, new_result):
 	print results_dict
@@ -393,10 +411,10 @@ def plot_all(mod, all_results):
 	#all_results = all_results[all_results['corrupt_prob_supervised']!=0.0]
 
 	grouped_by_problem = all_results.groupby(['corrupt_type_supervised',
-						'corrupt_prob_supervised',
-						'corrupt_type_bandit',
-						'corrupt_prob_bandit',
-						'bandit_supervised_size_ratio'])
+											  'corrupt_prob_supervised',
+											  'corrupt_type_bandit',
+											  'corrupt_prob_bandit',
+											  'bandit_supervised_size_ratio'])
 
 	#then group by dataset and warm_start size (corresponding to each point in cdf)
 	for name_problem, group_problem in grouped_by_problem:
@@ -405,7 +423,8 @@ def plot_all(mod, all_results):
 		sizes = None
 		mod.name_problem = name_problem
 
-		grouped_by_dataset = group_problem.groupby(['dataset','warm_start'])
+		grouped_by_dataset = group_problem.groupby(['dataset',
+													'warm_start'])
 		#then select unique combinations of (no_supervised, no_bandit, choices_lambda)
 		#e.g. (True, True, 1), (True, False, 1), (False, True, 1), (False, False, 2)
 		#(False, False, 8), and compute a normalized score
@@ -413,8 +432,13 @@ def plot_all(mod, all_results):
 		for name_dataset, group_dataset in grouped_by_dataset:
 			result_table = group_dataset
 
-			grouped_by_algorithm = group_dataset.groupby(['warm_start_type', 'choices_lambda', 'no_warm_start_update', 'no_interaction_update',
-			'validation_method'])
+		 	group_dataset = group_dataset.reset_index(drop=True)
+
+			grouped_by_algorithm = group_dataset.groupby(['warm_start_type',
+			                                              'choices_lambda',
+														  'no_warm_start_update',
+														  'no_interaction_update',
+														  'validation_method'])
 
 			mod.name_dataset = name_dataset
 
@@ -423,11 +447,18 @@ def plot_all(mod, all_results):
 
 			#In the future this should be changed if we run multiple folds: we
 			#should average among folds before choosing the min
-			result_table = grouped_by_algorithm.min()
-			result_table = result_table.reset_index()
+			#result_table = grouped_by_algorithm.min()
+			#result_table = result_table.reset_index()
 
-			#print result_table
+			#print grouped_by_algorithm
+			#grouped_by_algorithm.describe()
 
+			idx = grouped_by_algorithm.apply(lambda df:df["avg_error"].idxmin())
+			result_table = group_dataset.ix[idx, :]
+			#print idx
+			#print result_table
+			#print group_dataset
+			#raw_input('..')
 
 			#group_dataset.groupby(['choices_lambda','no_supervised',														'no_bandit'])
 				#print alg_results
@@ -436,8 +467,9 @@ def plot_all(mod, all_results):
 			#in general (including the first time) - record the error rates of all algorithms
 			#print result_table
 
-			new_size, new_unnormalized_result = get_unnormalized_results(result_table)
+			new_size, new_unnormalized_result, new_lr = get_unnormalized_results(result_table)
 			new_unnormalized_result[(0, 0, False, False, 1)] = get_maj_error(mod.maj_error_table, mod.name_dataset)
+			new_lr[(0, 0, False, False, 1)] = 0.0
 			new_normalized_result = normalize_score(new_unnormalized_result, mod)
 
 			#first time - generate names of algorithms considered
@@ -445,9 +477,11 @@ def plot_all(mod, all_results):
 				sizes = []
 				unnormalized_results = dict([(k,[]) for k in new_unnormalized_result.keys()])
 				normalized_results = dict([(k,[]) for k in new_unnormalized_result.keys()])
+				lrs = dict([(k,[]) for k in new_unnormalized_result.keys()])
 
 			update_result_dict(unnormalized_results, new_unnormalized_result)
 			update_result_dict(normalized_results, new_normalized_result)
+			update_result_dict(lrs, new_lr)
 			sizes.append(new_size)
 
 			#print 'sizes:'
@@ -468,6 +502,8 @@ def plot_all(mod, all_results):
 		if mod.cdf_on is True:
 			plot_all_cdfs(normalized_results, mod)
 
+		plot_all_lrs(lrs, mod)
+
 def save_to_hdf(mod):
 	print 'saving to hdf..'
 	store = pd.HDFStore('store.h5')
@@ -549,6 +585,9 @@ def load_from_sum(mod):
 	#print mod.best_error_table[mod.best_error_table['dataset'] == 'ds_160_5.vw.gz']
 	#raw_input(' ')
 
+	#print all_results
+	#raw_input('..')
+
 	all_results = all_results[all_results['choices_lambda'] != 0]
 
 	#ignore the no update row:
@@ -558,6 +597,7 @@ def load_from_sum(mod):
 
 
 
+
 	#filter choices_lambdas = 2,4,8?
 	#if (alg_name[2] == False and alg_name[3] == False and alg_name[1] != 8):
 	#	pass
diff --git a/scripts/run_vw_commands.py b/scripts/run_vw_commands.py
index 2b5a81e4be4..41c974196a7 100644
--- a/scripts/run_vw_commands.py
+++ b/scripts/run_vw_commands.py
@@ -13,36 +13,51 @@ class model:
 	def __init__(self):
 		# Setting up argument-independent learning parameters in the constructor
 		self.baselines_on = True
-		self.algs_on = True
-		self.optimal_on = True
-		self.majority_on = True
+		self.algs_on = False
+		self.optimal_on = False
+		self.majority_on = False
+
+		self.ws_gt_on = True
+		self.inter_gt_on = False
 
 		self.num_checkpoints = 200
 
 		# use fractions instead of absolute numbers
 		self.ws_multipliers = [pow(2,i) for i in range(4)]
+		#self.ws_multipliers = [pow(2,i) for i in range(2)]
 
 		self.choices_cb_type = ['mtr']
 		#mod.choices_choices_lambda = [2,4,8]
 		self.choices_choices_lambda = [2,8,16]
 
-		#mod.choices_cor_type_ws = [1,2,3]
-		#mod.choices_cor_prob_ws = [0.0,0.5,1.0]
+		#self.choices_cor_type_ws = [1,2,3]
+		#self.choices_cor_prob_ws = [0.0,0.5,1.0]
 		self.choices_cor_type_ws = [1]
 		self.choices_cor_prob_ws = [0.0]
 
-		self.choices_cor_type_inter = [1,2,3]
-		self.choices_cor_prob_inter = [0.0,0.5,1.0]
+		self.choices_cor_type_inter = [1]
+		self.choices_cor_prob_inter = [0.0, 0.125, 0.25, 0.5]
 
-		self.validation_method = 1
-		self.weighting_scheme = 2
+		self.choices_loss_enc = [(-1, 0)]
+		#self.choices_cor_type_inter = [1,2]
+		#self.choices_cor_prob_inter = [0.0,0.5]
 
-		#self.epsilon = 0.05
+		self.choices_epsilon = [0.05, 0.1]
 		#self.epsilon_on = True
-		self.lr_template = [0.1, 0.03, 0.3, 0.01, 1.0, 0.003, 3.0, 0.001, 10.0]
-		self.adf_on = True
+		#self.lr_template = [0.1, 0.03, 0.3, 0.01, 1.0, 0.003, 3.0, 0.001, 10.0, 0.0003, 30.0, 0.0001, 100.0]
+		self.choices_adf = [True]
 		self.critical_size_ratios = [184 * pow(2, -i) for i in range(7) ]
 
+def gen_lr(n):
+	m = math.floor(n / 4.0)
+	if n % 4 == 0:
+		return 0.1 * pow(10, m)
+	if n % 4 == 1:
+		return 0.03 * pow(10, -m)
+	if n % 4 == 2:
+		return 0.3 * pow(10, m)
+	if n % 4 == 3:
+		return 0.01 * pow(10, -m)
 
 def collect_stats(mod):
 	avg_error_value = avg_error(mod)
@@ -155,7 +170,10 @@ def gen_vw_options(mod):
 									   ('validation_method',1),
 									   ('weighting_scheme',1),
 									   ('learning_rate',0.5),
-   									   ('progress',2.0),])
+									   ('epsilon', 0.05),
+									   ('loss0', 0),
+									   ('loss1', 0),
+   									   ('progress',2.0)])
 
 		mod.param['warm_start'] = mod.param['warm_start_multiplier'] * mod.param['progress']
 		mod.param['interaction'] = mod.param['total_size'] - mod.param['warm_start']
@@ -221,7 +239,10 @@ def param_to_str_simplified(mod):
 	 'optimal_approx',
 	 'majority_approx',
 	 'learning_rate',
-	 'adf_on']
+	 'adf_on',
+	 'epsilon',
+	 'loss0',
+	 'loss1']
 
 	mod.template_red = OrderedDict([(k,mod.result_template[k]) for k in vw_run_param_set])
 	#mod.simplified_keymap_red = dict([(k,mod.simplified_keymap[k]) for k in vw_run_param_set])
@@ -311,11 +332,16 @@ def dictify(param_name, param_choices):
 	result = []
 	for param in param_choices:
 		dic = {}
-		dic[param_name] = param
+		if isinstance(param_name, tuple):
+			for i in range(len(param_name)):
+				dic[param_name[i]] = param[i]
+		else:
+			dic[param_name] = param
 		result.append(dic)
-	print param_name, len(result)
+	print param_name, result
 	return result
 
+
 def params_per_task(mod):
 	# Problem parameters
 	prm_cor_type_ws = dictify('corrupt_type_warm_start', mod.choices_cor_type_ws)
@@ -330,7 +356,9 @@ def params_per_task(mod):
 	prm_cb_type = dictify('cb_type', mod.choices_cb_type)
 	prm_dataset = dictify('dataset', mod.dss)
 	prm_choices_lbd = dictify('choices_lambda', mod.choices_choices_lambda)
-	prm_adf_on = dictify('adf_on', [True])
+	prm_choices_eps = dictify('epsilon', mod.choices_epsilon)
+	prm_adf_on = dictify('adf_on', mod.choices_adf)
+	prm_loss_enc = dictify(('loss0', 'loss1'), mod.choices_loss_enc)
 
 	# Common parameters
 	prm_com = param_cartesian_multi(
@@ -342,21 +370,29 @@ def params_per_task(mod):
 	 prm_lrs,
 	 prm_cb_type,
 	 prm_fold,
-	 prm_adf_on])
-
-	fltr_inter_gt = lambda p: ((p['corrupt_type_interaction'] == 1 #noiseless for interaction data
-							and abs(p['corrupt_prob_interaction']) < 1e-4)
-							and
-		                    (p['corrupt_type_warm_start'] == 1 #filter out repetitive warm start data
-							or abs(p['corrupt_prob_warm_start']) > 1e-4))
+	 prm_adf_on,
+	 prm_choices_eps,
+	 prm_loss_enc])
+
+	if mod.inter_gt_on:
+		fltr_inter_gt = lambda p: ((p['corrupt_type_interaction'] == 1 #noiseless for interaction data
+								and abs(p['corrupt_prob_interaction']) < 1e-4)
+								and
+			                    (p['corrupt_type_warm_start'] == 1 #filter out repetitive warm start data
+								or abs(p['corrupt_prob_warm_start']) > 1e-4))
+	else:
+		fltr_inter_gt = lambda p: False
 
 	prm_com_inter_gt = filter(fltr_inter_gt, prm_com)
 
-	fltr_ws_gt = lambda p: ((p['corrupt_type_warm_start'] == 1 #noiseless for warm start data
-						and abs(p['corrupt_prob_warm_start']) < 1e-4)
-						and
-	                    (p['corrupt_type_interaction'] == 1 #filter out repetitive interaction data
-						or abs(p['corrupt_prob_interaction']) > 1e-4))
+	if mod.ws_gt_on:
+		fltr_ws_gt = lambda p: ((p['corrupt_type_warm_start'] == 1 #noiseless for warm start data
+							and abs(p['corrupt_prob_warm_start']) < 1e-4)
+							and
+		                    (p['corrupt_type_interaction'] == 1 #filter out repetitive interaction data
+							or abs(p['corrupt_prob_interaction']) > 1e-4))
+	else:
+		fltr_ws_gt = lambda p: False
 
 	prm_com_ws_gt = filter(fltr_ws_gt, prm_com)
 
@@ -378,8 +414,9 @@ def params_per_task(mod):
 				#Sim-Bandit
 				{'warm_start_type': 2,
 				 'warm_start_update': True,
- 				 'interaction_update': True},
-				#Sim-Bandit with no warm-start update
+ 				 'interaction_update': True,
+				 'lambda_scheme': 1},
+				#Sim-Bandit with only warm-start update
 				{'warm_start_type': 2,
 				 'warm_start_update': True,
  				 'interaction_update': False}
@@ -486,10 +523,10 @@ def params_per_task(mod):
 									   d['corrupt_prob_warm_start'],
 									   d['corrupt_type_interaction'],
 									   d['corrupt_prob_interaction'])
-					   )
+					 )
 	print 'The total number of VW commands to run is: ', len(prm_all)
-	#for row in prm_all:
-	#	print row
+	for row in prm_all:
+		print row
 	return get_params_task(prm_all)
 
 
@@ -586,6 +623,9 @@ def main_loop(mod):
 	('actual_variance', 'av', 0.0),
 	('ideal_variance', 'iv', 0.0),
 	('last_lambda', 'll', 0.0),
+	('epsilon', 'eps', 0.0),
+	('loss0', 'l0', 0.0),
+	('loss1', 'l1', 0.0),
 	]
 
  	num_cols = len(mod.result_template_list)
@@ -643,9 +683,9 @@ def remove_suffix(filename):
 
 	if args.task_id == 0:
 		# Compile vw in one of the subfolders
-		process = subprocess.Popen('make -C .. clean; make -C ..', shell=True, stdout=f, stderr=f)
-		subprocess.check_call(cmd, shell=True)
-		process.wait()
+		#process = subprocess.Popen('make -C .. clean; make -C ..', shell=True, stdout=f, stderr=f)
+		#subprocess.check_call(cmd, shell=True)
+		#process.wait()
 
 		# To avoid race condition of writing to the same file at the same time
 		create_dir(args.results_dir)
@@ -664,10 +704,10 @@ def remove_suffix(filename):
 		while not os.path.exists(flag_dir):
 			time.sleep(1)
 
-	if args.num_learning_rates <= 0 or args.num_learning_rates >= 10:
-		mod.learning_rates = mod.lr_template
+	if args.num_learning_rates <= 0:
+		mod.learning_rates = [gen_lr(0)]
 	else:
-		mod.learning_rates = mod.lr_template[:args.num_learning_rates]
+		mod.learning_rates = [gen_lr(i) for i in range(args.num_learning_rates)]
 	#mod.folds = range(1,11)
 	mod.folds = range(1, args.num_folds+1)
 
diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc
index 3bcab3abac6..1947a0734e6 100644
--- a/vowpalwabbit/cbify.cc
+++ b/vowpalwabbit/cbify.cc
@@ -656,11 +656,10 @@ void predict_or_learn_adf(cbify& data, multi_learner& base, example& ec)
 	else
 	{
 		data.mc_label = ec.l.multi;
-		/*if (data.ws_iter < data.ws_period)
+		if (data.ws_iter < data.ws_period)
 			ec.l.multi.label = corrupt_action(data, data.mc_label.label, WARM_START);
 		else if (data.inter_iter < data.inter_period)
 			ec.l.multi.label = corrupt_action(data, data.mc_label.label, INTERACTION);
-		*/
 	}
 
 	// Warm start phase

From 648f0d979f29b97fc4723e81423f7928013e11c6 Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Mon, 6 Aug 2018 10:37:28 -0400
Subject: [PATCH 102/127] removed running scripts

---
 scripts/alg_comparison.py  | 635 --------------------------------
 scripts/data_gen.py        |  88 -----
 scripts/run_vw_commands.py | 727 -------------------------------------
 scripts/run_vw_job.py      | 205 -----------
 scripts/shuffle.sh         |  10 -
 5 files changed, 1665 deletions(-)
 delete mode 100644 scripts/alg_comparison.py
 delete mode 100644 scripts/data_gen.py
 delete mode 100644 scripts/run_vw_commands.py
 delete mode 100644 scripts/run_vw_job.py
 delete mode 100644 scripts/shuffle.sh

diff --git a/scripts/alg_comparison.py b/scripts/alg_comparison.py
deleted file mode 100644
index 674a4e86c89..00000000000
--- a/scripts/alg_comparison.py
+++ /dev/null
@@ -1,635 +0,0 @@
-import matplotlib
-matplotlib.use('Agg')
-import matplotlib.pyplot as plt
-import pylab
-import os
-import glob
-import pandas as pd
-import scipy.stats as stats
-from itertools import compress
-from math import sqrt
-import argparse
-import numpy as np
-import seaborn as sns
-from matplotlib.colors import ListedColormap
-from matplotlib.font_manager import FontProperties
-from collections import Counter
-
-
-class model:
-	def __init__(self):
-		pass
-
-def sum_files(result_path):
-	prevdir = os.getcwd()
-	os.chdir(result_path)
-	dss = sorted(glob.glob('*.sum'))
-	os.chdir(prevdir)
-	return dss
-
-def parse_sum_file(sum_filename):
-	f = open(sum_filename, 'r')
-	#f.seek(0, 0)
-	table = pd.read_table(f, sep='\s+',lineterminator='\n',error_bad_lines=False)
-
-	return table
-
-def get_z_scores(errors_1, errors_2, sizes):
-	z_scores = []
-	for i in range(len(errors_1)):
-		#print i
-		z_scores.append( z_score(errors_1[i], errors_2[i], sizes[i]) )
-	return z_scores
-
-def z_score(err_1, err_2, size):
-	if (abs(err_1) < 1e-6 or abs(err_1) > 1-1e-6) and (abs(err_2) < 1e-6 or abs(err_2) > 1-1e-6):
-		return 0
-
-	#print err_1, err_2, size, sqrt( (err_1*(1 - err_1) + err_2*(1-err_2)) / size )
-
-	z = (err_1 - err_2) / sqrt( (err_1*(1 - err_1) + err_2*(1-err_2)) / size )
-	return z
-	#print z
-
-def is_significant(z):
-	if (stats.norm.cdf(z) < 0.05) or (stats.norm.cdf(z) > 0.95):
-		return True
-	else:
-		return False
-
-def plot_comparison(errors_1, errors_2, sizes):
-	#print title
-	plt.plot([0,1],[0,1])
-	z_scores = get_z_scores(errors_1, errors_2, sizes)
-	sorted_z_scores = sorted(enumerate(z_scores), key=lambda x:x[1])
-	#for s in sorted_z_scores:
-	#	print s, is_significant(s[1])
-
-	significance = map(is_significant, z_scores)
-	results_signi_1 = list(compress(errors_1, significance))
-	results_signi_2 = list(compress(errors_2, significance))
-	plt.scatter(results_signi_1, results_signi_2, s=18, c='r')
-
-	insignificance = [not b for b in significance]
-	results_insigni_1 = list(compress(errors_1, insignificance))
-	results_insigni_2 = list(compress(errors_2, insignificance))
-
-	plt.scatter(results_insigni_1, results_insigni_2, s=2, c='k')
-
-	len_errors = len(errors_1)
-	wins_1 = [z_scores[i] < 0 and significance[i] for i in range(len_errors) ]
-	wins_2 = [z_scores[i] > 0 and significance[i] for i in range(len_errors) ]
-	num_wins_1 = wins_1.count(True)
-	num_wins_2 = wins_2.count(True)
-
-	return num_wins_1, num_wins_2
-
-def alg_info(alg_name, result_lst):
-	if (alg_name[0] == 0):
-		return result_lst[0]
-	if (alg_name[0] == 2):
-		return result_lst[1]
-	if (alg_name[2] == True and alg_name[3] == True):
-		return result_lst[2]
-	if (alg_name[2] == True and alg_name[3] == False):
-		return result_lst[3]
-	if (alg_name[2] == False and alg_name[3] == True):
-		return result_lst[4]
-	if (alg_name[2] == False and alg_name[3] == False and alg_name[1] == 2 and alg_name[4] == 2):
-		return result_lst[5]
-	if (alg_name[2] == False and alg_name[3] == False and alg_name[1] == 4 and alg_name[4] == 2):
-		return result_lst[6]
-	if (alg_name[2] == False and alg_name[3] == False and alg_name[1] == 8 and alg_name[4] == 2):
-		return result_lst[7]
-	if (alg_name[2] == False and alg_name[3] == False and alg_name[1] == 16 and alg_name[4] == 2):
-		return result_lst[8]
-	if (alg_name[2] == False and alg_name[3] == False and alg_name[1] == 2 and alg_name[4] == 3):
-		return result_lst[9]
-	if (alg_name[2] == False and alg_name[3] == False and alg_name[1] == 4 and alg_name[4] == 3):
-		return result_lst[10]
-	if (alg_name[2] == False and alg_name[3] == False and alg_name[1] == 8 and alg_name[4] == 3):
-		return result_lst[11]
-	if (alg_name[2] == False and alg_name[3] == False and alg_name[1] == 16 and alg_name[4] == 3):
-		return result_lst[12]
-
-	return result_lst[13]
-
-def alg_str(alg_name):
-	return alg_info(alg_name,
-	['Most-Freq',
-	'Sim-Bandit',
-	'Class-1',
-	'Bandit-Only',
-	'Sup-Only',
-	'MinimaxBandits, split validation',
-	'AwesomeBandits with $|\Lambda|$=4, split validation',
-	'AwesomeBandits with $|\Lambda|$=8, split validation',
-	'AwesomeBandits with $|\Lambda|$=16, split validation',
-	'MinimaxBandits, no-split validation',
-	'AwesomeBandits with $|\Lambda|$=4, no-split validation',
-	'AwesomeBandits with $|\Lambda|$=8, no-split validation',
-	'AwesomeBandits with $|\Lambda|$=16, no-split validation',
-	'unknown'])
-
-def alg_str_compatible(alg_name):
-	return alg_info(alg_name,
-	['Most-Freq',
-	'Sim-Bandit',
-	'Class-1',
-	'Bandit-Only',
-	'Sup-Only',
-	'Choices_lambda=2, validation_method=2',
-	'Choices_lambda=4, validation_method=2',
-	'Choices_lambda=8, validation_method=2',
-	'Choices_lambda=16, validation_method=2',
-	'Choices_lambda=2, validation_method=3',
-	'Choices_lambda=4, validation_method=3',
-	'Choices_lambda=8, validation_method=3',
-	'Choices_lambda=16, validation_method=3',
-	'unknown'])
-
-def alg_color_style(alg_name):
-	palette = sns.color_palette('colorblind')
-	colors = palette.as_hex()
-	#colors = [colors[5], colors[4], 'black', colors[2], colors[1], colors[3], 'black', colors[0], 'black', 'black']
-	colors = [
-	colors[5],
-	colors[3],
-	'black',
-	colors[0],
-	colors[1],
-	colors[2],
-	colors[2],
-	colors[2],
-	colors[2],
-	colors[4],
-	colors[4],
-	colors[4],
-	colors[4],
-	'black' ]
-
-	styles = [
-	'solid',
-	'solid',
-	'solid',
-	'solid',
-	'dashed',
-	'dotted',
-	'dashdot',
-	'solid',
-	'dashed',
-	'dotted',
-	'dashdot',
-	'solid',
-	'dashed',
-	'solid']
-
-	return alg_info(alg_name, zip(colors, styles))
-	#['black', 'magenta', 'lime', 'green', 'blue', 'darkorange','darksalmon', 'red', 'cyan']
-
-def alg_index(alg_name):
-	return alg_info(alg_name,
-	[7.0,
-	6.0,
-	8.0,
-	5.0,
-	4.0,
-	2.0,
-	1.0,
-	1.2,
-	1.5,
-	3.0,
-	2.0,
-	2.2,
-	2.5,
-	9.0])
-
-
-def order_legends(indices):
-	ax = plt.gca()
-	handles, labels = ax.get_legend_handles_labels()
-	# sort both labels and handles by labels
-	labels, handles, indices = zip(*sorted(zip(labels, handles, indices), key=lambda t: t[2]))
-	ax.legend(handles, labels)
-
-def save_legend(mod, indices):
-	ax = plt.gca()
-	handles, labels = ax.get_legend_handles_labels()
-	labels, handles, indices = zip(*sorted(zip(labels, handles, indices), key=lambda t: t[2]))
-	#figlegend = pylab.figure(figsize=(26,1))
-	#figlegend.legend(handles, labels, 'center', fontsize=26, ncol=8)
-	figlegend = pylab.figure(figsize=(17,1.5))
-	figlegend.legend(handles, labels, 'center', fontsize=26, ncol=3)
-	figlegend.tight_layout(pad=0)
-	figlegend.savefig(mod.problemdir+'legend.pdf')
-
-def problem_str(name_problem):
-	return 'sct='+str(name_problem[0]) \
-			+'_scp='+str(name_problem[1]) \
-			+'_bct='+str(name_problem[2]) \
-			+'_bcp='+str(name_problem[3]) \
-			+'_ratio='+str(name_problem[4])
-
-def noise_type_str(noise_type):
-	if noise_type == 1:
-		return 'UAR'
-	elif noise_type == 2:
-		return 'CYC'
-	elif noise_type == 3:
-		return 'MAJ'
-
-def problem_text(name_problem):
-	s=''
-	s += 'Ratio = ' + str(name_problem[2]) + ', '
-	if abs(name_problem[1]) < 1e-6:
-		s += 'noiseless'
-	else:
-		s += noise_type_str(name_problem[0]) + ', '
-		s += 'p = ' + str(name_problem[1])
-	return s
-
-
-def plot_cdf(alg_name, errs):
-
-	#print alg_name
-	#print errs
-	#print len(errs)
-
-	col, sty = alg_color_style(alg_name)
-
-	plt.step(np.sort(errs), np.linspace(0, 1, len(errs), endpoint=False), label=alg_str(alg_name), color=col, linestyle=sty, linewidth=2.0)
-
-	#
-
-	#raw_input("Press Enter to continue...")
-
-def plot_all_cdfs(alg_results, mod):
-	#plot all cdfs:
-	print 'printing cdfs..'
-
-	indices = []
-
-	pylab.figure(figsize=(8,6))
-
-	for alg_name, errs in alg_results.iteritems():
-		indices.append(alg_index(alg_name))
-		plot_cdf(alg_name, errs)
-
-	if mod.normalize_type == 1:
-		plt.xlim(0,1)
-	elif mod.normalize_type == 2:
-		plt.xlim(-1,1)
-	elif mod.normalize_type == 3:
-		plt.xlim(0, 1)
-
-	plt.ylim(0,1)
-	#params={'legend.fontsize':26,
-	#'axes.labelsize': 24, 'axes.titlesize':26, 'xtick.labelsize':20,
-	#'ytick.labelsize':20 }
-	#plt.rcParams.update(params)
-	#plt.xlabel('Normalized error',fontsize=34)
-	#plt.ylabel('Cumulative frequency', fontsize=34)
-	#plt.title(problem_text(mod.name_problem), fontsize=36)
-	plt.xticks(fontsize=30)
-	plt.yticks(fontsize=30)
-	plt.tight_layout(pad=0)
-
-	ax = plt.gca()
-	order_legends(indices)
-	ax.legend_.set_zorder(-1)
-	plt.savefig(mod.problemdir+'cdf.pdf')
-	ax.legend_.remove()
-	plt.savefig(mod.problemdir+'cdf_nolegend.pdf')
-	save_legend(mod, indices)
-	plt.clf()
-
-def plot_all_lrs(lrs, mod):
-	alg_names = lrs.keys()
-
-	for i in range(len(alg_names)):
-		pylab.figure(figsize=(8,6))
-		lrs_alg = lrs[alg_names[i]]
-		counts = Counter(lrs_alg)
-		names = list(counts.keys())
-		names_sorted = sorted(names)
-		values = [counts[n] for n in names_sorted]
-		plt.barh(range(len(names_sorted)),values)
-		plt.yticks(range(len(names_sorted)),names_sorted)
-		plt.savefig(mod.problemdir+alg_str_compatible(alg_names[i])+'_lr.pdf')
-		plt.clf()
-
-
-def plot_all_pair_comp(alg_results, sizes, mod):
-	alg_names = alg_results.keys()
-
-	for i in range(len(alg_names)):
-		for j in range(len(alg_names)):
-			if i < j:
-				errs_1 = alg_results[alg_names[i]]
-				errs_2 = alg_results[alg_names[j]]
-
-				print len(errs_1), len(errs_2), len(sizes)
-				#raw_input('Press any key to continue..')
-
-				num_wins_1, num_wins_2 = plot_comparison(errs_1, errs_2, sizes)
-
-				plt.title( 'total number of comparisons = ' + str(len(errs_1)) + '\n'+
-				alg_str(alg_names[i]) + ' wins ' + str(num_wins_1) + ' times, \n' + alg_str(alg_names[j]) + ' wins ' + str(num_wins_2) + ' times')
-				plt.savefig(mod.problemdir+alg_str_compatible(alg_names[i])+'_vs_'+alg_str_compatible(alg_names[j])+'.pdf')
-				plt.clf()
-
-#def init_results(result_table):
-#	alg_results = {}
-#	for idx, row in result_table.iterrows():
-#		alg_name = (row['warm_start_type'], row['choices_lambda'], row['no_warm_start_update'], row['no_interaction_update'])
-#		alg_results[alg_name] = []
-#	alg_results[(0, 0, False, False)] = []
-#	return alg_results
-
-def normalize_score(unnormalized_result, mod):
-	if mod.normalize_type == 1:
-		l = get_best_error(mod.best_error_table, mod.name_dataset)
-		u = max(unnormalized_result.values())
-		return { k : ((v - l) / (u - l + 1e-4)) for k, v in unnormalized_result.iteritems() }
-	elif mod.normalize_type == 2:
-		l = unnormalized_result[(1, 1, True, False)]
-		return { k : ((v - l) / (l + 1e-4)) for k, v in unnormalized_result.iteritems() }
-	elif mod.normalize_type == 3:
-		return unnormalized_result
-
-def get_best_error(best_error_table, name_dataset):
-	name = name_dataset[0]
-	print name
-	print best_error_table
-	best_error_oneline = best_error_table[best_error_table['dataset'] == name]
-	best_error = best_error_oneline.loc[best_error_oneline.index[0], 'avg_error']
-	#raw_input("...")
-	#print best_error_oneline
-	#raw_input("...")
-	#print best_error
-	#raw_input("...")
-	return best_error
-
-def get_maj_error(maj_error_table, name_dataset):
-	name = name_dataset[0]
-	maj_error_oneline = maj_error_table[maj_error_table['data'] == name]
-	maj_error = maj_error_oneline.loc[maj_error_oneline.index[0], 'avg_error']
-	return maj_error
-
-#normalized_results[alg_name].append(normalized_errs[i])
-#errs = []
-#for idx, row in result_table.iterrows():
-#	errs.append(row['avg_error'])
-
-def get_unnormalized_results(result_table):
-	new_unnormalized_results = {}
-	new_lr = {}
-	new_size = 0
-
-	i = 0
-	for idx, row in result_table.iterrows():
-		if i == 0:
-			new_size = row['bandit_size']
-
-		if row['bandit_size'] == new_size:
-			alg_name = (row['warm_start_type'], row['choices_lambda'], row['no_warm_start_update'], row['no_interaction_update'], row['validation_method'])
-			new_unnormalized_results[alg_name] = row['avg_error']
-			new_lr[alg_name] = row['learning_rate']
-		i += 1
-
-	return new_size, new_unnormalized_results, new_lr
-
-def update_result_dict(results_dict, new_result):
-	print results_dict
-	for k, v in new_result.iteritems():
-		print k
-		results_dict[k].append(v)
-
-
-def plot_all(mod, all_results):
-
-	#all_results = all_results[all_results['corrupt_prob_supervised']!=0.0]
-
-	grouped_by_problem = all_results.groupby(['corrupt_type_supervised',
-											  'corrupt_prob_supervised',
-											  'corrupt_type_bandit',
-											  'corrupt_prob_bandit',
-											  'bandit_supervised_size_ratio'])
-
-	#then group by dataset and warm_start size (corresponding to each point in cdf)
-	for name_problem, group_problem in grouped_by_problem:
-		normalized_results = None
-		unnormalized_results = None
-		sizes = None
-		mod.name_problem = name_problem
-
-		grouped_by_dataset = group_problem.groupby(['dataset',
-													'warm_start'])
-		#then select unique combinations of (no_supervised, no_bandit, choices_lambda)
-		#e.g. (True, True, 1), (True, False, 1), (False, True, 1), (False, False, 2)
-		#(False, False, 8), and compute a normalized score
-
-		for name_dataset, group_dataset in grouped_by_dataset:
-			result_table = group_dataset
-
-		 	group_dataset = group_dataset.reset_index(drop=True)
-
-			grouped_by_algorithm = group_dataset.groupby(['warm_start_type',
-			                                              'choices_lambda',
-														  'no_warm_start_update',
-														  'no_interaction_update',
-														  'validation_method'])
-
-			mod.name_dataset = name_dataset
-
-			#The 'learning_rate' would be the only free degree here now. Taking the
-			#min aggregation will give us the algorithms we are evaluating.
-
-			#In the future this should be changed if we run multiple folds: we
-			#should average among folds before choosing the min
-			#result_table = grouped_by_algorithm.min()
-			#result_table = result_table.reset_index()
-
-			#print grouped_by_algorithm
-			#grouped_by_algorithm.describe()
-
-			idx = grouped_by_algorithm.apply(lambda df:df["avg_error"].idxmin())
-			result_table = group_dataset.ix[idx, :]
-			#print idx
-			#print result_table
-			#print group_dataset
-			#raw_input('..')
-
-			#group_dataset.groupby(['choices_lambda','no_supervised',														'no_bandit'])
-				#print alg_results
-				#dummy = input('')
-
-			#in general (including the first time) - record the error rates of all algorithms
-			#print result_table
-
-			new_size, new_unnormalized_result, new_lr = get_unnormalized_results(result_table)
-			new_unnormalized_result[(0, 0, False, False, 1)] = get_maj_error(mod.maj_error_table, mod.name_dataset)
-			new_lr[(0, 0, False, False, 1)] = 0.0
-			new_normalized_result = normalize_score(new_unnormalized_result, mod)
-
-			#first time - generate names of algorithms considered
-			if normalized_results is None:
-				sizes = []
-				unnormalized_results = dict([(k,[]) for k in new_unnormalized_result.keys()])
-				normalized_results = dict([(k,[]) for k in new_unnormalized_result.keys()])
-				lrs = dict([(k,[]) for k in new_unnormalized_result.keys()])
-
-			update_result_dict(unnormalized_results, new_unnormalized_result)
-			update_result_dict(normalized_results, new_normalized_result)
-			update_result_dict(lrs, new_lr)
-			sizes.append(new_size)
-
-			#print 'sizes:'
-			#print len(sizes)
-			#for k, v in unnormalized_results.iteritems():
-			#	print len(v)
-
-		mod.problemdir = mod.fulldir+problem_str(mod.name_problem)+'/'
-		if not os.path.exists(mod.problemdir):
-			os.makedirs(mod.problemdir)
-
-		print 'best_errors', mod.best_error_table
-		print 'unnormalized_results', unnormalized_results
-		print 'normalized_results', normalized_results
-
-		if mod.pair_comp_on is True:
-			plot_all_pair_comp(unnormalized_results, sizes, mod)
-		if mod.cdf_on is True:
-			plot_all_cdfs(normalized_results, mod)
-
-		plot_all_lrs(lrs, mod)
-
-def save_to_hdf(mod):
-	print 'saving to hdf..'
-	store = pd.HDFStore('store.h5')
-	store['result_table'] = mod.all_results
-	store.close()
-
-def load_from_hdf(mod):
-	print 'reading from hdf..'
-	store = pd.HDFStore('store.h5')
-	mod.all_results = store['result_table']
-	store.close()
-
-def load_from_sum(mod):
-	print 'reading directory..'
-	dss = sum_files(mod.results_dir)
-	print len(dss)
-
-	#print dss[168]
-
-	all_results = None
-
-	print 'reading sum tables..'
-	for i in range(len(dss)):
-		print 'result file name: ', dss[i]
-		result = parse_sum_file(mod.results_dir + dss[i])
-
-		if (i == 0):
-			all_results = result
-		else:
-			all_results = all_results.append(result)
-
-	print all_results
-	mod.all_results = all_results
-
-
-# This is a hack - need to do this systematically in the future
-#def load_maj_error(mod):
-#	return parse_sum_file(mod.maj_error_dir)
-
-
-if __name__ == '__main__':
-	parser = argparse.ArgumentParser(description='result summary')
-	parser.add_argument('--results_dir', default='../../../figs/')
-	parser.add_argument('--filter', default='1')
-	parser.add_argument('--plot_subdir', default='expt1/')
-	parser.add_argument('--from_hdf', action='store_true')
-	parser.add_argument('--normalize_type', type=int, default=1)
-	args = parser.parse_args()
-
-	mod = model()
-
-	mod.results_dir = args.results_dir
-	mod.filter = args.filter
-	mod.plot_subdir = args.plot_subdir
-	mod.normalize_type = args.normalize_type #1: normalized score; 2: bandit only centered score; 3: raw score
-	mod.pair_comp_on = False
-	mod.cdf_on = True
-	mod.maj_error_dir = '../../../figs_all/expt_0509/figs_maj_errors/0of1.sum'
-	mod.best_error_dir = '../../../figs_all/expt_0606/0of1.sum'
-
-	mod.fulldir = mod.results_dir + mod.plot_subdir
-	if not os.path.exists(mod.fulldir):
-		os.makedirs(mod.fulldir)
-
-	#print args.from_hdf
-	#raw_input(' ')
-	if args.from_hdf is True:
-		load_from_hdf(mod)
-	else:
-		load_from_sum(mod)
-		save_to_hdf(mod)
-
-	#first group by corruption mode, then corruption prob
-	#then group by warm start - bandit ratio
-	#these constitutes all the problem settings we are looking at (corresponding
-	#to each cdf graph)
-	all_results = mod.all_results
-
-	#print mod.best_error_table[mod.best_error_table['dataset'] == 'ds_160_5.vw.gz']
-	#raw_input(' ')
-
-	#print all_results
-	#raw_input('..')
-
-	all_results = all_results[all_results['choices_lambda'] != 0]
-
-	#ignore the no update row:
-	all_results = all_results[(all_results['no_warm_start_update'] == False) | (all_results['no_interaction_update'] == False)]
-	#ignore the choice_lambda = 4 row
-	all_results = all_results[(all_results['choices_lambda'] != 4)]
-
-
-
-
-	#filter choices_lambdas = 2,4,8?
-	#if (alg_name[2] == False and alg_name[3] == False and alg_name[1] != 8):
-	#	pass
-	#else:
-
-	mod.maj_error_table = parse_sum_file(mod.maj_error_dir)
-	mod.maj_error_table = mod.maj_error_table[mod.maj_error_table['majority_approx']]
-	mod.best_error_table = parse_sum_file(mod.best_error_dir)
-	mod.best_error_table = mod.best_error_table[mod.best_error_table['optimal_approx']]
-
-	if mod.filter == '1':
-		pass
-	elif mod.filter == '2':
-		#print all_results['warm_start_size'] >= 100
-		#raw_input(' ')
-		all_results = all_results[all_results['warm_start'] >= 200]
-	elif mod.filter == '3':
-		all_results = all_results[all_results['num_classes'] >= 3]
-	elif mod.filter == '4':
-		all_results = all_results[all_results['num_classes'] <= 2]
-	elif mod.filter == '5':
-		all_results = all_results[all_results['total_size'] >= 10000]
-		all_results = all_results[all_results['num_classes'] >= 3]
-	elif mod.filter == '6':
-		all_results = all_results[all_results['warm_start'] >= 100]
-		all_results = all_results[all_results['learning_rate'] == 0.3]
-	elif mod.filter == '7':
-		all_results = all_results[all_results['warm_start'] >= 100]
-		all_results = all_results[all_results['num_classes'] >= 3]
-
-	plot_all(mod, all_results)
-
-	#if i >= 331 and i <= 340:
-	#	print 'result:', result
-	#	print 'all_results:', all_results
diff --git a/scripts/data_gen.py b/scripts/data_gen.py
deleted file mode 100644
index aa30cb061c2..00000000000
--- a/scripts/data_gen.py
+++ /dev/null
@@ -1,88 +0,0 @@
-import random
-import numpy as np
-
-classes = 2
-m = 10
-kwperclass = 2
-
-def gen_keyword():
-	keyword = np.zeros((classes, m))
-
-	for i in range(classes):
-		shuffled = range(m)
-		random.shuffle(shuffled)
-
-		for j in range(kwperclass):
-			keyword[i,shuffled[j]] = 1
-
-	return keyword
-
-
-def classify(classifier, example):
-		result = classifier.dot(example)
-		return np.argmax(result)
-
-def gen_datasets(filename, keyword, num_samples, fprob):
-
-	f = open(filename+".vw", "w")
-	g = open(filename+"_m.vw", "w")
-
-	for i in range(num_samples):
-		c = random.randint(0, classes-1)
-
-		#generate a pair of datasets (one is cost-sensitive, the other is multiclass)
-		for l in range(classes):
-			f.write(str(l+1)+':')
-			cost = 1
-			if l == c:
-				cost = 0
-			f.write(str(cost)+' ')
-
-		g.write(str(c+1))
-
-		f.write(' | ')
-		g.write(' | ')
-
-		vec = np.zeros(m)
-
-		for j in range(m):
-			flip = np.random.choice([False,True],p=[1-fprob, fprob])
-			if flip:
-				vec[j] = 2 * (1-keyword[c][j]) - 1
-			else:
-				vec[j] = 2 * keyword[c][j] - 1
-
-		for j in range(m):
-			f.write('w'+str(j)+':')
-			f.write(str(vec[j])+' ')
-			g.write('w'+str(j)+':')
-			g.write(str(vec[j])+' ')
-
-		#print 'Is the prediction equal to the class label? ', classify(keyword, vec) == c
-		f.write('\n')
-		g.write('\n')
-
-	f.close()
-	g.close()
-
-
-
-if __name__ == '__main__':
-
-	keyword = gen_keyword()
-	# Remember to generate a pair of datasets at the same time
-	# so that the class-dependent feature is retained
-
-
-	num_samples = 10000
-	fprob = 0.1
-	filename = "source1"+'_'+str(fprob)
-
-	gen_datasets(filename, keyword, num_samples, fprob)
-
-
-	num_samples = 10000
-	fprob = 0.1
-	filename = "source2"+'_'+str(fprob)
-
-	gen_datasets(filename, keyword, num_samples, fprob)
diff --git a/scripts/run_vw_commands.py b/scripts/run_vw_commands.py
deleted file mode 100644
index 41c974196a7..00000000000
--- a/scripts/run_vw_commands.py
+++ /dev/null
@@ -1,727 +0,0 @@
-import subprocess
-from itertools import product
-import os
-import math
-import argparse
-import time
-import glob
-import re
-from collections import OrderedDict
-
-
-class model:
-	def __init__(self):
-		# Setting up argument-independent learning parameters in the constructor
-		self.baselines_on = True
-		self.algs_on = False
-		self.optimal_on = False
-		self.majority_on = False
-
-		self.ws_gt_on = True
-		self.inter_gt_on = False
-
-		self.num_checkpoints = 200
-
-		# use fractions instead of absolute numbers
-		self.ws_multipliers = [pow(2,i) for i in range(4)]
-		#self.ws_multipliers = [pow(2,i) for i in range(2)]
-
-		self.choices_cb_type = ['mtr']
-		#mod.choices_choices_lambda = [2,4,8]
-		self.choices_choices_lambda = [2,8,16]
-
-		#self.choices_cor_type_ws = [1,2,3]
-		#self.choices_cor_prob_ws = [0.0,0.5,1.0]
-		self.choices_cor_type_ws = [1]
-		self.choices_cor_prob_ws = [0.0]
-
-		self.choices_cor_type_inter = [1]
-		self.choices_cor_prob_inter = [0.0, 0.125, 0.25, 0.5]
-
-		self.choices_loss_enc = [(-1, 0)]
-		#self.choices_cor_type_inter = [1,2]
-		#self.choices_cor_prob_inter = [0.0,0.5]
-
-		self.choices_epsilon = [0.05, 0.1]
-		#self.epsilon_on = True
-		#self.lr_template = [0.1, 0.03, 0.3, 0.01, 1.0, 0.003, 3.0, 0.001, 10.0, 0.0003, 30.0, 0.0001, 100.0]
-		self.choices_adf = [True]
-		self.critical_size_ratios = [184 * pow(2, -i) for i in range(7) ]
-
-def gen_lr(n):
-	m = math.floor(n / 4.0)
-	if n % 4 == 0:
-		return 0.1 * pow(10, m)
-	if n % 4 == 1:
-		return 0.03 * pow(10, -m)
-	if n % 4 == 2:
-		return 0.3 * pow(10, m)
-	if n % 4 == 3:
-		return 0.01 * pow(10, -m)
-
-def collect_stats(mod):
-	avg_error_value = avg_error(mod)
-	actual_var_value = actual_var(mod)
-	ideal_var_value = ideal_var(mod)
-
-	vw_run_results = []
-	vw_result_template = {
-	'interaction': 0,
-	'inter_ws_size_ratio': 0,
-	'avg_error': 0.0,
-	'actual_variance': 0.0,
-	'ideal_variance': 0.0
-	}
-
-	if 'majority_approx' in mod.param or 'optimal_approx' in mod.param:
-		vw_result = vw_result_template.copy()
-		if 'optimal_approx' in mod.param:
-			# this condition is for computing the optimal error
-			vw_result['avg_error'] = avg_error_value
-		else:
-			# this condition is for computing the majority error
-			err =  1 - float(mod.param['majority_size']) / mod.param['total_size']
-			vw_result['avg_error'] = float('%0.5f' % err)
-		vw_run_results.append(vw_result)
-		return vw_run_results
-
-	f = open(mod.vw_output_filename, 'r')
-
-	i = 0
-	for line in f:
-		vw_progress_pattern = '\d+\.\d+\s+\d+\.\d+\s+\d+\s+\d+\.\d+\s+[a-zA-Z0-9]+\s+[a-zA-Z0-9]+\s+\d+.*'
-		matchobj = re.match(vw_progress_pattern, line)
-
-		if matchobj:
-			s = line.split()
-			if len(s) >= 8:
-				s = s[:7]
-			avg_loss_str, last_loss_str, counter_str, weight_str, curr_label_str, \
-			curr_pred_str, curr_feat_str = s
-
-			avg_loss = float(avg_loss_str)
-			inter_effective = int(float(weight_str))
-
-			for ratio in mod.critical_size_ratios:
-				if inter_effective >= (1 - 1e-7) * mod.param['warm_start'] * ratio and \
-				inter_effective <= (1 + 1e-7) * mod.param['warm_start'] * ratio:
-					vw_result = vw_result_template.copy()
-					vw_result['interaction'] = inter_effective
-					vw_result['inter_ws_size_ratio'] = ratio
-					vw_result['avg_error'] = avg_loss
-					vw_result['actual_variance'] = actual_var_value
-					vw_result['ideal_variance'] = ideal_var_value
-					vw_run_results.append(vw_result)
-	f.close()
-
-	#if len(vw_run_results) >= 1:
-	#	print mod.param['warm_start']
-	#	print vw_run_results
-	#raw_input('..')
-	return vw_run_results
-
-
-def gen_vw_options_list(mod):
-	mod.vw_options = format_setting(mod.vw_template, mod.param)
-	vw_options_list = []
-	for k, v in mod.vw_options.iteritems():
-		vw_options_list.append('--'+str(k))
-		vw_options_list.append(str(v))
-	return vw_options_list
-
-def gen_vw_options(mod):
-	if 'optimal_approx' in mod.param:
-		# Fully supervised on full dataset
-		mod.vw_template = OrderedDict([('data',''),
-									   ('progress',2.0),
-									   ('passes',0),
-									   ('oaa',0),
-									   ('cache_file','')])
-		mod.param['passes'] = 5
-		mod.param['oaa'] = mod.param['num_classes']
-		mod.param['cache_file'] = mod.param['data'] + '.cache'
-	elif 'majority_approx' in mod.param:
-		# Compute majority error; basically we would like to skip vw running as fast as possible
-		mod.vw_template = OrderedDict([('data',''),
-									   ('progress',2.0),
-									   ('cbify',0),
-									   ('warm_start',0),
-									   ('interaction',0)])
-		mod.param['cbify'] = mod.param['num_classes']
-		mod.param['warm_start'] = 0
-		mod.param['interaction'] = 0
-	else:
-		# General CB
-		mod.vw_template = OrderedDict([('data',''),
-									   ('cbify',0),
-									   ('cb_type','mtr'),
-									   ('warm_start',0),
-									   ('interaction',0),
-									   ('corrupt_type_interaction',0),
-									   ('corrupt_prob_interaction',0.0),
-									   ('corrupt_type_warm_start',0),
-									   ('corrupt_prob_warm_start',0.0),
-									   ('warm_start_update',True),
-									   ('interaction_update',True),
-									   ('choices_lambda',0),
-									   ('lambda_scheme',1),
-									   ('warm_start_type',1),
-									   ('overwrite_label',1),
-									   ('validation_method',1),
-									   ('weighting_scheme',1),
-									   ('learning_rate',0.5),
-									   ('epsilon', 0.05),
-									   ('loss0', 0),
-									   ('loss1', 0),
-   									   ('progress',2.0)])
-
-		mod.param['warm_start'] = mod.param['warm_start_multiplier'] * mod.param['progress']
-		mod.param['interaction'] = mod.param['total_size'] - mod.param['warm_start']
-		mod.param['cbify'] = mod.param['num_classes']
-		mod.param['overwrite_label'] = mod.param['majority_class']
-
-		if mod.param['adf_on'] is True:
-			mod.param['cb_explore_adf'] = ' '
-			mod.vw_template['cb_explore_adf'] = ' '
-		else:
-			mod.param['cb_explore'] = mod.param['num_classes']
-			mod.vw_template['cb_explore'] = 0
-
-
-def execute_vw(mod):
-	gen_vw_options(mod)
-	vw_options_list = gen_vw_options_list(mod)
-	cmd = intersperse([mod.vw_path]+vw_options_list, ' ')
-	print cmd
-
-	f = open(mod.vw_output_filename, 'w')
-	process = subprocess.Popen(cmd, shell=True, stdout=f, stderr=f)
-	#subprocess.check_call(cmd, shell=True)
-	process.wait()
-	f.close()
-
-def intersperse(l, ch):
-	s = ''
-	for item in l:
-		s += str(item)
-		s += ch
-	return s
-
-def param_to_str(param):
-	param_list = [ str(k)+'='+str(v) for k,v in param.iteritems() ]
-	return intersperse(param_list, ',')
-
-def replace_keys(dic, simplified_keymap):
-	dic_new = OrderedDict()
-	for k, v in dic.iteritems():
-		dic_new[simplified_keymap[k]] = v
-	return dic_new
-
-def param_to_str_simplified(mod):
-	#print 'before replace'
-	#print param
-	vw_run_param_set = \
-	['dataset',
-	 'fold',
-	 'lambda_scheme',
-	 'validation_method',
-	 'warm_start_multiplier',
-	 'corrupt_prob_interaction',
-	 'corrupt_prob_warm_start',
-	 'corrupt_type_interaction',
-	 'corrupt_type_warm_start',
- 	 'warm_start_update',
- 	 'interaction_update',
-	 'warm_start_type',
-	 'choices_lambda',
-	 'weighting_scheme',
-	 'cb_type',
-	 'optimal_approx',
-	 'majority_approx',
-	 'learning_rate',
-	 'adf_on',
-	 'epsilon',
-	 'loss0',
-	 'loss1']
-
-	mod.template_red = OrderedDict([(k,mod.result_template[k]) for k in vw_run_param_set])
-	#mod.simplified_keymap_red = dict([(k,mod.simplified_keymap[k]) for k in vw_run_param_set])
-	# step 1: use the above as a template to filter out irrelevant parameters
-	# in the vw output file title
-	param_formatted = format_setting(mod.template_red, mod.param)
-	# step 2: replace the key names with the simplified names
-	param_simplified = replace_keys(param_formatted, mod.simplified_keymap)
-	#print 'after replace'
-	#print param
-	return param_to_str(param_simplified)
-
-def run_single_expt(mod):
-	mod.param['data'] = mod.ds_path + str(mod.param['fold']) + '/' + mod.param['dataset']
-	mod.param['total_size'] = get_num_lines(mod.param['data'])
-	mod.param['num_classes'] = get_num_classes(mod.param['data'])
-	mod.param['majority_size'], mod.param['majority_class'] = get_majority_class(mod.param['data'])
-	mod.param['progress'] = int(math.ceil(float(mod.param['total_size']) / float(mod.num_checkpoints)))
-	mod.vw_output_dir = mod.results_path + remove_suffix(mod.param['data']) + '/'
-	mod.vw_output_filename = mod.vw_output_dir + param_to_str_simplified(mod) + '.txt'
-
-	#plot_errors(mod)
-	#print mod.param['validation_method']
-
-	execute_vw(mod)
-	vw_run_results = collect_stats(mod)
-	for vw_result in vw_run_results:
-		result_combined = merge_two_dicts(mod.param, vw_result)
-
-		#print mod.result_template['no_interaction_update']
-		#print result_combined['no_interaction_update']
-
-		result_formatted = format_setting(mod.result_template, result_combined)
-		record_result(mod, result_formatted)
-
-
-# The following function is a "template filling" function
-# Given a template, we use the setting dict to fill it as much as possible
-def format_setting(template, setting):
-	formatted = template.copy()
-	for k, v in setting.iteritems():
-		if k in template.keys():
-			formatted[k] = v
-	return formatted
-
-def record_result(mod, result):
-	result_row = result.values()
-	#for k in mod.result_header_list:
-	#	result_row.append(result[k])
-	#print result['validation_method']
-	#print result_row
-
-	summary_file = open(mod.summary_file_name, 'a')
-	summary_file.write( intersperse(result_row, '\t') + '\n')
-	summary_file.close()
-
-def ds_files(ds_path):
-	prevdir = os.getcwd()
-	os.chdir(ds_path)
-	dss = sorted(glob.glob('*.vw.gz'))
-	#dss = [ds_path+ds for ds in dss]
-	os.chdir(prevdir)
-	return dss
-
-def merge_two_dicts(x, y):
-	#print 'x = ', x
-	#print 'y = ', y
-	z = x.copy()   # start with x's keys and values
-	z.update(y)    # modifies z with y's keys and values & returns None
-	return z
-
-def param_cartesian(param_set_1, param_set_2):
-	prod = []
-	for param_1 in param_set_1:
-		for param_2 in param_set_2:
-			prod.append(merge_two_dicts(param_1, param_2))
-	return prod
-
-def param_cartesian_multi(param_sets):
-	#print param_sets
-	prod = [{}]
-	for param_set in param_sets:
-		prod = param_cartesian(prod, param_set)
-	return prod
-
-def dictify(param_name, param_choices):
-	result = []
-	for param in param_choices:
-		dic = {}
-		if isinstance(param_name, tuple):
-			for i in range(len(param_name)):
-				dic[param_name[i]] = param[i]
-		else:
-			dic[param_name] = param
-		result.append(dic)
-	print param_name, result
-	return result
-
-
-def params_per_task(mod):
-	# Problem parameters
-	prm_cor_type_ws = dictify('corrupt_type_warm_start', mod.choices_cor_type_ws)
-	prm_cor_prob_ws = dictify('corrupt_prob_warm_start', mod.choices_cor_prob_ws)
-	prm_cor_type_inter = dictify('corrupt_type_interaction', mod.choices_cor_type_inter)
-	prm_cor_prob_inter = dictify('corrupt_prob_interaction', mod.choices_cor_prob_inter)
-	prm_ws_multiplier = dictify('warm_start_multiplier', mod.ws_multipliers)
-	prm_lrs = dictify('learning_rate', mod.learning_rates)
-	# could potentially induce a bug if the maj and best does not have this parameter
-	prm_fold = dictify('fold', mod.folds)
-	# Algorithm parameters
-	prm_cb_type = dictify('cb_type', mod.choices_cb_type)
-	prm_dataset = dictify('dataset', mod.dss)
-	prm_choices_lbd = dictify('choices_lambda', mod.choices_choices_lambda)
-	prm_choices_eps = dictify('epsilon', mod.choices_epsilon)
-	prm_adf_on = dictify('adf_on', mod.choices_adf)
-	prm_loss_enc = dictify(('loss0', 'loss1'), mod.choices_loss_enc)
-
-	# Common parameters
-	prm_com = param_cartesian_multi(
-	[prm_cor_type_ws,
-	 prm_cor_prob_ws,
-	 prm_cor_type_inter,
-	 prm_cor_prob_inter,
-	 prm_ws_multiplier,
-	 prm_lrs,
-	 prm_cb_type,
-	 prm_fold,
-	 prm_adf_on,
-	 prm_choices_eps,
-	 prm_loss_enc])
-
-	if mod.inter_gt_on:
-		fltr_inter_gt = lambda p: ((p['corrupt_type_interaction'] == 1 #noiseless for interaction data
-								and abs(p['corrupt_prob_interaction']) < 1e-4)
-								and
-			                    (p['corrupt_type_warm_start'] == 1 #filter out repetitive warm start data
-								or abs(p['corrupt_prob_warm_start']) > 1e-4))
-	else:
-		fltr_inter_gt = lambda p: False
-
-	prm_com_inter_gt = filter(fltr_inter_gt, prm_com)
-
-	if mod.ws_gt_on:
-		fltr_ws_gt = lambda p: ((p['corrupt_type_warm_start'] == 1 #noiseless for warm start data
-							and abs(p['corrupt_prob_warm_start']) < 1e-4)
-							and
-		                    (p['corrupt_type_interaction'] == 1 #filter out repetitive interaction data
-							or abs(p['corrupt_prob_interaction']) > 1e-4))
-	else:
-		fltr_ws_gt = lambda p: False
-
-	prm_com_ws_gt = filter(fltr_ws_gt, prm_com)
-
-	prm_com = filter(lambda p: (fltr_ws_gt(p) or fltr_inter_gt(p)), prm_com)
-
-	# Baseline parameters construction
-	if mod.baselines_on:
-		prm_baseline_basic = \
-		[
-			[
-				#Sup-Only
-		 		{'warm_start_type': 1,
-				 'warm_start_update': True,
-				 'interaction_update': False},
-				#Band-Only
- 		 		{'warm_start_type': 1,
- 				 'warm_start_update': False,
- 				 'interaction_update': True},
-				#Sim-Bandit
-				{'warm_start_type': 2,
-				 'warm_start_update': True,
- 				 'interaction_update': True,
-				 'lambda_scheme': 1},
-				#Sim-Bandit with only warm-start update
-				{'warm_start_type': 2,
-				 'warm_start_update': True,
- 				 'interaction_update': False}
-			]
-		]
-
-		prm_baseline_const = \
-		[
-			[
-				{'weighting_scheme':1,
-				 'adf_on':True,
-				 'lambda_scheme':3,
-				 'choices_lambda':1}
-			]
-		]
-		prm_baseline = param_cartesian_multi([prm_com] + prm_baseline_const + prm_baseline_basic)
-	else:
-		prm_baseline = []
-
-
-	# Algorithm parameters construction
-	if mod.algs_on:
-		# Algorithms for supervised validation
-		prm_ws_gt = \
-		[
-			 [
-		  	 	{'warm_start_update': True,
-				 'interaction_update': True,
-				 'warm_start_type': 1,
-				 'lambda_scheme': 2,
-				 'weighting_scheme': 2}
-			 ],
-			 [
-			 	{'validation_method':2},
-				{'validation_method':3}
-			 ]
-	    ]
-
-		prm_inter_gt = \
-		[
-			 [
-		  	 	{'warm_start_update': True,
-				 'interaction_update': True,
-				 'warm_start_type': 1,
-				 'lambda_scheme': 4,
-				 'weighting_scheme': 1}
-			 ],
-		]
-
-		prm_algs_ws_gt = param_cartesian_multi([prm_com_ws_gt] + [prm_choices_lbd] + prm_ws_gt)
-		prm_algs_inter_gt = param_cartesian_multi([prm_com_inter_gt] + [prm_choices_lbd] + prm_inter_gt)
-		prm_algs = prm_algs_ws_gt + prm_algs_inter_gt
-	else:
-		prm_algs = []
-
-	# Optimal baselines parameter construction
-	if mod.optimal_on:
-		prm_optimal = \
-		[
-			{'optimal_approx': True,
-			 'fold': 1,
-			 'corrupt_type_warm_start':1,
-			 'corrupt_prob_warm_start':0.0,
-			 'corrupt_type_interaction':1,
-			 'corrupt_prob_interaction':0.0}
-	    ]
-	else:
-		prm_optimal = []
-
-	if mod.majority_on:
-		prm_majority = \
-		[
-			{'majority_approx': True,
-			 'fold': 1,
-			 'corrupt_type_warm_start':1,
-			 'corrupt_prob_warm_start':0.0,
-			 'corrupt_type_interaction':1,
-			 'corrupt_prob_interaction':0.0}
-		]
-	else:
-		prm_majority = []
-
-
-	#for p in params_common:
-	#	print p
-	#for p in params_baseline:
-	#	print p
-	#print len(prm_com_ws_gt), len(prm_algs_ws_gt)
-	#print len(prm_com_inter_gt), len(prm_algs_inter_gt)
-	#print len(prm_com)
-	#print len(prm_baseline)
-	#print len(prm_algs)
-	#raw_input('..')
-
-	# Common factor in all 3 groups: dataset
-	prm_all = param_cartesian_multi(
-	[prm_dataset,
-
-	 prm_baseline + prm_algs + prm_optimal + prm_majority])
-
-	prm_all = sorted(prm_all,
-						key=lambda d: (d['dataset'],
-						               d['corrupt_type_warm_start'],
-									   d['corrupt_prob_warm_start'],
-									   d['corrupt_type_interaction'],
-									   d['corrupt_prob_interaction'])
-					 )
-	print 'The total number of VW commands to run is: ', len(prm_all)
-	for row in prm_all:
-		print row
-	return get_params_task(prm_all)
-
-
-def get_params_task(params_all):
-	params_task = []
-	for i in range(len(params_all)):
-		if (i % mod.num_tasks == mod.task_id):
-			params_task.append(params_all[i])
-	return params_task
-
-def get_num_lines(dataset_name):
-	num_lines = subprocess.check_output(('zcat ' + dataset_name + ' | wc -l'), shell=True)
-	return int(num_lines)
-
-def get_num_classes(ds):
-	# could be a bug for including the prefix..
-	did, n_actions = os.path.basename(ds).split('.')[0].split('_')[1:]
-	did, n_actions = int(did), int(n_actions)
-	return n_actions
-
-def get_majority_class(dataset_name):
-	maj_class_str = subprocess.check_output(('zcat '+ dataset_name +' | cut -d \' \' -f 1 | sort | uniq -c | sort -r -n | head -1 | xargs '), shell=True)
-	maj_size, maj_class = maj_class_str.split()
-	return int(maj_size), int(maj_class)
-
-def avg_error(mod):
-	return vw_output_extract(mod, 'average loss')
-
-def actual_var(mod):
-	return vw_output_extract(mod, 'Measured average variance')
-
-def ideal_var(mod):
-	return vw_output_extract(mod, 'Ideal average variance')
-
-def vw_output_extract(mod, pattern):
-	#print mod.vw_output_filename
-	vw_output = open(mod.vw_output_filename, 'r')
-	vw_output_text = vw_output.read()
-	#print vw_output_text
-	#rgx_pattern = '^'+pattern+' = (.*)(|\sh)\n.*$'
-	#print rgx_pattern
-	rgx_pattern = '.*'+pattern+' = ([\d]*.[\d]*)( h|)\n.*'
-	rgx = re.compile(rgx_pattern, flags=re.M)
-
-	errs = rgx.findall(vw_output_text)
-	if not errs:
-		avge = 0
-	else:
-		#print errs
-		avge = float(errs[0][0])
-
-	vw_output.close()
-	return avge
-
-def write_summary_header(mod):
-	summary_file = open(mod.summary_file_name, 'w')
-	summary_header = intersperse(mod.result_template.keys(), '\t')
-	summary_file.write(summary_header+'\n')
-	summary_file.close()
-
-def main_loop(mod):
-	mod.summary_file_name = mod.results_path+str(mod.task_id)+'of'+str(mod.num_tasks)+'.sum'
-
-	# The reason for using a list is that, we would like to keep the order of the
-	#columns in this way. Maybe use ordered dictionary in the future?
-	mod.result_template_list = [
-	('fold', 'fd', 0),
-	('data', 'dt', ''),
-	('dataset', 'ds', ''),
-	('num_classes','nc', 0),
-	('total_size', 'ts', 0),
-	('majority_size','ms', 0),
-	('corrupt_type_warm_start', 'ctws', 0),
-	('corrupt_prob_warm_start', 'cpws', 0.0),
-	('corrupt_type_interaction', 'cti', 0),
-	('corrupt_prob_interaction', 'cpi', 0.0),
-	('adf_on', 'ao', True),
-	('warm_start_multiplier','wsm',1),
-	('warm_start', 'ws', 0),
-	('warm_start_type', 'wst', 0),
-	('interaction', 'bs', 0),
-	('inter_ws_size_ratio', 'iwsr', 0),
-	('cb_type', 'cbt', 'mtr'),
-	('validation_method', 'vm', 0),
-	('weighting_scheme', 'wts', 0),
-	('lambda_scheme', 'ls', 0),
-	('choices_lambda', 'cl', 0),
-	('warm_start_update', 'wsu', True),
-	('interaction_update', 'iu', True),
-	('learning_rate', 'lr', 0.0),
-	('optimal_approx', 'oa', False),
-	('majority_approx', 'ma', False),
-	('avg_error', 'ae', 0.0),
-	('actual_variance', 'av', 0.0),
-	('ideal_variance', 'iv', 0.0),
-	('last_lambda', 'll', 0.0),
-	('epsilon', 'eps', 0.0),
-	('loss0', 'l0', 0.0),
-	('loss1', 'l1', 0.0),
-	]
-
- 	num_cols = len(mod.result_template_list)
-	#mod.result_header_list = [ mod.result_template_list[i][0] for i in range(num_cols) ]
-	mod.result_template = OrderedDict([ (mod.result_template_list[i][0], mod.result_template_list[i][2]) for i in range(num_cols) ])
-	mod.simplified_keymap = OrderedDict([ (mod.result_template_list[i][0], mod.result_template_list[i][1]) for i in range(num_cols) ])
-
-	write_summary_header(mod)
-	for mod.param in mod.config_task:
-		#if (mod.param['no_interaction_update'] is True):
-		#	raw_input(' ')
-		run_single_expt(mod)
-
-def create_dir(dir):
-	if not os.path.exists(dir):
-		os.makedirs(dir)
-		import stat
-		os.chmod(dir, os.stat(dir).st_mode | stat.S_IWOTH)
-
-def remove_suffix(filename):
-	return os.path.basename(filename).split('.')[0]
-
-if __name__ == '__main__':
-	parser = argparse.ArgumentParser(description='vw job')
-	parser.add_argument('task_id', type=int, help='task ID, between 0 and num_tasks - 1')
-	parser.add_argument('num_tasks', type=int)
-	parser.add_argument('--results_dir', default='../../../figs/')
-	parser.add_argument('--ds_dir', default='../../../vwshuffled/')
-	parser.add_argument('--num_learning_rates', type=int, default=1)
-	parser.add_argument('--num_datasets', type=int, default=-1)
-	parser.add_argument('--num_folds', type=int, default=1)
-
-	args = parser.parse_args()
-	flag_dir = args.results_dir + 'flag/'
-
-	mod = model()
-	mod.num_tasks = args.num_tasks
-	mod.task_id = args.task_id
-	mod.vw_path = '../vowpalwabbit/vw'
-	mod.ds_path = args.ds_dir
-	mod.results_path = args.results_dir
-	print 'reading dataset files..'
-	#TODO: this line specifically for multiple folds
-	#Need a systematic way to detect subfolder names
-	mod.dss = ds_files(mod.ds_path + '1/')
-
-	print len(mod.dss)
-
-	if args.num_datasets == -1 or args.num_datasets > len(mod.dss):
-		pass
-	else:
-		mod.dss = mod.dss[:args.num_datasets]
-
-	#print mod.dss
-
-	if args.task_id == 0:
-		# Compile vw in one of the subfolders
-		#process = subprocess.Popen('make -C .. clean; make -C ..', shell=True, stdout=f, stderr=f)
-		#subprocess.check_call(cmd, shell=True)
-		#process.wait()
-
-		# To avoid race condition of writing to the same file at the same time
-		create_dir(args.results_dir)
-
-		# This is specifically designed for teamscratch, as accessing a folder
-		# with a huge number of result files can be super slow. Hence, we create a
-		# subfolder for each dataset to alleviate this.
-		for ds in mod.dss:
-			ds_no_suffix = remove_suffix(ds)
-			create_dir(args.results_dir + ds_no_suffix + '/')
-
-		create_dir(flag_dir)
-	else:
-		# may still have the potential of race condition on those subfolders (if
-		# we have a lot of datasets to run and the datasets are small)
-		while not os.path.exists(flag_dir):
-			time.sleep(1)
-
-	if args.num_learning_rates <= 0:
-		mod.learning_rates = [gen_lr(0)]
-	else:
-		mod.learning_rates = [gen_lr(i) for i in range(args.num_learning_rates)]
-	#mod.folds = range(1,11)
-	mod.folds = range(1, args.num_folds+1)
-
-	#mod.dss = ["ds_223_63.vw.gz"]
-	#mod.dss = mod.dss[:5]
-
-	print 'generating tasks..'
-	# here, we are generating the task specific parameter settings
-	# by first generate all parameter setting and pick every num_tasks of them
-	mod.config_task = params_per_task(mod)
-	print 'task ' + str(mod.task_id) + ' of ' + str(mod.num_tasks) + ':'
-	print len(mod.config_task)
-
-	#print mod.ds_task
-	# we only need to vary the warm start fraction, and there is no need to vary the bandit fraction,
-	# as each run of vw automatically accumulates the bandit dataset
-	main_loop(mod)
diff --git a/scripts/run_vw_job.py b/scripts/run_vw_job.py
deleted file mode 100644
index d2551819f4e..00000000000
--- a/scripts/run_vw_job.py
+++ /dev/null
@@ -1,205 +0,0 @@
-import argparse
-import os
-import re
-import subprocess
-import sys
-import time
-
-USE_ADF = True
-USE_CS = False
-
-VW = '/scratch/clear/abietti/.local/bin/vw'
-if USE_CS:
-    VW_DS_DIR = '/scratch/clear/abietti/cb_eval/vwshuffled_cs/'
-    DIR_PATTERN = '/scratch/clear/abietti/cb_eval/res_cs/cbresults_{}/'
-else:
-    VW_DS_DIR = '/scratch/clear/abietti/cb_eval/vwshuffled/'
-    DIR_PATTERN = '/scratch/clear/abietti/cb_eval/res/cbresults_{}/'
-# VW_DS_DIR = '/bscratch/b-albiet/vwshuffled/'
-# DIR_PATTERN = '/bscratch/b-albiet/cbresults_{}/'
-
-rgx = re.compile('^average loss = (.*)$', flags=re.M)
-
-
-def expand_cover(policies):
-    algs = []
-    for psi in [0, 0.01, 0.1, 1.0]:
-        algs.append(('cover', policies, 'psi', psi))
-        algs.append(('cover', policies, 'psi', psi, 'nounif', None))
-        # algs.append(('cover', policies, 'psi', psi, 'nounifagree', None, 'agree_mellowness', 0.1))
-        # algs.append(('cover', policies, 'psi', psi, 'nounifagree', None, 'agree_mellowness', 0.01))
-    return algs
-
-params_old = {
-    'alg': [
-        ('supervised',),
-        ('epsilon', 0),
-        ('epsilon', 0.02),
-        ('epsilon', 0.05),
-        ('epsilon', 0.1),
-        ('epsilon', 0.05, 'nounifagree', None, 'agree_mellowness', 1.0),
-        ('epsilon', 0.05, 'nounifagree', None, 'agree_mellowness', 1e-2),
-        ('epsilon', 0.05, 'nounifagree', None, 'agree_mellowness', 1e-4),
-        ('epsilon', 0.05, 'nounifagree', None, 'agree_mellowness', 1e-6),
-        # agree
-        ('epsilon', 1, 'nounifagree', None, 'agree_mellowness', 1.0),
-        ('epsilon', 1, 'nounifagree', None, 'agree_mellowness', 1e-2),
-        ('epsilon', 1, 'nounifagree', None, 'agree_mellowness', 1e-4),
-        ('epsilon', 1, 'nounifagree', None, 'agree_mellowness', 1e-6),
-        ('bag', 2),
-        ('bag', 4),
-        ('bag', 8),
-        ('bag', 16),
-        ('bag', 2, 'greedify', None),
-        ('bag', 4, 'greedify', None),
-        ('bag', 8, 'greedify', None),
-        ('bag', 16, 'greedify', None),
-        ] + expand_cover(1) + expand_cover(4) + expand_cover(8) + expand_cover(16),
-    'learning_rate': [0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1.0, 3.0, 10.0],
-    'cb_type': ['dr', 'ips', 'mtr'],
-    }
-
-params = {
-    'alg': [
-        ('epsilon', 0.05, 'nounifagree', None, 'agree_mellowness', 1.0),
-        ('epsilon', 0.05, 'nounifagree', None, 'agree_mellowness', 1e-2),
-        ('epsilon', 0.05, 'nounifagree', None, 'agree_mellowness', 1e-4),
-        ('epsilon', 0.05, 'nounifagree', None, 'agree_mellowness', 1e-6),
-        ('epsilon', 1, 'nounifagree', None, 'agree_mellowness', 1.0),
-        ('epsilon', 1, 'nounifagree', None, 'agree_mellowness', 1e-2),
-        ('epsilon', 1, 'nounifagree', None, 'agree_mellowness', 1e-4),
-        ('epsilon', 1, 'nounifagree', None, 'agree_mellowness', 1e-6),
-        ],
-    'learning_rate': [0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1.0, 3.0, 10.0],
-    'cb_type': ['dr', 'ips', 'mtr'],
-    }
-
-extra_flags = None
-# extra_flags = ['--loss0', '9', '--loss1', '10', '--baseline']
-
-def param_grid():
-    grid = [{}]
-    for k in params:
-        new_grid = []
-        for g in grid:
-            for param in params[k]:
-                gg = g.copy()
-                gg[k] = param
-                new_grid.append(gg)
-        grid = new_grid
-
-    return sorted(grid)
-
-
-def ds_files():
-    import glob
-    return sorted(glob.glob(os.path.join(VW_DS_DIR, '*.vw.gz')))
-
-
-def get_task_name(ds, params):
-    did, n_actions = os.path.basename(ds).split('.')[0].split('_')[1:]
-    did, n_actions = int(did), int(n_actions)
-
-    task_name = 'ds:{}|na:{}'.format(did, n_actions)
-    if len(params) > 1:
-        task_name += '|' + '|'.join('{}:{}'.format(k, v) for k, v in sorted(params.items()) if k != 'alg')
-    task_name += '|' + ':'.join([str(p) for p in params['alg'] if p is not None])
-    return task_name
-
-
-def process(ds, params, results_dir):
-    print 'processing', ds, params
-    did, n_actions = os.path.basename(ds).split('.')[0].split('_')[1:]
-    did, n_actions = int(did), int(n_actions)
-
-    cmd = [VW, ds, '-b', '24']
-    for k, v in params.iteritems():
-        if k == 'alg':
-            if v[0] == 'supervised':
-                cmd += ['--csoaa' if USE_CS else '--oaa', str(n_actions)]
-            else:
-                cmd += ['--cbify', str(n_actions)]
-                if USE_CS:
-                    cmd += ['--cbify_cs']
-                if extra_flags:
-                    cmd += extra_flags
-                if USE_ADF:
-                    cmd += ['--cb_explore_adf']
-                assert len(v) % 2 == 0, 'params should be in pairs of (option, value)'
-                for i in range(len(v) / 2):
-                    cmd += ['--{}'.format(v[2 * i])]
-                    if v[2 * i + 1] is not None:
-                        cmd += [str(v[2 * i + 1])]
-        else:
-            if params['alg'][0] == 'supervised' and k == 'cb_type':
-                pass
-            else:
-	        cmd += ['--{}'.format(k), str(v)]
-
-    print 'running', cmd
-    t = time.time()
-    output = subprocess.check_output(cmd, stderr=subprocess.STDOUT)
-    sys.stderr.write('\n\n{}, {}, time: {}, output:\n'.format(ds, params, time.time() - t))
-    sys.stderr.write(output)
-    pv_loss = float(rgx.findall(output)[0])
-    print 'elapsed time:', time.time() - t, 'pv loss:', pv_loss
-
-    return pv_loss
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='vw job')
-    parser.add_argument('task_id', type=int, help='task ID, between 0 and num_tasks - 1')
-    parser.add_argument('num_tasks', type=int)
-    parser.add_argument('--task_offset', type=int, default=0,
-                        help='offset for task_id in output filenames')
-    parser.add_argument('--results_dir', default=DIR_PATTERN.format('agree01'))
-    parser.add_argument('--name', default=None)
-    parser.add_argument('--test', action='store_true')
-    parser.add_argument('--flags', default=None, help='extra flags for cb algorithms')
-    args = parser.parse_args()
-
-    if args.name is not None:
-        args.results_dir = DIR_PATTERN.format(args.name)
-
-    if args.flags is not None:
-        extra_flags = args.flags.split()
-    grid = param_grid()
-    dss = ds_files()
-    tot_jobs = len(grid) * len(dss)
-
-    if args.task_id == 0:
-        if not os.path.exists(args.results_dir):
-            os.makedirs(args.results_dir)
-            import stat
-            os.chmod(args.results_dir, os.stat(args.results_dir).st_mode | stat.S_IWOTH)
-    else:
-        while not os.path.exists(args.results_dir):
-            time.sleep(1)
-    if not args.test:
-        fname = os.path.join(args.results_dir, 'loss{}.txt'.format(args.task_offset + args.task_id))
-        done_tasks = set()
-        if os.path.exists(fname):
-            done_tasks = set([line.split()[0] for line in open(fname).readlines()])
-        loss_file = open(fname, 'a')
-    idx = args.task_id
-    while idx < tot_jobs:
-        ds = dss[idx / len(grid)]
-        params = grid[idx % len(grid)]
-        if args.test:
-            print ds, params
-        else:
-            task_name = get_task_name(ds, params)
-            if task_name not in done_tasks:
-                try:
-                    pv_loss = process(ds, params, args.results_dir)
-                    loss_file.write('{} {}\n'.format(task_name, pv_loss))
-                    loss_file.flush()
-                    os.fsync(loss_file.fileno())
-                except subprocess.CalledProcessError:
-                    sys.stderr.write('\nERROR: TASK FAILED {} {}\n\n'.format(ds, params))
-                    print 'ERROR: TASK FAILED', ds, params
-        idx += args.num_tasks
-
-    if not args.test:
-        loss_file.close()
diff --git a/scripts/shuffle.sh b/scripts/shuffle.sh
deleted file mode 100644
index 69aacfc3ee5..00000000000
--- a/scripts/shuffle.sh
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/bin/bash
-
-suffix=".gz"
-
-for filename in ./*.vw.gz; do
-	vw_name=$(echo "$filename" | sed -e "s/$suffix$//")
-	echo $vw_name
-	zcat $filename | shuf > ../vwshuffled/$vw_name
-	gzip ../vwshuffled/$vw_name
-done

From 5561a123a4dfd5a9e8a2735464b7f6a0b2a761ff Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Mon, 6 Aug 2018 11:10:49 -0400
Subject: [PATCH 103/127] removed spurious changes

---
 Makefile                       |  2 +-
 vowpalwabbit/cb_adf.cc         |  2 +-
 vowpalwabbit/cb_explore.cc     | 14 ------------
 vowpalwabbit/cb_explore_adf.cc | 12 +---------
 vowpalwabbit/cost_sensitive.cc |  1 -
 vowpalwabbit/csoaa.cc          |  8 +------
 vowpalwabbit/example.h         |  1 -
 vowpalwabbit/gd.cc             |  1 -
 vowpalwabbit/gen_cs_example.cc | 42 +++++++++++++++-------------------
 vowpalwabbit/gen_cs_example.h  |  6 ++---
 vowpalwabbit/learner.h         | 26 ++++-----------------
 11 files changed, 30 insertions(+), 85 deletions(-)

diff --git a/Makefile b/Makefile
index 5f0d7c3c69a..fe59f2f34fb 100644
--- a/Makefile
+++ b/Makefile
@@ -86,7 +86,7 @@ FLAGS = -std=c++11 $(CFLAGS) $(LDFLAGS) $(ARCH) $(WARN_FLAGS) $(OPTIM_FLAGS) -D_
 #CXX = g++
 
 # for valgrind / gdb debugging
-FLAGS = -std=c++11 $(CFLAGS) $(LDFLAGS) $(ARCH) $(WARN_FLAGS) -D_FILE_OFFSET_BITS=64 $(BOOST_INCLUDE) $(JSON_INCLUDE) -g -O0  -fPIC
+#FLAGS = -std=c++11 $(CFLAGS) $(LDFLAGS) $(ARCH) $(WARN_FLAGS) -D_FILE_OFFSET_BITS=64 $(BOOST_INCLUDE) $(JSON_INCLUDE) -g -O0  -fPIC
 
 # for valgrind profiling: run 'valgrind --tool=callgrind PROGRAM' then 'callgrind_annotate --tree=both --inclusive=yes'
 #FLAGS = -std=c++11 $(CFLAGS) $(LDFLAGS) -Wall $(ARCH) -ffast-math -D_FILE_OFFSET_BITS=64 $(BOOST_INCLUDE) $(JSON_INCLUDE)  -g -fomit-frame-pointer -ffast-math -fno-strict-aliasing  -fPIC
diff --git a/vowpalwabbit/cb_adf.cc b/vowpalwabbit/cb_adf.cc
index 53a8bb5a4db..72b8c0699b5 100644
--- a/vowpalwabbit/cb_adf.cc
+++ b/vowpalwabbit/cb_adf.cc
@@ -349,7 +349,7 @@ base_learner* cb_adf_setup(arguments& arg)
   if (arg.new_options("Contextual Bandit with Action Dependent Features")
       .critical("cb_adf", "Do Contextual Bandit learning with multiline action dependent features.")
       .keep(ld->rank_all, "rank_all", "Return actions sorted by score order")
-      (ld->no_predict, "no_predict", "Do not do a prediction when training")
+       (ld->no_predict, "no_predict", "Do not do a prediction when training")
       .keep("cb_type", type_string, (string)"ips", "contextual bandit method to use in {ips,dm,dr, mtr}").missing())
     return nullptr;
 
diff --git a/vowpalwabbit/cb_explore.cc b/vowpalwabbit/cb_explore.cc
index cf04c811ecb..5cb58f303c4 100644
--- a/vowpalwabbit/cb_explore.cc
+++ b/vowpalwabbit/cb_explore.cc
@@ -22,8 +22,6 @@ struct cb_explore
   cb_to_cs cbcs;
   v_array<uint32_t> preds;
   v_array<float> cover_probs;
-  v_array<float> cost_lambda;
-  v_array<float> lambdas;
 
   CB::label cb_label;
   COST_SENSITIVE::label cs_label;
@@ -36,8 +34,6 @@ struct cb_explore
   size_t bag_size;
   size_t cover_size;
   float psi;
-  size_t lambda_size;
-	float n_2;
 
   size_t counter;
 
@@ -192,21 +188,11 @@ void predict_or_learn_cover(cb_explore& data, single_learner& base, example& ec)
     data.cs_label.costs.clear();
     float norm = min_prob * num_actions;
     ec.l.cb = data.cb_label;
-
     data.cbcs.known_cost = get_observed_cost(data.cb_label);
-		//cout<<"cbcs's cb type is "<<data.cbcs.cb_type<<endl;
-
-		//for (size_t i = 0; i < data.cbcs.num_actions; i++)
-		//	cout<<"action "<<i<<" has cost "<<data.cs_label.costs[i].x<<endl;
-
-
     gen_cs_example<false>(data.cbcs, ec, data.cb_label, data.cs_label);
     for(uint32_t i = 0; i < num_actions; i++)
       probabilities[i] = 0;
 
-		//for (size_t i = 0; i < data.cbcs.num_actions; i++)
-		//	cout<<"action "<<i<<" has cost "<<data.cs_label.costs[i].x<<endl;
-
     ec.l.cs = data.second_cs_label;
     //2. Update functions
     for (size_t i = 0; i < cover_size; i++)
diff --git a/vowpalwabbit/cb_explore_adf.cc b/vowpalwabbit/cb_explore_adf.cc
index 0a69e3d029e..ced6fd55a92 100644
--- a/vowpalwabbit/cb_explore_adf.cc
+++ b/vowpalwabbit/cb_explore_adf.cc
@@ -260,18 +260,13 @@ void predict_or_learn_first(cb_explore_adf& data, multi_learner& base, multi_ex&
 template <bool is_learn>
 void predict_or_learn_greedy(cb_explore_adf& data, multi_learner& base, multi_ex& examples)
 {
-	//cout<<"data offset = "<<data.offset<<endl;
-	//cout<<"example feature offset before = "<<examples[0]->ft_offset<<endl;
 	data.offset = examples[0]->ft_offset;
-  //cout << "in p_or_l_g" << endl;
   //Explore uniform random an epsilon fraction of the time.
   if (is_learn && test_adf_sequence(examples) != nullptr)
     multiline_learn_or_predict<true>(base, examples, data.offset);
   else
     multiline_learn_or_predict<false>(base, examples, data.offset);
 
-	//cout<<"example feature offset after = "<<examples[0]->ft_offset<<endl;
-
   action_scores& preds = examples[0]->pred.a_s;
 
   uint32_t num_actions = (uint32_t)preds.size();
@@ -789,12 +784,7 @@ base_learner* cb_explore_adf_setup(arguments& arg)
     data->explore_type = REGCB;
   else
   {
-    if (!arg.vm.count("epsilon"))
-		{
-			data->epsilon = 0.05f;
-			//a hacky way of passing the implicit epsilon value to cbify
-			arg.vm.insert(std::make_pair("epsilon", boost::program_options::variable_value(data->epsilon, false)));
-		}
+    if (!arg.vm.count("epsilon")) data->epsilon = 0.05f;
     data->explore_type = EPS_GREEDY;
   }
 
diff --git a/vowpalwabbit/cost_sensitive.cc b/vowpalwabbit/cost_sensitive.cc
index 03065517303..80e8e5c4438 100644
--- a/vowpalwabbit/cost_sensitive.cc
+++ b/vowpalwabbit/cost_sensitive.cc
@@ -312,7 +312,6 @@ void finish_example(vw& all, example& ec)
 bool example_is_test(example& ec)
 {
   v_array<COST_SENSITIVE::wclass> costs = ec.l.cs.costs;
-  //cout << "is_test " << costs.size() << endl;
   if (costs.size() == 0) return true;
   for (size_t j=0; j<costs.size(); j++)
     if (costs[j].x != FLT_MAX) return false;
diff --git a/vowpalwabbit/csoaa.cc b/vowpalwabbit/csoaa.cc
index b5dc04246a2..7b72d041fb0 100644
--- a/vowpalwabbit/csoaa.cc
+++ b/vowpalwabbit/csoaa.cc
@@ -33,12 +33,9 @@ inline void inner_loop(single_learner& base, example& ec, uint32_t i, float cost
 {
   if (is_learn)
   {
-    float old_weight = ec.weight;
-		if (cost == FLT_MAX) ec.weight = 0.f;
-    //ec.weight = (cost == FLT_MAX) ? 0.f : 1.f;
+    ec.weight = (cost == FLT_MAX) ? 0.f : 1.f;
     ec.l.simple.label = cost;
     base.learn(ec, i-1);
-    ec.weight = old_weight;
   }
   else
     base.predict(ec, i-1);
@@ -284,7 +281,6 @@ bool test_ldf_sequence(ldf& data, size_t start_K, multi_ex& ec_seq)
     if (ec_is_example_header(*ec))
       THROW("warning: example headers at position " << k << ": can only have in initial position!");
   }
-  //  cout << endl;
   return isTest;
 }
 
@@ -372,12 +368,10 @@ void do_actual_learning_oaa(ldf& data, single_learner& base, size_t start_K, mul
 
     simple_label.initial = 0.;
     float old_weight = ec->weight;
-    //cout << "weight = " << ec->weight << endl;
     if (!data.treat_as_classifier)   // treat like regression
       simple_label.label = costs[0].x;
     else     // treat like classification
     {
-      //cout << "here" << endl;
       if (costs[0].x <= min_cost)
       {
         simple_label.label = -1.;
diff --git a/vowpalwabbit/example.h b/vowpalwabbit/example.h
index 1a174cc2902..b9dd6388021 100644
--- a/vowpalwabbit/example.h
+++ b/vowpalwabbit/example.h
@@ -107,7 +107,6 @@ void free_flatten_example(flat_example* fec);
 
 inline int example_is_newline(example& ec)
 { // if only index is constant namespace or no index
-  //  std::cout << "call e_i_n " << ec.indices.size() << " " << ec.tag.size() << std::endl;
   if (ec.tag.size() > 0) return false;
   return ((ec.indices.size() == 0) ||
           ((ec.indices.size() == 1) &&
diff --git a/vowpalwabbit/gd.cc b/vowpalwabbit/gd.cc
index 6bfba8e21e7..f77ae3be9e5 100644
--- a/vowpalwabbit/gd.cc
+++ b/vowpalwabbit/gd.cc
@@ -651,7 +651,6 @@ void learn(gd& g, base_learner& base, example& ec)
   assert(ec.l.simple.label != FLT_MAX);
   assert(ec.weight > 0.);
   g.predict(g,base,ec);
-  //cout << "iw = " << ec.weight << endl;
   update<sparse_l2, invariant, sqrt_rate, feature_mask_off, adax, adaptive, normalized, spare>(g,base,ec);
 }
 
diff --git a/vowpalwabbit/gen_cs_example.cc b/vowpalwabbit/gen_cs_example.cc
index f5ade2627cc..8fe85b46696 100644
--- a/vowpalwabbit/gen_cs_example.cc
+++ b/vowpalwabbit/gen_cs_example.cc
@@ -49,7 +49,7 @@ void gen_cs_example_ips(multi_ex& examples, COST_SENSITIVE::label& cs_labels)
   for (uint32_t i = 0; i < examples.size(); i++)
   {
     CB::label ld = examples[i]->l.cb;
-    //std::cout << "example weight = " << examples[i]->weight << std::endl;
+
     COST_SENSITIVE::wclass wc = {0.,i,0.,0.};
     if (shared && i > 0)
       wc.class_index = (uint32_t)i-1;
@@ -112,35 +112,31 @@ void gen_cs_test_example(multi_ex& examples, COST_SENSITIVE::label& cs_labels)
 //single line version
 void gen_cs_example_ips(cb_to_cs& c, CB::label& ld, COST_SENSITIVE::label& cs_ld)
 {
-	//std::cout<<"-------"<<c.known_cost->action<<std::endl;
   //this implements the inverse propensity score method, where cost are importance weighted by the probability of the chosen action
   //generate cost-sensitive example
   cs_ld.costs.clear();
-
-  if (ld.costs.size() == 0 || (ld.costs.size() == 1 && ld.costs[0].cost != FLT_MAX))
-    //this is a typical example where we can perform all actions
+  if (ld.costs.size() == 1 || ld.costs.size() == 0)   //this is a typical example where we can perform all actions
+  {
+    //in this case generate cost-sensitive example with all actions
+    for (uint32_t i = 1; i <= c.num_actions; i++)
     {
-      //in this case generate cost-sensitive example with all actions
-      for (uint32_t i = 1; i <= c.num_actions; i++)
-        {
-          COST_SENSITIVE::wclass wc = {0.,i,0.,0.};
-          if (c.known_cost != nullptr && i == c.known_cost->action)
-            {
-              wc.x = c.known_cost->cost / safe_probability(c.known_cost->probability); //use importance weighted cost for observed action, 0 otherwise
-              //ips can be thought as the doubly robust method with a fixed regressor that predicts 0 costs for everything
-              //update the loss of this regressor
-              c.nb_ex_regressors++;
-              c.avg_loss_regressors += (1.0f / c.nb_ex_regressors)*((c.known_cost->cost)*(c.known_cost->cost) - c.avg_loss_regressors);
-              c.last_pred_reg = 0;
-              c.last_correct_cost = c.known_cost->cost;
-            }
-
-          cs_ld.costs.push_back(wc);
-        }
+      COST_SENSITIVE::wclass wc = {0.,i,0.,0.};
+      if (c.known_cost != nullptr && i == c.known_cost->action)
+      {
+        wc.x = c.known_cost->cost / safe_probability(c.known_cost->probability); //use importance weighted cost for observed action, 0 otherwise
+        //ips can be thought as the doubly robust method with a fixed regressor that predicts 0 costs for everything
+        //update the loss of this regressor
+        c.nb_ex_regressors++;
+        c.avg_loss_regressors += (1.0f / c.nb_ex_regressors)*((c.known_cost->cost)*(c.known_cost->cost) - c.avg_loss_regressors);
+        c.last_pred_reg = 0;
+        c.last_correct_cost = c.known_cost->cost;
+      }
+
+      cs_ld.costs.push_back(wc);
     }
+  }
   else   //this is an example where we can only perform a subset of the actions
   {
-		//std::cout<<"---not typical----"<<std::endl;
     //in this case generate cost-sensitive example with only allowed actions
     for (auto& cl : ld.costs)
     {
diff --git a/vowpalwabbit/gen_cs_example.h b/vowpalwabbit/gen_cs_example.h
index 0ffcf08d96f..ac555a8c0f2 100644
--- a/vowpalwabbit/gen_cs_example.h
+++ b/vowpalwabbit/gen_cs_example.h
@@ -39,10 +39,9 @@ struct cb_to_cs_adf
   //for DR
   COST_SENSITIVE::label pred_scores;
   CB::cb_class known_cost;
-
   LEARNER::single_learner* scorer;
-	
-	//for scaling the weights of MTR
+
+  //for scaling the weights in MTR
 	uint32_t num_actions;
 
 };
@@ -118,7 +117,6 @@ void gen_cs_label(cb_to_cs& c, example& ec, COST_SENSITIVE::label& cs_ld, uint32
 
   //get cost prediction for this action
   wc.x = CB_ALGS::get_cost_pred<is_learn>(c.scorer, c.known_cost, ec, action, c.num_actions);
-	//std::cout<<"wc.x = "<<wc.x<<std::endl;
 
   c.pred_scores.costs.push_back(wc);
   //add correction if we observed cost for this action and regressor is wrong
diff --git a/vowpalwabbit/learner.h b/vowpalwabbit/learner.h
index 91a8add8dd7..f597dcd7d97 100644
--- a/vowpalwabbit/learner.h
+++ b/vowpalwabbit/learner.h
@@ -92,33 +92,22 @@ inline float noop_sensitivity(void*, base_learner&, example&) { std::cout << std
 float recur_sensitivity(void*, base_learner&, example&);
 
 inline void increment_offset(example& ex, const size_t increment, const size_t i)
-{
-	//std::cout<<"in increment_offset singleex: increment = "<<increment<<" ex.ft_offset = "<<ex.ft_offset<<" i = "<<i<<std::endl;
-	ex.ft_offset += static_cast<uint32_t>(increment * i);
+{ ex.ft_offset += static_cast<uint32_t>(increment * i);
 }
 
 inline void increment_offset(multi_ex& ec_seq, const size_t increment, const size_t i)
-{
-	for (auto ec : ec_seq)
-	{
-		//std::cout<<"in increment_offset multiex: increment = "<<increment<<" ec->ft_offset = "<<ec->ft_offset<<" i = "<<i<<std::endl;
+{ for (auto ec : ec_seq)
     ec->ft_offset += static_cast<uint32_t>(increment * i);
-	}
 }
 
 inline void decrement_offset(example& ex, const size_t increment, const size_t i)
-{
-	//std::cout<<"in decrement_offset singleex: increment = "<<increment<<" ex.ft_offset = "<<ex.ft_offset<<" i = "<<i<<std::endl;
-	assert(ex.ft_offset >= increment * i);
+{ assert(ex.ft_offset >= increment * i);
   ex.ft_offset -= static_cast<uint32_t>(increment * i);
 }
 
 inline void decrement_offset(multi_ex& ec_seq, const size_t increment, const size_t i)
 { for (auto ec : ec_seq)
-  {
-		//commenting out this line for multiple learning rate aggregation purposes
-		//std::cout<<"in decrement_offset multiex: increment = "<<increment<<" ec->ft_offset = "<<ec->ft_offset<<" i = "<<i<<std::endl;
-		assert(ec->ft_offset >= increment * i);
+  { assert(ec->ft_offset >= increment * i);
     ec->ft_offset -= static_cast<uint32_t>(increment * i);
   }
 }
@@ -453,9 +442,7 @@ template<class T,class E> struct learner
   void multiline_learn_or_predict(multi_learner& base, multi_ex& examples, const uint64_t offset, const uint32_t id = 0)
   { std::vector<uint64_t> saved_offsets;
     for (auto ec : examples)
-    {
-			//std::cout<<"saved offsets before = "<<ec->ft_offset<<std::endl;
-			saved_offsets.push_back(ec->ft_offset);
+    { saved_offsets.push_back(ec->ft_offset);
       ec->ft_offset = offset;
     }
 
@@ -465,9 +452,6 @@ template<class T,class E> struct learner
       base.predict(examples, id);
 
     for (size_t i = 0; i < examples.size(); i++)
-		{
       examples[i]->ft_offset = saved_offsets[i];
-			//std::cout<<"saved offsets after = "<<saved_offsets[i]<<std::endl;
-		}
   }
 }

From 6069739219084e405d85b41a13d53a7d48c7bf28 Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Mon, 6 Aug 2018 11:20:22 -0400
Subject: [PATCH 104/127] removed spurious changes

---
 vowpalwabbit/gen_cs_example.cc | 37 +++++++++++++++++-----------------
 1 file changed, 19 insertions(+), 18 deletions(-)

diff --git a/vowpalwabbit/gen_cs_example.cc b/vowpalwabbit/gen_cs_example.cc
index 8fe85b46696..97eb548269c 100644
--- a/vowpalwabbit/gen_cs_example.cc
+++ b/vowpalwabbit/gen_cs_example.cc
@@ -115,26 +115,27 @@ void gen_cs_example_ips(cb_to_cs& c, CB::label& ld, COST_SENSITIVE::label& cs_ld
   //this implements the inverse propensity score method, where cost are importance weighted by the probability of the chosen action
   //generate cost-sensitive example
   cs_ld.costs.clear();
-  if (ld.costs.size() == 1 || ld.costs.size() == 0)   //this is a typical example where we can perform all actions
-  {
-    //in this case generate cost-sensitive example with all actions
-    for (uint32_t i = 1; i <= c.num_actions; i++)
+  if (ld.costs.size() == 0 || (ld.costs.size() == 1 && ld.costs[0].cost != FLT_MAX))
+    //this is a typical example where we can perform all actions
     {
-      COST_SENSITIVE::wclass wc = {0.,i,0.,0.};
-      if (c.known_cost != nullptr && i == c.known_cost->action)
-      {
-        wc.x = c.known_cost->cost / safe_probability(c.known_cost->probability); //use importance weighted cost for observed action, 0 otherwise
-        //ips can be thought as the doubly robust method with a fixed regressor that predicts 0 costs for everything
-        //update the loss of this regressor
-        c.nb_ex_regressors++;
-        c.avg_loss_regressors += (1.0f / c.nb_ex_regressors)*((c.known_cost->cost)*(c.known_cost->cost) - c.avg_loss_regressors);
-        c.last_pred_reg = 0;
-        c.last_correct_cost = c.known_cost->cost;
-      }
-
-      cs_ld.costs.push_back(wc);
+      //in this case generate cost-sensitive example with all actions
+      for (uint32_t i = 1; i <= c.num_actions; i++)
+        {
+          COST_SENSITIVE::wclass wc = {0.,i,0.,0.};
+          if (c.known_cost != nullptr && i == c.known_cost->action)
+            {
+              wc.x = c.known_cost->cost / safe_probability(c.known_cost->probability); //use importance weighted cost for observed action, 0 otherwise
+              //ips can be thought as the doubly robust method with a fixed regressor that predicts 0 costs for everything
+              //update the loss of this regressor
+              c.nb_ex_regressors++;
+              c.avg_loss_regressors += (1.0f / c.nb_ex_regressors)*((c.known_cost->cost)*(c.known_cost->cost) - c.avg_loss_regressors);
+              c.last_pred_reg = 0;
+              c.last_correct_cost = c.known_cost->cost;
+            }
+
+          cs_ld.costs.push_back(wc);
+        }
     }
-  }
   else   //this is an example where we can only perform a subset of the actions
   {
     //in this case generate cost-sensitive example with only allowed actions

From d9573e124cbac7aa5c7a2e27274b59ccb17fc4b2 Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Mon, 6 Aug 2018 11:45:56 -0400
Subject: [PATCH 105/127] undoing the weight scaling by 1/k in mtr

---
 vowpalwabbit/cb_adf.cc        | 8 +++-----
 vowpalwabbit/gen_cs_example.h | 4 ----
 2 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/vowpalwabbit/cb_adf.cc b/vowpalwabbit/cb_adf.cc
index 72b8c0699b5..a5291eb24a2 100644
--- a/vowpalwabbit/cb_adf.cc
+++ b/vowpalwabbit/cb_adf.cc
@@ -113,9 +113,7 @@ void learn_MTR(cb_adf& mydata, multi_learner& base, multi_ex& examples)
   gen_cs_example_mtr(mydata.gen_cs, examples, mydata.cs_labels);
   uint32_t nf = (uint32_t)examples[mydata.gen_cs.mtr_example]->num_features;
   float old_weight = examples[mydata.gen_cs.mtr_example]->weight;
-
-	//adjust the importance weight to scale by a factor of 1/num_actions (the last term)
-  examples[mydata.gen_cs.mtr_example]->weight *= 1.f / examples[mydata.gen_cs.mtr_example]->l.cb.costs[0].probability * ((float)mydata.gen_cs.event_sum / (float)mydata.gen_cs.action_sum) * (1.f / mydata.gen_cs.num_actions);
+  examples[mydata.gen_cs.mtr_example]->weight *= 1.f / examples[mydata.gen_cs.mtr_example]->l.cb.costs[0].probability * ((float)mydata.gen_cs.event_sum / (float)mydata.gen_cs.action_sum);
   GEN_CS::call_cs_ldf<true>(base, mydata.gen_cs.mtr_ec_seq, mydata.cb_labels, mydata.cs_labels, mydata.prepped_cs_labels, mydata.offset);
   examples[mydata.gen_cs.mtr_example]->num_features = nf;
   examples[mydata.gen_cs.mtr_example]->weight = old_weight;
@@ -354,7 +352,6 @@ base_learner* cb_adf_setup(arguments& arg)
     return nullptr;
 
   ld->all = arg.all;
-	ld->gen_cs.num_actions = arg.vm["cbify"].as<uint32_t>();
 
   // number of weight vectors needed
   size_t problem_multiplier = 1;//default for IPS
@@ -386,9 +383,10 @@ base_learner* cb_adf_setup(arguments& arg)
        || ld->rank_all || arg.vm.count("csoaa_rank") == 0)
   {
     if (count(arg.args.begin(), arg.args.end(), "--csoaa_ldf") == 0)
+    {
       arg.args.push_back("--csoaa_ldf");
-    if (count(arg.args.begin(), arg.args.end(), "multiline") == 0)
       arg.args.push_back("multiline");
+    }
     if (count(arg.args.begin(), arg.args.end(), "--csoaa_rank") == 0)
       arg.args.push_back("--csoaa_rank");
   }
diff --git a/vowpalwabbit/gen_cs_example.h b/vowpalwabbit/gen_cs_example.h
index ac555a8c0f2..f60bf9821cf 100644
--- a/vowpalwabbit/gen_cs_example.h
+++ b/vowpalwabbit/gen_cs_example.h
@@ -40,10 +40,6 @@ struct cb_to_cs_adf
   COST_SENSITIVE::label pred_scores;
   CB::cb_class known_cost;
   LEARNER::single_learner* scorer;
-
-  //for scaling the weights in MTR
-	uint32_t num_actions;
-
 };
 
 CB::cb_class* get_observed_cost(CB::label& ld);

From 01bf93e744050d80790627af137813526f8e1ab5 Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Mon, 6 Aug 2018 12:01:23 -0400
Subject: [PATCH 106/127] updated tests

---
 test/RunTests                           | 18 +++++++++---------
 test/train-sets/ref/cbify_ws_cyc.stderr |  8 ++++----
 test/train-sets/ref/cbify_ws_maj.stderr |  6 +++---
 3 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/test/RunTests b/test/RunTests
index 5f51bb68642..fa4708f092c 100755
--- a/test/RunTests
+++ b/test/RunTests
@@ -1635,36 +1635,36 @@ echo "1 | feature:1" | {VW} -a --initial_weight 0.1 --initial_t 0.3
 
 # Test 175 cbify warm start
 {VW} --cbify 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 -d train-sets/multiclass
-	/train-sets/ref/cbify_ws.stderr
+    train-sets/ref/cbify_ws.stderr
 
 # Test 176 cbify warm start with lambda set containing 0/1
 {VW} --cbify 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --lambda_scheme 2 -d train-sets/multiclass
-	/train-sets/ref/cbify_ws_lambda_zeroone.stderr
+    train-sets/ref/cbify_ws_lambda_zeroone.stderr
 
 # Test 177 cbify warm start with warm start update turned off
 {VW} --cbify 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --warm_start_update false -d train-sets/multiclass
-	/train-sets/ref/cbify_ws_no_ws_upd.stderr
+    train-sets/ref/cbify_ws_no_ws_upd.stderr
 
 # Test 178 cbify warm start with interaction update turned off
 {VW} --cbify 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --interaction_update false -d train-sets/multiclass
-	/train-sets/ref/cbify_ws_no_int_upd.stderr
+    train-sets/ref/cbify_ws_no_int_upd.stderr
 
 # Test 179 cbify warm start with bandit warm start type (Sim-Bandit)
 {VW} --cbify 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --warm_start_type 2 -d train-sets/multiclass
-	/train-sets/ref/cbify_ws_simbandit.stderr
+    train-sets/ref/cbify_ws_simbandit.stderr
 
 # Test 180 cbify warm start with UAR supervised corruption
 {VW} --cbify 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --corrupt_type_warm_start 1 --corrupt_prob_warm_start 0.5 -d train-sets/multiclass
-	/train-sets/ref/cbify_ws_uar.stderr
+    train-sets/ref/cbify_ws_uar.stderr
 
 # Test 181 cbify warm start with CYC supervised corruption
 {VW} --cbify 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --corrupt_type_warm_start 2 --corrupt_prob_warm_start 0.5 -d train-sets/multiclass
-	/train-sets/ref/cbify_ws_cyc.stderr
+    train-sets/ref/cbify_ws_cyc.stderr
 
 # Test 182 cbify warm start with MAJ supervised corruption
 {VW} --cbify 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --corrupt_type_warm_start 3 --corrupt_prob_warm_start 0.5 --overwrite_label 1 -d train-sets/multiclass
-	/train-sets/ref/cbify_ws_maj.stderr
+    train-sets/ref/cbify_ws_maj.stderr
 
 # Test 183 cbify warm start with warm start distribution being the ground truth
 {VW} --cbify 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --corrupt_type_interaction 2 --corrupt_prob_interaction 0.5 --weighting_scheme 2 --validation_method 2 -d train-sets/multiclass
-	/train-sets/ref/cbify_ws_wsgt.stderr
+    train-sets/ref/cbify_ws_wsgt.stderr
diff --git a/test/train-sets/ref/cbify_ws_cyc.stderr b/test/train-sets/ref/cbify_ws_cyc.stderr
index 6d05ba5a0db..a1affe4ec96 100644
--- a/test/train-sets/ref/cbify_ws_cyc.stderr
+++ b/test/train-sets/ref/cbify_ws_cyc.stderr
@@ -7,13 +7,13 @@ Reading datafile = train-sets/multiclass
 num sources = 1
 average  since         example        example  current  current  current
 loss     last          counter         weight    label  predict features
-1.000000 1.000000            4            1.0        4        3        2
-1.000000 1.000000            5            2.0        5        3        2
-1.000000 1.000000            7            4.0        7        3        2
+0.000000 0.000000            4            1.0        4        4        2
+0.500000 1.000000            5            2.0        5        4        2
+0.750000 1.000000            7            4.0        7        3        2
 
 finished run
 number of examples = 10
 weighted example sum = 7.000000
 weighted label sum = 0.000000
-average loss = 1.000000
+average loss = 0.857143
 total feature number = 20
diff --git a/test/train-sets/ref/cbify_ws_maj.stderr b/test/train-sets/ref/cbify_ws_maj.stderr
index 6d05ba5a0db..2a12135dfa0 100644
--- a/test/train-sets/ref/cbify_ws_maj.stderr
+++ b/test/train-sets/ref/cbify_ws_maj.stderr
@@ -7,9 +7,9 @@ Reading datafile = train-sets/multiclass
 num sources = 1
 average  since         example        example  current  current  current
 loss     last          counter         weight    label  predict features
-1.000000 1.000000            4            1.0        4        3        2
-1.000000 1.000000            5            2.0        5        3        2
-1.000000 1.000000            7            4.0        7        3        2
+1.000000 1.000000            4            1.0        4        1        2
+1.000000 1.000000            5            2.0        5        1        2
+1.000000 1.000000            7            4.0        7        1        2
 
 finished run
 number of examples = 10

From 83da642f7203bae563f7de4eb5a3d6aa02141022 Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Mon, 6 Aug 2018 14:25:24 -0400
Subject: [PATCH 107/127] added warm_cb as a separate file

---
 test/RunTests              |  20 +-
 vowpalwabbit/Makefile.am   |   2 +-
 vowpalwabbit/cbify.cc      | 609 ++-------------------------
 vowpalwabbit/parse_args.cc |   2 +
 vowpalwabbit/warm_cb.cc    | 831 +++++++++++++++++++++++++++++++++++++
 vowpalwabbit/warm_cb.h     |   1 +
 6 files changed, 872 insertions(+), 593 deletions(-)
 create mode 100644 vowpalwabbit/warm_cb.cc
 create mode 100644 vowpalwabbit/warm_cb.h

diff --git a/test/RunTests b/test/RunTests
index fa4708f092c..833b295b9ab 100755
--- a/test/RunTests
+++ b/test/RunTests
@@ -1633,38 +1633,38 @@ echo "1 | feature:1" | {VW} -a --initial_weight 0.1 --initial_t 0.3
 {VW} --cbify 10 --cb_explore_adf --cb_type mtr --regcbopt --mellowness 0.01 -d train-sets/multiclass
     train-sets/ref/cbify_regcbopt.stderr
 
-# Test 175 cbify warm start
-{VW} --cbify 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 -d train-sets/multiclass
+# Test 175 warm_cb warm start
+{VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 -d train-sets/multiclass
     train-sets/ref/cbify_ws.stderr
 
 # Test 176 cbify warm start with lambda set containing 0/1
-{VW} --cbify 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --lambda_scheme 2 -d train-sets/multiclass
+{VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --lambda_scheme 2 -d train-sets/multiclass
     train-sets/ref/cbify_ws_lambda_zeroone.stderr
 
 # Test 177 cbify warm start with warm start update turned off
-{VW} --cbify 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --warm_start_update false -d train-sets/multiclass
+{VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --warm_start_update false -d train-sets/multiclass
     train-sets/ref/cbify_ws_no_ws_upd.stderr
 
 # Test 178 cbify warm start with interaction update turned off
-{VW} --cbify 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --interaction_update false -d train-sets/multiclass
+{VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --interaction_update false -d train-sets/multiclass
     train-sets/ref/cbify_ws_no_int_upd.stderr
 
 # Test 179 cbify warm start with bandit warm start type (Sim-Bandit)
-{VW} --cbify 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --warm_start_type 2 -d train-sets/multiclass
+{VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --warm_start_type 2 -d train-sets/multiclass
     train-sets/ref/cbify_ws_simbandit.stderr
 
 # Test 180 cbify warm start with UAR supervised corruption
-{VW} --cbify 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --corrupt_type_warm_start 1 --corrupt_prob_warm_start 0.5 -d train-sets/multiclass
+{VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --corrupt_type_warm_start 1 --corrupt_prob_warm_start 0.5 -d train-sets/multiclass
     train-sets/ref/cbify_ws_uar.stderr
 
 # Test 181 cbify warm start with CYC supervised corruption
-{VW} --cbify 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --corrupt_type_warm_start 2 --corrupt_prob_warm_start 0.5 -d train-sets/multiclass
+{VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --corrupt_type_warm_start 2 --corrupt_prob_warm_start 0.5 -d train-sets/multiclass
     train-sets/ref/cbify_ws_cyc.stderr
 
 # Test 182 cbify warm start with MAJ supervised corruption
-{VW} --cbify 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --corrupt_type_warm_start 3 --corrupt_prob_warm_start 0.5 --overwrite_label 1 -d train-sets/multiclass
+{VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --corrupt_type_warm_start 3 --corrupt_prob_warm_start 0.5 --overwrite_label 1 -d train-sets/multiclass
     train-sets/ref/cbify_ws_maj.stderr
 
 # Test 183 cbify warm start with warm start distribution being the ground truth
-{VW} --cbify 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --corrupt_type_interaction 2 --corrupt_prob_interaction 0.5 --weighting_scheme 2 --validation_method 2 -d train-sets/multiclass
+{VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --corrupt_type_interaction 2 --corrupt_prob_interaction 0.5 --weighting_scheme 2 --validation_method 2 -d train-sets/multiclass
     train-sets/ref/cbify_ws_wsgt.stderr
diff --git a/vowpalwabbit/Makefile.am b/vowpalwabbit/Makefile.am
index 127d68887fe..cfab1395555 100644
--- a/vowpalwabbit/Makefile.am
+++ b/vowpalwabbit/Makefile.am
@@ -4,7 +4,7 @@ liballreduce_la_SOURCES = allreduce_sockets.cc allreduce_threads.cc vw_exception
 
 bin_PROGRAMS = vw active_interactor
 
-libvw_la_SOURCES = parser_helper.cc global_data.cc io_buf.cc parse_regressor.cc parse_primitives.cc unique_sort.cc cache.cc rand48.cc no_label.cc simple_label.cc multiclass.cc oaa.cc multilabel_oaa.cc boosting.cc ect.cc marginal.cc autolink.cc binary.cc lrq.cc cost_sensitive.cc multilabel.cc label_dictionary.cc csoaa.cc cb.cc cb_adf.cc cb_algs.cc search.cc search_meta.cc search_sequencetask.cc search_dep_parser.cc search_hooktask.cc search_multiclasstask.cc search_entityrelationtask.cc search_graph.cc parse_example.cc scorer.cc network.cc parse_args.cc accumulate.cc gd.cc learner.cc mwt.cc lda_core.cc gd_mf.cc mf.cc bfgs.cc noop.cc print.cc example.cc parser.cc loss_functions.cc sender.cc nn.cc confidence.cc bs.cc cbify.cc explore_eval.cc topk.cc stagewise_poly.cc log_multi.cc recall_tree.cc active.cc active_cover.cc cs_active.cc kernel_svm.cc best_constant.cc ftrl.cc svrg.cc lrqfa.cc interact.cc comp_io.cc interactions.cc vw_exception.cc vw_validate.cc audit_regressor.cc gen_cs_example.cc cb_explore.cc action_score.cc cb_explore_adf.cc OjaNewton.cc parse_example_json.cc baseline.cc classweight.cc
+libvw_la_SOURCES = parser_helper.cc global_data.cc io_buf.cc parse_regressor.cc parse_primitives.cc unique_sort.cc cache.cc rand48.cc no_label.cc simple_label.cc multiclass.cc oaa.cc multilabel_oaa.cc boosting.cc ect.cc marginal.cc autolink.cc binary.cc lrq.cc cost_sensitive.cc multilabel.cc label_dictionary.cc csoaa.cc cb.cc cb_adf.cc cb_algs.cc search.cc search_meta.cc search_sequencetask.cc search_dep_parser.cc search_hooktask.cc search_multiclasstask.cc search_entityrelationtask.cc search_graph.cc parse_example.cc scorer.cc network.cc parse_args.cc accumulate.cc gd.cc learner.cc mwt.cc lda_core.cc gd_mf.cc mf.cc bfgs.cc noop.cc print.cc example.cc parser.cc loss_functions.cc sender.cc nn.cc confidence.cc bs.cc cbify.cc explore_eval.cc topk.cc stagewise_poly.cc log_multi.cc recall_tree.cc active.cc active_cover.cc cs_active.cc kernel_svm.cc best_constant.cc ftrl.cc svrg.cc lrqfa.cc interact.cc comp_io.cc interactions.cc vw_exception.cc vw_validate.cc audit_regressor.cc gen_cs_example.cc cb_explore.cc action_score.cc cb_explore_adf.cc OjaNewton.cc parse_example_json.cc baseline.cc classweight.cc warm_cb.cc
 
 libvw_c_wrapper_la_SOURCES = vwdll.cpp
 
diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc
index 1947a0734e6..d3b2752d260 100644
--- a/vowpalwabbit/cbify.cc
+++ b/vowpalwabbit/cbify.cc
@@ -4,7 +4,7 @@
 #include "rand48.h"
 #include "bs.h"
 #include "vw.h"
-#include "../explore/hash.h"
+#include "hash.h"
 #include "explore.h"
 
 #include <vector>
@@ -14,30 +14,6 @@ using namespace exploration;
 using namespace ACTION_SCORE;
 using namespace std;
 
-#define WARM_START 1
-#define INTERACTION 2
-#define SKIP 3
-
-#define SUPERVISED_WS 1
-#define BANDIT_WS 2
-
-#define UAR 1
-#define CIRCULAR 2
-#define OVERWRITE 3
-
-#define INTER_VALI 1
-#define WS_VALI_SPLIT 2
-#define WS_VALI_NOSPLIT 3
-
-#define INSTANCE_WT 1
-#define DATASET_WT 2
-
-#define ABS_CENTRAL 1
-#define ABS_CENTRAL_ZEROONE 2
-#define MINIMAX_CENTRAL 3
-#define MINIMAX_CENTRAL_ZEROONE 4
-
-
 struct cbify;
 
 struct cbify_adf_data
@@ -58,42 +34,6 @@ struct cbify
   cbify_adf_data adf_data;
   float loss0;
   float loss1;
-
-	//warm start parameters
-	uint32_t ws_period;
-	uint32_t inter_period;
-	uint32_t choices_lambda;
-	bool upd_ws;
-	bool upd_inter;
-	int cor_type_ws;
-	float cor_prob_ws;
-	int cor_type_inter;
-	float cor_prob_inter;
-	int vali_method;
-	int wt_scheme;
-	int lambda_scheme;
-	uint32_t overwrite_label;
-	int ws_type;
-
-	//auxiliary variables
-	uint32_t num_actions;
-	float epsilon;
-	vector<float> lambdas;
-	action_scores a_s_adf;
-	vector<float> cumulative_costs;
-	CB::cb_class cl_adf;
-	uint32_t ws_train_size;
-	uint32_t ws_vali_size;
-	vector<example*> ws_vali;
-	float cumu_var;
-	uint32_t ws_iter;
-	uint32_t inter_iter;
-	MULTICLASS::label_t mc_label;
-	COST_SENSITIVE::label cs_label;
-	COST_SENSITIVE::label* csls;
-	CB::label* cbls;
-	bool use_cs;
-
 };
 
 float loss(cbify& data, uint32_t label, uint32_t final_prediction)
@@ -118,42 +58,12 @@ float loss_cs(cbify& data, v_array<COST_SENSITIVE::wclass>& costs, uint32_t fina
 
 template<class T> inline void delete_it(T* p) { if (p != nullptr) delete p; }
 
-template <class T>
-uint32_t find_min(vector<T> arr)
-{
-	T min_val = FLT_MAX;
-	uint32_t argmin = 0;
-
-	for (uint32_t i = 0; i < arr.size(); i++)
-	{
-		//cout<<arr[i]<<endl;
-		if (arr[i] < min_val)
-		{
-			min_val = arr[i];
-			argmin = i;
-		}
-	}
-	return argmin;
-}
-
 void finish(cbify& data)
 {
   CB::cb_label.delete_label(&data.cb_label);
   data.a_s.delete_v();
   if (data.use_adf)
   {
-		cout<<"average variance estimate = "<<data.cumu_var / data.inter_iter<<endl;
-		cout<<"theoretical average variance = "<<data.num_actions / data.epsilon<<endl;
-		uint32_t argmin = find_min(data.cumulative_costs);
-		cout<<"last lambda chosen = "<<data.lambdas[argmin]<<" among lambdas ranging from "<<data.lambdas[0]<<" to "<<data.lambdas[data.choices_lambda-1]<<endl;
-
-		for (size_t a = 0; a < data.adf_data.num_actions; ++a)
-		{
-			COST_SENSITIVE::cs_label.delete_label(&data.csls[a]);
-		}
-		free(data.csls);
-		free(data.cbls);
-
     for (size_t a = 0; a < data.adf_data.num_actions; ++a)
       {
         data.adf_data.ecs[a]->pred.a_s.delete_v();
@@ -161,20 +71,6 @@ void finish(cbify& data)
         free_it(data.adf_data.ecs[a]);
       }
     data.adf_data.ecs.~vector<example*>();
-
-		data.lambdas.~vector<float>();
-		data.cumulative_costs.~vector<float>();
-
-		data.a_s_adf.delete_v();
-		for (size_t i = 0; i < data.ws_vali.size(); ++i)
-		{
-			if (data.use_cs)
-				VW::dealloc_example(COST_SENSITIVE::cs_label.delete_label, *data.ws_vali[i]);
-			else
-				VW::dealloc_example(MULTICLASS::mc_label.delete_label, *data.ws_vali[i]);
-			free(data.ws_vali[i]);
-		}
-		data.ws_vali.~vector<example*>();
   }
 }
 
@@ -211,141 +107,6 @@ void copy_example_to_adf(cbify& data, example& ec)
   }
 }
 
-float minimax_lambda(float epsilon, size_t num_actions, size_t warm_start_period, size_t interaction_period)
-{
-	return epsilon / (num_actions + epsilon);
-}
-
-void setup_lambdas(cbify& data)
-{
-	// The lambdas are arranged in ascending order
-	vector<float>& lambdas = data.lambdas;
-	for (uint32_t i = 0; i<data.choices_lambda; i++)
-		lambdas.push_back(0.f);
-
-	//interaction only
-	if (!data.upd_ws && data.upd_inter)
-	{
-		for (uint32_t i = 0; i<data.choices_lambda; i++)
-			lambdas[i] = 1.0;
-		return;
-	}
-
-	//warm start only
-	if (!data.upd_inter && data.upd_ws)
-	{
-		for (uint32_t i = 0; i<data.choices_lambda; i++)
-			lambdas[i] = 0.0;
-		return;
-	}
-
-	//if no warm start and no interaction, then as there are no updates anyway,
-	//we are still fine
-
-	uint32_t mid = data.choices_lambda / 2;
-
-	if (data.lambda_scheme == ABS_CENTRAL || data.lambda_scheme == ABS_CENTRAL_ZEROONE)
-		lambdas[mid] = 0.5;
-	else
-		lambdas[mid] = minimax_lambda(data.epsilon, data.num_actions, data.ws_period, data.inter_period);
-
-	for (uint32_t i = mid; i > 0; i--)
-		lambdas[i-1] = lambdas[i] / 2.0;
-
-	for (uint32_t i = mid+1; i < data.choices_lambda; i++)
-		lambdas[i] = 1 - (1-lambdas[i-1]) / 2.0;
-
-	if (data.lambda_scheme == MINIMAX_CENTRAL_ZEROONE || data.lambda_scheme == ABS_CENTRAL_ZEROONE)
-	{
-		lambdas[0] = 0.0;
-		lambdas[data.choices_lambda-1] = 1.0;
-	}
-}
-
-uint32_t generate_uar_action(cbify& data)
-{
-	float randf = merand48(data.all->random_state);
-
-	for (uint32_t i = 1; i <= data.num_actions; i++)
-	{
-		if (randf <= float(i) / data.num_actions)
-			return i;
-	}
-	return data.num_actions;
-}
-
-uint32_t corrupt_action(cbify& data, uint32_t action, int ec_type)
-{
-	float cor_prob;
-	uint32_t cor_type;
-	uint32_t cor_action;
-
-	if (ec_type == WARM_START)
-	{
-		cor_prob = data.cor_prob_ws;
-		cor_type = data.cor_type_ws;
-	}
-	else
-	{
-		cor_prob = data.cor_prob_inter;
-		cor_type = data.cor_type_inter;
-	}
-
-	float randf = merand48(data.all->random_state);
-	if (randf < cor_prob)
-	{
-		if (cor_type == UAR)
-			cor_action = generate_uar_action(data);
-		else if (cor_type == OVERWRITE)
-			cor_action = data.overwrite_label;
-		else
-			cor_action = (action % data.num_actions) + 1;
-	}
-	else
-		cor_action = action;
-	return cor_action;
-}
-
-bool ind_update(cbify& data, int ec_type)
-{
-	if (ec_type == WARM_START)
-		return data.upd_ws;
-	else
-		return data.upd_inter;
-}
-
-float compute_weight_multiplier(cbify& data, size_t i, int ec_type)
-{
-	float weight_multiplier;
-	float ws_train_size = data.ws_train_size;
-	float inter_train_size = data.inter_period;
-	float total_train_size = ws_train_size + inter_train_size;
-	float total_weight = (1-data.lambdas[i]) * ws_train_size + data.lambdas[i] * inter_train_size;
-
-	//cout<<i<<" "<<data.lambdas[i]<<endl;
-	//cout<<total_weight<<endl;
-
-	if (data.wt_scheme == INSTANCE_WT)
-	{
-		if (ec_type == WARM_START)
-			weight_multiplier = (1-data.lambdas[i]) * total_train_size / (total_weight + FLT_MIN);
-		else
-			weight_multiplier = data.lambdas[i] * total_train_size / (total_weight + FLT_MIN);
-	}
-	else
-	{
-		if (ec_type == WARM_START)
-			weight_multiplier = (1-data.lambdas[i]) * total_train_size / ws_train_size;
-		else
-			weight_multiplier = data.lambdas[i] * total_train_size / inter_train_size;
-	}
-
-	//cout<<"weight multiplier: "<<weight_multiplier<<endl;
-
-	return weight_multiplier;
-}
-
-
 template <bool is_learn, bool use_cs>
 void predict_or_learn(cbify& data, single_learner& base, example& ec)
 {
@@ -393,308 +154,44 @@ void predict_or_learn(cbify& data, single_learner& base, example& ec)
   ec.pred.multiclass = cl.action;
 }
 
-uint32_t predict_sublearner_adf(cbify& data, multi_learner& base, example& ec, uint32_t i)
-{
-	//cout<<"predict using sublearner "<< i <<endl;
-	copy_example_to_adf(data, ec);
-	//uint32_t offset = data.adf_data.ecs[0]->ft_offset;
-	//multiline_learn_or_predict<false>(base, data.adf_data.ecs, offset, i);
-	base.predict(data.adf_data.ecs, i);
-	//cout<<"greedy label = " << data.adf_data.ecs[0]->pred.a_s[0].action+1 << endl;
-	return data.adf_data.ecs[0]->pred.a_s[0].action+1;
-}
-
-void accumu_costs_iv_adf(cbify& data, multi_learner& base, example& ec)
-{
-	CB::cb_class& cl = data.cl_adf;
-	//IPS for approximating the cumulative costs for all lambdas
-	for (uint32_t i = 0; i < data.choices_lambda; i++)
-	{
-		uint32_t action = predict_sublearner_adf(data, base, ec, i);
-
-		if (action == cl.action)
-			data.cumulative_costs[i] += cl.cost / cl.probability;
-		//cout<<data.cumulative_costs[i]<<endl;
-	}
-	//cout<<endl;
-}
-
-template<bool use_cs>
-void accumu_costs_wsv_adf(cbify& data, multi_learner& base)
-{
-	uint32_t ws_vali_size = data.ws_vali_size;
-	//only update cumulative costs every warm_start_period iterations
-	if ( data.inter_iter >= 1 && abs( log2(data.inter_iter+1) - floor(log2(data.inter_iter+1)) ) < 1e-4 )
-	{
-		for (uint32_t i = 0; i < data.choices_lambda; i++)
-			data.cumulative_costs[i] = 0;
-
-		uint32_t num_epochs = ceil(log2(data.inter_period));
-		uint32_t epoch = log2(data.inter_iter+1) - 1;
-		float batch_vali_size = ((float) ws_vali_size) / num_epochs;
-		uint32_t lb, ub;
-
-		if (data.vali_method == WS_VALI_SPLIT)
-		{
-			lb = ceil(batch_vali_size * epoch);
-			ub = ceil(batch_vali_size * (epoch + 1));
-		}
-		else
-		{
-			lb = 0;
-			ub = ws_vali_size;
-		}
-		//cout<<"validation at iteration "<<data.inter_iter<<endl;
-		//cout<<"validation example range: "<< lb << " to " << ub << endl;
-		//cout<<"updating validation error on supervised data: " << data.bandit_iter / data.warm_start_period << endl;
-		for (uint32_t i = 0; i < data.choices_lambda; i++)
-		{
-			for (uint32_t j = lb; j < ub; j++)
-			{
-				example* ec_vali = data.ws_vali[j];
-				uint32_t pred_label = predict_sublearner_adf(data, base, *ec_vali, i);
-
-				if (use_cs)
-					data.cumulative_costs[i] += loss_cs(data, ec_vali->l.cs.costs, pred_label);
-				else
-					data.cumulative_costs[i] += loss(data, ec_vali->l.multi.label, pred_label);
-
-				//cout<<ec_vali.l.multi.label<<" "<<pred_label<<endl;
-			}
-			//cout<<data.cumulative_costs[i]<<endl;
-		}
-	}
-}
-
-template<bool use_cs>
-void add_to_vali(cbify& data, example& ec)
-{
-	//TODO: set the first parameter properly
-	example* ec_copy = VW::alloc_examples(sizeof(polylabel), 1);
-
-	if (use_cs)
-		VW::copy_example_data(false, ec_copy, &ec, 0, COST_SENSITIVE::cs_label.copy_label);
-	else
-		VW::copy_example_data(false, ec_copy, &ec, 0, MULTICLASS::mc_label.copy_label);
-
-	data.ws_vali.push_back(ec_copy);
-}
-
-uint32_t predict_sup_adf(cbify& data, multi_learner& base, example& ec)
-{
-	uint32_t argmin = find_min(data.cumulative_costs);
-	return predict_sublearner_adf(data, base, ec, argmin);
-}
-
-template<bool use_cs>
-void learn_sup_adf(cbify& data, multi_learner& base, example& ec, int ec_type)
-{
-	copy_example_to_adf(data, ec);
-	//generate cost-sensitive label (for CSOAA's temporary use)
-	auto& csls = data.csls;
-	auto& cbls = data.cbls;
-	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
-	{
-		csls[a].costs[0].class_index = a+1;
-		if (use_cs)
-			csls[a].costs[0].x = loss_cs(data, ec.l.cs.costs, a+1);
- 		else
-			csls[a].costs[0].x = loss(data, ec.l.multi.label, a+1);
-	}
-	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
-	{
-		cbls[a] = data.adf_data.ecs[a]->l.cb;
-		data.adf_data.ecs[a]->l.cs = csls[a];
-		//cout<<ecs[a].l.cs.costs.size()<<endl;
-	}
-
-	vector<float> old_weights;
-	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
-		old_weights.push_back(data.adf_data.ecs[a]->weight);
-
-	for (uint32_t i = 0; i < data.choices_lambda; i++)
-	{
-		float weight_multiplier = compute_weight_multiplier(data, i, ec_type);
-		//cout<<"weight multiplier in sup = "<<weight_multiplier<<endl;
-
-		for (size_t a = 0; a < data.adf_data.num_actions; ++a)
-			data.adf_data.ecs[a]->weight = old_weights[a] * weight_multiplier;
-		multi_learner* cs_learner = as_multiline(data.all->cost_sensitive);
-		cs_learner->learn(data.adf_data.ecs, i);
-
-		//cout<<"cost-sensitive increment = "<<cs_learner->increment<<endl;
-	}
-	//Seems like we don't need to set the weights back as this example will be
-	//discarded anyway
-	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
-		data.adf_data.ecs[a]->weight = old_weights[a];
-
-	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
-		data.adf_data.ecs[a]->l.cb = cbls[a];
-}
-
-template<bool use_cs>
-void predict_or_learn_sup_adf(cbify& data, multi_learner& base, example& ec, int ec_type)
-{
-	uint32_t action = predict_sup_adf(data, base, ec);
-
-	if (ind_update(data, ec_type))
-		learn_sup_adf<use_cs>(data, base, ec, ec_type);
-
-	ec.pred.multiclass = action;
-}
-
-uint32_t predict_bandit_adf(cbify& data, multi_learner& base, example& ec)
+template <bool is_learn, bool use_cs>
+void predict_or_learn_adf(cbify& data, multi_learner& base, example& ec)
 {
-	uint32_t argmin = find_min(data.cumulative_costs);
+  //Store the multiclass or cost-sensitive input label
+  MULTICLASS::label_t ld;
+  COST_SENSITIVE::label csl;
+  if (use_cs)
+    csl = ec.l.cs;
+  else
+    ld = ec.l.multi;
 
   copy_example_to_adf(data, ec);
-	base.predict(data.adf_data.ecs, argmin);
+  base.predict(data.adf_data.ecs);
+
+  auto& out_ec = *data.adf_data.ecs[0];
 
-	auto& out_ec = *data.adf_data.ecs[0];
   uint32_t chosen_action;
   if (sample_after_normalizing(data.app_seed + data.example_counter++, begin_scores(out_ec.pred.a_s), end_scores(out_ec.pred.a_s), chosen_action))
     THROW("Failed to sample from pdf");
 
-	//cout<<"predict using sublearner "<< argmin <<endl;
-	//cout<<"greedy label = " << data.adf_data.ecs[0]->pred.a_s[0].action+1 << endl;
-	//cout<<"chosen action = " << chosen_action << endl;
-
-	auto& a_s = data.a_s_adf;
-	copy_array<action_score>(a_s, out_ec.pred.a_s);
+  CB::cb_class cl;
+  cl.action = out_ec.pred.a_s[chosen_action].action + 1;
+  cl.probability = out_ec.pred.a_s[chosen_action].score;
 
-	return chosen_action;
-}
+  if(!cl.action)
+    THROW("No action with non-zero probability found!");
 
-void learn_bandit_adf(cbify& data, multi_learner& base, example& ec, int ec_type)
-{
-	copy_example_to_adf(data, ec);
+  if (use_cs)
+    cl.cost = loss_cs(data, csl.costs, cl.action);
+  else
+    cl.cost = loss(data, ld.label, cl.action);
 
   // add cb label to chosen action
-	auto& cl = data.cl_adf;
   auto& lab = data.adf_data.ecs[cl.action - 1]->l.cb;
   lab.costs.push_back(cl);
 
-	vector<float> old_weights;
-	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
-		old_weights.push_back(data.adf_data.ecs[a]->weight);
-
-	for (uint32_t i = 0; i < data.choices_lambda; i++)
-	{
-		float weight_multiplier = compute_weight_multiplier(data, i, ec_type);
-
-		//cout<<"learn in sublearner "<< i <<" with weight multiplier "<<weight_multiplier<<endl;
-	  for (size_t a = 0; a < data.adf_data.num_actions; ++a)
-			data.adf_data.ecs[a]->weight = old_weights[a] * weight_multiplier;
-	  base.learn(data.adf_data.ecs, i);
-
-		//cout<<"cb-explore increment = "<<base.increment<<endl;
-		//uint32_t offset = data.adf_data.ecs[0]->ft_offset;
-		//multiline_learn_or_predict<true>(base, data.adf_data.ecs, offset, i);
-	}
-
-	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
-		data.adf_data.ecs[a]->weight = old_weights[a];
-}
-
-template<bool use_cs>
-void predict_or_learn_bandit_adf(cbify& data, multi_learner& base, example& ec, int ec_type)
-{
-	uint32_t chosen_action = predict_bandit_adf(data, base, ec);
-
-	auto& cl = data.cl_adf;
-	auto& a_s = data.a_s_adf;
-	cl.action = a_s[chosen_action].action + 1;
-	cl.probability = a_s[chosen_action].score;
-
-	//cout<<cl.action<<" "<<cl.probability<<" "<<ec.l.multi.label<<endl;
-
-	if(!cl.action)
-		THROW("No action with non-zero probability found!");
-
-	if (use_cs)
-		cl.cost = loss_cs(data, ec.l.cs.costs, cl.action);
-	else
-		cl.cost = loss(data, ec.l.multi.label, cl.action);
-
-	if (ec_type == INTERACTION && data.vali_method == INTER_VALI)
-		accumu_costs_iv_adf(data, base, ec);
-
-	//cout<<cl.action<<" "<<cl.probability<<endl;
-
-	if (ind_update(data, ec_type))
-		learn_bandit_adf(data, base, ec, ec_type);
-
-	if (ec_type == INTERACTION && (data.vali_method == WS_VALI_SPLIT || data.vali_method == WS_VALI_NOSPLIT))
-		accumu_costs_wsv_adf<use_cs>(data, base);
-
-	ec.pred.multiclass = cl.action;
-}
-
-void accumu_var_adf(cbify& data, multi_learner& base, example& ec)
-{
-	size_t pred_best_approx = predict_sup_adf(data, base, ec);
-	float temp_var;
-
-	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
-		if (pred_best_approx == data.a_s_adf[a].action + 1)
-			temp_var = 1.0 / data.a_s_adf[a].score;
-
-	data.cumu_var += temp_var;
-
-	//cout<<"variance at bandit round "<< data.inter_iter << " = " << temp_var << endl;
-	//cout<<pred_best_approx<<" "<<data.a_s_adf[0].action+1<<endl;
-}
-
-template <bool is_learn, bool use_cs>
-void predict_or_learn_adf(cbify& data, multi_learner& base, example& ec)
-{
-	// Corrupt labels (only corrupting multiclass labels as of now)
-
-	if (use_cs)
-		data.cs_label = ec.l.cs;
-	else
-	{
-		data.mc_label = ec.l.multi;
-		if (data.ws_iter < data.ws_period)
-			ec.l.multi.label = corrupt_action(data, data.mc_label.label, WARM_START);
-		else if (data.inter_iter < data.inter_period)
-			ec.l.multi.label = corrupt_action(data, data.mc_label.label, INTERACTION);
-	}
-
-	// Warm start phase
-	if (data.ws_iter < data.ws_period)
-	{
-		if (data.ws_iter < data.ws_train_size)
-		{
-			if (data.ws_type == SUPERVISED_WS)
-				predict_or_learn_sup_adf<use_cs>(data, base, ec, WARM_START);
-			else if (data.ws_type == BANDIT_WS)
-				predict_or_learn_bandit_adf<use_cs>(data, base, ec, WARM_START);
-		}
-		else
-			add_to_vali<use_cs>(data, ec);
-		ec.weight = 0;
-		data.ws_iter++;
-	}
-	// Interaction phase
-	else if (data.inter_iter < data.inter_period)
-	{
-		predict_or_learn_bandit_adf<use_cs>(data, base, ec, INTERACTION);
-		accumu_var_adf(data, base, ec);
-		data.a_s_adf.clear();
-		data.inter_iter++;
-	}
-	// Skipping the rest of the examples
-	else
-		ec.weight = 0;
-
-	// Store the original labels back
-	if (use_cs)
-		ec.l.cs = data.cs_label;
-	else
-		ec.l.multi = data.mc_label;
-
+  base.learn(data.adf_data.ecs);
+  ec.pred.multiclass = cl.action;
 }
 
 void init_adf_data(cbify& data, const size_t num_actions)
@@ -709,33 +206,6 @@ void init_adf_data(cbify& data, const size_t num_actions)
     auto& lab = adf_data.ecs[a]->l.cb;
     CB::cb_label.default_label(&lab);
   }
-
-	// The rest of the initialization is for warm start CB
-	data.csls = calloc_or_throw<COST_SENSITIVE::label>(num_actions);
-	for (uint32_t a=0; a < num_actions; ++a)
-	{
-		COST_SENSITIVE::cs_label.default_label(&data.csls[a]);
-		data.csls[a].costs.push_back({0, a+1, 0, 0});
-	}
-	data.cbls = calloc_or_throw<CB::label>(num_actions);
-
-	if (data.vali_method == WS_VALI_SPLIT || data.vali_method == WS_VALI_NOSPLIT)
-	{
-		data.ws_train_size = ceil(data.ws_period / 2.0);
-		data.ws_vali_size = data.ws_period - data.ws_train_size;
-	}
-	else
-	{
-		data.ws_train_size = data.ws_period;
-		data.ws_vali_size = 0;
-	}
-	data.ws_iter = 0;
-	data.inter_iter = 0;
-
-	setup_lambdas(data);
-	for (uint32_t i = 0; i < data.choices_lambda; i++)
-		data.cumulative_costs.push_back(0.f);
-	data.cumu_var = 0.f;
 }
 
 base_learner* cbify_setup(arguments& arg)
@@ -748,21 +218,7 @@ base_learner* cbify_setup(arguments& arg)
       .critical("cbify", num_actions, "Convert multiclass on <k> classes into a contextual bandit problem")
       (use_cs, "cbify_cs", "consume cost-sensitive classification examples instead of multiclass")
       ("loss0", data->loss0, 0.f, "loss for correct label")
-      ("loss1", data->loss1, 1.f, "loss for incorrect label")
-			("warm_start", data->ws_period, 0U, "number of training examples for warm start phase")
-			("interaction", data->inter_period, UINT32_MAX, "number of examples for the interactive contextual bandit learning phase")
-			("warm_start_update", data->upd_ws, true, "indicator of warm start updates")
-			("interaction_update", data->upd_inter, true, "indicator of interaction updates")
-			("corrupt_type_warm_start", data->cor_type_ws, UAR, "type of label corruption in the warm start phase (1: uniformly at random, 2: circular, 3: replacing with overwriting label)")
-			("corrupt_prob_warm_start", data->cor_prob_ws, 0.f, "probability of label corruption in the warm start phase")
-			("corrupt_type_interaction", data->cor_type_inter, UAR, "type of label corruption in the interaction phase (1: uniformly at random, 2: circular, 3: replacing with overwriting label)")
-			("corrupt_prob_interaction", data->cor_prob_inter, 0.f, "probability of label corruption in the interaction phase")
-		  ("choices_lambda", data->choices_lambda, 1U, "the number of candidate lambdas to aggregate (lambda is the importance weight parameter between the two sources) ")
-			("lambda_scheme", data->lambda_scheme, ABS_CENTRAL, "The scheme for generating candidate lambda set (1: center lambda=0.5, 2: center lambda=0.5, min lambda=0, max lambda=1, 3: center lambda=epsilon/(#actions+epsilon), 4: center lambda=epsilon/(#actions+epsilon), min lambda=0, max lambda=1); the rest of candidate lambda values are generated using a doubling scheme")
-			("weighting_scheme", data->wt_scheme, INSTANCE_WT, "weighting scheme (1: per instance weighting, where for every lambda, each contextual bandit example have weight lambda/(1-lambda) times that of each warm start example, 2: per dataset weighting, where for every lambda, the contextual bandit dataset has total weight lambda/(1-lambda) times that of the warm start dataset)")
-			("validation_method", data->vali_method, INTER_VALI, "lambda selection criterion (1: using contextual bandit examples with progressive validation, 2: using warm start examples, with fresh validation examples at each epoch, 3: using warm start examples, with a single validation set throughout)")
-			("overwrite_label", data->overwrite_label, 1U, "the label used by type 3 corruptions (overwriting)")
-			("warm_start_type", data->ws_type, SUPERVISED_WS, "update method of utilizing warm start examples (1: using supervised updates, 2: using contextual bandit updates)").missing())
+      ("loss1", data->loss1, 1.f, "loss for incorrect label").missing())
     return nullptr;
 
   data->use_adf = count(arg.args.begin(), arg.args.end(),"--cb_explore_adf") > 0;
@@ -770,9 +226,6 @@ base_learner* cbify_setup(arguments& arg)
   data->a_s = v_init<action_score>();
   data->all = arg.all;
 
-	data->num_actions = num_actions;
-	data->use_cs = use_cs;
-
   if (data->use_adf)
     init_adf_data(*data.get(), num_actions);
 
@@ -803,18 +256,10 @@ base_learner* cbify_setup(arguments& arg)
   if (data->use_adf)
   {
     multi_learner* base = as_multiline(setup_base(arg));
-		// Note: the current version of warm start CB can only support epsilon greedy exploration
-		// algorithm - we need to wait for the default epsilon value to be passed from cb_explore
-		// is there is one
-		//cout<<"count: "<<arg.vm.count("epsilon") <<endl;
-		data->epsilon = arg.vm.count("epsilon") > 0 ? arg.vm["epsilon"].as<float>() : 0.0f;
-
     if (use_cs)
-      l = &init_cost_sensitive_learner(data, base, predict_or_learn_adf<true, true>, predict_or_learn_adf<false, true>, arg.all->p, data->choices_lambda);
+      l = &init_cost_sensitive_learner(data, base, predict_or_learn_adf<true, true>, predict_or_learn_adf<false, true>, arg.all->p, 1);
     else
-      l = &init_multiclass_learner(data, base, predict_or_learn_adf<true, false>, predict_or_learn_adf<false, false>, arg.all->p, data->choices_lambda);
-
-		//cout<<"cbify increment = "<<l->increment<<endl;
+      l = &init_multiclass_learner(data, base, predict_or_learn_adf<true, false>, predict_or_learn_adf<false, false>, arg.all->p, 1);
   }
   else
   {
diff --git a/vowpalwabbit/parse_args.cc b/vowpalwabbit/parse_args.cc
index 5c27df2b621..fb97222de41 100644
--- a/vowpalwabbit/parse_args.cc
+++ b/vowpalwabbit/parse_args.cc
@@ -73,6 +73,7 @@ license as described in the file LICENSE.
 #include "explore_eval.h"
 #include "baseline.h"
 #include "classweight.h"
+#include "warm_cb.h"
 // #include "cntk.h"
 
 using namespace std;
@@ -1140,6 +1141,7 @@ void parse_reductions(arguments& arg)
   all.reduction_stack.push_back(mwt_setup);
   all.reduction_stack.push_back(cb_explore_setup);
   all.reduction_stack.push_back(cb_explore_adf_setup);
+  all.reduction_stack.push_back(warm_cb_setup);
   all.reduction_stack.push_back(cbify_setup);
   all.reduction_stack.push_back(explore_eval_setup);
   all.reduction_stack.push_back(ExpReplay::expreplay_setup<'c', COST_SENSITIVE::cs_label>);
diff --git a/vowpalwabbit/warm_cb.cc b/vowpalwabbit/warm_cb.cc
new file mode 100644
index 00000000000..389359539e1
--- /dev/null
+++ b/vowpalwabbit/warm_cb.cc
@@ -0,0 +1,831 @@
+#include <float.h>
+#include "reductions.h"
+#include "cb_algs.h"
+#include "rand48.h"
+#include "bs.h"
+#include "vw.h"
+#include "../explore/hash.h"
+#include "explore.h"
+
+#include <vector>
+
+using namespace LEARNER;
+using namespace exploration;
+using namespace ACTION_SCORE;
+using namespace std;
+
+#define WARM_START 1
+#define INTERACTION 2
+#define SKIP 3
+
+#define SUPERVISED_WS 1
+#define BANDIT_WS 2
+
+#define UAR 1
+#define CIRCULAR 2
+#define OVERWRITE 3
+
+#define INTER_VALI 1
+#define WS_VALI_SPLIT 2
+#define WS_VALI_NOSPLIT 3
+
+#define INSTANCE_WT 1
+#define DATASET_WT 2
+
+#define ABS_CENTRAL 1
+#define ABS_CENTRAL_ZEROONE 2
+#define MINIMAX_CENTRAL 3
+#define MINIMAX_CENTRAL_ZEROONE 4
+
+
+struct warm_cb;
+
+struct warm_cb_adf_data
+{
+  multi_ex ecs;
+  size_t num_actions;
+};
+
+struct warm_cb
+{
+  CB::label cb_label;
+  uint64_t app_seed;
+  action_scores a_s;
+  // used as the seed
+  size_t example_counter;
+  vw* all;
+  bool use_adf; // if true, reduce to cb_explore_adf instead of cb_explore
+  warm_cb_adf_data adf_data;
+  float loss0;
+  float loss1;
+
+	//warm start parameters
+	uint32_t ws_period;
+	uint32_t inter_period;
+	uint32_t choices_lambda;
+	bool upd_ws;
+	bool upd_inter;
+	int cor_type_ws;
+	float cor_prob_ws;
+	int cor_type_inter;
+	float cor_prob_inter;
+	int vali_method;
+	int wt_scheme;
+	int lambda_scheme;
+	uint32_t overwrite_label;
+	int ws_type;
+
+	//auxiliary variables
+	uint32_t num_actions;
+	float epsilon;
+	vector<float> lambdas;
+	action_scores a_s_adf;
+	vector<float> cumulative_costs;
+	CB::cb_class cl_adf;
+	uint32_t ws_train_size;
+	uint32_t ws_vali_size;
+	vector<example*> ws_vali;
+	float cumu_var;
+	uint32_t ws_iter;
+	uint32_t inter_iter;
+	MULTICLASS::label_t mc_label;
+	COST_SENSITIVE::label cs_label;
+	COST_SENSITIVE::label* csls;
+	CB::label* cbls;
+	bool use_cs;
+
+};
+
+float loss(warm_cb& data, uint32_t label, uint32_t final_prediction)
+{
+  if (label != final_prediction)
+    return data.loss1;
+  else
+    return data.loss0;
+}
+
+float loss_cs(warm_cb& data, v_array<COST_SENSITIVE::wclass>& costs, uint32_t final_prediction)
+{
+  float cost = 0.;
+  for (auto wc : costs)
+  { if (wc.class_index == final_prediction)
+    { cost = wc.x;
+      break;
+    }
+  }
+  return data.loss0 + (data.loss1 - data.loss0) * cost;
+}
+
+template<class T> inline void delete_it(T* p) { if (p != nullptr) delete p; }
+
+template <class T>
+uint32_t find_min(vector<T> arr)
+{
+	T min_val = FLT_MAX;
+	uint32_t argmin = 0;
+
+	for (uint32_t i = 0; i < arr.size(); i++)
+	{
+		//cout<<arr[i]<<endl;
+		if (arr[i] < min_val)
+		{
+			min_val = arr[i];
+			argmin = i;
+		}
+	}
+	return argmin;
+}
+
+void finish(warm_cb& data)
+{
+  CB::cb_label.delete_label(&data.cb_label);
+  data.a_s.delete_v();
+  if (data.use_adf)
+  {
+		cout<<"average variance estimate = "<<data.cumu_var / data.inter_iter<<endl;
+		cout<<"theoretical average variance = "<<data.num_actions / data.epsilon<<endl;
+		uint32_t argmin = find_min(data.cumulative_costs);
+		cout<<"last lambda chosen = "<<data.lambdas[argmin]<<" among lambdas ranging from "<<data.lambdas[0]<<" to "<<data.lambdas[data.choices_lambda-1]<<endl;
+
+		for (size_t a = 0; a < data.adf_data.num_actions; ++a)
+		{
+			COST_SENSITIVE::cs_label.delete_label(&data.csls[a]);
+		}
+		free(data.csls);
+		free(data.cbls);
+
+    for (size_t a = 0; a < data.adf_data.num_actions; ++a)
+      {
+        data.adf_data.ecs[a]->pred.a_s.delete_v();
+        VW::dealloc_example(CB::cb_label.delete_label, *data.adf_data.ecs[a]);
+        free_it(data.adf_data.ecs[a]);
+      }
+    data.adf_data.ecs.~vector<example*>();
+
+		data.lambdas.~vector<float>();
+		data.cumulative_costs.~vector<float>();
+
+		data.a_s_adf.delete_v();
+		for (size_t i = 0; i < data.ws_vali.size(); ++i)
+		{
+			if (data.use_cs)
+				VW::dealloc_example(COST_SENSITIVE::cs_label.delete_label, *data.ws_vali[i]);
+			else
+				VW::dealloc_example(MULTICLASS::mc_label.delete_label, *data.ws_vali[i]);
+			free(data.ws_vali[i]);
+		}
+		data.ws_vali.~vector<example*>();
+  }
+}
+
+void copy_example_to_adf(warm_cb& data, example& ec)
+{
+  auto& adf_data = data.adf_data;
+  const uint64_t ss = data.all->weights.stride_shift();
+  const uint64_t mask = data.all->weights.mask();
+
+  for (size_t a = 0; a < adf_data.num_actions; ++a)
+  {
+    auto& eca = *adf_data.ecs[a];
+    // clear label
+    auto& lab = eca.l.cb;
+    CB::cb_label.default_label(&lab);
+
+    // copy data
+    VW::copy_example_data(false, &eca, &ec);
+
+    // offset indicies for given action
+    for (features& fs : eca)
+    {
+      for (feature_index& idx : fs.indicies)
+      {
+        idx = ((((idx >> ss) * 28904713) + 4832917 * (uint64_t)a) << ss) & mask;
+      }
+    }
+
+    // avoid empty example by adding a tag (hacky)
+    if (CB_ALGS::example_is_newline_not_header(eca) && CB::cb_label.test_label(&eca.l))
+    {
+      eca.tag.push_back('n');
+    }
+  }
+}
+
+float minimax_lambda(float epsilon, size_t num_actions, size_t warm_start_period, size_t interaction_period)
+{
+	return epsilon / (num_actions + epsilon);
+}
+
+void setup_lambdas(warm_cb& data)
+{
+	// The lambdas are arranged in ascending order
+	vector<float>& lambdas = data.lambdas;
+	for (uint32_t i = 0; i<data.choices_lambda; i++)
+		lambdas.push_back(0.f);
+
+	//interaction only
+	if (!data.upd_ws && data.upd_inter)
+	{
+		for (uint32_t i = 0; i<data.choices_lambda; i++)
+			lambdas[i] = 1.0;
+		return;
+	}
+
+	//warm start only
+	if (!data.upd_inter && data.upd_ws)
+	{
+		for (uint32_t i = 0; i<data.choices_lambda; i++)
+			lambdas[i] = 0.0;
+		return;
+	}
+
+	//if no warm start and no interaction, then as there are no updates anyway,
+	//we are still fine
+
+	uint32_t mid = data.choices_lambda / 2;
+
+	if (data.lambda_scheme == ABS_CENTRAL || data.lambda_scheme == ABS_CENTRAL_ZEROONE)
+		lambdas[mid] = 0.5;
+	else
+		lambdas[mid] = minimax_lambda(data.epsilon, data.num_actions, data.ws_period, data.inter_period);
+
+	for (uint32_t i = mid; i > 0; i--)
+		lambdas[i-1] = lambdas[i] / 2.0;
+
+	for (uint32_t i = mid+1; i < data.choices_lambda; i++)
+		lambdas[i] = 1 - (1-lambdas[i-1]) / 2.0;
+
+	if (data.lambda_scheme == MINIMAX_CENTRAL_ZEROONE || data.lambda_scheme == ABS_CENTRAL_ZEROONE)
+	{
+		lambdas[0] = 0.0;
+		lambdas[data.choices_lambda-1] = 1.0;
+	}
+}
+
+uint32_t generate_uar_action(warm_cb& data)
+{
+	float randf = merand48(data.all->random_state);
+
+	for (uint32_t i = 1; i <= data.num_actions; i++)
+	{
+		if (randf <= float(i) / data.num_actions)
+			return i;
+	}
+	return data.num_actions;
+}
+
+uint32_t corrupt_action(warm_cb& data, uint32_t action, int ec_type)
+{
+	float cor_prob;
+	uint32_t cor_type;
+	uint32_t cor_action;
+
+	if (ec_type == WARM_START)
+	{
+		cor_prob = data.cor_prob_ws;
+		cor_type = data.cor_type_ws;
+	}
+	else
+	{
+		cor_prob = data.cor_prob_inter;
+		cor_type = data.cor_type_inter;
+	}
+
+	float randf = merand48(data.all->random_state);
+	if (randf < cor_prob)
+	{
+		if (cor_type == UAR)
+			cor_action = generate_uar_action(data);
+		else if (cor_type == OVERWRITE)
+			cor_action = data.overwrite_label;
+		else
+			cor_action = (action % data.num_actions) + 1;
+	}
+	else
+		cor_action = action;
+	return cor_action;
+}
+
+bool ind_update(warm_cb& data, int ec_type)
+{
+	if (ec_type == WARM_START)
+		return data.upd_ws;
+	else
+		return data.upd_inter;
+}
+
+float compute_weight_multiplier(warm_cb& data, size_t i, int ec_type)
+{
+	float weight_multiplier;
+	float ws_train_size = data.ws_train_size;
+	float inter_train_size = data.inter_period;
+	float total_train_size = ws_train_size + inter_train_size;
+	float total_weight = (1-data.lambdas[i]) * ws_train_size + data.lambdas[i] * inter_train_size;
+
+	//cout<<i<<" "<<data.lambdas[i]<<endl;
+	//cout<<total_weight<<endl;
+
+	if (data.wt_scheme == INSTANCE_WT)
+	{
+		if (ec_type == WARM_START)
+			weight_multiplier = (1-data.lambdas[i]) * total_train_size / (total_weight + FLT_MIN);
+		else
+			weight_multiplier = data.lambdas[i] * total_train_size / (total_weight + FLT_MIN);
+	}
+	else
+	{
+		if (ec_type == WARM_START)
+			weight_multiplier = (1-data.lambdas[i]) * total_train_size / ws_train_size;
+		else
+			weight_multiplier = data.lambdas[i] * total_train_size / inter_train_size;
+	}
+
+	//cout<<"weight multiplier: "<<weight_multiplier<<endl;
+
+	return weight_multiplier;
+}
+
+
+template <bool is_learn, bool use_cs>
+void predict_or_learn(warm_cb& data, single_learner& base, example& ec)
+{
+  //Store the multiclass or cost-sensitive input label
+  MULTICLASS::label_t ld;
+  COST_SENSITIVE::label csl;
+  if (use_cs)
+    csl = ec.l.cs;
+  else
+    ld = ec.l.multi;
+
+  data.cb_label.costs.clear();
+  ec.l.cb = data.cb_label;
+  ec.pred.a_s = data.a_s;
+
+  //Call the cb_explore algorithm. It returns a vector of probabilities for each action
+  base.predict(ec);
+  //data.probs = ec.pred.scalars;
+
+  uint32_t chosen_action;
+  if (sample_after_normalizing(data.app_seed + data.example_counter++, begin_scores(ec.pred.a_s), end_scores(ec.pred.a_s), chosen_action))
+    THROW("Failed to sample from pdf");
+
+  CB::cb_class cl;
+  cl.action = chosen_action + 1;
+  cl.probability = ec.pred.a_s[chosen_action].score;
+
+  if(!cl.action)
+    THROW("No action with non-zero probability found!");
+  if (use_cs)
+    cl.cost = loss_cs(data, csl.costs, cl.action);
+  else
+    cl.cost = loss(data, ld.label, cl.action);
+
+  //Create a new cb label
+  data.cb_label.costs.push_back(cl);
+  ec.l.cb = data.cb_label;
+  base.learn(ec);
+  data.a_s.clear();
+  data.a_s = ec.pred.a_s;
+  if (use_cs)
+    ec.l.cs = csl;
+  else
+    ec.l.multi = ld;
+  ec.pred.multiclass = cl.action;
+}
+
+uint32_t predict_sublearner_adf(warm_cb& data, multi_learner& base, example& ec, uint32_t i)
+{
+	//cout<<"predict using sublearner "<< i <<endl;
+	copy_example_to_adf(data, ec);
+	//uint32_t offset = data.adf_data.ecs[0]->ft_offset;
+	//multiline_learn_or_predict<false>(base, data.adf_data.ecs, offset, i);
+	base.predict(data.adf_data.ecs, i);
+	//cout<<"greedy label = " << data.adf_data.ecs[0]->pred.a_s[0].action+1 << endl;
+	return data.adf_data.ecs[0]->pred.a_s[0].action+1;
+}
+
+void accumu_costs_iv_adf(warm_cb& data, multi_learner& base, example& ec)
+{
+	CB::cb_class& cl = data.cl_adf;
+	//IPS for approximating the cumulative costs for all lambdas
+	for (uint32_t i = 0; i < data.choices_lambda; i++)
+	{
+		uint32_t action = predict_sublearner_adf(data, base, ec, i);
+
+		if (action == cl.action)
+			data.cumulative_costs[i] += cl.cost / cl.probability;
+		//cout<<data.cumulative_costs[i]<<endl;
+	}
+	//cout<<endl;
+}
+
+template<bool use_cs>
+void accumu_costs_wsv_adf(warm_cb& data, multi_learner& base)
+{
+	uint32_t ws_vali_size = data.ws_vali_size;
+	//only update cumulative costs every warm_start_period iterations
+	if ( data.inter_iter >= 1 && abs( log2(data.inter_iter+1) - floor(log2(data.inter_iter+1)) ) < 1e-4 )
+	{
+		for (uint32_t i = 0; i < data.choices_lambda; i++)
+			data.cumulative_costs[i] = 0;
+
+		uint32_t num_epochs = ceil(log2(data.inter_period));
+		uint32_t epoch = log2(data.inter_iter+1) - 1;
+		float batch_vali_size = ((float) ws_vali_size) / num_epochs;
+		uint32_t lb, ub;
+
+		if (data.vali_method == WS_VALI_SPLIT)
+		{
+			lb = ceil(batch_vali_size * epoch);
+			ub = ceil(batch_vali_size * (epoch + 1));
+		}
+		else
+		{
+			lb = 0;
+			ub = ws_vali_size;
+		}
+		//cout<<"validation at iteration "<<data.inter_iter<<endl;
+		//cout<<"validation example range: "<< lb << " to " << ub << endl;
+		//cout<<"updating validation error on supervised data: " << data.bandit_iter / data.warm_start_period << endl;
+		for (uint32_t i = 0; i < data.choices_lambda; i++)
+		{
+			for (uint32_t j = lb; j < ub; j++)
+			{
+				example* ec_vali = data.ws_vali[j];
+				uint32_t pred_label = predict_sublearner_adf(data, base, *ec_vali, i);
+
+				if (use_cs)
+					data.cumulative_costs[i] += loss_cs(data, ec_vali->l.cs.costs, pred_label);
+				else
+					data.cumulative_costs[i] += loss(data, ec_vali->l.multi.label, pred_label);
+
+				//cout<<ec_vali.l.multi.label<<" "<<pred_label<<endl;
+			}
+			//cout<<data.cumulative_costs[i]<<endl;
+		}
+	}
+}
+
+template<bool use_cs>
+void add_to_vali(warm_cb& data, example& ec)
+{
+	//TODO: set the first parameter properly
+	example* ec_copy = VW::alloc_examples(sizeof(polylabel), 1);
+
+	if (use_cs)
+		VW::copy_example_data(false, ec_copy, &ec, 0, COST_SENSITIVE::cs_label.copy_label);
+	else
+		VW::copy_example_data(false, ec_copy, &ec, 0, MULTICLASS::mc_label.copy_label);
+
+	data.ws_vali.push_back(ec_copy);
+}
+
+uint32_t predict_sup_adf(warm_cb& data, multi_learner& base, example& ec)
+{
+	uint32_t argmin = find_min(data.cumulative_costs);
+	return predict_sublearner_adf(data, base, ec, argmin);
+}
+
+template<bool use_cs>
+void learn_sup_adf(warm_cb& data, multi_learner& base, example& ec, int ec_type)
+{
+	copy_example_to_adf(data, ec);
+	//generate cost-sensitive label (for CSOAA's temporary use)
+	auto& csls = data.csls;
+	auto& cbls = data.cbls;
+	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
+	{
+		csls[a].costs[0].class_index = a+1;
+		if (use_cs)
+			csls[a].costs[0].x = loss_cs(data, ec.l.cs.costs, a+1);
+ 		else
+			csls[a].costs[0].x = loss(data, ec.l.multi.label, a+1);
+	}
+	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
+	{
+		cbls[a] = data.adf_data.ecs[a]->l.cb;
+		data.adf_data.ecs[a]->l.cs = csls[a];
+		//cout<<ecs[a].l.cs.costs.size()<<endl;
+	}
+
+	vector<float> old_weights;
+	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
+		old_weights.push_back(data.adf_data.ecs[a]->weight);
+
+	for (uint32_t i = 0; i < data.choices_lambda; i++)
+	{
+		float weight_multiplier = compute_weight_multiplier(data, i, ec_type);
+		//cout<<"weight multiplier in sup = "<<weight_multiplier<<endl;
+
+		for (size_t a = 0; a < data.adf_data.num_actions; ++a)
+			data.adf_data.ecs[a]->weight = old_weights[a] * weight_multiplier;
+		multi_learner* cs_learner = as_multiline(data.all->cost_sensitive);
+		cs_learner->learn(data.adf_data.ecs, i);
+
+		//cout<<"cost-sensitive increment = "<<cs_learner->increment<<endl;
+	}
+	//Seems like we don't need to set the weights back as this example will be
+	//discarded anyway
+	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
+		data.adf_data.ecs[a]->weight = old_weights[a];
+
+	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
+		data.adf_data.ecs[a]->l.cb = cbls[a];
+}
+
+template<bool use_cs>
+void predict_or_learn_sup_adf(warm_cb& data, multi_learner& base, example& ec, int ec_type)
+{
+	uint32_t action = predict_sup_adf(data, base, ec);
+
+	if (ind_update(data, ec_type))
+		learn_sup_adf<use_cs>(data, base, ec, ec_type);
+
+	ec.pred.multiclass = action;
+}
+
+uint32_t predict_bandit_adf(warm_cb& data, multi_learner& base, example& ec)
+{
+	uint32_t argmin = find_min(data.cumulative_costs);
+
+  copy_example_to_adf(data, ec);
+	base.predict(data.adf_data.ecs, argmin);
+
+	auto& out_ec = *data.adf_data.ecs[0];
+  uint32_t chosen_action;
+  if (sample_after_normalizing(data.app_seed + data.example_counter++, begin_scores(out_ec.pred.a_s), end_scores(out_ec.pred.a_s), chosen_action))
+    THROW("Failed to sample from pdf");
+
+	//cout<<"predict using sublearner "<< argmin <<endl;
+	//cout<<"greedy label = " << data.adf_data.ecs[0]->pred.a_s[0].action+1 << endl;
+	//cout<<"chosen action = " << chosen_action << endl;
+
+	auto& a_s = data.a_s_adf;
+	copy_array<action_score>(a_s, out_ec.pred.a_s);
+
+	return chosen_action;
+}
+
+void learn_bandit_adf(warm_cb& data, multi_learner& base, example& ec, int ec_type)
+{
+	copy_example_to_adf(data, ec);
+
+  // add cb label to chosen action
+	auto& cl = data.cl_adf;
+  auto& lab = data.adf_data.ecs[cl.action - 1]->l.cb;
+  lab.costs.push_back(cl);
+
+	vector<float> old_weights;
+	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
+		old_weights.push_back(data.adf_data.ecs[a]->weight);
+
+	for (uint32_t i = 0; i < data.choices_lambda; i++)
+	{
+		float weight_multiplier = compute_weight_multiplier(data, i, ec_type);
+
+		//cout<<"learn in sublearner "<< i <<" with weight multiplier "<<weight_multiplier<<endl;
+	  for (size_t a = 0; a < data.adf_data.num_actions; ++a)
+			data.adf_data.ecs[a]->weight = old_weights[a] * weight_multiplier;
+	  base.learn(data.adf_data.ecs, i);
+
+		//cout<<"cb-explore increment = "<<base.increment<<endl;
+		//uint32_t offset = data.adf_data.ecs[0]->ft_offset;
+		//multiline_learn_or_predict<true>(base, data.adf_data.ecs, offset, i);
+	}
+
+	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
+		data.adf_data.ecs[a]->weight = old_weights[a];
+}
+
+template<bool use_cs>
+void predict_or_learn_bandit_adf(warm_cb& data, multi_learner& base, example& ec, int ec_type)
+{
+	uint32_t chosen_action = predict_bandit_adf(data, base, ec);
+
+	auto& cl = data.cl_adf;
+	auto& a_s = data.a_s_adf;
+	cl.action = a_s[chosen_action].action + 1;
+	cl.probability = a_s[chosen_action].score;
+
+	//cout<<cl.action<<" "<<cl.probability<<" "<<ec.l.multi.label<<endl;
+
+	if(!cl.action)
+		THROW("No action with non-zero probability found!");
+
+	if (use_cs)
+		cl.cost = loss_cs(data, ec.l.cs.costs, cl.action);
+	else
+		cl.cost = loss(data, ec.l.multi.label, cl.action);
+
+	if (ec_type == INTERACTION && data.vali_method == INTER_VALI)
+		accumu_costs_iv_adf(data, base, ec);
+
+	//cout<<cl.action<<" "<<cl.probability<<endl;
+
+	if (ind_update(data, ec_type))
+		learn_bandit_adf(data, base, ec, ec_type);
+
+	if (ec_type == INTERACTION && (data.vali_method == WS_VALI_SPLIT || data.vali_method == WS_VALI_NOSPLIT))
+		accumu_costs_wsv_adf<use_cs>(data, base);
+
+	ec.pred.multiclass = cl.action;
+}
+
+void accumu_var_adf(warm_cb& data, multi_learner& base, example& ec)
+{
+	size_t pred_best_approx = predict_sup_adf(data, base, ec);
+	float temp_var;
+
+	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
+		if (pred_best_approx == data.a_s_adf[a].action + 1)
+			temp_var = 1.0 / data.a_s_adf[a].score;
+
+	data.cumu_var += temp_var;
+
+	//cout<<"variance at bandit round "<< data.inter_iter << " = " << temp_var << endl;
+	//cout<<pred_best_approx<<" "<<data.a_s_adf[0].action+1<<endl;
+}
+
+template <bool is_learn, bool use_cs>
+void predict_or_learn_adf(warm_cb& data, multi_learner& base, example& ec)
+{
+	// Corrupt labels (only corrupting multiclass labels as of now)
+
+	if (use_cs)
+		data.cs_label = ec.l.cs;
+	else
+	{
+		data.mc_label = ec.l.multi;
+		if (data.ws_iter < data.ws_period)
+			ec.l.multi.label = corrupt_action(data, data.mc_label.label, WARM_START);
+		else if (data.inter_iter < data.inter_period)
+			ec.l.multi.label = corrupt_action(data, data.mc_label.label, INTERACTION);
+	}
+
+	// Warm start phase
+	if (data.ws_iter < data.ws_period)
+	{
+		if (data.ws_iter < data.ws_train_size)
+		{
+			if (data.ws_type == SUPERVISED_WS)
+				predict_or_learn_sup_adf<use_cs>(data, base, ec, WARM_START);
+			else if (data.ws_type == BANDIT_WS)
+				predict_or_learn_bandit_adf<use_cs>(data, base, ec, WARM_START);
+		}
+		else
+			add_to_vali<use_cs>(data, ec);
+		ec.weight = 0;
+		data.ws_iter++;
+	}
+	// Interaction phase
+	else if (data.inter_iter < data.inter_period)
+	{
+		predict_or_learn_bandit_adf<use_cs>(data, base, ec, INTERACTION);
+		accumu_var_adf(data, base, ec);
+		data.a_s_adf.clear();
+		data.inter_iter++;
+	}
+	// Skipping the rest of the examples
+	else
+		ec.weight = 0;
+
+	// Store the original labels back
+	if (use_cs)
+		ec.l.cs = data.cs_label;
+	else
+		ec.l.multi = data.mc_label;
+
+}
+
+void init_adf_data(warm_cb& data, const size_t num_actions)
+{
+  auto& adf_data = data.adf_data;
+  adf_data.num_actions = num_actions;
+
+  adf_data.ecs.resize(num_actions);
+  for (size_t a=0; a < num_actions; ++a)
+  {
+    adf_data.ecs[a] = VW::alloc_examples(CB::cb_label.label_size, 1);
+    auto& lab = adf_data.ecs[a]->l.cb;
+    CB::cb_label.default_label(&lab);
+  }
+
+	// The rest of the initialization is for warm start CB
+	data.csls = calloc_or_throw<COST_SENSITIVE::label>(num_actions);
+	for (uint32_t a=0; a < num_actions; ++a)
+	{
+		COST_SENSITIVE::cs_label.default_label(&data.csls[a]);
+		data.csls[a].costs.push_back({0, a+1, 0, 0});
+	}
+	data.cbls = calloc_or_throw<CB::label>(num_actions);
+
+	if (data.vali_method == WS_VALI_SPLIT || data.vali_method == WS_VALI_NOSPLIT)
+	{
+		data.ws_train_size = ceil(data.ws_period / 2.0);
+		data.ws_vali_size = data.ws_period - data.ws_train_size;
+	}
+	else
+	{
+		data.ws_train_size = data.ws_period;
+		data.ws_vali_size = 0;
+	}
+	data.ws_iter = 0;
+	data.inter_iter = 0;
+
+	setup_lambdas(data);
+	for (uint32_t i = 0; i < data.choices_lambda; i++)
+		data.cumulative_costs.push_back(0.f);
+	data.cumu_var = 0.f;
+}
+
+base_learner* warm_cb_setup(arguments& arg)
+{
+  uint32_t num_actions=0;
+  auto data = scoped_calloc_or_throw<warm_cb>();
+  bool use_cs;
+
+  if (arg.new_options("Make Multiclass into Contextual Bandit")
+      .critical("warm_cb", num_actions, "Convert multiclass on <k> classes into a contextual bandit problem")
+      (use_cs, "warm_cb_cs", "consume cost-sensitive classification examples instead of multiclass")
+      ("loss0", data->loss0, 0.f, "loss for correct label")
+      ("loss1", data->loss1, 1.f, "loss for incorrect label")
+			("warm_start", data->ws_period, 0U, "number of training examples for warm start phase")
+			("interaction", data->inter_period, UINT32_MAX, "number of examples for the interactive contextual bandit learning phase")
+			("warm_start_update", data->upd_ws, true, "indicator of warm start updates")
+			("interaction_update", data->upd_inter, true, "indicator of interaction updates")
+			("corrupt_type_warm_start", data->cor_type_ws, UAR, "type of label corruption in the warm start phase (1: uniformly at random, 2: circular, 3: replacing with overwriting label)")
+			("corrupt_prob_warm_start", data->cor_prob_ws, 0.f, "probability of label corruption in the warm start phase")
+			("corrupt_type_interaction", data->cor_type_inter, UAR, "type of label corruption in the interaction phase (1: uniformly at random, 2: circular, 3: replacing with overwriting label)")
+			("corrupt_prob_interaction", data->cor_prob_inter, 0.f, "probability of label corruption in the interaction phase")
+		  ("choices_lambda", data->choices_lambda, 1U, "the number of candidate lambdas to aggregate (lambda is the importance weight parameter between the two sources) ")
+			("lambda_scheme", data->lambda_scheme, ABS_CENTRAL, "The scheme for generating candidate lambda set (1: center lambda=0.5, 2: center lambda=0.5, min lambda=0, max lambda=1, 3: center lambda=epsilon/(#actions+epsilon), 4: center lambda=epsilon/(#actions+epsilon), min lambda=0, max lambda=1); the rest of candidate lambda values are generated using a doubling scheme")
+			("weighting_scheme", data->wt_scheme, INSTANCE_WT, "weighting scheme (1: per instance weighting, where for every lambda, each contextual bandit example have weight lambda/(1-lambda) times that of each warm start example, 2: per dataset weighting, where for every lambda, the contextual bandit dataset has total weight lambda/(1-lambda) times that of the warm start dataset)")
+			("validation_method", data->vali_method, INTER_VALI, "lambda selection criterion (1: using contextual bandit examples with progressive validation, 2: using warm start examples, with fresh validation examples at each epoch, 3: using warm start examples, with a single validation set throughout)")
+			("overwrite_label", data->overwrite_label, 1U, "the label used by type 3 corruptions (overwriting)")
+			("warm_start_type", data->ws_type, SUPERVISED_WS, "update method of utilizing warm start examples (1: using supervised updates, 2: using contextual bandit updates)").missing())
+    return nullptr;
+
+  data->use_adf = count(arg.args.begin(), arg.args.end(),"--cb_explore_adf") > 0;
+  data->app_seed = uniform_hash("vw", 2, 0);
+  data->a_s = v_init<action_score>();
+  data->all = arg.all;
+
+	data->num_actions = num_actions;
+	data->use_cs = use_cs;
+
+  if (data->use_adf)
+    init_adf_data(*data.get(), num_actions);
+
+  if (count(arg.args.begin(), arg.args.end(),"--cb_explore") == 0 && !data->use_adf)
+  {
+    arg.args.push_back("--cb_explore");
+    stringstream ss;
+    ss << num_actions;
+    arg.args.push_back(ss.str());
+  }
+  if (data->use_adf)
+    {
+      arg.args.push_back("--cb_min_cost");
+      arg.args.push_back(to_string(data->loss0));
+      arg.args.push_back("--cb_max_cost");
+      arg.args.push_back(to_string(data->loss1));
+    }
+  if (count(arg.args.begin(), arg.args.end(), "--baseline"))
+  {
+    arg.args.push_back("--lr_multiplier");
+    stringstream ss;
+    ss << max<float>(abs(data->loss0), abs(data->loss1)) / (data->loss1 - data->loss0);
+    arg.args.push_back(ss.str());
+  }
+
+  learner<warm_cb,example>* l;
+
+  if (data->use_adf)
+  {
+    multi_learner* base = as_multiline(setup_base(arg));
+		// Note: the current version of warm start CB can only support epsilon greedy exploration
+		// algorithm - we need to wait for the default epsilon value to be passed from cb_explore
+		// is there is one
+		//cout<<"count: "<<arg.vm.count("epsilon") <<endl;
+		data->epsilon = arg.vm.count("epsilon") > 0 ? arg.vm["epsilon"].as<float>() : 0.0f;
+
+    if (use_cs)
+      l = &init_cost_sensitive_learner(data, base, predict_or_learn_adf<true, true>, predict_or_learn_adf<false, true>, arg.all->p, data->choices_lambda);
+    else
+      l = &init_multiclass_learner(data, base, predict_or_learn_adf<true, false>, predict_or_learn_adf<false, false>, arg.all->p, data->choices_lambda);
+
+		//cout<<"warm_cb increment = "<<l->increment<<endl;
+  }
+  else
+  {
+    single_learner* base = as_singleline(setup_base(arg));
+    if (use_cs)
+      l = &init_cost_sensitive_learner(data, base, predict_or_learn<true, true>, predict_or_learn<false, true>, arg.all->p, 1);
+    else
+      l = &init_multiclass_learner(data, base, predict_or_learn<true, false>, predict_or_learn<false, false>, arg.all->p, 1);
+  }
+  l->set_finish(finish);
+  arg.all->delete_prediction = nullptr;
+
+  return make_base(*l);
+}
diff --git a/vowpalwabbit/warm_cb.h b/vowpalwabbit/warm_cb.h
new file mode 100644
index 00000000000..2d645774f88
--- /dev/null
+++ b/vowpalwabbit/warm_cb.h
@@ -0,0 +1 @@
+LEARNER::base_learner* warm_cb_setup(arguments& arg);

From 3f037851ec374554e456956fa75d176dab0133c3 Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Mon, 6 Aug 2018 14:34:52 -0400
Subject: [PATCH 108/127] .

---
 vowpalwabbit/warm_cb.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vowpalwabbit/warm_cb.cc b/vowpalwabbit/warm_cb.cc
index 389359539e1..180d7aa19f6 100644
--- a/vowpalwabbit/warm_cb.cc
+++ b/vowpalwabbit/warm_cb.cc
@@ -4,7 +4,7 @@
 #include "rand48.h"
 #include "bs.h"
 #include "vw.h"
-#include "../explore/hash.h"
+#include "hash.h"
 #include "explore.h"
 
 #include <vector>

From 452e4aa1b5cc577964c646c9334867cc8734f4b8 Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Mon, 6 Aug 2018 14:58:43 -0400
Subject: [PATCH 109/127] removed part on non-adf

---
 vowpalwabbit/warm_cb.cc | 265 ++++++++++++++--------------------------
 1 file changed, 93 insertions(+), 172 deletions(-)

diff --git a/vowpalwabbit/warm_cb.cc b/vowpalwabbit/warm_cb.cc
index 180d7aa19f6..bdded4c25f8 100644
--- a/vowpalwabbit/warm_cb.cc
+++ b/vowpalwabbit/warm_cb.cc
@@ -38,14 +38,6 @@ using namespace std;
 #define MINIMAX_CENTRAL_ZEROONE 4
 
 
-struct warm_cb;
-
-struct warm_cb_adf_data
-{
-  multi_ex ecs;
-  size_t num_actions;
-};
-
 struct warm_cb
 {
   CB::label cb_label;
@@ -54,8 +46,7 @@ struct warm_cb
   // used as the seed
   size_t example_counter;
   vw* all;
-  bool use_adf; // if true, reduce to cb_explore_adf instead of cb_explore
-  warm_cb_adf_data adf_data;
+  multi_ex ecs;
   float loss0;
   float loss1;
 
@@ -140,53 +131,50 @@ void finish(warm_cb& data)
 {
   CB::cb_label.delete_label(&data.cb_label);
   data.a_s.delete_v();
-  if (data.use_adf)
-  {
-		cout<<"average variance estimate = "<<data.cumu_var / data.inter_iter<<endl;
-		cout<<"theoretical average variance = "<<data.num_actions / data.epsilon<<endl;
-		uint32_t argmin = find_min(data.cumulative_costs);
-		cout<<"last lambda chosen = "<<data.lambdas[argmin]<<" among lambdas ranging from "<<data.lambdas[0]<<" to "<<data.lambdas[data.choices_lambda-1]<<endl;
 
-		for (size_t a = 0; a < data.adf_data.num_actions; ++a)
-		{
-			COST_SENSITIVE::cs_label.delete_label(&data.csls[a]);
-		}
-		free(data.csls);
-		free(data.cbls);
+	cout<<"average variance estimate = "<<data.cumu_var / data.inter_iter<<endl;
+	cout<<"theoretical average variance = "<<data.num_actions / data.epsilon<<endl;
+	uint32_t argmin = find_min(data.cumulative_costs);
+	cout<<"last lambda chosen = "<<data.lambdas[argmin]<<" among lambdas ranging from "<<data.lambdas[0]<<" to "<<data.lambdas[data.choices_lambda-1]<<endl;
 
-    for (size_t a = 0; a < data.adf_data.num_actions; ++a)
-      {
-        data.adf_data.ecs[a]->pred.a_s.delete_v();
-        VW::dealloc_example(CB::cb_label.delete_label, *data.adf_data.ecs[a]);
-        free_it(data.adf_data.ecs[a]);
-      }
-    data.adf_data.ecs.~vector<example*>();
+	for (size_t a = 0; a < data.num_actions; ++a)
+	{
+		COST_SENSITIVE::cs_label.delete_label(&data.csls[a]);
+	}
+	free(data.csls);
+	free(data.cbls);
 
-		data.lambdas.~vector<float>();
-		data.cumulative_costs.~vector<float>();
+  for (size_t a = 0; a < data.num_actions; ++a)
+    {
+      data.ecs[a]->pred.a_s.delete_v();
+      VW::dealloc_example(CB::cb_label.delete_label, *data.ecs[a]);
+      free_it(data.ecs[a]);
+    }
+  data.ecs.~vector<example*>();
 
-		data.a_s_adf.delete_v();
-		for (size_t i = 0; i < data.ws_vali.size(); ++i)
-		{
-			if (data.use_cs)
-				VW::dealloc_example(COST_SENSITIVE::cs_label.delete_label, *data.ws_vali[i]);
-			else
-				VW::dealloc_example(MULTICLASS::mc_label.delete_label, *data.ws_vali[i]);
-			free(data.ws_vali[i]);
-		}
-		data.ws_vali.~vector<example*>();
-  }
+	data.lambdas.~vector<float>();
+	data.cumulative_costs.~vector<float>();
+
+	data.a_s_adf.delete_v();
+	for (size_t i = 0; i < data.ws_vali.size(); ++i)
+	{
+		if (data.use_cs)
+			VW::dealloc_example(COST_SENSITIVE::cs_label.delete_label, *data.ws_vali[i]);
+		else
+			VW::dealloc_example(MULTICLASS::mc_label.delete_label, *data.ws_vali[i]);
+		free(data.ws_vali[i]);
+	}
+	data.ws_vali.~vector<example*>();
 }
 
 void copy_example_to_adf(warm_cb& data, example& ec)
 {
-  auto& adf_data = data.adf_data;
   const uint64_t ss = data.all->weights.stride_shift();
   const uint64_t mask = data.all->weights.mask();
 
-  for (size_t a = 0; a < adf_data.num_actions; ++a)
+  for (size_t a = 0; a < data.num_actions; ++a)
   {
-    auto& eca = *adf_data.ecs[a];
+    auto& eca = *data.ecs[a];
     // clear label
     auto& lab = eca.l.cb;
     CB::cb_label.default_label(&lab);
@@ -345,63 +333,15 @@ float compute_weight_multiplier(warm_cb& data, size_t i, int ec_type)
 	return weight_multiplier;
 }
 
-
-template <bool is_learn, bool use_cs>
-void predict_or_learn(warm_cb& data, single_learner& base, example& ec)
-{
-  //Store the multiclass or cost-sensitive input label
-  MULTICLASS::label_t ld;
-  COST_SENSITIVE::label csl;
-  if (use_cs)
-    csl = ec.l.cs;
-  else
-    ld = ec.l.multi;
-
-  data.cb_label.costs.clear();
-  ec.l.cb = data.cb_label;
-  ec.pred.a_s = data.a_s;
-
-  //Call the cb_explore algorithm. It returns a vector of probabilities for each action
-  base.predict(ec);
-  //data.probs = ec.pred.scalars;
-
-  uint32_t chosen_action;
-  if (sample_after_normalizing(data.app_seed + data.example_counter++, begin_scores(ec.pred.a_s), end_scores(ec.pred.a_s), chosen_action))
-    THROW("Failed to sample from pdf");
-
-  CB::cb_class cl;
-  cl.action = chosen_action + 1;
-  cl.probability = ec.pred.a_s[chosen_action].score;
-
-  if(!cl.action)
-    THROW("No action with non-zero probability found!");
-  if (use_cs)
-    cl.cost = loss_cs(data, csl.costs, cl.action);
-  else
-    cl.cost = loss(data, ld.label, cl.action);
-
-  //Create a new cb label
-  data.cb_label.costs.push_back(cl);
-  ec.l.cb = data.cb_label;
-  base.learn(ec);
-  data.a_s.clear();
-  data.a_s = ec.pred.a_s;
-  if (use_cs)
-    ec.l.cs = csl;
-  else
-    ec.l.multi = ld;
-  ec.pred.multiclass = cl.action;
-}
-
 uint32_t predict_sublearner_adf(warm_cb& data, multi_learner& base, example& ec, uint32_t i)
 {
 	//cout<<"predict using sublearner "<< i <<endl;
 	copy_example_to_adf(data, ec);
-	//uint32_t offset = data.adf_data.ecs[0]->ft_offset;
-	//multiline_learn_or_predict<false>(base, data.adf_data.ecs, offset, i);
-	base.predict(data.adf_data.ecs, i);
-	//cout<<"greedy label = " << data.adf_data.ecs[0]->pred.a_s[0].action+1 << endl;
-	return data.adf_data.ecs[0]->pred.a_s[0].action+1;
+	//uint32_t offset = data.ecs[0]->ft_offset;
+	//multiline_learn_or_predict<false>(base, data.ecs, offset, i);
+	base.predict(data.ecs, i);
+	//cout<<"greedy label = " << data.ecs[0]->pred.a_s[0].action+1 << endl;
+	return data.ecs[0]->pred.a_s[0].action+1;
 }
 
 void accumu_costs_iv_adf(warm_cb& data, multi_learner& base, example& ec)
@@ -493,7 +433,7 @@ void learn_sup_adf(warm_cb& data, multi_learner& base, example& ec, int ec_type)
 	//generate cost-sensitive label (for CSOAA's temporary use)
 	auto& csls = data.csls;
 	auto& cbls = data.cbls;
-	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
+	for (size_t a = 0; a < data.num_actions; ++a)
 	{
 		csls[a].costs[0].class_index = a+1;
 		if (use_cs)
@@ -501,36 +441,36 @@ void learn_sup_adf(warm_cb& data, multi_learner& base, example& ec, int ec_type)
  		else
 			csls[a].costs[0].x = loss(data, ec.l.multi.label, a+1);
 	}
-	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
+	for (size_t a = 0; a < data.num_actions; ++a)
 	{
-		cbls[a] = data.adf_data.ecs[a]->l.cb;
-		data.adf_data.ecs[a]->l.cs = csls[a];
+		cbls[a] = data.ecs[a]->l.cb;
+		data.ecs[a]->l.cs = csls[a];
 		//cout<<ecs[a].l.cs.costs.size()<<endl;
 	}
 
 	vector<float> old_weights;
-	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
-		old_weights.push_back(data.adf_data.ecs[a]->weight);
+	for (size_t a = 0; a < data.num_actions; ++a)
+		old_weights.push_back(data.ecs[a]->weight);
 
 	for (uint32_t i = 0; i < data.choices_lambda; i++)
 	{
 		float weight_multiplier = compute_weight_multiplier(data, i, ec_type);
 		//cout<<"weight multiplier in sup = "<<weight_multiplier<<endl;
 
-		for (size_t a = 0; a < data.adf_data.num_actions; ++a)
-			data.adf_data.ecs[a]->weight = old_weights[a] * weight_multiplier;
+		for (size_t a = 0; a < data.num_actions; ++a)
+			data.ecs[a]->weight = old_weights[a] * weight_multiplier;
 		multi_learner* cs_learner = as_multiline(data.all->cost_sensitive);
-		cs_learner->learn(data.adf_data.ecs, i);
+		cs_learner->learn(data.ecs, i);
 
 		//cout<<"cost-sensitive increment = "<<cs_learner->increment<<endl;
 	}
 	//Seems like we don't need to set the weights back as this example will be
 	//discarded anyway
-	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
-		data.adf_data.ecs[a]->weight = old_weights[a];
+	for (size_t a = 0; a < data.num_actions; ++a)
+		data.ecs[a]->weight = old_weights[a];
 
-	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
-		data.adf_data.ecs[a]->l.cb = cbls[a];
+	for (size_t a = 0; a < data.num_actions; ++a)
+		data.ecs[a]->l.cb = cbls[a];
 }
 
 template<bool use_cs>
@@ -549,15 +489,15 @@ uint32_t predict_bandit_adf(warm_cb& data, multi_learner& base, example& ec)
 	uint32_t argmin = find_min(data.cumulative_costs);
 
   copy_example_to_adf(data, ec);
-	base.predict(data.adf_data.ecs, argmin);
+	base.predict(data.ecs, argmin);
 
-	auto& out_ec = *data.adf_data.ecs[0];
+	auto& out_ec = *data.ecs[0];
   uint32_t chosen_action;
   if (sample_after_normalizing(data.app_seed + data.example_counter++, begin_scores(out_ec.pred.a_s), end_scores(out_ec.pred.a_s), chosen_action))
     THROW("Failed to sample from pdf");
 
 	//cout<<"predict using sublearner "<< argmin <<endl;
-	//cout<<"greedy label = " << data.adf_data.ecs[0]->pred.a_s[0].action+1 << endl;
+	//cout<<"greedy label = " << data.ecs[0]->pred.a_s[0].action+1 << endl;
 	//cout<<"chosen action = " << chosen_action << endl;
 
 	auto& a_s = data.a_s_adf;
@@ -572,29 +512,29 @@ void learn_bandit_adf(warm_cb& data, multi_learner& base, example& ec, int ec_ty
 
   // add cb label to chosen action
 	auto& cl = data.cl_adf;
-  auto& lab = data.adf_data.ecs[cl.action - 1]->l.cb;
+  auto& lab = data.ecs[cl.action - 1]->l.cb;
   lab.costs.push_back(cl);
 
 	vector<float> old_weights;
-	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
-		old_weights.push_back(data.adf_data.ecs[a]->weight);
+	for (size_t a = 0; a < data.num_actions; ++a)
+		old_weights.push_back(data.ecs[a]->weight);
 
 	for (uint32_t i = 0; i < data.choices_lambda; i++)
 	{
 		float weight_multiplier = compute_weight_multiplier(data, i, ec_type);
 
 		//cout<<"learn in sublearner "<< i <<" with weight multiplier "<<weight_multiplier<<endl;
-	  for (size_t a = 0; a < data.adf_data.num_actions; ++a)
-			data.adf_data.ecs[a]->weight = old_weights[a] * weight_multiplier;
-	  base.learn(data.adf_data.ecs, i);
+	  for (size_t a = 0; a < data.num_actions; ++a)
+			data.ecs[a]->weight = old_weights[a] * weight_multiplier;
+	  base.learn(data.ecs, i);
 
 		//cout<<"cb-explore increment = "<<base.increment<<endl;
-		//uint32_t offset = data.adf_data.ecs[0]->ft_offset;
-		//multiline_learn_or_predict<true>(base, data.adf_data.ecs, offset, i);
+		//uint32_t offset = data.ecs[0]->ft_offset;
+		//multiline_learn_or_predict<true>(base, data.ecs, offset, i);
 	}
 
-	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
-		data.adf_data.ecs[a]->weight = old_weights[a];
+	for (size_t a = 0; a < data.num_actions; ++a)
+		data.ecs[a]->weight = old_weights[a];
 }
 
 template<bool use_cs>
@@ -634,9 +574,9 @@ void predict_or_learn_bandit_adf(warm_cb& data, multi_learner& base, example& ec
 void accumu_var_adf(warm_cb& data, multi_learner& base, example& ec)
 {
 	size_t pred_best_approx = predict_sup_adf(data, base, ec);
-	float temp_var;
+	float temp_var = 0.f;
 
-	for (size_t a = 0; a < data.adf_data.num_actions; ++a)
+	for (size_t a = 0; a < data.num_actions; ++a)
 		if (pred_best_approx == data.a_s_adf[a].action + 1)
 			temp_var = 1.0 / data.a_s_adf[a].score;
 
@@ -699,14 +639,12 @@ void predict_or_learn_adf(warm_cb& data, multi_learner& base, example& ec)
 
 void init_adf_data(warm_cb& data, const size_t num_actions)
 {
-  auto& adf_data = data.adf_data;
-  adf_data.num_actions = num_actions;
-
-  adf_data.ecs.resize(num_actions);
+  data.num_actions = num_actions;
+  data.ecs.resize(num_actions);
   for (size_t a=0; a < num_actions; ++a)
   {
-    adf_data.ecs[a] = VW::alloc_examples(CB::cb_label.label_size, 1);
-    auto& lab = adf_data.ecs[a]->l.cb;
+    data.ecs[a] = VW::alloc_examples(CB::cb_label.label_size, 1);
+    auto& lab = data.ecs[a]->l.cb;
     CB::cb_label.default_label(&lab);
   }
 
@@ -765,31 +703,18 @@ base_learner* warm_cb_setup(arguments& arg)
 			("warm_start_type", data->ws_type, SUPERVISED_WS, "update method of utilizing warm start examples (1: using supervised updates, 2: using contextual bandit updates)").missing())
     return nullptr;
 
-  data->use_adf = count(arg.args.begin(), arg.args.end(),"--cb_explore_adf") > 0;
   data->app_seed = uniform_hash("vw", 2, 0);
   data->a_s = v_init<action_score>();
   data->all = arg.all;
-
-	data->num_actions = num_actions;
 	data->use_cs = use_cs;
 
-  if (data->use_adf)
-    init_adf_data(*data.get(), num_actions);
+  init_adf_data(*data.get(), num_actions);
+
+  arg.args.push_back("--cb_min_cost");
+  arg.args.push_back(to_string(data->loss0));
+  arg.args.push_back("--cb_max_cost");
+  arg.args.push_back(to_string(data->loss1));
 
-  if (count(arg.args.begin(), arg.args.end(),"--cb_explore") == 0 && !data->use_adf)
-  {
-    arg.args.push_back("--cb_explore");
-    stringstream ss;
-    ss << num_actions;
-    arg.args.push_back(ss.str());
-  }
-  if (data->use_adf)
-    {
-      arg.args.push_back("--cb_min_cost");
-      arg.args.push_back(to_string(data->loss0));
-      arg.args.push_back("--cb_max_cost");
-      arg.args.push_back(to_string(data->loss1));
-    }
   if (count(arg.args.begin(), arg.args.end(), "--baseline"))
   {
     arg.args.push_back("--lr_multiplier");
@@ -800,30 +725,26 @@ base_learner* warm_cb_setup(arguments& arg)
 
   learner<warm_cb,example>* l;
 
-  if (data->use_adf)
+  multi_learner* base = as_multiline(setup_base(arg));
+	// Note: the current version of warm start CB can only support epsilon greedy exploration
+	// algorithm - we need to wait for the default epsilon value to be passed from cb_explore
+	// is there is one
+	//cout<<"count: "<<arg.vm.count("epsilon") <<endl;
+  if (arg.vm.count("epsilon") == 0)
   {
-    multi_learner* base = as_multiline(setup_base(arg));
-		// Note: the current version of warm start CB can only support epsilon greedy exploration
-		// algorithm - we need to wait for the default epsilon value to be passed from cb_explore
-		// is there is one
-		//cout<<"count: "<<arg.vm.count("epsilon") <<endl;
-		data->epsilon = arg.vm.count("epsilon") > 0 ? arg.vm["epsilon"].as<float>() : 0.0f;
-
-    if (use_cs)
-      l = &init_cost_sensitive_learner(data, base, predict_or_learn_adf<true, true>, predict_or_learn_adf<false, true>, arg.all->p, data->choices_lambda);
-    else
-      l = &init_multiclass_learner(data, base, predict_or_learn_adf<true, false>, predict_or_learn_adf<false, false>, arg.all->p, data->choices_lambda);
-
-		//cout<<"warm_cb increment = "<<l->increment<<endl;
+    cerr<<"Warning: no epsilon (greedy parameter) specified; resetting to 0.05"<<endl;
+    data->epsilon = 0.05f;
   }
   else
-  {
-    single_learner* base = as_singleline(setup_base(arg));
-    if (use_cs)
-      l = &init_cost_sensitive_learner(data, base, predict_or_learn<true, true>, predict_or_learn<false, true>, arg.all->p, 1);
-    else
-      l = &init_multiclass_learner(data, base, predict_or_learn<true, false>, predict_or_learn<false, false>, arg.all->p, 1);
-  }
+    data->epsilon = arg.vm["epsilon"].as<float>();
+
+  if (use_cs)
+    l = &init_cost_sensitive_learner(data, base, predict_or_learn_adf<true, true>, predict_or_learn_adf<false, true>, arg.all->p, data->choices_lambda);
+  else
+    l = &init_multiclass_learner(data, base, predict_or_learn_adf<true, false>, predict_or_learn_adf<false, false>, arg.all->p, data->choices_lambda);
+
+	//cout<<"warm_cb increment = "<<l->increment<<endl;
+
   l->set_finish(finish);
   arg.all->delete_prediction = nullptr;
 

From aa9e9f7d44572108f3273884d87d01194c45d1e1 Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Mon, 6 Aug 2018 15:25:06 -0400
Subject: [PATCH 110/127] redoing the importance weight scaling by a factor of
 1/k

---
 vowpalwabbit/cb_adf.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vowpalwabbit/cb_adf.cc b/vowpalwabbit/cb_adf.cc
index a5291eb24a2..67faafbf129 100644
--- a/vowpalwabbit/cb_adf.cc
+++ b/vowpalwabbit/cb_adf.cc
@@ -113,7 +113,7 @@ void learn_MTR(cb_adf& mydata, multi_learner& base, multi_ex& examples)
   gen_cs_example_mtr(mydata.gen_cs, examples, mydata.cs_labels);
   uint32_t nf = (uint32_t)examples[mydata.gen_cs.mtr_example]->num_features;
   float old_weight = examples[mydata.gen_cs.mtr_example]->weight;
-  examples[mydata.gen_cs.mtr_example]->weight *= 1.f / examples[mydata.gen_cs.mtr_example]->l.cb.costs[0].probability * ((float)mydata.gen_cs.event_sum / (float)mydata.gen_cs.action_sum);
+  examples[mydata.gen_cs.mtr_example]->weight *= 1.f / examples[mydata.gen_cs.mtr_example]->l.cb.costs[0].probability * ((float)mydata.gen_cs.event_sum / (float)mydata.gen_cs.action_sum) * (1.f / examples.size());
   GEN_CS::call_cs_ldf<true>(base, mydata.gen_cs.mtr_ec_seq, mydata.cb_labels, mydata.cs_labels, mydata.prepped_cs_labels, mydata.offset);
   examples[mydata.gen_cs.mtr_example]->num_features = nf;
   examples[mydata.gen_cs.mtr_example]->weight = old_weight;

From 52439aa4ebbe79e9ffaf6442459a60b0f532c1e4 Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Mon, 6 Aug 2018 15:32:52 -0400
Subject: [PATCH 111/127] .

---
 vowpalwabbit/cb_adf.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vowpalwabbit/cb_adf.cc b/vowpalwabbit/cb_adf.cc
index 67faafbf129..374dabd9e15 100644
--- a/vowpalwabbit/cb_adf.cc
+++ b/vowpalwabbit/cb_adf.cc
@@ -113,7 +113,7 @@ void learn_MTR(cb_adf& mydata, multi_learner& base, multi_ex& examples)
   gen_cs_example_mtr(mydata.gen_cs, examples, mydata.cs_labels);
   uint32_t nf = (uint32_t)examples[mydata.gen_cs.mtr_example]->num_features;
   float old_weight = examples[mydata.gen_cs.mtr_example]->weight;
-  examples[mydata.gen_cs.mtr_example]->weight *= 1.f / examples[mydata.gen_cs.mtr_example]->l.cb.costs[0].probability * ((float)mydata.gen_cs.event_sum / (float)mydata.gen_cs.action_sum) * (1.f / examples.size());
+  examples[mydata.gen_cs.mtr_example]->weight *= 1.f / examples[mydata.gen_cs.mtr_example]->l.cb.costs[0].probability * ((float)mydata.gen_cs.event_sum / (float)mydata.gen_cs.action_sum) * (1.f / (float)examples.size())
   GEN_CS::call_cs_ldf<true>(base, mydata.gen_cs.mtr_ec_seq, mydata.cb_labels, mydata.cs_labels, mydata.prepped_cs_labels, mydata.offset);
   examples[mydata.gen_cs.mtr_example]->num_features = nf;
   examples[mydata.gen_cs.mtr_example]->weight = old_weight;

From e5db84414f95fc71d78a3d23abe6285ebbdeb7e4 Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Mon, 6 Aug 2018 16:14:11 -0400
Subject: [PATCH 112/127] comma typo

---
 test/RunTests          | 16 ++++++++--------
 vowpalwabbit/cb_adf.cc |  2 +-
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/test/RunTests b/test/RunTests
index 833b295b9ab..f33df041fa0 100755
--- a/test/RunTests
+++ b/test/RunTests
@@ -1637,34 +1637,34 @@ echo "1 | feature:1" | {VW} -a --initial_weight 0.1 --initial_t 0.3
 {VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 -d train-sets/multiclass
     train-sets/ref/cbify_ws.stderr
 
-# Test 176 cbify warm start with lambda set containing 0/1
+# Test 176 warm_cb warm start with lambda set containing 0/1
 {VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --lambda_scheme 2 -d train-sets/multiclass
     train-sets/ref/cbify_ws_lambda_zeroone.stderr
 
-# Test 177 cbify warm start with warm start update turned off
+# Test 177 warm_cb warm start with warm start update turned off
 {VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --warm_start_update false -d train-sets/multiclass
     train-sets/ref/cbify_ws_no_ws_upd.stderr
 
-# Test 178 cbify warm start with interaction update turned off
+# Test 178 warm_cb warm start with interaction update turned off
 {VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --interaction_update false -d train-sets/multiclass
     train-sets/ref/cbify_ws_no_int_upd.stderr
 
-# Test 179 cbify warm start with bandit warm start type (Sim-Bandit)
+# Test 179 warm_cb warm start with bandit warm start type (Sim-Bandit)
 {VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --warm_start_type 2 -d train-sets/multiclass
     train-sets/ref/cbify_ws_simbandit.stderr
 
-# Test 180 cbify warm start with UAR supervised corruption
+# Test 180 warm_cb warm start with UAR supervised corruption
 {VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --corrupt_type_warm_start 1 --corrupt_prob_warm_start 0.5 -d train-sets/multiclass
     train-sets/ref/cbify_ws_uar.stderr
 
-# Test 181 cbify warm start with CYC supervised corruption
+# Test 181 warm_cb warm start with CYC supervised corruption
 {VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --corrupt_type_warm_start 2 --corrupt_prob_warm_start 0.5 -d train-sets/multiclass
     train-sets/ref/cbify_ws_cyc.stderr
 
-# Test 182 cbify warm start with MAJ supervised corruption
+# Test 182 warm_cb warm start with MAJ supervised corruption
 {VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --corrupt_type_warm_start 3 --corrupt_prob_warm_start 0.5 --overwrite_label 1 -d train-sets/multiclass
     train-sets/ref/cbify_ws_maj.stderr
 
-# Test 183 cbify warm start with warm start distribution being the ground truth
+# Test 183 warm_cb warm start with warm start distribution being the ground truth
 {VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --corrupt_type_interaction 2 --corrupt_prob_interaction 0.5 --weighting_scheme 2 --validation_method 2 -d train-sets/multiclass
     train-sets/ref/cbify_ws_wsgt.stderr
diff --git a/vowpalwabbit/cb_adf.cc b/vowpalwabbit/cb_adf.cc
index 374dabd9e15..8da4f76c5b4 100644
--- a/vowpalwabbit/cb_adf.cc
+++ b/vowpalwabbit/cb_adf.cc
@@ -113,7 +113,7 @@ void learn_MTR(cb_adf& mydata, multi_learner& base, multi_ex& examples)
   gen_cs_example_mtr(mydata.gen_cs, examples, mydata.cs_labels);
   uint32_t nf = (uint32_t)examples[mydata.gen_cs.mtr_example]->num_features;
   float old_weight = examples[mydata.gen_cs.mtr_example]->weight;
-  examples[mydata.gen_cs.mtr_example]->weight *= 1.f / examples[mydata.gen_cs.mtr_example]->l.cb.costs[0].probability * ((float)mydata.gen_cs.event_sum / (float)mydata.gen_cs.action_sum) * (1.f / (float)examples.size())
+  examples[mydata.gen_cs.mtr_example]->weight *= 1.f / examples[mydata.gen_cs.mtr_example]->l.cb.costs[0].probability * ((float)mydata.gen_cs.event_sum / (float)mydata.gen_cs.action_sum) * (1.f / (float)examples.size());
   GEN_CS::call_cs_ldf<true>(base, mydata.gen_cs.mtr_ec_seq, mydata.cb_labels, mydata.cs_labels, mydata.prepped_cs_labels, mydata.offset);
   examples[mydata.gen_cs.mtr_example]->num_features = nf;
   examples[mydata.gen_cs.mtr_example]->weight = old_weight;

From 2011b7ac97a1ad0a307b5dd59c14736305762bd7 Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Thu, 16 Aug 2018 14:16:49 -0400
Subject: [PATCH 113/127] removed redundant comments

---
 vowpalwabbit/warm_cb.cc | 68 ++++++-----------------------------------
 1 file changed, 10 insertions(+), 58 deletions(-)

diff --git a/vowpalwabbit/warm_cb.cc b/vowpalwabbit/warm_cb.cc
index bdded4c25f8..492ab7198e4 100644
--- a/vowpalwabbit/warm_cb.cc
+++ b/vowpalwabbit/warm_cb.cc
@@ -117,7 +117,6 @@ uint32_t find_min(vector<T> arr)
 
 	for (uint32_t i = 0; i < arr.size(); i++)
 	{
-		//cout<<arr[i]<<endl;
 		if (arr[i] < min_val)
 		{
 			min_val = arr[i];
@@ -211,7 +210,7 @@ void setup_lambdas(warm_cb& data)
 	for (uint32_t i = 0; i<data.choices_lambda; i++)
 		lambdas.push_back(0.f);
 
-	//interaction only
+	//interaction only: set all lambda's to be identically 1
 	if (!data.upd_ws && data.upd_inter)
 	{
 		for (uint32_t i = 0; i<data.choices_lambda; i++)
@@ -219,7 +218,7 @@ void setup_lambdas(warm_cb& data)
 		return;
 	}
 
-	//warm start only
+	//warm start only: set all lambda's to be identically 0
 	if (!data.upd_inter && data.upd_ws)
 	{
 		for (uint32_t i = 0; i<data.choices_lambda; i++)
@@ -227,9 +226,6 @@ void setup_lambdas(warm_cb& data)
 		return;
 	}
 
-	//if no warm start and no interaction, then as there are no updates anyway,
-	//we are still fine
-
 	uint32_t mid = data.choices_lambda / 2;
 
 	if (data.lambda_scheme == ABS_CENTRAL || data.lambda_scheme == ABS_CENTRAL_ZEROONE)
@@ -310,9 +306,6 @@ float compute_weight_multiplier(warm_cb& data, size_t i, int ec_type)
 	float total_train_size = ws_train_size + inter_train_size;
 	float total_weight = (1-data.lambdas[i]) * ws_train_size + data.lambdas[i] * inter_train_size;
 
-	//cout<<i<<" "<<data.lambdas[i]<<endl;
-	//cout<<total_weight<<endl;
-
 	if (data.wt_scheme == INSTANCE_WT)
 	{
 		if (ec_type == WARM_START)
@@ -327,20 +320,13 @@ float compute_weight_multiplier(warm_cb& data, size_t i, int ec_type)
 		else
 			weight_multiplier = data.lambdas[i] * total_train_size / inter_train_size;
 	}
-
-	//cout<<"weight multiplier: "<<weight_multiplier<<endl;
-
 	return weight_multiplier;
 }
 
 uint32_t predict_sublearner_adf(warm_cb& data, multi_learner& base, example& ec, uint32_t i)
 {
-	//cout<<"predict using sublearner "<< i <<endl;
 	copy_example_to_adf(data, ec);
-	//uint32_t offset = data.ecs[0]->ft_offset;
-	//multiline_learn_or_predict<false>(base, data.ecs, offset, i);
 	base.predict(data.ecs, i);
-	//cout<<"greedy label = " << data.ecs[0]->pred.a_s[0].action+1 << endl;
 	return data.ecs[0]->pred.a_s[0].action+1;
 }
 
@@ -354,16 +340,14 @@ void accumu_costs_iv_adf(warm_cb& data, multi_learner& base, example& ec)
 
 		if (action == cl.action)
 			data.cumulative_costs[i] += cl.cost / cl.probability;
-		//cout<<data.cumulative_costs[i]<<endl;
 	}
-	//cout<<endl;
 }
 
 template<bool use_cs>
 void accumu_costs_wsv_adf(warm_cb& data, multi_learner& base)
 {
 	uint32_t ws_vali_size = data.ws_vali_size;
-	//only update cumulative costs every warm_start_period iterations
+	//only update cumulative costs at the end of every epoch
 	if ( data.inter_iter >= 1 && abs( log2(data.inter_iter+1) - floor(log2(data.inter_iter+1)) ) < 1e-4 )
 	{
 		for (uint32_t i = 0; i < data.choices_lambda; i++)
@@ -384,9 +368,7 @@ void accumu_costs_wsv_adf(warm_cb& data, multi_learner& base)
 			lb = 0;
 			ub = ws_vali_size;
 		}
-		//cout<<"validation at iteration "<<data.inter_iter<<endl;
-		//cout<<"validation example range: "<< lb << " to " << ub << endl;
-		//cout<<"updating validation error on supervised data: " << data.bandit_iter / data.warm_start_period << endl;
+
 		for (uint32_t i = 0; i < data.choices_lambda; i++)
 		{
 			for (uint32_t j = lb; j < ub; j++)
@@ -398,10 +380,7 @@ void accumu_costs_wsv_adf(warm_cb& data, multi_learner& base)
 					data.cumulative_costs[i] += loss_cs(data, ec_vali->l.cs.costs, pred_label);
 				else
 					data.cumulative_costs[i] += loss(data, ec_vali->l.multi.label, pred_label);
-
-				//cout<<ec_vali.l.multi.label<<" "<<pred_label<<endl;
 			}
-			//cout<<data.cumulative_costs[i]<<endl;
 		}
 	}
 }
@@ -430,7 +409,7 @@ template<bool use_cs>
 void learn_sup_adf(warm_cb& data, multi_learner& base, example& ec, int ec_type)
 {
 	copy_example_to_adf(data, ec);
-	//generate cost-sensitive label (for CSOAA's temporary use)
+	//generate cost-sensitive label (for cost-sensitive learner's temporary use)
 	auto& csls = data.csls;
 	auto& cbls = data.cbls;
 	for (size_t a = 0; a < data.num_actions; ++a)
@@ -445,7 +424,6 @@ void learn_sup_adf(warm_cb& data, multi_learner& base, example& ec, int ec_type)
 	{
 		cbls[a] = data.ecs[a]->l.cb;
 		data.ecs[a]->l.cs = csls[a];
-		//cout<<ecs[a].l.cs.costs.size()<<endl;
 	}
 
 	vector<float> old_weights;
@@ -455,17 +433,12 @@ void learn_sup_adf(warm_cb& data, multi_learner& base, example& ec, int ec_type)
 	for (uint32_t i = 0; i < data.choices_lambda; i++)
 	{
 		float weight_multiplier = compute_weight_multiplier(data, i, ec_type);
-		//cout<<"weight multiplier in sup = "<<weight_multiplier<<endl;
-
 		for (size_t a = 0; a < data.num_actions; ++a)
 			data.ecs[a]->weight = old_weights[a] * weight_multiplier;
 		multi_learner* cs_learner = as_multiline(data.all->cost_sensitive);
 		cs_learner->learn(data.ecs, i);
-
-		//cout<<"cost-sensitive increment = "<<cs_learner->increment<<endl;
 	}
-	//Seems like we don't need to set the weights back as this example will be
-	//discarded anyway
+
 	for (size_t a = 0; a < data.num_actions; ++a)
 		data.ecs[a]->weight = old_weights[a];
 
@@ -496,10 +469,6 @@ uint32_t predict_bandit_adf(warm_cb& data, multi_learner& base, example& ec)
   if (sample_after_normalizing(data.app_seed + data.example_counter++, begin_scores(out_ec.pred.a_s), end_scores(out_ec.pred.a_s), chosen_action))
     THROW("Failed to sample from pdf");
 
-	//cout<<"predict using sublearner "<< argmin <<endl;
-	//cout<<"greedy label = " << data.ecs[0]->pred.a_s[0].action+1 << endl;
-	//cout<<"chosen action = " << chosen_action << endl;
-
 	auto& a_s = data.a_s_adf;
 	copy_array<action_score>(a_s, out_ec.pred.a_s);
 
@@ -522,15 +491,9 @@ void learn_bandit_adf(warm_cb& data, multi_learner& base, example& ec, int ec_ty
 	for (uint32_t i = 0; i < data.choices_lambda; i++)
 	{
 		float weight_multiplier = compute_weight_multiplier(data, i, ec_type);
-
-		//cout<<"learn in sublearner "<< i <<" with weight multiplier "<<weight_multiplier<<endl;
 	  for (size_t a = 0; a < data.num_actions; ++a)
 			data.ecs[a]->weight = old_weights[a] * weight_multiplier;
 	  base.learn(data.ecs, i);
-
-		//cout<<"cb-explore increment = "<<base.increment<<endl;
-		//uint32_t offset = data.ecs[0]->ft_offset;
-		//multiline_learn_or_predict<true>(base, data.ecs, offset, i);
 	}
 
 	for (size_t a = 0; a < data.num_actions; ++a)
@@ -547,8 +510,6 @@ void predict_or_learn_bandit_adf(warm_cb& data, multi_learner& base, example& ec
 	cl.action = a_s[chosen_action].action + 1;
 	cl.probability = a_s[chosen_action].score;
 
-	//cout<<cl.action<<" "<<cl.probability<<" "<<ec.l.multi.label<<endl;
-
 	if(!cl.action)
 		THROW("No action with non-zero probability found!");
 
@@ -560,8 +521,6 @@ void predict_or_learn_bandit_adf(warm_cb& data, multi_learner& base, example& ec
 	if (ec_type == INTERACTION && data.vali_method == INTER_VALI)
 		accumu_costs_iv_adf(data, base, ec);
 
-	//cout<<cl.action<<" "<<cl.probability<<endl;
-
 	if (ind_update(data, ec_type))
 		learn_bandit_adf(data, base, ec, ec_type);
 
@@ -581,16 +540,12 @@ void accumu_var_adf(warm_cb& data, multi_learner& base, example& ec)
 			temp_var = 1.0 / data.a_s_adf[a].score;
 
 	data.cumu_var += temp_var;
-
-	//cout<<"variance at bandit round "<< data.inter_iter << " = " << temp_var << endl;
-	//cout<<pred_best_approx<<" "<<data.a_s_adf[0].action+1<<endl;
 }
 
 template <bool is_learn, bool use_cs>
 void predict_or_learn_adf(warm_cb& data, multi_learner& base, example& ec)
 {
 	// Corrupt labels (only corrupting multiclass labels as of now)
-
 	if (use_cs)
 		data.cs_label = ec.l.cs;
 	else
@@ -629,7 +584,7 @@ void predict_or_learn_adf(warm_cb& data, multi_learner& base, example& ec)
 	else
 		ec.weight = 0;
 
-	// Store the original labels back
+	// Restore the original labels
 	if (use_cs)
 		ec.l.cs = data.cs_label;
 	else
@@ -726,10 +681,9 @@ base_learner* warm_cb_setup(arguments& arg)
   learner<warm_cb,example>* l;
 
   multi_learner* base = as_multiline(setup_base(arg));
-	// Note: the current version of warm start CB can only support epsilon greedy exploration
-	// algorithm - we need to wait for the default epsilon value to be passed from cb_explore
-	// is there is one
-	//cout<<"count: "<<arg.vm.count("epsilon") <<endl;
+	// Note: the current version of warm start CB can only support epsilon-greedy exploration
+	// We need to wait for the epsilon value to be passed from the base
+  // cb_explore learner, if there is one
   if (arg.vm.count("epsilon") == 0)
   {
     cerr<<"Warning: no epsilon (greedy parameter) specified; resetting to 0.05"<<endl;
@@ -743,8 +697,6 @@ base_learner* warm_cb_setup(arguments& arg)
   else
     l = &init_multiclass_learner(data, base, predict_or_learn_adf<true, false>, predict_or_learn_adf<false, false>, arg.all->p, data->choices_lambda);
 
-	//cout<<"warm_cb increment = "<<l->increment<<endl;
-
   l->set_finish(finish);
   arg.all->delete_prediction = nullptr;
 

From 4d8811de01733ada04abdc0acb2c5cc22f5b1868 Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Wed, 6 Feb 2019 23:54:59 -0500
Subject: [PATCH 114/127] resolve conflicts

---
 vowpalwabbit/cb_adf.cc |  7 -------
 vowpalwabbit/learner.h | 10 ----------
 2 files changed, 17 deletions(-)

diff --git a/vowpalwabbit/cb_adf.cc b/vowpalwabbit/cb_adf.cc
index e3e3cff9692..c632b57c77e 100644
--- a/vowpalwabbit/cb_adf.cc
+++ b/vowpalwabbit/cb_adf.cc
@@ -111,15 +111,8 @@ void learn_MTR(cb_adf& mydata, multi_learner& base, multi_ex& examples)
   gen_cs_example_mtr(mydata.gen_cs, examples, mydata.cs_labels);
   uint32_t nf = (uint32_t)examples[mydata.gen_cs.mtr_example]->num_features;
   float old_weight = examples[mydata.gen_cs.mtr_example]->weight;
-<<<<<<< HEAD
   examples[mydata.gen_cs.mtr_example]->weight *= 1.f / examples[mydata.gen_cs.mtr_example]->l.cb.costs[0].probability * ((float)mydata.gen_cs.event_sum / (float)mydata.gen_cs.action_sum) * (1.f / (float)examples.size());
   GEN_CS::call_cs_ldf<true>(base, mydata.gen_cs.mtr_ec_seq, mydata.cb_labels, mydata.cs_labels, mydata.prepped_cs_labels, mydata.offset);
-=======
-  examples[mydata.gen_cs.mtr_example]->weight *= 1.f / examples[mydata.gen_cs.mtr_example]->l.cb.costs[0].probability *
-      ((float)mydata.gen_cs.event_sum / (float)mydata.gen_cs.action_sum);
-  GEN_CS::call_cs_ldf<true>(
-      base, mydata.gen_cs.mtr_ec_seq, mydata.cb_labels, mydata.cs_labels, mydata.prepped_cs_labels, mydata.offset);
->>>>>>> c9110426d9e8585e8410403d12f0194d5e6673fa
   examples[mydata.gen_cs.mtr_example]->num_features = nf;
   examples[mydata.gen_cs.mtr_example]->weight = old_weight;
   swap(examples[0]->pred.a_s, mydata.a_s);
diff --git a/vowpalwabbit/learner.h b/vowpalwabbit/learner.h
index db1ef826dcf..b6bd1b5b61c 100644
--- a/vowpalwabbit/learner.h
+++ b/vowpalwabbit/learner.h
@@ -406,19 +406,9 @@ learner<T, E>& init_learner(free_ptr<T>& dat, L* base, void (*learn)(T&, L&, E&)
 {
   auto ret = &learner<T, E>::init_learner(dat.get(), base, learn, predict, 1, base->pred_type);
 
-<<<<<<< HEAD
-  template<bool is_learn>
-  void multiline_learn_or_predict(multi_learner& base, multi_ex& examples, const uint64_t offset, const uint32_t id = 0)
-  { std::vector<uint64_t> saved_offsets;
-    for (auto ec : examples)
-    { saved_offsets.push_back(ec->ft_offset);
-      ec->ft_offset = offset;
-    }
-=======
   dat.release();
   return *ret;
 }
->>>>>>> c9110426d9e8585e8410403d12f0194d5e6673fa
 
 // Reduction with no data.
 template <class T, class E, class L>

From 7bc56af05f3325bb8c61a10287b067767645b24c Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Thu, 7 Feb 2019 00:36:13 -0500
Subject: [PATCH 115/127] compile error on peeking epsilon in warm_cb.cc

---
 vowpalwabbit/CMakeLists.txt    |  4 +-
 vowpalwabbit/cb_explore_adf.cc |  5 +--
 vowpalwabbit/warm_cb.cc        | 75 ++++++++++++++++++----------------
 vowpalwabbit/warm_cb.h         |  3 +-
 4 files changed, 45 insertions(+), 42 deletions(-)

diff --git a/vowpalwabbit/CMakeLists.txt b/vowpalwabbit/CMakeLists.txt
index 06f580bbcf4..e45eb19b0a5 100644
--- a/vowpalwabbit/CMakeLists.txt
+++ b/vowpalwabbit/CMakeLists.txt
@@ -32,7 +32,7 @@ set(vw_all_headers
   binary.h nn.h boosting.h ftrl.h no_label.h spanning_tree.h bs.h gd.h noop.h stable_unique.h
   cache.h gd_mf.h oaa.h stagewise_poly.h cb_adf.h gd_predict.h OjaNewton.h svrg.h cb_algs.h
   gen_cs_example.h parse_args.h topk.h cb_explore_adf.h parse_dispatch_loop.h unique_sort.h
-  interact.h interactions.h parse_example_json.h cbify.h interactions_predict.h vw_allreduce.h
+  interact.h interactions.h parse_example_json.h cbify.h warm_cb.h interactions_predict.h vw_allreduce.h
   classweight.h parse_regressor.h kernel_svm.h confidence.h label_dictionary.h
   config.h.in primitives.h lda_core.h print.h vw_versions.h
 )
@@ -46,7 +46,7 @@ set(vw_all_sources
   search_multiclasstask.cc search_entityrelationtask.cc search_graph.cc parse_example.cc scorer.cc
   network.cc parse_args.cc accumulate.cc gd.cc learner.cc mwt.cc lda_core.cc gd_mf.cc mf.cc bfgs.cc
   noop.cc print.cc example.cc parser.cc loss_functions.cc sender.cc nn.cc confidence.cc bs.cc
-  cbify.cc explore_eval.cc topk.cc stagewise_poly.cc log_multi.cc recall_tree.cc active.cc
+  cbify.cc warm_cb.cc explore_eval.cc topk.cc stagewise_poly.cc log_multi.cc recall_tree.cc active.cc
   active_cover.cc cs_active.cc kernel_svm.cc best_constant.cc ftrl.cc svrg.cc lrqfa.cc interact.cc
   comp_io.cc interactions.cc vw_validate.cc audit_regressor.cc gen_cs_example.cc cb_explore.cc
   action_score.cc cb_explore_adf.cc OjaNewton.cc parse_example_json.cc baseline.cc classweight.cc
diff --git a/vowpalwabbit/cb_explore_adf.cc b/vowpalwabbit/cb_explore_adf.cc
index b38d96427d4..e40f27a944c 100644
--- a/vowpalwabbit/cb_explore_adf.cc
+++ b/vowpalwabbit/cb_explore_adf.cc
@@ -269,12 +269,9 @@ void predict_or_learn_first(cb_explore_adf& data, multi_learner& base, multi_ex&
 template <bool is_learn>
 void predict_or_learn_greedy(cb_explore_adf& data, multi_learner& base, multi_ex& examples)
 {
-<<<<<<< HEAD
 	data.offset = examples[0]->ft_offset;
   //Explore uniform random an epsilon fraction of the time.
-=======
-  // Explore uniform random an epsilon fraction of the time.
->>>>>>> c9110426d9e8585e8410403d12f0194d5e6673fa
+
   if (is_learn && test_adf_sequence(examples) != nullptr)
     multiline_learn_or_predict<true>(base, examples, data.offset);
   else
diff --git a/vowpalwabbit/warm_cb.cc b/vowpalwabbit/warm_cb.cc
index 492ab7198e4..89ab71504d3 100644
--- a/vowpalwabbit/warm_cb.cc
+++ b/vowpalwabbit/warm_cb.cc
@@ -13,6 +13,7 @@ using namespace LEARNER;
 using namespace exploration;
 using namespace ACTION_SCORE;
 using namespace std;
+using namespace VW::config;
 
 #define WARM_START 1
 #define INTERACTION 2
@@ -631,74 +632,78 @@ void init_adf_data(warm_cb& data, const size_t num_actions)
 	data.cumu_var = 0.f;
 }
 
-base_learner* warm_cb_setup(arguments& arg)
+base_learner* warm_cb_setup(options_i& options, vw& all)
 {
   uint32_t num_actions=0;
   auto data = scoped_calloc_or_throw<warm_cb>();
   bool use_cs;
 
-  if (arg.new_options("Make Multiclass into Contextual Bandit")
-      .critical("warm_cb", num_actions, "Convert multiclass on <k> classes into a contextual bandit problem")
-      (use_cs, "warm_cb_cs", "consume cost-sensitive classification examples instead of multiclass")
-      ("loss0", data->loss0, 0.f, "loss for correct label")
-      ("loss1", data->loss1, 1.f, "loss for incorrect label")
-			("warm_start", data->ws_period, 0U, "number of training examples for warm start phase")
-			("interaction", data->inter_period, UINT32_MAX, "number of examples for the interactive contextual bandit learning phase")
-			("warm_start_update", data->upd_ws, true, "indicator of warm start updates")
-			("interaction_update", data->upd_inter, true, "indicator of interaction updates")
-			("corrupt_type_warm_start", data->cor_type_ws, UAR, "type of label corruption in the warm start phase (1: uniformly at random, 2: circular, 3: replacing with overwriting label)")
-			("corrupt_prob_warm_start", data->cor_prob_ws, 0.f, "probability of label corruption in the warm start phase")
-			("corrupt_type_interaction", data->cor_type_inter, UAR, "type of label corruption in the interaction phase (1: uniformly at random, 2: circular, 3: replacing with overwriting label)")
-			("corrupt_prob_interaction", data->cor_prob_inter, 0.f, "probability of label corruption in the interaction phase")
-		  ("choices_lambda", data->choices_lambda, 1U, "the number of candidate lambdas to aggregate (lambda is the importance weight parameter between the two sources) ")
-			("lambda_scheme", data->lambda_scheme, ABS_CENTRAL, "The scheme for generating candidate lambda set (1: center lambda=0.5, 2: center lambda=0.5, min lambda=0, max lambda=1, 3: center lambda=epsilon/(#actions+epsilon), 4: center lambda=epsilon/(#actions+epsilon), min lambda=0, max lambda=1); the rest of candidate lambda values are generated using a doubling scheme")
-			("weighting_scheme", data->wt_scheme, INSTANCE_WT, "weighting scheme (1: per instance weighting, where for every lambda, each contextual bandit example have weight lambda/(1-lambda) times that of each warm start example, 2: per dataset weighting, where for every lambda, the contextual bandit dataset has total weight lambda/(1-lambda) times that of the warm start dataset)")
-			("validation_method", data->vali_method, INTER_VALI, "lambda selection criterion (1: using contextual bandit examples with progressive validation, 2: using warm start examples, with fresh validation examples at each epoch, 3: using warm start examples, with a single validation set throughout)")
-			("overwrite_label", data->overwrite_label, 1U, "the label used by type 3 corruptions (overwriting)")
-			("warm_start_type", data->ws_type, SUPERVISED_WS, "update method of utilizing warm start examples (1: using supervised updates, 2: using contextual bandit updates)").missing())
-    return nullptr;
+  option_group_definition new_options("Make Multiclass into Warm-starting Contextual Bandit");
+
+  new_options
+      .add(make_option("warm_cb", num_actions)
+               .keep()
+               .help("Convert multiclass on <k> classes into a contextual bandit problem"))
+      .add(make_option("warm_cb_cs", use_cs).help("consume cost-sensitive classification examples instead of multiclass"))
+      .add(make_option("loss0", data->loss0).default_value(0.f).help("loss for correct label"))
+      .add(make_option("loss1", data->loss1).default_value(1.f).help("loss for incorrect label"))
+      .add(make_option("warm_start", data->ws_period).default_value(0U).help("number of training examples for warm start phase"))
+      .add(make_option("interaction", data->inter_period).default_value(UINT32_MAX).help("number of examples for the interactive contextual bandit learning phase"))
+      .add(make_option("warm_start_update", data->upd_ws).default_value(true).help("indicator of warm start updates"))
+      .add(make_option("interaction_update", data->upd_inter).default_value(true).help("indicator of interaction updates"))
+      .add(make_option("corrupt_type_warm_start", data->cor_type_ws).default_value(UAR).help("type of label corruption in the warm start phase (1: uniformly at random, 2: circular, 3: replacing with overwriting label)"))
+      .add(make_option("corrupt_prob_warm_start", data->cor_prob_ws).default_value(0.f).help("probability of label corruption in the warm start phase"))
+      .add(make_option("corrupt_type_interaction", data->cor_type_inter).default_value(UAR).help("type of label corruption in the interaction phase (1: uniformly at random, 2: circular, 3: replacing with overwriting label)"))
+      .add(make_option("corrupt_prob_interaction", data->cor_prob_inter).default_value(0.f).help("probability of label corruption in the interaction phase"))
+      .add(make_option("choices_lambda", data->choices_lambda).default_value(1U).help("the number of candidate lambdas to aggregate (lambda is the importance weight parameter between the two sources)"))
+      .add(make_option("lambda_scheme", data->lambda_scheme).default_value(ABS_CENTRAL).help("The scheme for generating candidate lambda set (1: center lambda=0.5, 2: center lambda=0.5, min lambda=0, max lambda=1, 3: center lambda=epsilon/(#actions+epsilon), 4: center lambda=epsilon/(#actions+epsilon), min lambda=0, max lambda=1); the rest of candidate lambda values are generated using a doubling scheme"))
+      .add(make_option("weighting_scheme", data->wt_scheme).default_value(INSTANCE_WT).help("weighting scheme (1: per instance weighting, where for every lambda, each contextual bandit example have weight lambda/(1-lambda) times that of each warm start example, 2: per dataset weighting, where for every lambda, the contextual bandit dataset has total weight lambda/(1-lambda) times that of the warm start dataset)"))
+      .add(make_option("validation_method", data->vali_method).default_value(INTER_VALI).help("lambda selection criterion (1: using contextual bandit examples with progressive validation, 2: using warm start examples, with fresh validation examples at each epoch, 3: using warm start examples, with a single validation set throughout)"))
+      .add(make_option("overwrite_label", data->overwrite_label).default_value(1U).help("the label used by type 3 corruptions (overwriting)"))
+      .add(make_option("warm_start_type", data->ws_type).default_value(SUPERVISED_WS).help("update method of utilizing warm start examples (1: using supervised updates, 2: using contextual bandit updates)"));
+
+  options.add_and_parse(new_options);
 
   data->app_seed = uniform_hash("vw", 2, 0);
   data->a_s = v_init<action_score>();
-  data->all = arg.all;
+  data->all = &all;
 	data->use_cs = use_cs;
 
   init_adf_data(*data.get(), num_actions);
 
-  arg.args.push_back("--cb_min_cost");
-  arg.args.push_back(to_string(data->loss0));
-  arg.args.push_back("--cb_max_cost");
-  arg.args.push_back(to_string(data->loss1));
+  options.insert("cb_min_cost", to_string(data->loss0));
+  options.insert("cb_max_cost", to_string(data->loss1));
 
-  if (count(arg.args.begin(), arg.args.end(), "--baseline"))
+  if (options.was_supplied("baseline"))
   {
-    arg.args.push_back("--lr_multiplier");
     stringstream ss;
     ss << max<float>(abs(data->loss0), abs(data->loss1)) / (data->loss1 - data->loss0);
-    arg.args.push_back(ss.str());
+    options.insert("lr_multiplier", ss.str());
   }
 
   learner<warm_cb,example>* l;
 
-  multi_learner* base = as_multiline(setup_base(arg));
+  multi_learner* base = as_multiline(setup_base(options, all));
 	// Note: the current version of warm start CB can only support epsilon-greedy exploration
 	// We need to wait for the epsilon value to be passed from the base
   // cb_explore learner, if there is one
-  if (arg.vm.count("epsilon") == 0)
+
+
+  if (!options.was_supplied("epsilon"))
   {
     cerr<<"Warning: no epsilon (greedy parameter) specified; resetting to 0.05"<<endl;
     data->epsilon = 0.05f;
   }
   else
-    data->epsilon = arg.vm["epsilon"].as<float>();
+    data->epsilon = *options.get_option("epsilon");
 
   if (use_cs)
-    l = &init_cost_sensitive_learner(data, base, predict_or_learn_adf<true, true>, predict_or_learn_adf<false, true>, arg.all->p, data->choices_lambda);
+    l = &init_cost_sensitive_learner(data, base, predict_or_learn_adf<true, true>, predict_or_learn_adf<false, true>, all.p, data->choices_lambda);
   else
-    l = &init_multiclass_learner(data, base, predict_or_learn_adf<true, false>, predict_or_learn_adf<false, false>, arg.all->p, data->choices_lambda);
+    l = &init_multiclass_learner(data, base, predict_or_learn_adf<true, false>, predict_or_learn_adf<false, false>, all.p, data->choices_lambda);
 
   l->set_finish(finish);
-  arg.all->delete_prediction = nullptr;
+  all.delete_prediction = nullptr;
 
   return make_base(*l);
 }
diff --git a/vowpalwabbit/warm_cb.h b/vowpalwabbit/warm_cb.h
index 2d645774f88..1f211135d85 100644
--- a/vowpalwabbit/warm_cb.h
+++ b/vowpalwabbit/warm_cb.h
@@ -1 +1,2 @@
-LEARNER::base_learner* warm_cb_setup(arguments& arg);
+//LEARNER::base_learner* warm_cb_setup(arguments& arg);
+LEARNER::base_learner* warm_cb_setup(VW::config::options_i& options, vw& all);

From 49714779ce011aee8d924ca0a8d3d8492f002d8d Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Thu, 7 Feb 2019 11:42:48 -0500
Subject: [PATCH 116/127] fixed sim-bandit option, disallow cost-sensitive
 corruption

---
 vowpalwabbit/learner.h  |  2 +-
 vowpalwabbit/warm_cb.cc | 36 ++++++++++++++++++++++++++++--------
 2 files changed, 29 insertions(+), 9 deletions(-)

diff --git a/vowpalwabbit/learner.h b/vowpalwabbit/learner.h
index b6bd1b5b61c..902f86b09c4 100644
--- a/vowpalwabbit/learner.h
+++ b/vowpalwabbit/learner.h
@@ -468,7 +468,7 @@ single_learner* as_singleline(learner<T, E>* l)
 template <bool is_learn>
 void multiline_learn_or_predict(multi_learner& base, multi_ex& examples, const uint64_t offset, const uint32_t id = 0)
 {
-  std::vector<uint64_t> saved_offsets(examples.size());
+  std::vector<uint64_t> saved_offsets;
   for (auto ec : examples)
   {
     saved_offsets.push_back(ec->ft_offset);
diff --git a/vowpalwabbit/warm_cb.cc b/vowpalwabbit/warm_cb.cc
index 89ab71504d3..cd4378be91b 100644
--- a/vowpalwabbit/warm_cb.cc
+++ b/vowpalwabbit/warm_cb.cc
@@ -6,6 +6,7 @@
 #include "vw.h"
 #include "hash.h"
 #include "explore.h"
+#include "vw_exception.h"
 
 #include <vector>
 
@@ -66,6 +67,7 @@ struct warm_cb
 	int lambda_scheme;
 	uint32_t overwrite_label;
 	int ws_type;
+  bool sim_bandit;
 
 	//auxiliary variables
 	uint32_t num_actions;
@@ -132,10 +134,14 @@ void finish(warm_cb& data)
   CB::cb_label.delete_label(&data.cb_label);
   data.a_s.delete_v();
 
-	cout<<"average variance estimate = "<<data.cumu_var / data.inter_iter<<endl;
-	cout<<"theoretical average variance = "<<data.num_actions / data.epsilon<<endl;
 	uint32_t argmin = find_min(data.cumulative_costs);
-	cout<<"last lambda chosen = "<<data.lambdas[argmin]<<" among lambdas ranging from "<<data.lambdas[0]<<" to "<<data.lambdas[data.choices_lambda-1]<<endl;
+
+  if (!data.all->quiet)
+  {
+  	cerr << "average variance estimate = " << data.cumu_var / data.inter_iter << endl;
+  	cerr << "theoretical average variance = " << data.num_actions / data.epsilon << endl;
+  	cerr << "last lambda chosen = " << data.lambdas[argmin] << " among lambdas ranging from " << data.lambdas[0] << " to " << data.lambdas[data.choices_lambda-1] << endl;
+  }
 
 	for (size_t a = 0; a < data.num_actions; ++a)
 	{
@@ -596,6 +602,10 @@ void predict_or_learn_adf(warm_cb& data, multi_learner& base, example& ec)
 void init_adf_data(warm_cb& data, const size_t num_actions)
 {
   data.num_actions = num_actions;
+  if (data.sim_bandit)
+    data.ws_type = BANDIT_WS;
+  else
+    data.ws_type = SUPERVISED_WS;
   data.ecs.resize(num_actions);
   for (size_t a=0; a < num_actions; ++a)
   {
@@ -648,9 +658,10 @@ base_learner* warm_cb_setup(options_i& options, vw& all)
       .add(make_option("loss0", data->loss0).default_value(0.f).help("loss for correct label"))
       .add(make_option("loss1", data->loss1).default_value(1.f).help("loss for incorrect label"))
       .add(make_option("warm_start", data->ws_period).default_value(0U).help("number of training examples for warm start phase"))
+      .add(make_option("epsilon", data->epsilon).keep().help("epsilon-greedy exploration"))
       .add(make_option("interaction", data->inter_period).default_value(UINT32_MAX).help("number of examples for the interactive contextual bandit learning phase"))
-      .add(make_option("warm_start_update", data->upd_ws).default_value(true).help("indicator of warm start updates"))
-      .add(make_option("interaction_update", data->upd_inter).default_value(true).help("indicator of interaction updates"))
+      .add(make_option("warm_start_update", data->upd_ws).help("indicator of warm start updates"))
+      .add(make_option("interaction_update", data->upd_inter).help("indicator of interaction updates"))
       .add(make_option("corrupt_type_warm_start", data->cor_type_ws).default_value(UAR).help("type of label corruption in the warm start phase (1: uniformly at random, 2: circular, 3: replacing with overwriting label)"))
       .add(make_option("corrupt_prob_warm_start", data->cor_prob_ws).default_value(0.f).help("probability of label corruption in the warm start phase"))
       .add(make_option("corrupt_type_interaction", data->cor_type_inter).default_value(UAR).help("type of label corruption in the interaction phase (1: uniformly at random, 2: circular, 3: replacing with overwriting label)"))
@@ -660,10 +671,21 @@ base_learner* warm_cb_setup(options_i& options, vw& all)
       .add(make_option("weighting_scheme", data->wt_scheme).default_value(INSTANCE_WT).help("weighting scheme (1: per instance weighting, where for every lambda, each contextual bandit example have weight lambda/(1-lambda) times that of each warm start example, 2: per dataset weighting, where for every lambda, the contextual bandit dataset has total weight lambda/(1-lambda) times that of the warm start dataset)"))
       .add(make_option("validation_method", data->vali_method).default_value(INTER_VALI).help("lambda selection criterion (1: using contextual bandit examples with progressive validation, 2: using warm start examples, with fresh validation examples at each epoch, 3: using warm start examples, with a single validation set throughout)"))
       .add(make_option("overwrite_label", data->overwrite_label).default_value(1U).help("the label used by type 3 corruptions (overwriting)"))
-      .add(make_option("warm_start_type", data->ws_type).default_value(SUPERVISED_WS).help("update method of utilizing warm start examples (1: using supervised updates, 2: using contextual bandit updates)"));
+      .add(make_option("sim_bandit", data->sim_bandit).help("simulate contextual bandit updates on warm start examples"));
 
   options.add_and_parse(new_options);
 
+  if(use_cs && (options.was_supplied("corrupt_type_warm_start") || options.was_supplied("corrupt_prob_warm_start") || options.was_supplied("corrupt_type_interaction") || options.was_supplied("corrupt_prob_interaction") ))
+  {
+    THROW("label corruption on cost-sensitive examples not currently supported");
+  }
+
+
+  if(!options.was_supplied("warm_cb"))
+  {
+    return nullptr;
+  }
+
   data->app_seed = uniform_hash("vw", 2, 0);
   data->a_s = v_init<action_score>();
   data->all = &all;
@@ -694,8 +716,6 @@ base_learner* warm_cb_setup(options_i& options, vw& all)
     cerr<<"Warning: no epsilon (greedy parameter) specified; resetting to 0.05"<<endl;
     data->epsilon = 0.05f;
   }
-  else
-    data->epsilon = *options.get_option("epsilon");
 
   if (use_cs)
     l = &init_cost_sensitive_learner(data, base, predict_or_learn_adf<true, true>, predict_or_learn_adf<false, true>, all.p, data->choices_lambda);

From db1da5e55c5fd1ba94deae6820d6843cd50b7833 Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Thu, 7 Feb 2019 22:06:50 -0500
Subject: [PATCH 117/127] begin fixing importance weight in cs examples

---
 vowpalwabbit/cost_sensitive.cc | 203 ++++++++++++++++-----------------
 vowpalwabbit/warm_cb.cc        | 120 ++++---------------
 2 files changed, 118 insertions(+), 205 deletions(-)

diff --git a/vowpalwabbit/cost_sensitive.cc b/vowpalwabbit/cost_sensitive.cc
index 2d239e6d612..80e8e5c4438 100644
--- a/vowpalwabbit/cost_sensitive.cc
+++ b/vowpalwabbit/cost_sensitive.cc
@@ -6,42 +6,43 @@
 using namespace std;
 namespace COST_SENSITIVE
 {
-void name_value(substring& s, v_array<substring>& name, float& v)
+
+void name_value(substring &s, v_array<substring>& name, float &v)
 {
   tokenize(':', s, name);
 
   switch (name.size())
   {
-    case 0:
-    case 1:
-      v = 1.;
-      break;
-    case 2:
-      v = float_of_substring(name[1]);
-      if (nanpattern(v))
-        THROW("error NaN value for: " << name[0]);
-      break;
-    default:
-      cerr << "example with a wierd name.  What is '";
-      cerr.write(s.begin, s.end - s.begin);
-      cerr << "'?\n";
+  case 0:
+  case 1:
+    v = 1.;
+    break;
+  case 2:
+    v = float_of_substring(name[1]);
+    if (nanpattern(v))
+      THROW("error NaN value for: " << name[0]);
+    break;
+  default:
+    cerr << "example with a wierd name.  What is '";
+    cerr.write(s.begin, s.end - s.begin);
+    cerr << "'?\n";
   }
 }
 
 char* bufread_label(label* ld, char* c, io_buf& cache)
 {
-  size_t num = *(size_t*)c;
+  size_t num = *(size_t *)c;
   ld->costs.clear();
   c += sizeof(size_t);
-  size_t total = sizeof(wclass) * num;
-  if (cache.buf_read(c, (int)total) < total)
+  size_t total = sizeof(wclass)*num;
+  if (buf_read(cache, c, (int)total) < total)
   {
     cout << "error in demarshal of cost data" << endl;
     return c;
   }
-  for (size_t i = 0; i < num; i++)
+  for (size_t i = 0; i<num; i++)
   {
-    wclass temp = *(wclass*)c;
+    wclass temp = *(wclass *)c;
     c += sizeof(wclass);
     ld->costs.push_back(temp);
   }
@@ -51,26 +52,29 @@ char* bufread_label(label* ld, char* c, io_buf& cache)
 
 size_t read_cached_label(shared_data*, void* v, io_buf& cache)
 {
-  label* ld = (label*)v;
+  label* ld = (label*) v;
   ld->costs.clear();
-  char* c;
+  char *c;
   size_t total = sizeof(size_t);
-  if (cache.buf_read(c, (int)total) < total)
+  if (buf_read(cache, c, (int)total) < total)
     return 0;
-  bufread_label(ld, c, cache);
+  bufread_label(ld,c, cache);
 
   return total;
 }
 
-float weight(void*) { return 1.; }
+float weight(void*)
+{
+  return 1.;
+}
 
 char* bufcache_label(label* ld, char* c)
 {
-  *(size_t*)c = ld->costs.size();
+  *(size_t *)c = ld->costs.size();
   c += sizeof(size_t);
-  for (unsigned int i = 0; i < ld->costs.size(); i++)
+  for (unsigned int i = 0; i< ld->costs.size(); i++)
   {
-    *(wclass*)c = ld->costs[i];
+    *(wclass *)c = ld->costs[i];
     c += sizeof(wclass);
   }
   return c;
@@ -78,37 +82,36 @@ char* bufcache_label(label* ld, char* c)
 
 void cache_label(void* v, io_buf& cache)
 {
-  char* c;
-  label* ld = (label*)v;
-  cache.buf_write(c, sizeof(size_t) + sizeof(wclass) * ld->costs.size());
-  bufcache_label(ld, c);
+  char *c;
+  label* ld = (label*) v;
+  buf_write(cache, c, sizeof(size_t)+sizeof(wclass)*ld->costs.size());
+  bufcache_label(ld,c);
 }
 
 void default_label(void* v)
 {
-  label* ld = (label*)v;
+  label* ld = (label*) v;
   ld->costs.clear();
 }
 
 bool test_label(void* v)
 {
-  label* ld = (label*)v;
+  label* ld = (label*) v;
   if (ld->costs.size() == 0)
     return true;
-  for (unsigned int i = 0; i < ld->costs.size(); i++)
+  for (unsigned int i=0; i<ld->costs.size(); i++)
     if (FLT_MAX != ld->costs[i].x)
       return false;
   return true;
 }
 
-void delete_label(void* v)
+  void delete_label(void* v)
 {
   label* ld = (label*)v;
-  if (ld)
-    ld->costs.delete_v();
+  if (ld) ld->costs.delete_v();
 }
 
-void copy_label(void* dst, void* src)
+void copy_label(void*dst, void*src)
 {
   if (dst && src)
   {
@@ -120,14 +123,13 @@ void copy_label(void* dst, void* src)
 
 bool substring_eq(substring ss, const char* str)
 {
-  size_t len_ss = ss.end - ss.begin;
+  size_t len_ss  = ss.end - ss.begin;
   size_t len_str = strlen(str);
-  if (len_ss != len_str)
-    return false;
+  if (len_ss != len_str) return false;
   return (strncmp(ss.begin, str, len_ss) == 0);
 }
 
-void parse_label(parser* p, shared_data* sd, void* v, v_array<substring>& words)
+void parse_label(parser* p, shared_data*sd, void* v, v_array<substring>& words)
 {
   label* ld = (label*)v;
   ld->costs.clear();
@@ -138,31 +140,29 @@ void parse_label(parser* p, shared_data* sd, void* v, v_array<substring>& words)
     float fx;
     name_value(words[0], p->parse_name, fx);
     bool eq_shared = substring_eq(p->parse_name[0], "***shared***");
-    bool eq_label = substring_eq(p->parse_name[0], "***label***");
-    if (!sd->ldict)
+    bool eq_label  = substring_eq(p->parse_name[0], "***label***");
+    if (! sd->ldict)
     {
       eq_shared |= substring_eq(p->parse_name[0], "shared");
-      eq_label |= substring_eq(p->parse_name[0], "label");
+      eq_label  |= substring_eq(p->parse_name[0], "label");
     }
     if (eq_shared || eq_label)
     {
       if (eq_shared)
       {
-        if (p->parse_name.size() != 1)
-          cerr << "shared feature vectors should not have costs on: " << words[0] << endl;
+        if (p->parse_name.size() != 1) cerr << "shared feature vectors should not have costs on: " << words[0] << endl;
         else
         {
-          wclass f = {-FLT_MAX, 0, 0., 0.};
+          wclass f = { -FLT_MAX, 0, 0., 0.};
           ld->costs.push_back(f);
         }
       }
       if (eq_label)
       {
-        if (p->parse_name.size() != 2)
-          cerr << "label feature vectors should have exactly one cost on: " << words[0] << endl;
+        if (p->parse_name.size() != 2) cerr << "label feature vectors should have exactly one cost on: " << words[0] << endl;
         else
         {
-          wclass f = {float_of_substring(p->parse_name[1]), 0, 0., 0.};
+          wclass f = { float_of_substring(p->parse_name[1]), 0, 0., 0.};
           ld->costs.push_back(f);
         }
       }
@@ -173,7 +173,7 @@ void parse_label(parser* p, shared_data* sd, void* v, v_array<substring>& words)
   // otherwise this is a "real" example
   for (unsigned int i = 0; i < words.size(); i++)
   {
-    wclass f = {0., 0, 0., 0.};
+    wclass f = {0.,0,0.,0.};
     name_value(words[i], p->parse_name, f.x);
 
     if (p->parse_name.size() == 0)
@@ -181,8 +181,7 @@ void parse_label(parser* p, shared_data* sd, void* v, v_array<substring>& words)
 
     if (p->parse_name.size() == 1 || p->parse_name.size() == 2 || p->parse_name.size() == 3)
     {
-      f.class_index =
-          sd->ldict ? (uint32_t)sd->ldict->get(p->parse_name[0]) : (uint32_t)hashstring(p->parse_name[0], 0);
+      f.class_index = sd->ldict ? (uint32_t)sd->ldict->get(p->parse_name[0]) : (uint32_t)hashstring(p->parse_name[0], 0);
       if (p->parse_name.size() == 1 && f.x >= 0)  // test examples are specified just by un-valued class #s
         f.x = FLT_MAX;
     }
@@ -193,8 +192,13 @@ void parse_label(parser* p, shared_data* sd, void* v, v_array<substring>& words)
   }
 }
 
-label_parser cs_label = {default_label, parse_label, cache_label, read_cached_label, delete_label, weight, copy_label,
-    test_label, sizeof(label)};
+label_parser cs_label = {default_label, parse_label,
+                         cache_label, read_cached_label,
+                         delete_label, weight,
+                         copy_label,
+                         test_label,
+                         sizeof(label)
+                        };
 
 void print_update(vw& all, bool is_test, example& ec, multi_ex* ec_seq, bool action_scores, uint32_t prediction)
 {
@@ -213,10 +217,10 @@ void print_update(vw& all, bool is_test, example& ec, multi_ex* ec_seq, bool act
       const example& first_ex = *(*ec_seq)[0];
 
       v_array<COST_SENSITIVE::wclass> costs = first_ex.l.cs.costs;
-      if (costs.size() == 1 && costs[0].class_index == 0 && costs[0].x < 0)
-        ecc++;
+      if (costs.size() == 1 && costs[0].class_index == 0 && costs[0].x < 0) ecc++;
 
-      for (; ecc != &(*ec_seq->cend()); ecc++) num_current_features += (*ecc)->num_features;
+      for (; ecc!=&(*ec_seq->cend()); ecc++)
+        num_current_features += (*ecc)->num_features;
     }
 
     std::string label_buf;
@@ -232,22 +236,17 @@ void print_update(vw& all, bool is_test, example& ec, multi_ex* ec_seq, bool act
       pred_buf << std::setw(all.sd->col_current_predict) << std::right << std::setfill(' ');
       if (all.sd->ldict)
       {
-        if (action_scores)
-          pred_buf << all.sd->ldict->get(ec.pred.a_s[0].action);
-        else
-          pred_buf << all.sd->ldict->get(prediction);
+        if (action_scores) pred_buf << all.sd->ldict->get(ec.pred.a_s[0].action);
+        else            pred_buf << all.sd->ldict->get(prediction);
       }
-      else
-        pred_buf << ec.pred.a_s[0].action;
-      if (action_scores)
-        pred_buf << ".....";
-      all.sd->print_update(all.holdout_set_off, all.current_pass, label_buf, pred_buf.str(), num_current_features,
-          all.progress_add, all.progress_arg);
-      ;
+      else            pred_buf << ec.pred.a_s[0].action;
+      if (action_scores) pred_buf <<".....";
+      all.sd->print_update(all.holdout_set_off, all.current_pass, label_buf, pred_buf.str(),
+                           num_current_features, all.progress_add, all.progress_arg);;
     }
     else
-      all.sd->print_update(all.holdout_set_off, all.current_pass, label_buf, prediction, num_current_features,
-          all.progress_add, all.progress_arg);
+      all.sd->print_update(all.holdout_set_off, all.current_pass, label_buf, prediction,
+                           num_current_features, all.progress_add, all.progress_arg);
   }
 }
 
@@ -257,31 +256,31 @@ void output_example(vw& all, example& ec)
 
   float loss = 0.;
   if (!test_label(&ld))
-  {
-    // need to compute exact loss
-    size_t pred = (size_t)ec.pred.multiclass;
-
-    float chosen_loss = FLT_MAX;
-    float min = FLT_MAX;
-    for (auto& cl : ld.costs)
     {
-      if (cl.class_index == pred)
-        chosen_loss = cl.x;
-      if (cl.x < min)
-        min = cl.x;
-    }
-    if (chosen_loss == FLT_MAX)
-      cerr << "warning: csoaa predicted an invalid class. Are all multi-class labels in the {1..k} range?" << endl;
+      //need to compute exact loss
+      size_t pred = (size_t)ec.pred.multiclass;
 
-    loss = chosen_loss - min;
-    // TODO(alberto): add option somewhere to allow using absolute loss instead?
-    // loss = chosen_loss;
-  }
+      float chosen_loss = FLT_MAX;
+      float min = FLT_MAX;
+      for (auto& cl : ld.costs)
+        {
+          if (cl.class_index == pred)
+            chosen_loss = cl.x;
+          if (cl.x < min)
+            min = cl.x;
+        }
+      if (chosen_loss == FLT_MAX)
+        cerr << "warning: csoaa predicted an invalid class. Are all multi-class labels in the {1..k} range?" << endl;
+
+      loss = chosen_loss - min;
+      // TODO(alberto): add option somewhere to allow using absolute loss instead?
+      // loss = chosen_loss;
+    }
 
   all.sd->update(ec.test_only, !test_label(&ld), loss, 1.f, ec.num_features);
 
   for (int sink : all.final_prediction_sink)
-    if (!all.sd->ldict)
+    if (! all.sd->ldict)
       all.print(sink, (float)ec.pred.multiclass, 0, ec.tag);
     else
     {
@@ -295,8 +294,7 @@ void output_example(vw& all, example& ec)
     for (unsigned int i = 0; i < ld.costs.size(); i++)
     {
       wclass cl = ld.costs[i];
-      if (i > 0)
-        outputStringStream << ' ';
+      if (i > 0) outputStringStream << ' ';
       outputStringStream << cl.class_index << ':' << cl.partial_prediction;
     }
     all.print_text(all.raw_prediction, outputStringStream.str(), ec.tag);
@@ -314,23 +312,18 @@ void finish_example(vw& all, example& ec)
 bool example_is_test(example& ec)
 {
   v_array<COST_SENSITIVE::wclass> costs = ec.l.cs.costs;
-  if (costs.size() == 0)
-    return true;
-  for (size_t j = 0; j < costs.size(); j++)
-    if (costs[j].x != FLT_MAX)
-      return false;
+  if (costs.size() == 0) return true;
+  for (size_t j=0; j<costs.size(); j++)
+    if (costs[j].x != FLT_MAX) return false;
   return true;
 }
 
 bool ec_is_example_header(example& ec)  // example headers look like "0:-1" or just "shared"
 {
   v_array<COST_SENSITIVE::wclass> costs = ec.l.cs.costs;
-  if (costs.size() != 1)
-    return false;
-  if (costs[0].class_index != 0)
-    return false;
-  if (costs[0].x != -FLT_MAX)
-    return false;
+  if (costs.size() != 1) return false;
+  if (costs[0].class_index != 0) return false;
+  if (costs[0].x != -FLT_MAX) return false;
   return true;
 }
-}  // namespace COST_SENSITIVE
+}
diff --git a/vowpalwabbit/warm_cb.cc b/vowpalwabbit/warm_cb.cc
index cd4378be91b..a796708dcbf 100644
--- a/vowpalwabbit/warm_cb.cc
+++ b/vowpalwabbit/warm_cb.cc
@@ -27,13 +27,6 @@ using namespace VW::config;
 #define CIRCULAR 2
 #define OVERWRITE 3
 
-#define INTER_VALI 1
-#define WS_VALI_SPLIT 2
-#define WS_VALI_NOSPLIT 3
-
-#define INSTANCE_WT 1
-#define DATASET_WT 2
-
 #define ABS_CENTRAL 1
 #define ABS_CENTRAL_ZEROONE 2
 #define MINIMAX_CENTRAL 3
@@ -60,8 +53,6 @@ struct warm_cb
 	bool upd_inter;
 	int cor_type_ws;
 	float cor_prob_ws;
-	int cor_type_inter;
-	float cor_prob_inter;
 	int vali_method;
 	int wt_scheme;
 	int lambda_scheme;
@@ -205,7 +196,7 @@ void copy_example_to_adf(warm_cb& data, example& ec)
   }
 }
 
-float minimax_lambda(float epsilon, size_t num_actions, size_t warm_start_period, size_t interaction_period)
+float minimax_lambda(float epsilon, size_t num_actions)
 {
 	return epsilon / (num_actions + epsilon);
 }
@@ -238,7 +229,7 @@ void setup_lambdas(warm_cb& data)
 	if (data.lambda_scheme == ABS_CENTRAL || data.lambda_scheme == ABS_CENTRAL_ZEROONE)
 		lambdas[mid] = 0.5;
 	else
-		lambdas[mid] = minimax_lambda(data.epsilon, data.num_actions, data.ws_period, data.inter_period);
+		lambdas[mid] = minimax_lambda(data.epsilon, data.num_actions);
 
 	for (uint32_t i = mid; i > 0; i--)
 		lambdas[i-1] = lambdas[i] / 2.0;
@@ -276,11 +267,6 @@ uint32_t corrupt_action(warm_cb& data, uint32_t action, int ec_type)
 		cor_prob = data.cor_prob_ws;
 		cor_type = data.cor_type_ws;
 	}
-	else
-	{
-		cor_prob = data.cor_prob_inter;
-		cor_type = data.cor_type_inter;
-	}
 
 	float randf = merand48(data.all->random_state);
 	if (randf < cor_prob)
@@ -313,20 +299,11 @@ float compute_weight_multiplier(warm_cb& data, size_t i, int ec_type)
 	float total_train_size = ws_train_size + inter_train_size;
 	float total_weight = (1-data.lambdas[i]) * ws_train_size + data.lambdas[i] * inter_train_size;
 
-	if (data.wt_scheme == INSTANCE_WT)
-	{
-		if (ec_type == WARM_START)
-			weight_multiplier = (1-data.lambdas[i]) * total_train_size / (total_weight + FLT_MIN);
-		else
-			weight_multiplier = data.lambdas[i] * total_train_size / (total_weight + FLT_MIN);
-	}
+	if (ec_type == WARM_START)
+		weight_multiplier = (1-data.lambdas[i]) * total_train_size / (total_weight + FLT_MIN);
 	else
-	{
-		if (ec_type == WARM_START)
-			weight_multiplier = (1-data.lambdas[i]) * total_train_size / ws_train_size;
-		else
-			weight_multiplier = data.lambdas[i] * total_train_size / inter_train_size;
-	}
+		weight_multiplier = data.lambdas[i] * total_train_size / (total_weight + FLT_MIN);
+
 	return weight_multiplier;
 }
 
@@ -350,48 +327,6 @@ void accumu_costs_iv_adf(warm_cb& data, multi_learner& base, example& ec)
 	}
 }
 
-template<bool use_cs>
-void accumu_costs_wsv_adf(warm_cb& data, multi_learner& base)
-{
-	uint32_t ws_vali_size = data.ws_vali_size;
-	//only update cumulative costs at the end of every epoch
-	if ( data.inter_iter >= 1 && abs( log2(data.inter_iter+1) - floor(log2(data.inter_iter+1)) ) < 1e-4 )
-	{
-		for (uint32_t i = 0; i < data.choices_lambda; i++)
-			data.cumulative_costs[i] = 0;
-
-		uint32_t num_epochs = ceil(log2(data.inter_period));
-		uint32_t epoch = log2(data.inter_iter+1) - 1;
-		float batch_vali_size = ((float) ws_vali_size) / num_epochs;
-		uint32_t lb, ub;
-
-		if (data.vali_method == WS_VALI_SPLIT)
-		{
-			lb = ceil(batch_vali_size * epoch);
-			ub = ceil(batch_vali_size * (epoch + 1));
-		}
-		else
-		{
-			lb = 0;
-			ub = ws_vali_size;
-		}
-
-		for (uint32_t i = 0; i < data.choices_lambda; i++)
-		{
-			for (uint32_t j = lb; j < ub; j++)
-			{
-				example* ec_vali = data.ws_vali[j];
-				uint32_t pred_label = predict_sublearner_adf(data, base, *ec_vali, i);
-
-				if (use_cs)
-					data.cumulative_costs[i] += loss_cs(data, ec_vali->l.cs.costs, pred_label);
-				else
-					data.cumulative_costs[i] += loss(data, ec_vali->l.multi.label, pred_label);
-			}
-		}
-	}
-}
-
 template<bool use_cs>
 void add_to_vali(warm_cb& data, example& ec)
 {
@@ -413,7 +348,7 @@ uint32_t predict_sup_adf(warm_cb& data, multi_learner& base, example& ec)
 }
 
 template<bool use_cs>
-void learn_sup_adf(warm_cb& data, multi_learner& base, example& ec, int ec_type)
+void learn_sup_adf(warm_cb& data, example& ec, int ec_type)
 {
 	copy_example_to_adf(data, ec);
 	//generate cost-sensitive label (for cost-sensitive learner's temporary use)
@@ -459,7 +394,7 @@ void predict_or_learn_sup_adf(warm_cb& data, multi_learner& base, example& ec, i
 	uint32_t action = predict_sup_adf(data, base, ec);
 
 	if (ind_update(data, ec_type))
-		learn_sup_adf<use_cs>(data, base, ec, ec_type);
+		learn_sup_adf<use_cs>(data, ec, ec_type);
 
 	ec.pred.multiclass = action;
 }
@@ -525,15 +460,12 @@ void predict_or_learn_bandit_adf(warm_cb& data, multi_learner& base, example& ec
 	else
 		cl.cost = loss(data, ec.l.multi.label, cl.action);
 
-	if (ec_type == INTERACTION && data.vali_method == INTER_VALI)
+	if (ec_type == INTERACTION)
 		accumu_costs_iv_adf(data, base, ec);
 
 	if (ind_update(data, ec_type))
 		learn_bandit_adf(data, base, ec, ec_type);
 
-	if (ec_type == INTERACTION && (data.vali_method == WS_VALI_SPLIT || data.vali_method == WS_VALI_NOSPLIT))
-		accumu_costs_wsv_adf<use_cs>(data, base);
-
 	ec.pred.multiclass = cl.action;
 }
 
@@ -560,22 +492,16 @@ void predict_or_learn_adf(warm_cb& data, multi_learner& base, example& ec)
 		data.mc_label = ec.l.multi;
 		if (data.ws_iter < data.ws_period)
 			ec.l.multi.label = corrupt_action(data, data.mc_label.label, WARM_START);
-		else if (data.inter_iter < data.inter_period)
-			ec.l.multi.label = corrupt_action(data, data.mc_label.label, INTERACTION);
 	}
 
 	// Warm start phase
 	if (data.ws_iter < data.ws_period)
 	{
-		if (data.ws_iter < data.ws_train_size)
-		{
 			if (data.ws_type == SUPERVISED_WS)
 				predict_or_learn_sup_adf<use_cs>(data, base, ec, WARM_START);
 			else if (data.ws_type == BANDIT_WS)
 				predict_or_learn_bandit_adf<use_cs>(data, base, ec, WARM_START);
-		}
-		else
-			add_to_vali<use_cs>(data, ec);
+
 		ec.weight = 0;
 		data.ws_iter++;
 	}
@@ -589,7 +515,10 @@ void predict_or_learn_adf(warm_cb& data, multi_learner& base, example& ec)
 	}
 	// Skipping the rest of the examples
 	else
-		ec.weight = 0;
+	{
+    ec.weight = 0;
+    ec.pred.multiclass = 1;
+  }
 
 	// Restore the original labels
 	if (use_cs)
@@ -597,6 +526,8 @@ void predict_or_learn_adf(warm_cb& data, multi_learner& base, example& ec)
 	else
 		ec.l.multi = data.mc_label;
 
+  cout<<ec.weight<<endl;
+
 }
 
 void init_adf_data(warm_cb& data, const size_t num_actions)
@@ -623,16 +554,9 @@ void init_adf_data(warm_cb& data, const size_t num_actions)
 	}
 	data.cbls = calloc_or_throw<CB::label>(num_actions);
 
-	if (data.vali_method == WS_VALI_SPLIT || data.vali_method == WS_VALI_NOSPLIT)
-	{
-		data.ws_train_size = ceil(data.ws_period / 2.0);
-		data.ws_vali_size = data.ws_period - data.ws_train_size;
-	}
-	else
-	{
-		data.ws_train_size = data.ws_period;
-		data.ws_vali_size = 0;
-	}
+	data.ws_train_size = data.ws_period;
+	data.ws_vali_size = 0;
+
 	data.ws_iter = 0;
 	data.inter_iter = 0;
 
@@ -664,18 +588,14 @@ base_learner* warm_cb_setup(options_i& options, vw& all)
       .add(make_option("interaction_update", data->upd_inter).help("indicator of interaction updates"))
       .add(make_option("corrupt_type_warm_start", data->cor_type_ws).default_value(UAR).help("type of label corruption in the warm start phase (1: uniformly at random, 2: circular, 3: replacing with overwriting label)"))
       .add(make_option("corrupt_prob_warm_start", data->cor_prob_ws).default_value(0.f).help("probability of label corruption in the warm start phase"))
-      .add(make_option("corrupt_type_interaction", data->cor_type_inter).default_value(UAR).help("type of label corruption in the interaction phase (1: uniformly at random, 2: circular, 3: replacing with overwriting label)"))
-      .add(make_option("corrupt_prob_interaction", data->cor_prob_inter).default_value(0.f).help("probability of label corruption in the interaction phase"))
       .add(make_option("choices_lambda", data->choices_lambda).default_value(1U).help("the number of candidate lambdas to aggregate (lambda is the importance weight parameter between the two sources)"))
       .add(make_option("lambda_scheme", data->lambda_scheme).default_value(ABS_CENTRAL).help("The scheme for generating candidate lambda set (1: center lambda=0.5, 2: center lambda=0.5, min lambda=0, max lambda=1, 3: center lambda=epsilon/(#actions+epsilon), 4: center lambda=epsilon/(#actions+epsilon), min lambda=0, max lambda=1); the rest of candidate lambda values are generated using a doubling scheme"))
-      .add(make_option("weighting_scheme", data->wt_scheme).default_value(INSTANCE_WT).help("weighting scheme (1: per instance weighting, where for every lambda, each contextual bandit example have weight lambda/(1-lambda) times that of each warm start example, 2: per dataset weighting, where for every lambda, the contextual bandit dataset has total weight lambda/(1-lambda) times that of the warm start dataset)"))
-      .add(make_option("validation_method", data->vali_method).default_value(INTER_VALI).help("lambda selection criterion (1: using contextual bandit examples with progressive validation, 2: using warm start examples, with fresh validation examples at each epoch, 3: using warm start examples, with a single validation set throughout)"))
       .add(make_option("overwrite_label", data->overwrite_label).default_value(1U).help("the label used by type 3 corruptions (overwriting)"))
       .add(make_option("sim_bandit", data->sim_bandit).help("simulate contextual bandit updates on warm start examples"));
 
   options.add_and_parse(new_options);
 
-  if(use_cs && (options.was_supplied("corrupt_type_warm_start") || options.was_supplied("corrupt_prob_warm_start") || options.was_supplied("corrupt_type_interaction") || options.was_supplied("corrupt_prob_interaction") ))
+  if( use_cs && ( options.was_supplied("corrupt_type_warm_start") || options.was_supplied("corrupt_prob_warm_start") ) )
   {
     THROW("label corruption on cost-sensitive examples not currently supported");
   }

From 12b36b99ad73495cae2883ab1ef823dbf5f72869 Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Thu, 7 Feb 2019 22:10:27 -0500
Subject: [PATCH 118/127] revert cost_sensitive.cc

---
 vowpalwabbit/cost_sensitive.cc | 203 +++++++++++++++++----------------
 1 file changed, 105 insertions(+), 98 deletions(-)

diff --git a/vowpalwabbit/cost_sensitive.cc b/vowpalwabbit/cost_sensitive.cc
index 80e8e5c4438..2d239e6d612 100644
--- a/vowpalwabbit/cost_sensitive.cc
+++ b/vowpalwabbit/cost_sensitive.cc
@@ -6,43 +6,42 @@
 using namespace std;
 namespace COST_SENSITIVE
 {
-
-void name_value(substring &s, v_array<substring>& name, float &v)
+void name_value(substring& s, v_array<substring>& name, float& v)
 {
   tokenize(':', s, name);
 
   switch (name.size())
   {
-  case 0:
-  case 1:
-    v = 1.;
-    break;
-  case 2:
-    v = float_of_substring(name[1]);
-    if (nanpattern(v))
-      THROW("error NaN value for: " << name[0]);
-    break;
-  default:
-    cerr << "example with a wierd name.  What is '";
-    cerr.write(s.begin, s.end - s.begin);
-    cerr << "'?\n";
+    case 0:
+    case 1:
+      v = 1.;
+      break;
+    case 2:
+      v = float_of_substring(name[1]);
+      if (nanpattern(v))
+        THROW("error NaN value for: " << name[0]);
+      break;
+    default:
+      cerr << "example with a wierd name.  What is '";
+      cerr.write(s.begin, s.end - s.begin);
+      cerr << "'?\n";
   }
 }
 
 char* bufread_label(label* ld, char* c, io_buf& cache)
 {
-  size_t num = *(size_t *)c;
+  size_t num = *(size_t*)c;
   ld->costs.clear();
   c += sizeof(size_t);
-  size_t total = sizeof(wclass)*num;
-  if (buf_read(cache, c, (int)total) < total)
+  size_t total = sizeof(wclass) * num;
+  if (cache.buf_read(c, (int)total) < total)
   {
     cout << "error in demarshal of cost data" << endl;
     return c;
   }
-  for (size_t i = 0; i<num; i++)
+  for (size_t i = 0; i < num; i++)
   {
-    wclass temp = *(wclass *)c;
+    wclass temp = *(wclass*)c;
     c += sizeof(wclass);
     ld->costs.push_back(temp);
   }
@@ -52,29 +51,26 @@ char* bufread_label(label* ld, char* c, io_buf& cache)
 
 size_t read_cached_label(shared_data*, void* v, io_buf& cache)
 {
-  label* ld = (label*) v;
+  label* ld = (label*)v;
   ld->costs.clear();
-  char *c;
+  char* c;
   size_t total = sizeof(size_t);
-  if (buf_read(cache, c, (int)total) < total)
+  if (cache.buf_read(c, (int)total) < total)
     return 0;
-  bufread_label(ld,c, cache);
+  bufread_label(ld, c, cache);
 
   return total;
 }
 
-float weight(void*)
-{
-  return 1.;
-}
+float weight(void*) { return 1.; }
 
 char* bufcache_label(label* ld, char* c)
 {
-  *(size_t *)c = ld->costs.size();
+  *(size_t*)c = ld->costs.size();
   c += sizeof(size_t);
-  for (unsigned int i = 0; i< ld->costs.size(); i++)
+  for (unsigned int i = 0; i < ld->costs.size(); i++)
   {
-    *(wclass *)c = ld->costs[i];
+    *(wclass*)c = ld->costs[i];
     c += sizeof(wclass);
   }
   return c;
@@ -82,36 +78,37 @@ char* bufcache_label(label* ld, char* c)
 
 void cache_label(void* v, io_buf& cache)
 {
-  char *c;
-  label* ld = (label*) v;
-  buf_write(cache, c, sizeof(size_t)+sizeof(wclass)*ld->costs.size());
-  bufcache_label(ld,c);
+  char* c;
+  label* ld = (label*)v;
+  cache.buf_write(c, sizeof(size_t) + sizeof(wclass) * ld->costs.size());
+  bufcache_label(ld, c);
 }
 
 void default_label(void* v)
 {
-  label* ld = (label*) v;
+  label* ld = (label*)v;
   ld->costs.clear();
 }
 
 bool test_label(void* v)
 {
-  label* ld = (label*) v;
+  label* ld = (label*)v;
   if (ld->costs.size() == 0)
     return true;
-  for (unsigned int i=0; i<ld->costs.size(); i++)
+  for (unsigned int i = 0; i < ld->costs.size(); i++)
     if (FLT_MAX != ld->costs[i].x)
       return false;
   return true;
 }
 
-  void delete_label(void* v)
+void delete_label(void* v)
 {
   label* ld = (label*)v;
-  if (ld) ld->costs.delete_v();
+  if (ld)
+    ld->costs.delete_v();
 }
 
-void copy_label(void*dst, void*src)
+void copy_label(void* dst, void* src)
 {
   if (dst && src)
   {
@@ -123,13 +120,14 @@ void copy_label(void*dst, void*src)
 
 bool substring_eq(substring ss, const char* str)
 {
-  size_t len_ss  = ss.end - ss.begin;
+  size_t len_ss = ss.end - ss.begin;
   size_t len_str = strlen(str);
-  if (len_ss != len_str) return false;
+  if (len_ss != len_str)
+    return false;
   return (strncmp(ss.begin, str, len_ss) == 0);
 }
 
-void parse_label(parser* p, shared_data*sd, void* v, v_array<substring>& words)
+void parse_label(parser* p, shared_data* sd, void* v, v_array<substring>& words)
 {
   label* ld = (label*)v;
   ld->costs.clear();
@@ -140,29 +138,31 @@ void parse_label(parser* p, shared_data*sd, void* v, v_array<substring>& words)
     float fx;
     name_value(words[0], p->parse_name, fx);
     bool eq_shared = substring_eq(p->parse_name[0], "***shared***");
-    bool eq_label  = substring_eq(p->parse_name[0], "***label***");
-    if (! sd->ldict)
+    bool eq_label = substring_eq(p->parse_name[0], "***label***");
+    if (!sd->ldict)
     {
       eq_shared |= substring_eq(p->parse_name[0], "shared");
-      eq_label  |= substring_eq(p->parse_name[0], "label");
+      eq_label |= substring_eq(p->parse_name[0], "label");
     }
     if (eq_shared || eq_label)
     {
       if (eq_shared)
       {
-        if (p->parse_name.size() != 1) cerr << "shared feature vectors should not have costs on: " << words[0] << endl;
+        if (p->parse_name.size() != 1)
+          cerr << "shared feature vectors should not have costs on: " << words[0] << endl;
         else
         {
-          wclass f = { -FLT_MAX, 0, 0., 0.};
+          wclass f = {-FLT_MAX, 0, 0., 0.};
           ld->costs.push_back(f);
         }
       }
       if (eq_label)
       {
-        if (p->parse_name.size() != 2) cerr << "label feature vectors should have exactly one cost on: " << words[0] << endl;
+        if (p->parse_name.size() != 2)
+          cerr << "label feature vectors should have exactly one cost on: " << words[0] << endl;
         else
         {
-          wclass f = { float_of_substring(p->parse_name[1]), 0, 0., 0.};
+          wclass f = {float_of_substring(p->parse_name[1]), 0, 0., 0.};
           ld->costs.push_back(f);
         }
       }
@@ -173,7 +173,7 @@ void parse_label(parser* p, shared_data*sd, void* v, v_array<substring>& words)
   // otherwise this is a "real" example
   for (unsigned int i = 0; i < words.size(); i++)
   {
-    wclass f = {0.,0,0.,0.};
+    wclass f = {0., 0, 0., 0.};
     name_value(words[i], p->parse_name, f.x);
 
     if (p->parse_name.size() == 0)
@@ -181,7 +181,8 @@ void parse_label(parser* p, shared_data*sd, void* v, v_array<substring>& words)
 
     if (p->parse_name.size() == 1 || p->parse_name.size() == 2 || p->parse_name.size() == 3)
     {
-      f.class_index = sd->ldict ? (uint32_t)sd->ldict->get(p->parse_name[0]) : (uint32_t)hashstring(p->parse_name[0], 0);
+      f.class_index =
+          sd->ldict ? (uint32_t)sd->ldict->get(p->parse_name[0]) : (uint32_t)hashstring(p->parse_name[0], 0);
       if (p->parse_name.size() == 1 && f.x >= 0)  // test examples are specified just by un-valued class #s
         f.x = FLT_MAX;
     }
@@ -192,13 +193,8 @@ void parse_label(parser* p, shared_data*sd, void* v, v_array<substring>& words)
   }
 }
 
-label_parser cs_label = {default_label, parse_label,
-                         cache_label, read_cached_label,
-                         delete_label, weight,
-                         copy_label,
-                         test_label,
-                         sizeof(label)
-                        };
+label_parser cs_label = {default_label, parse_label, cache_label, read_cached_label, delete_label, weight, copy_label,
+    test_label, sizeof(label)};
 
 void print_update(vw& all, bool is_test, example& ec, multi_ex* ec_seq, bool action_scores, uint32_t prediction)
 {
@@ -217,10 +213,10 @@ void print_update(vw& all, bool is_test, example& ec, multi_ex* ec_seq, bool act
       const example& first_ex = *(*ec_seq)[0];
 
       v_array<COST_SENSITIVE::wclass> costs = first_ex.l.cs.costs;
-      if (costs.size() == 1 && costs[0].class_index == 0 && costs[0].x < 0) ecc++;
+      if (costs.size() == 1 && costs[0].class_index == 0 && costs[0].x < 0)
+        ecc++;
 
-      for (; ecc!=&(*ec_seq->cend()); ecc++)
-        num_current_features += (*ecc)->num_features;
+      for (; ecc != &(*ec_seq->cend()); ecc++) num_current_features += (*ecc)->num_features;
     }
 
     std::string label_buf;
@@ -236,17 +232,22 @@ void print_update(vw& all, bool is_test, example& ec, multi_ex* ec_seq, bool act
       pred_buf << std::setw(all.sd->col_current_predict) << std::right << std::setfill(' ');
       if (all.sd->ldict)
       {
-        if (action_scores) pred_buf << all.sd->ldict->get(ec.pred.a_s[0].action);
-        else            pred_buf << all.sd->ldict->get(prediction);
+        if (action_scores)
+          pred_buf << all.sd->ldict->get(ec.pred.a_s[0].action);
+        else
+          pred_buf << all.sd->ldict->get(prediction);
       }
-      else            pred_buf << ec.pred.a_s[0].action;
-      if (action_scores) pred_buf <<".....";
-      all.sd->print_update(all.holdout_set_off, all.current_pass, label_buf, pred_buf.str(),
-                           num_current_features, all.progress_add, all.progress_arg);;
+      else
+        pred_buf << ec.pred.a_s[0].action;
+      if (action_scores)
+        pred_buf << ".....";
+      all.sd->print_update(all.holdout_set_off, all.current_pass, label_buf, pred_buf.str(), num_current_features,
+          all.progress_add, all.progress_arg);
+      ;
     }
     else
-      all.sd->print_update(all.holdout_set_off, all.current_pass, label_buf, prediction,
-                           num_current_features, all.progress_add, all.progress_arg);
+      all.sd->print_update(all.holdout_set_off, all.current_pass, label_buf, prediction, num_current_features,
+          all.progress_add, all.progress_arg);
   }
 }
 
@@ -256,31 +257,31 @@ void output_example(vw& all, example& ec)
 
   float loss = 0.;
   if (!test_label(&ld))
-    {
-      //need to compute exact loss
-      size_t pred = (size_t)ec.pred.multiclass;
-
-      float chosen_loss = FLT_MAX;
-      float min = FLT_MAX;
-      for (auto& cl : ld.costs)
-        {
-          if (cl.class_index == pred)
-            chosen_loss = cl.x;
-          if (cl.x < min)
-            min = cl.x;
-        }
-      if (chosen_loss == FLT_MAX)
-        cerr << "warning: csoaa predicted an invalid class. Are all multi-class labels in the {1..k} range?" << endl;
+  {
+    // need to compute exact loss
+    size_t pred = (size_t)ec.pred.multiclass;
 
-      loss = chosen_loss - min;
-      // TODO(alberto): add option somewhere to allow using absolute loss instead?
-      // loss = chosen_loss;
+    float chosen_loss = FLT_MAX;
+    float min = FLT_MAX;
+    for (auto& cl : ld.costs)
+    {
+      if (cl.class_index == pred)
+        chosen_loss = cl.x;
+      if (cl.x < min)
+        min = cl.x;
     }
+    if (chosen_loss == FLT_MAX)
+      cerr << "warning: csoaa predicted an invalid class. Are all multi-class labels in the {1..k} range?" << endl;
+
+    loss = chosen_loss - min;
+    // TODO(alberto): add option somewhere to allow using absolute loss instead?
+    // loss = chosen_loss;
+  }
 
   all.sd->update(ec.test_only, !test_label(&ld), loss, 1.f, ec.num_features);
 
   for (int sink : all.final_prediction_sink)
-    if (! all.sd->ldict)
+    if (!all.sd->ldict)
       all.print(sink, (float)ec.pred.multiclass, 0, ec.tag);
     else
     {
@@ -294,7 +295,8 @@ void output_example(vw& all, example& ec)
     for (unsigned int i = 0; i < ld.costs.size(); i++)
     {
       wclass cl = ld.costs[i];
-      if (i > 0) outputStringStream << ' ';
+      if (i > 0)
+        outputStringStream << ' ';
       outputStringStream << cl.class_index << ':' << cl.partial_prediction;
     }
     all.print_text(all.raw_prediction, outputStringStream.str(), ec.tag);
@@ -312,18 +314,23 @@ void finish_example(vw& all, example& ec)
 bool example_is_test(example& ec)
 {
   v_array<COST_SENSITIVE::wclass> costs = ec.l.cs.costs;
-  if (costs.size() == 0) return true;
-  for (size_t j=0; j<costs.size(); j++)
-    if (costs[j].x != FLT_MAX) return false;
+  if (costs.size() == 0)
+    return true;
+  for (size_t j = 0; j < costs.size(); j++)
+    if (costs[j].x != FLT_MAX)
+      return false;
   return true;
 }
 
 bool ec_is_example_header(example& ec)  // example headers look like "0:-1" or just "shared"
 {
   v_array<COST_SENSITIVE::wclass> costs = ec.l.cs.costs;
-  if (costs.size() != 1) return false;
-  if (costs[0].class_index != 0) return false;
-  if (costs[0].x != -FLT_MAX) return false;
+  if (costs.size() != 1)
+    return false;
+  if (costs[0].class_index != 0)
+    return false;
+  if (costs[0].x != -FLT_MAX)
+    return false;
   return true;
 }
-}
+}  // namespace COST_SENSITIVE

From 1c0400be938a541f9aa18867e5efa48b37a63a02 Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Thu, 7 Feb 2019 22:35:01 -0500
Subject: [PATCH 119/127] fixed the weighting issue in cs examples

---
 vowpalwabbit/cost_sensitive.cc | 4 ++--
 vowpalwabbit/warm_cb.cc        | 2 --
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/vowpalwabbit/cost_sensitive.cc b/vowpalwabbit/cost_sensitive.cc
index 2d239e6d612..9021308cc46 100644
--- a/vowpalwabbit/cost_sensitive.cc
+++ b/vowpalwabbit/cost_sensitive.cc
@@ -273,12 +273,12 @@ void output_example(vw& all, example& ec)
     if (chosen_loss == FLT_MAX)
       cerr << "warning: csoaa predicted an invalid class. Are all multi-class labels in the {1..k} range?" << endl;
 
-    loss = chosen_loss - min;
+    loss = (chosen_loss - min) * ec.weight;
     // TODO(alberto): add option somewhere to allow using absolute loss instead?
     // loss = chosen_loss;
   }
 
-  all.sd->update(ec.test_only, !test_label(&ld), loss, 1.f, ec.num_features);
+  all.sd->update(ec.test_only, !test_label(&ld), loss, ec.weight, ec.num_features);
 
   for (int sink : all.final_prediction_sink)
     if (!all.sd->ldict)
diff --git a/vowpalwabbit/warm_cb.cc b/vowpalwabbit/warm_cb.cc
index a796708dcbf..2a61a1a935d 100644
--- a/vowpalwabbit/warm_cb.cc
+++ b/vowpalwabbit/warm_cb.cc
@@ -526,8 +526,6 @@ void predict_or_learn_adf(warm_cb& data, multi_learner& base, example& ec)
 	else
 		ec.l.multi = data.mc_label;
 
-  cout<<ec.weight<<endl;
-
 }
 
 void init_adf_data(warm_cb& data, const size_t num_actions)

From 0e3b7bce9a7c37bef99098ed5ea217dc76267421 Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Thu, 7 Feb 2019 23:05:23 -0500
Subject: [PATCH 120/127] .

---
 vowpalwabbit/Makefile.am | 61 ----------------------------------------
 1 file changed, 61 deletions(-)
 delete mode 100644 vowpalwabbit/Makefile.am

diff --git a/vowpalwabbit/Makefile.am b/vowpalwabbit/Makefile.am
deleted file mode 100644
index cfab1395555..00000000000
--- a/vowpalwabbit/Makefile.am
+++ /dev/null
@@ -1,61 +0,0 @@
-lib_LTLIBRARIES = liballreduce.la libvw.la libvw_c_wrapper.la
-
-liballreduce_la_SOURCES = allreduce_sockets.cc allreduce_threads.cc vw_exception.cc
-
-bin_PROGRAMS = vw active_interactor
-
-libvw_la_SOURCES = parser_helper.cc global_data.cc io_buf.cc parse_regressor.cc parse_primitives.cc unique_sort.cc cache.cc rand48.cc no_label.cc simple_label.cc multiclass.cc oaa.cc multilabel_oaa.cc boosting.cc ect.cc marginal.cc autolink.cc binary.cc lrq.cc cost_sensitive.cc multilabel.cc label_dictionary.cc csoaa.cc cb.cc cb_adf.cc cb_algs.cc search.cc search_meta.cc search_sequencetask.cc search_dep_parser.cc search_hooktask.cc search_multiclasstask.cc search_entityrelationtask.cc search_graph.cc parse_example.cc scorer.cc network.cc parse_args.cc accumulate.cc gd.cc learner.cc mwt.cc lda_core.cc gd_mf.cc mf.cc bfgs.cc noop.cc print.cc example.cc parser.cc loss_functions.cc sender.cc nn.cc confidence.cc bs.cc cbify.cc explore_eval.cc topk.cc stagewise_poly.cc log_multi.cc recall_tree.cc active.cc active_cover.cc cs_active.cc kernel_svm.cc best_constant.cc ftrl.cc svrg.cc lrqfa.cc interact.cc comp_io.cc interactions.cc vw_exception.cc vw_validate.cc audit_regressor.cc gen_cs_example.cc cb_explore.cc action_score.cc cb_explore_adf.cc OjaNewton.cc parse_example_json.cc baseline.cc classweight.cc warm_cb.cc
-
-libvw_c_wrapper_la_SOURCES = vwdll.cpp
-
-# accumulate.cc uses all_reduce
-libvw_la_LIBADD = liballreduce.la
-libvw_c_wrapper_la_LIBADD = libvw.la
-
-ACLOCAL_AMFLAGS = -I acinclude.d
-
-AM_CXXFLAGS = ${BOOST_CPPFLAGS} ${ZLIB_CPPFLAGS} ${PTHREAD_CFLAGS} -Wall -Wno-unused-local-typedefs
-LIBS = ${BOOST_LDFLAGS} ${BOOST_PROGRAM_OPTIONS_LIB} ${ZLIB_LDFLAGS} ${PTHREAD_LIBS}
-
-CXXOPTIMIZE =
-
-if PROFILE
-CXXOPTIMIZE += -pg
-endif
-
-if VWBUG
-CXXOPTIMIZE += -g -O1
-else
-CXXOPTIMIZE += -O3 -fomit-frame-pointer -DNDEBUG
-endif
-
-if NITPICK
-AM_CXXFLAGS += -Wextra -Wundef -Wshadow -Wunsafe-loop-optimizations -Wconversion -Wmissing-format-attribute
-AM_CXXFLAGS += -Wredundant-decls -ansi -Wmissing-noreturn
-endif
-
-if PARALLELIZE
-AM_CXXFLAGS += -Wno-strict-aliasing -fopenmp
-endif
-
-if FREEBSD
-AM_CXXFLAGS += -l compat
-else
-AM_CXXFLAGS += -pedantic
-endif
-
-if CLANG_LIBCXX
-AM_CXXFLAGS += -stdlib=libc++
-endif
-
-AM_CXXFLAGS += -I ../rapidjson/include -I ../explore
-
-AM_CXXFLAGS += $(CXXOPTIMIZE)
-
-vw_SOURCES = main.cc
-vw_CXXFLAGS = $(AM_CXXFLAGS)
-
-vw_LDADD = libvw.la liballreduce.la
-vw_DEPENDENCIES = libvw.la liballreduce.la
-
-active_interactor_SOURCES = active_interactor.cc

From 63d8c40b61961a6dc760596eb389e345eb375f29 Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Thu, 7 Feb 2019 23:26:11 -0500
Subject: [PATCH 121/127] edited vw_core.vcxproj

---
 vowpalwabbit/vw_core.vcxproj | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/vowpalwabbit/vw_core.vcxproj b/vowpalwabbit/vw_core.vcxproj
index 7e7515c689e..a68eb8b43a0 100644
--- a/vowpalwabbit/vw_core.vcxproj
+++ b/vowpalwabbit/vw_core.vcxproj
@@ -171,6 +171,7 @@
     <ClInclude Include="gen_cs_example.h" />
     <ClInclude Include="cb_adf.h" />
     <ClInclude Include="cbify.h" />
+    <ClInclude Include="warm_cb.h" />
     <ClInclude Include="comp_io.h" />
     <ClInclude Include="confidence.h" />
     <ClInclude Include="constant.h" />
@@ -269,6 +270,7 @@
     <ClCompile Include="cache.cc" />
     <ClCompile Include="cb.cc" />
     <ClCompile Include="cbify.cc" />
+    <ClCompile Include="warm_cb.cc" />
     <ClCompile Include="cb_explore.cc" />
     <ClCompile Include="cb_explore_adf.cc" />
     <ClCompile Include="gen_cs_example.cc" />
@@ -372,4 +374,4 @@
     <Error Condition="!Exists('$(SolutionDir)\packages\zlib.v140.windesktop.msvcstl.static.rt-dyn.1.2.8.8\build\native\zlib.v140.windesktop.msvcstl.static.rt-dyn.targets')" Text="$([System.String]::Format('$(ErrorText)', '$(SolutionDir)\packages\zlib.v140.windesktop.msvcstl.static.rt-dyn.1.2.8.8\build\native\zlib.v140.windesktop.msvcstl.static.rt-dyn.targets'))" />
   </Target>
   <Import Project="..\sdl\SDL-7.0-NativeAnalysis.targets" />
-</Project>
\ No newline at end of file
+</Project>

From 99d642b8ceb7f99d00e0d921089670a3e401fd7d Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Fri, 8 Feb 2019 10:59:35 -0500
Subject: [PATCH 122/127] added new warm cb test cases

---
 test/RunTests                                 | 40 ++++++++-----------
 test/train-sets/ref/cbify_ws_maj.stderr       | 19 ---------
 test/train-sets/ref/cbify_ws_uar.stderr       | 19 ---------
 test/train-sets/ref/cbify_ws_wsgt.stderr      | 19 ---------
 ...fy_ws_no_int_upd.stderr => warm_cb.stderr} |  3 ++
 test/train-sets/ref/warm_cb_cs.stderr         | 21 ++++++++++
 ...cbify_ws_cyc.stderr => warm_cb_cyc.stderr} |  3 ++
 ...e.stderr => warm_cb_lambda_zeroone.stderr} |  3 ++
 ...fy_ws.stderr => warm_cb_no_int_upd.stderr} |  3 ++
 ...ws_upd.stderr => warm_cb_no_ws_upd.stderr} |  3 ++
 ...bandit.stderr => warm_cb_simbandit.stderr} |  3 ++
 11 files changed, 55 insertions(+), 81 deletions(-)
 delete mode 100644 test/train-sets/ref/cbify_ws_maj.stderr
 delete mode 100644 test/train-sets/ref/cbify_ws_uar.stderr
 delete mode 100644 test/train-sets/ref/cbify_ws_wsgt.stderr
 rename test/train-sets/ref/{cbify_ws_no_int_upd.stderr => warm_cb.stderr} (80%)
 create mode 100644 test/train-sets/ref/warm_cb_cs.stderr
 rename test/train-sets/ref/{cbify_ws_cyc.stderr => warm_cb_cyc.stderr} (80%)
 rename test/train-sets/ref/{cbify_ws_lambda_zeroone.stderr => warm_cb_lambda_zeroone.stderr} (80%)
 rename test/train-sets/ref/{cbify_ws.stderr => warm_cb_no_int_upd.stderr} (81%)
 rename test/train-sets/ref/{cbify_ws_no_ws_upd.stderr => warm_cb_no_ws_upd.stderr} (80%)
 rename test/train-sets/ref/{cbify_ws_simbandit.stderr => warm_cb_simbandit.stderr} (80%)

diff --git a/test/RunTests b/test/RunTests
index 1679bedccbb..41cec2685a5 100755
--- a/test/RunTests
+++ b/test/RunTests
@@ -1638,37 +1638,29 @@ echo "1 | feature:1" | {VW} -a --initial_weight 0.1 --initial_t 0.3
     train-sets/ref/no_shared_features.stderr
 
 # Test 176 warm_cb warm start
-{VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 -d train-sets/multiclass
-    train-sets/ref/cbify_ws.stderr
+{VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --warm_start_update --interaction_update -d train-sets/multiclass
+    train-sets/ref/warm_cb.stderr
 
 # Test 177 warm_cb warm start with lambda set containing 0/1
-{VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --lambda_scheme 2 -d train-sets/multiclass
-    train-sets/ref/cbify_ws_lambda_zeroone.stderr
+{VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --lambda_scheme 2 --warm_start_update --interaction_update -d train-sets/multiclass
+    train-sets/ref/warm_cb_lambda_zeroone.stderr
 
 # Test 178 warm_cb warm start with warm start update turned off
-{VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --warm_start_update false -d train-sets/multiclass
-    train-sets/ref/cbify_ws_no_ws_upd.stderr
+{VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --interaction_update -d train-sets/multiclass
+    train-sets/ref/warm_cb_no_ws_upd.stderr
 
 # Test 179 warm_cb warm start with interaction update turned off
-{VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --interaction_update false -d train-sets/multiclass
-    train-sets/ref/cbify_ws_no_int_upd.stderr
+{VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.0 --warm_start 3 --interaction 7 --choices_lambda 8 --warm_start_update -d train-sets/multiclass
+    train-sets/ref/warm_cb_no_int_upd.stderr
 
 # Test 180 warm_cb warm start with bandit warm start type (Sim-Bandit)
-{VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --warm_start_type 2 -d train-sets/multiclass
-    train-sets/ref/cbify_ws_simbandit.stderr
+{VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 1 --warm_start_update --interaction_update --sim_bandit -d train-sets/multiclass
+    train-sets/ref/warm_cb_simbandit.stderr
 
-# Test 181 warm_cb warm start with UAR supervised corruption
-{VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --corrupt_type_warm_start 1 --corrupt_prob_warm_start 0.5 -d train-sets/multiclass
-    train-sets/ref/cbify_ws_uar.stderr
+# Test 181 warm_cb warm start with CYC supervised corruption
+{VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --warm_start_update --interaction_update --corrupt_type_warm_start 2 --corrupt_prob_warm_start 0.5 -d train-sets/multiclass
+    train-sets/ref/warm_cb_cyc.stderr
 
-# Test 182 warm_cb warm start with CYC supervised corruption
-{VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --corrupt_type_warm_start 2 --corrupt_prob_warm_start 0.5 -d train-sets/multiclass
-    train-sets/ref/cbify_ws_cyc.stderr
-
-# Test 183 warm_cb warm start with MAJ supervised corruption
-{VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --corrupt_type_warm_start 3 --corrupt_prob_warm_start 0.5 --overwrite_label 1 -d train-sets/multiclass
-    train-sets/ref/cbify_ws_maj.stderr
-
-# Test 184 warm_cb warm start with warm start distribution being the ground truth
-{VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --corrupt_type_interaction 2 --corrupt_prob_interaction 0.5 --weighting_scheme 2 --validation_method 2 -d train-sets/multiclass
-    train-sets/ref/cbify_ws_wsgt.stderr
+# Test 182 warm_cb warm start with input cost-sensitive examples
+{VW} --warm_cb 3 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 1 --interaction 2 --choices_lambda 8 --warm_start_update --interaction_update --warm_cb_cs -d train-sets/cs_cb
+    train-sets/ref/warm_cb_cs.stderr
diff --git a/test/train-sets/ref/cbify_ws_maj.stderr b/test/train-sets/ref/cbify_ws_maj.stderr
deleted file mode 100644
index 2a12135dfa0..00000000000
--- a/test/train-sets/ref/cbify_ws_maj.stderr
+++ /dev/null
@@ -1,19 +0,0 @@
-Num weight bits = 18
-learning rate = 0.5
-initial_t = 0
-power_t = 0.5
-using no cache
-Reading datafile = train-sets/multiclass
-num sources = 1
-average  since         example        example  current  current  current
-loss     last          counter         weight    label  predict features
-1.000000 1.000000            4            1.0        4        1        2
-1.000000 1.000000            5            2.0        5        1        2
-1.000000 1.000000            7            4.0        7        1        2
-
-finished run
-number of examples = 10
-weighted example sum = 7.000000
-weighted label sum = 0.000000
-average loss = 1.000000
-total feature number = 20
diff --git a/test/train-sets/ref/cbify_ws_uar.stderr b/test/train-sets/ref/cbify_ws_uar.stderr
deleted file mode 100644
index 6d05ba5a0db..00000000000
--- a/test/train-sets/ref/cbify_ws_uar.stderr
+++ /dev/null
@@ -1,19 +0,0 @@
-Num weight bits = 18
-learning rate = 0.5
-initial_t = 0
-power_t = 0.5
-using no cache
-Reading datafile = train-sets/multiclass
-num sources = 1
-average  since         example        example  current  current  current
-loss     last          counter         weight    label  predict features
-1.000000 1.000000            4            1.0        4        3        2
-1.000000 1.000000            5            2.0        5        3        2
-1.000000 1.000000            7            4.0        7        3        2
-
-finished run
-number of examples = 10
-weighted example sum = 7.000000
-weighted label sum = 0.000000
-average loss = 1.000000
-total feature number = 20
diff --git a/test/train-sets/ref/cbify_ws_wsgt.stderr b/test/train-sets/ref/cbify_ws_wsgt.stderr
deleted file mode 100644
index d05436ac3a2..00000000000
--- a/test/train-sets/ref/cbify_ws_wsgt.stderr
+++ /dev/null
@@ -1,19 +0,0 @@
-Num weight bits = 18
-learning rate = 0.5
-initial_t = 0
-power_t = 0.5
-using no cache
-Reading datafile = train-sets/multiclass
-num sources = 1
-average  since         example        example  current  current  current
-loss     last          counter         weight    label  predict features
-1.000000 1.000000            4            1.0        4        2        2
-1.000000 1.000000            5            2.0        5        2        2
-1.000000 1.000000            7            4.0        7        2        2
-
-finished run
-number of examples = 10
-weighted example sum = 7.000000
-weighted label sum = 0.000000
-average loss = 1.000000
-total feature number = 20
diff --git a/test/train-sets/ref/cbify_ws_no_int_upd.stderr b/test/train-sets/ref/warm_cb.stderr
similarity index 80%
rename from test/train-sets/ref/cbify_ws_no_int_upd.stderr
rename to test/train-sets/ref/warm_cb.stderr
index 6d05ba5a0db..542eedca77d 100644
--- a/test/train-sets/ref/cbify_ws_no_int_upd.stderr
+++ b/test/train-sets/ref/warm_cb.stderr
@@ -17,3 +17,6 @@ weighted example sum = 7.000000
 weighted label sum = 0.000000
 average loss = 1.000000
 total feature number = 20
+average variance estimate = 171.578140
+theoretical average variance = 200.000000
+last lambda chosen = 0.031250 among lambdas ranging from 0.031250 to 0.937500
diff --git a/test/train-sets/ref/warm_cb_cs.stderr b/test/train-sets/ref/warm_cb_cs.stderr
new file mode 100644
index 00000000000..0fa13e7b3e3
--- /dev/null
+++ b/test/train-sets/ref/warm_cb_cs.stderr
@@ -0,0 +1,21 @@
+Num weight bits = 18
+learning rate = 0.5
+initial_t = 0
+power_t = 0.5
+using no cache
+Reading datafile = train-sets/cs_cb
+num sources = 1
+average  since         example        example  current  current  current
+loss     last          counter         weight    label  predict features
+0.000000 0.000000            2            1.0    known        2        4
+0.000000 0.000000            3            2.0    known        2        4
+
+finished run
+number of examples = 3
+weighted example sum = 2.000000
+weighted label sum = 0.000000
+average loss = 0.000000
+total feature number = 12
+average variance estimate = 1.034483
+theoretical average variance = 60.000000
+last lambda chosen = 0.031250 among lambdas ranging from 0.031250 to 0.937500
diff --git a/test/train-sets/ref/cbify_ws_cyc.stderr b/test/train-sets/ref/warm_cb_cyc.stderr
similarity index 80%
rename from test/train-sets/ref/cbify_ws_cyc.stderr
rename to test/train-sets/ref/warm_cb_cyc.stderr
index a1affe4ec96..0f2fa85641b 100644
--- a/test/train-sets/ref/cbify_ws_cyc.stderr
+++ b/test/train-sets/ref/warm_cb_cyc.stderr
@@ -17,3 +17,6 @@ weighted example sum = 7.000000
 weighted label sum = 0.000000
 average loss = 0.857143
 total feature number = 20
+average variance estimate = 143.156311
+theoretical average variance = 200.000000
+last lambda chosen = 0.937500 among lambdas ranging from 0.031250 to 0.937500
diff --git a/test/train-sets/ref/cbify_ws_lambda_zeroone.stderr b/test/train-sets/ref/warm_cb_lambda_zeroone.stderr
similarity index 80%
rename from test/train-sets/ref/cbify_ws_lambda_zeroone.stderr
rename to test/train-sets/ref/warm_cb_lambda_zeroone.stderr
index 344c43a5335..0b01cc9af71 100644
--- a/test/train-sets/ref/cbify_ws_lambda_zeroone.stderr
+++ b/test/train-sets/ref/warm_cb_lambda_zeroone.stderr
@@ -17,3 +17,6 @@ weighted example sum = 7.000000
 weighted label sum = 0.000000
 average loss = 0.857143
 total feature number = 20
+average variance estimate = 60.903835
+theoretical average variance = 200.000000
+last lambda chosen = 0.000000 among lambdas ranging from 0.000000 to 1.000000
diff --git a/test/train-sets/ref/cbify_ws.stderr b/test/train-sets/ref/warm_cb_no_int_upd.stderr
similarity index 81%
rename from test/train-sets/ref/cbify_ws.stderr
rename to test/train-sets/ref/warm_cb_no_int_upd.stderr
index 6d05ba5a0db..2eb6f8c199b 100644
--- a/test/train-sets/ref/cbify_ws.stderr
+++ b/test/train-sets/ref/warm_cb_no_int_upd.stderr
@@ -17,3 +17,6 @@ weighted example sum = 7.000000
 weighted label sum = 0.000000
 average loss = 1.000000
 total feature number = 20
+average variance estimate = 1.000000
+theoretical average variance = inf
+last lambda chosen = 0.000000 among lambdas ranging from 0.000000 to 0.000000
diff --git a/test/train-sets/ref/cbify_ws_no_ws_upd.stderr b/test/train-sets/ref/warm_cb_no_ws_upd.stderr
similarity index 80%
rename from test/train-sets/ref/cbify_ws_no_ws_upd.stderr
rename to test/train-sets/ref/warm_cb_no_ws_upd.stderr
index 4b334d4e73b..9b172856ee1 100644
--- a/test/train-sets/ref/cbify_ws_no_ws_upd.stderr
+++ b/test/train-sets/ref/warm_cb_no_ws_upd.stderr
@@ -17,3 +17,6 @@ weighted example sum = 7.000000
 weighted label sum = 0.000000
 average loss = 0.714286
 total feature number = 20
+average variance estimate = 7.512840
+theoretical average variance = 200.000000
+last lambda chosen = 1.000000 among lambdas ranging from 1.000000 to 1.000000
diff --git a/test/train-sets/ref/cbify_ws_simbandit.stderr b/test/train-sets/ref/warm_cb_simbandit.stderr
similarity index 80%
rename from test/train-sets/ref/cbify_ws_simbandit.stderr
rename to test/train-sets/ref/warm_cb_simbandit.stderr
index 6d935a38a61..84e75bd328c 100644
--- a/test/train-sets/ref/cbify_ws_simbandit.stderr
+++ b/test/train-sets/ref/warm_cb_simbandit.stderr
@@ -17,3 +17,6 @@ weighted example sum = 7.000000
 weighted label sum = 0.000000
 average loss = 0.857143
 total feature number = 20
+average variance estimate = 4.685901
+theoretical average variance = 200.000000
+last lambda chosen = 0.500000 among lambdas ranging from 0.500000 to 0.500000

From 3ad0f7bdda05703368aecd9288461700e3e849a3 Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Sun, 24 Feb 2019 23:08:09 -0500
Subject: [PATCH 123/127] overwrote regcb test results, as we further divide
 importance weights of each example in the mtr reduction by 1/num_actions

---
 test/train-sets/ref/cbify_regcb.stderr    |  8 ++++----
 test/train-sets/ref/cbify_regcbopt.stderr | 10 +++++-----
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/test/train-sets/ref/cbify_regcb.stderr b/test/train-sets/ref/cbify_regcb.stderr
index dd5883333e1..42d1733cb12 100644
--- a/test/train-sets/ref/cbify_regcb.stderr
+++ b/test/train-sets/ref/cbify_regcb.stderr
@@ -3,18 +3,18 @@ learning rate = 0.5
 initial_t = 0
 power_t = 0.5
 using no cache
-Reading datafile = train-sets/multiclass
+Reading datafile = ../../test/train-sets/multiclass
 num sources = 1
 average  since         example        example  current  current  current
 loss     last          counter         weight    label  predict features
 1.000000 1.000000            1            1.0        1        5        2
 1.000000 1.000000            2            2.0        2       10        2
-0.750000 0.500000            4            4.0        4        7        2
-0.750000 0.750000            8            8.0        8        4        2
+0.750000 0.500000            4            4.0        4        8        2
+0.875000 1.000000            8            8.0        8        4        2
 
 finished run
 number of examples = 10
 weighted example sum = 10.000000
 weighted label sum = 0.000000
-average loss = 0.800000
+average loss = 0.900000
 total feature number = 20
diff --git a/test/train-sets/ref/cbify_regcbopt.stderr b/test/train-sets/ref/cbify_regcbopt.stderr
index 8bb64392725..1a4a367eb37 100644
--- a/test/train-sets/ref/cbify_regcbopt.stderr
+++ b/test/train-sets/ref/cbify_regcbopt.stderr
@@ -3,18 +3,18 @@ learning rate = 0.5
 initial_t = 0
 power_t = 0.5
 using no cache
-Reading datafile = train-sets/multiclass
+Reading datafile = ../../test/train-sets/multiclass
 num sources = 1
 average  since         example        example  current  current  current
 loss     last          counter         weight    label  predict features
 1.000000 1.000000            1            1.0        1        5        2
-1.000000 1.000000            2            2.0        2        9        2
-1.000000 1.000000            4            4.0        4        7        2
-0.875000 0.750000            8            8.0        8        4        2
+1.000000 1.000000            2            2.0        2       10        2
+0.750000 0.500000            4            4.0        4        7        2
+0.750000 0.750000            8            8.0        8        4        2
 
 finished run
 number of examples = 10
 weighted example sum = 10.000000
 weighted label sum = 0.000000
-average loss = 0.900000
+average loss = 0.800000
 total feature number = 20

From 2fa610e42f86de24c337ed816f833839696c2a8d Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Sun, 24 Feb 2019 23:25:09 -0500
Subject: [PATCH 124/127] corrected a mistake in new regcb test result

---
 test/train-sets/ref/cbify_regcb.stderr    | 2 +-
 test/train-sets/ref/cbify_regcbopt.stderr | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/train-sets/ref/cbify_regcb.stderr b/test/train-sets/ref/cbify_regcb.stderr
index 42d1733cb12..1b713f8a1e7 100644
--- a/test/train-sets/ref/cbify_regcb.stderr
+++ b/test/train-sets/ref/cbify_regcb.stderr
@@ -3,7 +3,7 @@ learning rate = 0.5
 initial_t = 0
 power_t = 0.5
 using no cache
-Reading datafile = ../../test/train-sets/multiclass
+Reading datafile = train-sets/multiclass
 num sources = 1
 average  since         example        example  current  current  current
 loss     last          counter         weight    label  predict features
diff --git a/test/train-sets/ref/cbify_regcbopt.stderr b/test/train-sets/ref/cbify_regcbopt.stderr
index 1a4a367eb37..dd5883333e1 100644
--- a/test/train-sets/ref/cbify_regcbopt.stderr
+++ b/test/train-sets/ref/cbify_regcbopt.stderr
@@ -3,7 +3,7 @@ learning rate = 0.5
 initial_t = 0
 power_t = 0.5
 using no cache
-Reading datafile = ../../test/train-sets/multiclass
+Reading datafile = train-sets/multiclass
 num sources = 1
 average  since         example        example  current  current  current
 loss     last          counter         weight    label  predict features

From 5e923d808c45fdf3898ed036beb645dbb80d8712 Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Wed, 20 Mar 2019 16:17:50 -0400
Subject: [PATCH 125/127] reorder reduction stack

---
 vowpalwabbit/parse_args.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vowpalwabbit/parse_args.cc b/vowpalwabbit/parse_args.cc
index 1e31cec9971..63a5a2d9fdc 100644
--- a/vowpalwabbit/parse_args.cc
+++ b/vowpalwabbit/parse_args.cc
@@ -1270,9 +1270,9 @@ void parse_reductions(options_i& options, vw& all)
   all.reduction_stack.push(cb_algs_setup);
   all.reduction_stack.push(cb_adf_setup);
   all.reduction_stack.push(mwt_setup);
-  all.reduction_stack.push(warm_cb_setup);
   all.reduction_stack.push(cb_explore_setup);
   all.reduction_stack.push(cb_explore_adf_setup);
+  all.reduction_stack.push(warm_cb_setup);
   all.reduction_stack.push(cbify_setup);
   all.reduction_stack.push(cbifyldf_setup);
   all.reduction_stack.push(explore_eval_setup);

From c71d3e3d197df1ff58cba5141818a6e237c332e5 Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Wed, 27 Mar 2019 16:16:56 -0400
Subject: [PATCH 126/127] changed the weight scaling back without 1/K; changed
 the central value of lambda

---
 test/train-sets/ref/warm_cb.stderr     | 2 +-
 test/train-sets/ref/warm_cb_cyc.stderr | 2 +-
 vowpalwabbit/cb_adf.cc                 | 3 ++-
 vowpalwabbit/warm_cb.cc                | 7 +++++--
 4 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/test/train-sets/ref/warm_cb.stderr b/test/train-sets/ref/warm_cb.stderr
index 542eedca77d..1ed30fba719 100644
--- a/test/train-sets/ref/warm_cb.stderr
+++ b/test/train-sets/ref/warm_cb.stderr
@@ -19,4 +19,4 @@ average loss = 1.000000
 total feature number = 20
 average variance estimate = 171.578140
 theoretical average variance = 200.000000
-last lambda chosen = 0.031250 among lambdas ranging from 0.031250 to 0.937500
+last lambda chosen = 0.937500 among lambdas ranging from 0.031250 to 0.937500
diff --git a/test/train-sets/ref/warm_cb_cyc.stderr b/test/train-sets/ref/warm_cb_cyc.stderr
index 0f2fa85641b..4254bb45d3f 100644
--- a/test/train-sets/ref/warm_cb_cyc.stderr
+++ b/test/train-sets/ref/warm_cb_cyc.stderr
@@ -19,4 +19,4 @@ average loss = 0.857143
 total feature number = 20
 average variance estimate = 143.156311
 theoretical average variance = 200.000000
-last lambda chosen = 0.937500 among lambdas ranging from 0.031250 to 0.937500
+last lambda chosen = 0.750000 among lambdas ranging from 0.031250 to 0.937500
diff --git a/vowpalwabbit/cb_adf.cc b/vowpalwabbit/cb_adf.cc
index c632b57c77e..f0634489180 100644
--- a/vowpalwabbit/cb_adf.cc
+++ b/vowpalwabbit/cb_adf.cc
@@ -39,6 +39,7 @@ struct cb_adf
   uint64_t offset;
   bool no_predict;
   bool rank_all;
+
 };
 
 CB::cb_class get_observed_cost(multi_ex& examples)
@@ -111,7 +112,7 @@ void learn_MTR(cb_adf& mydata, multi_learner& base, multi_ex& examples)
   gen_cs_example_mtr(mydata.gen_cs, examples, mydata.cs_labels);
   uint32_t nf = (uint32_t)examples[mydata.gen_cs.mtr_example]->num_features;
   float old_weight = examples[mydata.gen_cs.mtr_example]->weight;
-  examples[mydata.gen_cs.mtr_example]->weight *= 1.f / examples[mydata.gen_cs.mtr_example]->l.cb.costs[0].probability * ((float)mydata.gen_cs.event_sum / (float)mydata.gen_cs.action_sum) * (1.f / (float)examples.size());
+  examples[mydata.gen_cs.mtr_example]->weight *= 1.f / examples[mydata.gen_cs.mtr_example]->l.cb.costs[0].probability * ((float)mydata.gen_cs.event_sum / (float)mydata.gen_cs.action_sum);
   GEN_CS::call_cs_ldf<true>(base, mydata.gen_cs.mtr_ec_seq, mydata.cb_labels, mydata.cs_labels, mydata.prepped_cs_labels, mydata.offset);
   examples[mydata.gen_cs.mtr_example]->num_features = nf;
   examples[mydata.gen_cs.mtr_example]->weight = old_weight;
diff --git a/vowpalwabbit/warm_cb.cc b/vowpalwabbit/warm_cb.cc
index 2a61a1a935d..b8af0c4c489 100644
--- a/vowpalwabbit/warm_cb.cc
+++ b/vowpalwabbit/warm_cb.cc
@@ -196,9 +196,12 @@ void copy_example_to_adf(warm_cb& data, example& ec)
   }
 }
 
+// Changing the minimax value from eps/(K+eps)
+// to eps/(1+eps) to accomodate for
+// weight scaling of bandit examples by factor 1/K in mtr reduction
 float minimax_lambda(float epsilon, size_t num_actions)
 {
-	return epsilon / (num_actions + epsilon);
+	return epsilon / (1.0f + epsilon);
 }
 
 void setup_lambdas(warm_cb& data)
@@ -587,7 +590,7 @@ base_learner* warm_cb_setup(options_i& options, vw& all)
       .add(make_option("corrupt_type_warm_start", data->cor_type_ws).default_value(UAR).help("type of label corruption in the warm start phase (1: uniformly at random, 2: circular, 3: replacing with overwriting label)"))
       .add(make_option("corrupt_prob_warm_start", data->cor_prob_ws).default_value(0.f).help("probability of label corruption in the warm start phase"))
       .add(make_option("choices_lambda", data->choices_lambda).default_value(1U).help("the number of candidate lambdas to aggregate (lambda is the importance weight parameter between the two sources)"))
-      .add(make_option("lambda_scheme", data->lambda_scheme).default_value(ABS_CENTRAL).help("The scheme for generating candidate lambda set (1: center lambda=0.5, 2: center lambda=0.5, min lambda=0, max lambda=1, 3: center lambda=epsilon/(#actions+epsilon), 4: center lambda=epsilon/(#actions+epsilon), min lambda=0, max lambda=1); the rest of candidate lambda values are generated using a doubling scheme"))
+      .add(make_option("lambda_scheme", data->lambda_scheme).default_value(ABS_CENTRAL).help("The scheme for generating candidate lambda set (1: center lambda=0.5, 2: center lambda=0.5, min lambda=0, max lambda=1, 3: center lambda=epsilon/(1+epsilon), 4: center lambda=epsilon/(1+epsilon), min lambda=0, max lambda=1); the rest of candidate lambda values are generated using a doubling scheme"))
       .add(make_option("overwrite_label", data->overwrite_label).default_value(1U).help("the label used by type 3 corruptions (overwriting)"))
       .add(make_option("sim_bandit", data->sim_bandit).help("simulate contextual bandit updates on warm start examples"));
 

From 13bf77c584ef525e121fa7784410d41fa8522ce6 Mon Sep 17 00:00:00 2001
From: Chicheng Zhang <zcc1307@gmail.com>
Date: Wed, 27 Mar 2019 16:34:12 -0400
Subject: [PATCH 127/127] changed back regcbopt test results; undo changes in
 cb_adf.cc

---
 test/train-sets/ref/cbify_regcb.stderr    | 6 +++---
 test/train-sets/ref/cbify_regcbopt.stderr | 8 ++++----
 vowpalwabbit/cb_adf.cc                    | 7 ++++---
 3 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/test/train-sets/ref/cbify_regcb.stderr b/test/train-sets/ref/cbify_regcb.stderr
index 1b713f8a1e7..dd5883333e1 100644
--- a/test/train-sets/ref/cbify_regcb.stderr
+++ b/test/train-sets/ref/cbify_regcb.stderr
@@ -9,12 +9,12 @@ average  since         example        example  current  current  current
 loss     last          counter         weight    label  predict features
 1.000000 1.000000            1            1.0        1        5        2
 1.000000 1.000000            2            2.0        2       10        2
-0.750000 0.500000            4            4.0        4        8        2
-0.875000 1.000000            8            8.0        8        4        2
+0.750000 0.500000            4            4.0        4        7        2
+0.750000 0.750000            8            8.0        8        4        2
 
 finished run
 number of examples = 10
 weighted example sum = 10.000000
 weighted label sum = 0.000000
-average loss = 0.900000
+average loss = 0.800000
 total feature number = 20
diff --git a/test/train-sets/ref/cbify_regcbopt.stderr b/test/train-sets/ref/cbify_regcbopt.stderr
index dd5883333e1..8bb64392725 100644
--- a/test/train-sets/ref/cbify_regcbopt.stderr
+++ b/test/train-sets/ref/cbify_regcbopt.stderr
@@ -8,13 +8,13 @@ num sources = 1
 average  since         example        example  current  current  current
 loss     last          counter         weight    label  predict features
 1.000000 1.000000            1            1.0        1        5        2
-1.000000 1.000000            2            2.0        2       10        2
-0.750000 0.500000            4            4.0        4        7        2
-0.750000 0.750000            8            8.0        8        4        2
+1.000000 1.000000            2            2.0        2        9        2
+1.000000 1.000000            4            4.0        4        7        2
+0.875000 0.750000            8            8.0        8        4        2
 
 finished run
 number of examples = 10
 weighted example sum = 10.000000
 weighted label sum = 0.000000
-average loss = 0.800000
+average loss = 0.900000
 total feature number = 20
diff --git a/vowpalwabbit/cb_adf.cc b/vowpalwabbit/cb_adf.cc
index f0634489180..ccc9cefe131 100644
--- a/vowpalwabbit/cb_adf.cc
+++ b/vowpalwabbit/cb_adf.cc
@@ -39,7 +39,6 @@ struct cb_adf
   uint64_t offset;
   bool no_predict;
   bool rank_all;
-
 };
 
 CB::cb_class get_observed_cost(multi_ex& examples)
@@ -112,8 +111,10 @@ void learn_MTR(cb_adf& mydata, multi_learner& base, multi_ex& examples)
   gen_cs_example_mtr(mydata.gen_cs, examples, mydata.cs_labels);
   uint32_t nf = (uint32_t)examples[mydata.gen_cs.mtr_example]->num_features;
   float old_weight = examples[mydata.gen_cs.mtr_example]->weight;
-  examples[mydata.gen_cs.mtr_example]->weight *= 1.f / examples[mydata.gen_cs.mtr_example]->l.cb.costs[0].probability * ((float)mydata.gen_cs.event_sum / (float)mydata.gen_cs.action_sum);
-  GEN_CS::call_cs_ldf<true>(base, mydata.gen_cs.mtr_ec_seq, mydata.cb_labels, mydata.cs_labels, mydata.prepped_cs_labels, mydata.offset);
+  examples[mydata.gen_cs.mtr_example]->weight *= 1.f / examples[mydata.gen_cs.mtr_example]->l.cb.costs[0].probability *
+      ((float)mydata.gen_cs.event_sum / (float)mydata.gen_cs.action_sum);
+  GEN_CS::call_cs_ldf<true>(
+      base, mydata.gen_cs.mtr_ec_seq, mydata.cb_labels, mydata.cs_labels, mydata.prepped_cs_labels, mydata.offset);
   examples[mydata.gen_cs.mtr_example]->num_features = nf;
   examples[mydata.gen_cs.mtr_example]->weight = old_weight;
   swap(examples[0]->pred.a_s, mydata.a_s);