From c891ae820f4cca2874d7e398a8cc165e069f0b78 Mon Sep 17 00:00:00 2001 From: Chicheng Zhang Date: Thu, 25 Jan 2018 18:47:51 -0500 Subject: [PATCH 001/127] / --- vowpalwabbit/cb_explore.cc | 186 ++++++++++++++++++++++++++++++++- vowpalwabbit/gen_cs_example.cc | 1 + 2 files changed, 185 insertions(+), 2 deletions(-) diff --git a/vowpalwabbit/cb_explore.cc b/vowpalwabbit/cb_explore.cc index c3b2a07cffd..27d7953efbe 100644 --- a/vowpalwabbit/cb_explore.cc +++ b/vowpalwabbit/cb_explore.cc @@ -20,6 +20,8 @@ struct cb_explore cb_to_cs cbcs; v_array preds; v_array cover_probs; + v_array cost_lambda; + v_array lambdas; CB::label cb_label; COST_SENSITIVE::label cs_label; @@ -32,6 +34,8 @@ struct cb_explore size_t bag_size; size_t cover_size; float psi; + size_t lambda_size; + float n_2; size_t counter; @@ -117,6 +121,146 @@ void predict_or_learn_bag(cb_explore& data, base_learner& base, example& ec) ec.pred.a_s = probs; } +/* +template +void predict_or_learn_lambda(cb_explore& data, base_learner& base, example& ec) +{ + v_array probs = ec.pred.a_s; + probs.erase(); + + for (uint32_t i = 0; i < data.cbcs.num_actions; i++) + probs.push_back({i,0.}); + float prob = 1.f/(float)data.lambda_size; + for (size_t i = 0; i < data.lambda_size; i++) + { + if (is_learn && n_1 > 0) + { + //learn with lambda learning rate + n_1--; + } + else if (is_learn && n_1 <= 0) + { + //learn with 1-lambda learning rate + n_1--; + } + else + { + // predict + // select the lambda that has the minimum cumulative cost + base.predict(ec, i); + uint32_t chosen = ec.pred.multiclass-1; + probs[chosen].score += prob; + } + } + ec.pred.a_s = probs; + +} +*/ +float find_min(v_array arr) +{ + float min_val = FLT_MAX; + uint32_t argmin = -1; + + for (uint32_t i = 0; i < arr.size(); i++) + { + if (arr[i] < min_val) + { + min_val = arr[i]; + argmin = i; + } + } + + return argmin; +} + + + +template +void predict_or_learn_lambda(cb_explore& data, base_learner& base, example& ec) +{ + v_array probs = ec.pred.a_s; + probs.erase(); + + data.cs_label.costs.erase(); + + for (uint32_t j = 0; j < data.cbcs.num_actions; j++) + data.cs_label.costs.push_back({FLT_MAX,j+1,0.,0.}); + + data.cb_label = ec.l.cb; + + ec.l.cs = data.cs_label; + + + // learn + if (is_learn) + { + + // get the cost vector + //data.cs_label.costs.erase(); + //data.cb_label = ec.l.cb; + //data.cbcs.known_cost = get_observed_cost(data.cb_label); + //gen_cs_example(data.cbcs, ec, data.cb_label, data.cs_label); + + ec.l.cb = data.cb_label; + base.learn(ec); + + cout<(data.cbcs, ec, data.cb_label, data.cs_label); + + + for (size_t i = 0; i < data.cbcs.num_actions; i++) + cout<<"action "<() ,"epsilon-greedy exploration") ("bag",po::value() ,"bagging-based exploration") ("cover",po::value() ,"Online cover based exploration") - ("psi", po::value(), "disagreement parameter for cover"); + ("psi", po::value(), "disagreement parameter for cover") + ("lambda",po::value() ,"Online weighting based exploration") + ("n_2", po::value(), "dataset size of source 2"); add_options(all); po::variables_map& vm = all.vm; @@ -388,13 +538,45 @@ base_learner* cb_explore_setup(vw& all) *all.file_options << " --psi " << type_string; l = &init_learner(&data, base, predict_or_learn_cover, predict_or_learn_cover, data.cover_size + 1, prediction_type::action_probs); } + else if (vm.count("lambda")) + { + data.lambda_size = (uint32_t)vm["lambda"].as(); + data.cs = all.cost_sensitive; + data.cost_lambda = v_init(); + for (uint32_t i = 0; i < data.lambda_size; i++) + data.cost_lambda.push_back(0.); + + data.lambdas = v_init(); + for (uint32_t i = 0; i < data.lambda_size; i++) + data.lambdas.push_back(((float) i )/ data.lambda_size); + + data.second_cs_label.costs.resize(num_actions); + data.second_cs_label.costs.end() = data.second_cs_label.costs.begin()+num_actions; + *all.file_options << " --lambda "<< data.lambda_size; + + if (vm.count("epsilon")) + data.epsilon = vm["epsilon"].as(); + else + data.epsilon = 0.05f; + //cout<<"epsilon = "<(); + //data.preds = v_init(); + //data.preds.resize(data.lambda_size); + //sprintf(type_string, "%f", data.n_2); + //*all.file_options << " --phi " << type_string; + l = &init_learner(&data, base, predict_or_learn_lambda, predict_or_learn_lambda, data.lambda_size + 1, prediction_type::action_probs); + + } else if (vm.count("bag")) { data.bag_size = (uint32_t)vm["bag"].as(); *all.file_options << " --bag "<< data.bag_size; l = &init_learner(&data, base, predict_or_learn_bag, predict_or_learn_bag, data.bag_size, prediction_type::action_probs); } - else if (vm.count("first") ) + else if (vm.count("first")) { data.tau = (uint32_t)vm["first"].as(); *all.file_options << " --first "<< data.tau; diff --git a/vowpalwabbit/gen_cs_example.cc b/vowpalwabbit/gen_cs_example.cc index 4660d8d6a7d..fa2bc21440b 100644 --- a/vowpalwabbit/gen_cs_example.cc +++ b/vowpalwabbit/gen_cs_example.cc @@ -112,6 +112,7 @@ void gen_cs_test_example(v_array examples, COST_SENSITIVE::label& cs_l //single line version void gen_cs_example_ips(cb_to_cs& c, CB::label& ld, COST_SENSITIVE::label& cs_ld) { + //std::cout<<"-------"<action< Date: Mon, 29 Jan 2018 17:45:41 -0500 Subject: [PATCH 002/127] not sure if the cost vector retrieved is correct --- vowpalwabbit/cb_explore.cc | 92 ++++++++++++++++++++++++++++++---- vowpalwabbit/gen_cs_example.cc | 2 + vowpalwabbit/gen_cs_example.h | 1 + 3 files changed, 85 insertions(+), 10 deletions(-) diff --git a/vowpalwabbit/cb_explore.cc b/vowpalwabbit/cb_explore.cc index 27d7953efbe..eb43b392fcc 100644 --- a/vowpalwabbit/cb_explore.cc +++ b/vowpalwabbit/cb_explore.cc @@ -173,7 +173,57 @@ float find_min(v_array arr) return argmin; } +/* + //Randomize over predictions from a base set of predictors + //Use cost sensitive oracle to cover actions to form distribution. + + uint32_t num_actions = data.cbcs.num_actions; + + v_array probs = ec.pred.a_s; + probs.erase(); + data.cs_label.costs.erase(); + + for (uint32_t j = 0; j < num_actions; j++) + data.cs_label.costs.push_back({FLT_MAX,j+1,0.,0.}); + + size_t cover_size = data.cover_size; + size_t counter = data.counter; + v_array& probabilities = data.cover_probs; + v_array& predictions = data.preds; + + float additive_probability = 1.f / (float)cover_size; + + float min_prob = min(1.f / num_actions, 1.f / (float)sqrt(counter * num_actions)); + + data.cb_label = ec.l.cb; + + ec.l.cs = data.cs_label; + get_cover_probabilities(data, base, ec, probs); + + if (is_learn) + { + ec.l.cb = data.cb_label; + base.learn(ec); + + //Now update oracles + + //1. Compute loss vector + data.cs_label.costs.erase(); + float norm = min_prob * num_actions; + ec.l.cb = data.cb_label; + + data.cbcs.known_cost = get_observed_cost(data.cb_label); + for (size_t i = 0; i < data.cbcs.num_actions; i++) + cout<<"action "<(data.cbcs, ec, data.cb_label, data.cs_label); + for(uint32_t i = 0; i < num_actions; i++) + probabilities[i] = 0; + + for (size_t i = 0; i < data.cbcs.num_actions; i++) + cout<<"action "< void predict_or_learn_lambda(cb_explore& data, base_learner& base, example& ec) @@ -204,21 +254,23 @@ void predict_or_learn_lambda(cb_explore& data, base_learner& base, example& ec) ec.l.cb = data.cb_label; base.learn(ec); - cout<(data.cbcs, ec, data.cb_label, data.cs_label); - for (size_t i = 0; i < data.cbcs.num_actions; i++) - cout<<"action "<(data.cbcs, ec, data.cb_label, data.cs_label); for(uint32_t i = 0; i < num_actions; i++) probabilities[i] = 0; @@ -547,8 +605,19 @@ base_learner* cb_explore_setup(vw& all) data.cost_lambda.push_back(0.); data.lambdas = v_init(); - for (uint32_t i = 0; i < data.lambda_size; i++) - data.lambdas.push_back(((float) i )/ data.lambda_size); + for (uint32_t i = 0; i < data.lambda_size; i++) + if (i%2 == 0) + { + data.lambdas.push_back(pow(0.5f, floor(i/2) + 1)); + //cout<(); else data.epsilon = 0.05f; + + + //data.lambdas.push_back(((float) i )/ data.lambda_size); //cout<<"epsilon = "<(c.scorer, c.known_cost, ec, action, c.num_actions); + //std::cout<<"wc.x = "< Date: Mon, 29 Jan 2018 18:35:45 -0500 Subject: [PATCH 003/127] not sure if the cost vector retrieved is correct --- vowpalwabbit/cb_explore.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/vowpalwabbit/cb_explore.cc b/vowpalwabbit/cb_explore.cc index eb43b392fcc..beac5556048 100644 --- a/vowpalwabbit/cb_explore.cc +++ b/vowpalwabbit/cb_explore.cc @@ -269,8 +269,8 @@ void predict_or_learn_lambda(cb_explore& data, base_learner& base, example& ec) gen_cs_example(data.cbcs, ec, data.cb_label, data.cs_label); - //for (size_t i = 0; i < data.cbcs.num_actions; i++) - // cout<<"action "<(data.cbcs, ec, data.cb_label, data.cs_label); From 539b1e435bb579efd94e77fed65556562b6541ed Mon Sep 17 00:00:00 2001 From: chicheng Date: Thu, 8 Feb 2018 18:21:55 -0500 Subject: [PATCH 004/127] added cbify warm start code --- vowpalwabbit/cbify.cc | 171 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 140 insertions(+), 31 deletions(-) diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc index 25d03db58b7..386d511a5a2 100644 --- a/vowpalwabbit/cbify.cc +++ b/vowpalwabbit/cbify.cc @@ -52,6 +52,13 @@ struct cbify cbify_adf_data adf_data; float loss0; float loss1; + + size_t choices_lambda; + size_t warm_start_period; + v_array cumulative_costs; + v_array lambdas; + size_t num_actions; + }; vector vw_scorer::Score_Actions(example& ctx) @@ -126,37 +133,109 @@ void copy_example_to_adf(cbify& data, example& ec) } } +uint32_t find_min(v_array arr) +{ + float min_val = FLT_MAX; + uint32_t argmin = 0; + + for (uint32_t i = 0; i < arr.size(); i++) + { + if (arr[i] < min_val) + { + min_val = arr[i]; + argmin = i; + } + } + + return argmin; +} + template void predict_or_learn(cbify& data, base_learner& base, example& ec) { - //Store the multiclass input label - MULTICLASS::label_t ld = ec.l.multi; - data.cb_label.costs.erase(); - ec.l.cb = data.cb_label; - ec.pred.a_s = data.a_s; - - //Call the cb_explore algorithm. It returns a vector of probabilities for each action - base.predict(ec); - //data.probs = ec.pred.scalars; - - uint32_t action = data.mwt_explorer->Choose_Action(*data.generic_explorer, StringUtils::to_string(data.example_counter++), ec); - - CB::cb_class cl; - cl.action = action; - cl.probability = ec.pred.a_s[action-1].score; - - if(!cl.action) - THROW("No action with non-zero probability found!"); - cl.cost = loss(data, ld.label, cl.action); - - //Create a new cb label - data.cb_label.costs.push_back(cl); - ec.l.cb = data.cb_label; - base.learn(ec); - data.a_s.erase(); - data.a_s = ec.pred.a_s; - ec.l.multi = ld; - ec.pred.multiclass = action; + bool is_supervised; + + if (data.warm_start_period > 0) + { + is_supervised = true; + data.warm_start_period--; + } + else + is_supervised = false; + + uint32_t argmin; + argmin = find_min(data.cumulative_costs); + if (argmin != 0) + cout<<"argmin is not zero"<cost_sensitive->predict(ec, argmin); + //uint32_t chosen = ec.pred.multiclass-1; + //cout<cost_sensitive->learn(ec, i); + } + ec.l.multi = ld; + } + else //Call the cb_explore algorithm. It returns a vector of probabilities for each action + { + data.cb_label.costs.erase(); + ec.l.cb = data.cb_label; + ec.pred.a_s = data.a_s; + + base.predict(ec, argmin); + //base.predict(ec); + //data.probs = ec.pred.scalars; + + uint32_t action = data.mwt_explorer->Choose_Action(*data.generic_explorer, StringUtils::to_string(data.example_counter++), ec); + + CB::cb_class cl; + cl.action = action; + cl.probability = ec.pred.a_s[action-1].score; + + if(!cl.action) + THROW("No action with non-zero probability found!"); + cl.cost = loss(data, ld.label, cl.action); + + //Create a new cb label + data.cb_label.costs.push_back(cl); + ec.l.cb = data.cb_label; + //base.learn(ec); + for (uint32_t i = 0; i < data.choices_lambda; i++) + { + ec.weight = data.lambdas[i] / (1-data.lambdas[i]); + base.learn(ec, i); + data.cumulative_costs[i] += 0; + } + + data.a_s.erase(); + data.a_s = ec.pred.a_s; + ec.l.multi = ld; + ec.pred.multiclass = action; + } } template @@ -213,6 +292,22 @@ void init_adf_data(cbify& data, const size_t num_actions) adf_data.empty_example->in_use = true; } +void generate_lambdas(v_array& lambdas, size_t lambda_size) +{ + lambdas = v_init(); + for (uint32_t i = 0; i < lambda_size; i++) + if (i%2 == 0) + { + lambdas.push_back(pow(0.5f, floor(i/2) + 1)); + //cout<(), "loss for correct label") - ("loss1", po::value(), "loss for incorrect label"); + ("loss1", po::value(), "loss for incorrect label") + ("warm_start", po::value(), "number of training examples for fully-supervised warm start") + ("choices_lambda", po::value(), "numbers of lambdas importance weights to aggregate"); add_options(all); po::variables_map& vm = all.vm; @@ -238,6 +335,18 @@ base_learner* cbify_setup(vw& all) data.generic_explorer = new GenericExplorer(*data.scorer, (u32)num_actions); data.all = &all; + cout<() : 0; + cout<() : 1; + + generate_lambdas(data.lambdas, data.choices_lambda); + + for (size_t i = 0; i < data.choices_lambda; i++) + data.cumulative_costs.push_back(0.); + + data.num_actions = num_actions; + if (data.use_adf) { init_adf_data(data, num_actions); @@ -263,11 +372,11 @@ base_learner* cbify_setup(vw& all) learner* l; if (data.use_adf) { - l = &init_multiclass_learner(&data, base, predict_or_learn_adf, predict_or_learn_adf, all.p, 1); + l = &init_multiclass_learner(&data, base, predict_or_learn_adf, predict_or_learn_adf, all.p, data.choices_lambda); } else { - l = &init_multiclass_learner(&data, base, predict_or_learn, predict_or_learn, all.p, 1); + l = &init_multiclass_learner(&data, base, predict_or_learn, predict_or_learn, all.p, data.choices_lambda); } l->set_finish(finish); From 961a5a583366aa135dcbd60bc99820a4d3f5de2e Mon Sep 17 00:00:00 2001 From: chicheng Date: Fri, 9 Feb 2018 14:28:52 -0500 Subject: [PATCH 005/127] commented out the multiple lambda code in cbify --- vowpalwabbit/cbify.cc | 40 ++++++++++++++++++++++++++++------------ 1 file changed, 28 insertions(+), 12 deletions(-) diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc index 386d511a5a2..2fb6169153b 100644 --- a/vowpalwabbit/cbify.cc +++ b/vowpalwabbit/cbify.cc @@ -140,12 +140,14 @@ uint32_t find_min(v_array arr) for (uint32_t i = 0; i < arr.size(); i++) { + //cout<cost_sensitive->predict(ec, argmin); + //data.all->cost_sensitive->predict(ec, argmin); + data.all->cost_sensitive->predict(ec); //uint32_t chosen = ec.pred.multiclass-1; //cout<Choose_Action(*data.generic_explorer, StringUtils::to_string(data.example_counter++), ec); @@ -223,13 +227,25 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec) //Create a new cb label data.cb_label.costs.push_back(cl); ec.l.cb = data.cb_label; - //base.learn(ec); + + //IPS for approximating the cumulative costs for all lambdas + /* + for (uint32_t i = 0; i < data.choices_lambda; i++) + { + example ec2 = ec; + data.all->cost_sensitive->predict(ec2, i); + if (ec2.pred.multiclass == cl.action) + data.cumulative_costs[i] += cl.cost / cl.probability; + //cout<(*data.scorer, (u32)num_actions); data.all = &all; - cout<() : 0; - cout<() : 1; generate_lambdas(data.lambdas, data.choices_lambda); From 0fbc26afb91c980adf31d0183d9818c103400f81 Mon Sep 17 00:00:00 2001 From: chicheng Date: Fri, 9 Feb 2018 14:30:21 -0500 Subject: [PATCH 006/127] commented out the multiple lambda code in cbify --- vowpalwabbit/cbify.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc index 2fb6169153b..64474fa739b 100644 --- a/vowpalwabbit/cbify.cc +++ b/vowpalwabbit/cbify.cc @@ -388,11 +388,11 @@ base_learner* cbify_setup(vw& all) learner* l; if (data.use_adf) { - l = &init_multiclass_learner(&data, base, predict_or_learn_adf, predict_or_learn_adf, all.p, data.choices_lambda); + l = &init_multiclass_learner(&data, base, predict_or_learn_adf, predict_or_learn_adf, all.p, 1); } else { - l = &init_multiclass_learner(&data, base, predict_or_learn, predict_or_learn, all.p, data.choices_lambda); + l = &init_multiclass_learner(&data, base, predict_or_learn, predict_or_learn, all.p, 1); } l->set_finish(finish); From 369b3ea24e9ca2b0b4471c82f0d73f5349544093 Mon Sep 17 00:00:00 2001 From: chicheng Date: Fri, 9 Feb 2018 15:02:04 -0500 Subject: [PATCH 007/127] the cbexplore approach seems not working, as the first stage cannot prepare multiple copies of weights --- vowpalwabbit/cb_explore.cc | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/vowpalwabbit/cb_explore.cc b/vowpalwabbit/cb_explore.cc index beac5556048..28f7d748811 100644 --- a/vowpalwabbit/cb_explore.cc +++ b/vowpalwabbit/cb_explore.cc @@ -269,8 +269,8 @@ void predict_or_learn_lambda(cb_explore& data, base_learner& base, example& ec) gen_cs_example(data.cbcs, ec, data.cb_label, data.cs_label); - for (size_t i = 0; i < data.cbcs.num_actions; i++) - cout<<"action "<, predict_or_learn_lambda, data.lambda_size + 1, prediction_type::action_probs); + l = &init_learner(&data, base, predict_or_learn_lambda, predict_or_learn_lambda, data.lambda_size, prediction_type::action_probs); } else if (vm.count("bag")) From 8f096a5fb78b1c2aae7a870ac33bc90adc512b48 Mon Sep 17 00:00:00 2001 From: chicheng Date: Fri, 9 Feb 2018 17:30:04 -0500 Subject: [PATCH 008/127] . --- vowpalwabbit/cbify.cc | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc index 64474fa739b..46e12a28d76 100644 --- a/vowpalwabbit/cbify.cc +++ b/vowpalwabbit/cbify.cc @@ -165,8 +165,8 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec) else is_supervised = false; - //uint32_t argmin; - //argmin = find_min(data.cumulative_costs); + uint32_t argmin; + argmin = find_min(data.cumulative_costs); //cout<cost_sensitive->predict(ec, argmin); - data.all->cost_sensitive->predict(ec); + data.all->cost_sensitive->predict(ec, argmin); //uint32_t chosen = ec.pred.multiclass-1; //cout<Choose_Action(*data.generic_explorer, StringUtils::to_string(data.example_counter++), ec); @@ -229,23 +228,23 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec) ec.l.cb = data.cb_label; //IPS for approximating the cumulative costs for all lambdas - /* for (uint32_t i = 0; i < data.choices_lambda; i++) { - example ec2 = ec; - data.all->cost_sensitive->predict(ec2, i); - if (ec2.pred.multiclass == cl.action) + //example ec2 = ec; + data.all->cost_sensitive->predict(ec, i); + //cout<* l; if (data.use_adf) { - l = &init_multiclass_learner(&data, base, predict_or_learn_adf, predict_or_learn_adf, all.p, 1); + l = &init_multiclass_learner(&data, base, predict_or_learn_adf, predict_or_learn_adf, all.p, data.choices_lambda); } else { - l = &init_multiclass_learner(&data, base, predict_or_learn, predict_or_learn, all.p, 1); + l = &init_multiclass_learner(&data, base, predict_or_learn, predict_or_learn, all.p, data.choices_lambda); } l->set_finish(finish); From 904134f0150dfa35c3849282c7bb9b863067e0f3 Mon Sep 17 00:00:00 2001 From: chicheng Date: Mon, 12 Feb 2018 11:40:44 -0500 Subject: [PATCH 009/127] properly store the temp labels --- vowpalwabbit/cbify.cc | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc index 46e12a28d76..a298df9ba17 100644 --- a/vowpalwabbit/cbify.cc +++ b/vowpalwabbit/cbify.cc @@ -58,6 +58,7 @@ struct cbify v_array cumulative_costs; v_array lambdas; size_t num_actions; + COST_SENSITIVE::label csl; }; @@ -173,22 +174,24 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec) //Store the multiclass input label MULTICLASS::label_t ld = ec.l.multi; + COST_SENSITIVE::label csd = ec.l.cs; //cout<cost_sensitive->predict(ec, argmin); @@ -202,6 +205,7 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec) data.all->cost_sensitive->learn(ec, i); } ec.l.multi = ld; + ec.l.cs = csd; } else //Call the cb_explore algorithm. It returns a vector of probabilities for each action { @@ -249,6 +253,7 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec) data.a_s.erase(); data.a_s = ec.pred.a_s; ec.l.multi = ld; + ec.l.cs = csd; ec.pred.multiclass = action; } } From e2713440212d9f7430e067590cf0b0db9ee5a2f2 Mon Sep 17 00:00:00 2001 From: chicheng Date: Mon, 12 Feb 2018 11:48:19 -0500 Subject: [PATCH 010/127] back --- vowpalwabbit/cbify.cc | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc index a298df9ba17..44cce764577 100644 --- a/vowpalwabbit/cbify.cc +++ b/vowpalwabbit/cbify.cc @@ -58,7 +58,6 @@ struct cbify v_array cumulative_costs; v_array lambdas; size_t num_actions; - COST_SENSITIVE::label csl; }; @@ -174,24 +173,22 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec) //Store the multiclass input label MULTICLASS::label_t ld = ec.l.multi; - COST_SENSITIVE::label csd = ec.l.cs; //cout<cost_sensitive->predict(ec, argmin); @@ -205,7 +202,6 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec) data.all->cost_sensitive->learn(ec, i); } ec.l.multi = ld; - ec.l.cs = csd; } else //Call the cb_explore algorithm. It returns a vector of probabilities for each action { @@ -234,12 +230,12 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec) //IPS for approximating the cumulative costs for all lambdas for (uint32_t i = 0; i < data.choices_lambda; i++) { - //example ec2 = ec; - data.all->cost_sensitive->predict(ec, i); + example ec2 = ec; + data.all->cost_sensitive->predict(ec2, i); //cout< Date: Mon, 12 Feb 2018 18:04:17 -0500 Subject: [PATCH 011/127] . --- data_gen.py | 92 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) create mode 100644 data_gen.py diff --git a/data_gen.py b/data_gen.py new file mode 100644 index 00000000000..f1c15ae7716 --- /dev/null +++ b/data_gen.py @@ -0,0 +1,92 @@ +import random +import numpy as np + +classes = 10 +m = 100 + +def gen_keyword(): + + kwperclass = 20 + + keyword = np.zeros((classes, m)) + + for i in range(classes): + shuffled = range(m) + random.shuffle(shuffled) + + for j in range(kwperclass): + keyword[i,shuffled[j]] = 1 + + return keyword + + +def classify(classifier, example): + + result = classifier.dot(example) + + return np.argmax(result) + + + +if __name__ == '__main__': + + + filename = "text_lownoise" + + f = open(filename+".vw", "w") + g = open(filename+"_m.vw", "w") + + keyword = gen_keyword() + + + samples = 10000 + fprob = 0 + + cs = False + + for i in range(samples): + c = random.randint(0, classes-1) + + #generate a pair of datasets (one is cost-sensitive, the other is multiclass) + + for l in range(classes): + f.write(str(l+1)+':') + cost = 1 + if l == c: + cost = 0 + f.write(str(cost)+' ') + + g.write(str(c+1)) + + + + f.write(' | ') + g.write(' | ') + + vec = np.zeros(m) + + for j in range(m): + flip = np.random.choice([False,True],p=[1-fprob, fprob]) + if flip: + vec[j] = 2 * (1-keyword[c][j]) - 1 + else: + vec[j] = 2 * keyword[c][j] - 1 + + for j in range(m): + f.write('w'+str(j)+':') + f.write(str(vec[j])+' ') + g.write('w'+str(j)+':') + g.write(str(vec[j])+' ') + + #print 'Is the prediction equal to the class label? ', classify(keyword, vec) == c + + f.write('\n') + g.write('\n') + + f.close() + g.close() + + + + + From ced4bbdf559410f1069a52a36b27baf2db333620 Mon Sep 17 00:00:00 2001 From: chicheng Date: Tue, 13 Feb 2018 10:51:56 -0500 Subject: [PATCH 012/127] fixed the bug with assigning cb label before cost sensitive prediction - the ec.l field is anunion --- Makefile | 2 +- vowpalwabbit/cbify.cc | 26 +++++++++++++++----------- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/Makefile b/Makefile index ffa65608520..b01ec7db8cc 100644 --- a/Makefile +++ b/Makefile @@ -71,7 +71,7 @@ FLAGS = -std=c++0x $(CFLAGS) $(LDFLAGS) $(ARCH) $(WARN_FLAGS) $(OPTIM_FLAGS) -D_ #CXX = g++ # for valgrind / gdb debugging -#FLAGS = -std=c++0x $(CFLAGS) $(LDFLAGS) $(ARCH) $(WARN_FLAGS) -D_FILE_OFFSET_BITS=64 $(BOOST_INCLUDE) $(JSON_INCLUDE) -g -O0 -fPIC +FLAGS = -std=c++0x $(CFLAGS) $(LDFLAGS) $(ARCH) $(WARN_FLAGS) -D_FILE_OFFSET_BITS=64 $(BOOST_INCLUDE) $(JSON_INCLUDE) -g -O0 -fPIC # for valgrind profiling: run 'valgrind --tool=callgrind PROGRAM' then 'callgrind_annotate --tree=both --inclusive=yes' #FLAGS = -std=c++0x $(CFLAGS) $(LDFLAGS) -Wall $(ARCH) -ffast-math -D_FILE_OFFSET_BITS=64 $(BOOST_INCLUDE) $(JSON_INCLUDE) -g -fomit-frame-pointer -ffast-math -fno-strict-aliasing -fPIC diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc index 44cce764577..be40e8bae24 100644 --- a/vowpalwabbit/cbify.cc +++ b/vowpalwabbit/cbify.cc @@ -179,7 +179,7 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec) if (is_supervised) // Call the cost-sensitive learner directly { //generate cost-sensitive label - COST_SENSITIVE::label csl; + COST_SENSITIVE::label csl = calloc_or_throw(); csl.costs.resize(data.num_actions); csl.costs.end() = csl.costs.begin()+data.num_actions; for (uint32_t j = 0; j < data.num_actions; j++) @@ -192,6 +192,7 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec) //predict data.all->cost_sensitive->predict(ec, argmin); + auto old_pred = ec.pred; //uint32_t chosen = ec.pred.multiclass-1; //cout<cost_sensitive->learn(ec, i); } ec.l.multi = ld; + ec.pred = old_pred; } else //Call the cb_explore algorithm. It returns a vector of probabilities for each action { @@ -210,6 +212,7 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec) ec.pred.a_s = data.a_s; base.predict(ec, argmin); + auto old_pred = ec.pred; //base.predict(ec); //data.probs = ec.pred.scalars; @@ -223,23 +226,24 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec) THROW("No action with non-zero probability found!"); cl.cost = loss(data, ld.label, cl.action); - //Create a new cb label - data.cb_label.costs.push_back(cl); - ec.l.cb = data.cb_label; - //IPS for approximating the cumulative costs for all lambdas for (uint32_t i = 0; i < data.choices_lambda; i++) { - example ec2 = ec; - data.all->cost_sensitive->predict(ec2, i); + //example ec2 = ec; + //assert(0); + data.all->cost_sensitive->predict(ec, i); //cout< Date: Tue, 13 Feb 2018 14:50:06 -0500 Subject: [PATCH 013/127] the cumulative cost become diverse --- vowpalwabbit/cbify.cc | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc index be40e8bae24..855d18154b0 100644 --- a/vowpalwabbit/cbify.cc +++ b/vowpalwabbit/cbify.cc @@ -192,7 +192,7 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec) //predict data.all->cost_sensitive->predict(ec, argmin); - auto old_pred = ec.pred; + //auto old_pred = ec.pred; //uint32_t chosen = ec.pred.multiclass-1; //cout<cost_sensitive->learn(ec, i); } ec.l.multi = ld; - ec.pred = old_pred; + //ec.pred = old_pred; } else //Call the cb_explore algorithm. It returns a vector of probabilities for each action { @@ -229,14 +229,14 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec) //IPS for approximating the cumulative costs for all lambdas for (uint32_t i = 0; i < data.choices_lambda; i++) { - //example ec2 = ec; //assert(0); data.all->cost_sensitive->predict(ec, i); - //cout< Date: Tue, 13 Feb 2018 18:12:46 -0500 Subject: [PATCH 014/127] modified csoaa so that it can take example weights now. --- vowpalwabbit/cbify.cc | 30 ++++++++++++++++-------------- vowpalwabbit/csoaa.cc | 5 ++++- 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc index 855d18154b0..bb653d69d6c 100644 --- a/vowpalwabbit/cbify.cc +++ b/vowpalwabbit/cbify.cc @@ -234,9 +234,9 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec) //cout<& lambdas, size_t lambda_size) { lambdas = v_init(); + uint32_t mid = lambda_size / 2; for (uint32_t i = 0; i < lambda_size; i++) - if (i%2 == 0) - { - lambdas.push_back(pow(0.5f, floor(i/2) + 1)); - //cout< 0; i--) + lambdas[i-1] = lambdas[i] / 2; + + for (uint32_t i = mid+1; i < lambda_size; i++) + lambdas[i] = 1 - (1-lambdas[i-1]) / 2; + } base_learner* cbify_setup(vw& all) diff --git a/vowpalwabbit/csoaa.cc b/vowpalwabbit/csoaa.cc index 0abe97e640e..655b98395de 100644 --- a/vowpalwabbit/csoaa.cc +++ b/vowpalwabbit/csoaa.cc @@ -31,9 +31,12 @@ inline void inner_loop(base_learner& base, example& ec, uint32_t i, float cost, { if (is_learn) { - ec.weight = (cost == FLT_MAX) ? 0.f : 1.f; + float old_weight = ec.weight; + if (cost == FLT_MAX) ec.weight = 0.f; + //ec.weight = (cost == FLT_MAX) ? 0.f : 1.f; ec.l.simple.label = cost; base.learn(ec, i-1); + //ec.weight = old_weight; } else base.predict(ec, i-1); From e295aff2606cd445aad0bcc30af166294bc1d93f Mon Sep 17 00:00:00 2001 From: chicheng Date: Fri, 16 Feb 2018 12:54:32 -0500 Subject: [PATCH 015/127] . --- vowpalwabbit/cbify.cc | 37 ++++++++++++++++++++++++------------- vowpalwabbit/csoaa.cc | 2 +- 2 files changed, 25 insertions(+), 14 deletions(-) diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc index bb653d69d6c..5dc8f0acbb7 100644 --- a/vowpalwabbit/cbify.cc +++ b/vowpalwabbit/cbify.cc @@ -58,6 +58,9 @@ struct cbify v_array cumulative_costs; v_array lambdas; size_t num_actions; + bool ind_bandit; + bool ind_supervised; + }; @@ -196,11 +199,13 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec) //uint32_t chosen = ec.pred.multiclass-1; //cout<cost_sensitive->learn(ec, i); + for (uint32_t i = 0; i < data.choices_lambda; i++) + { + ec.weight = 1; + data.all->cost_sensitive->learn(ec, i); + } } ec.l.multi = ld; //ec.pred = old_pred; @@ -244,16 +249,17 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec) //base.learn(ec); ec.pred = old_pred; - - for (uint32_t i = 0; i < data.choices_lambda; i++) + if (data.ind_bandit) { - ec.weight = data.lambdas[i] / (1-data.lambdas[i]); - //ec.l.cb.costs[0].cost = 0; - //cl.cost * data.lambdas[i] / (1-data.lambdas[i]); - - base.learn(ec, i); + for (uint32_t i = 0; i < data.choices_lambda; i++) + { + ec.weight = data.lambdas[i] / (1-data.lambdas[i]); + //ec.l.cb.costs[0].cost = 0; + //cl.cost * data.lambdas[i] / (1-data.lambdas[i]); + + base.learn(ec, i); + } } - data.a_s.erase(); data.a_s = ec.pred.a_s; ec.l.multi = ld; @@ -340,7 +346,9 @@ base_learner* cbify_setup(vw& all) ("loss0", po::value(), "loss for correct label") ("loss1", po::value(), "loss for incorrect label") ("warm_start", po::value(), "number of training examples for fully-supervised warm start") - ("choices_lambda", po::value(), "numbers of lambdas importance weights to aggregate"); + ("choices_lambda", po::value(), "numbers of lambdas importance weights to aggregate") + ("no_supervised", "indicator of using supervised only") + ("no_bandit", "indicator of using bandit only"); add_options(all); po::variables_map& vm = all.vm; @@ -350,6 +358,9 @@ base_learner* cbify_setup(vw& all) data.use_adf = count(all.args.begin(), all.args.end(),"--cb_explore_adf") > 0; data.loss0 = vm.count("loss0") ? vm["loss0"].as() : 0.f; data.loss1 = vm.count("loss1") ? vm["loss1"].as() : 1.f; + data.ind_supervised = vm.count("no_supervised") ? false : true; + data.ind_bandit = vm.count("no_bandit") ? false : true; + data.recorder = new vw_recorder(); data.mwt_explorer = new MwtExplorer("vw",*data.recorder); data.scorer = new vw_scorer(); diff --git a/vowpalwabbit/csoaa.cc b/vowpalwabbit/csoaa.cc index 655b98395de..740f0af7b33 100644 --- a/vowpalwabbit/csoaa.cc +++ b/vowpalwabbit/csoaa.cc @@ -36,7 +36,7 @@ inline void inner_loop(base_learner& base, example& ec, uint32_t i, float cost, //ec.weight = (cost == FLT_MAX) ? 0.f : 1.f; ec.l.simple.label = cost; base.learn(ec, i-1); - //ec.weight = old_weight; + ec.weight = old_weight; } else base.predict(ec, i-1); From ed2f2bf5d09b8c97645b0282c0015aaba2c47dc2 Mon Sep 17 00:00:00 2001 From: chicheng Date: Fri, 16 Feb 2018 12:55:16 -0500 Subject: [PATCH 016/127] added some results of warm starting --- results.txt | 178 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 178 insertions(+) create mode 100644 results.txt diff --git a/results.txt b/results.txt new file mode 100644 index 00000000000..5be1452c1b9 --- /dev/null +++ b/results.txt @@ -0,0 +1,178 @@ + +chiczhan@chiczhan-ubuntu:~/datasets$ ~/local_vw/vowpal_wabbit/vowpalwabbit/vw --cbify 10 -d text_lownoise_m.vw --warm_start 15 --choices_lambda 10 --cb_type ips +Num weight bits = 18 +learning rate = 0.5 +initial_t = 0 +power_t = 0.5 +using no cache +Reading datafile = text_lownoise_m.vw +num sources = 1 +average since example example current current current +loss last counter weight label predict features +1.000000 1.000000 1 1.0 4 1 101 +1.000000 1.000000 2 2.0 10 4 101 +0.750000 0.500000 4 4.0 7 10 101 +0.625000 0.500000 8 8.0 8 8 101 +0.130435 0.026316 16 46.0 10 10 101 +0.629630 1.000000 18 108.0 2 10 101 +0.560345 0.500000 22 232.0 3 7 101 +0.529167 0.500000 30 480.0 9 8 101 +0.355533 0.187500 46 976.0 8 8 101 +0.365346 0.375000 78 1968.0 2 7 101 +0.480010 0.593750 142 3952.0 9 5 101 +0.517424 0.554688 270 7920.0 8 8 101 +0.496973 0.476562 526 15856.0 8 8 101 +0.472107 0.447266 1038 31728.0 2 9 101 +0.441124 0.410156 2062 63472.0 1 1 101 +0.348968 0.256836 4110 126960.0 8 1 101 +0.242348 0.135742 8206 253936.0 8 8 101 + +finished run +number of examples per pass = 10000 +passes used = 1 +weighted example sum = 309550.000000 +weighted label sum = 0.000000 +average loss = 0.209223 +total feature number = 1010000 +chiczhan@chiczhan-ubuntu:~/datasets$ ~/local_vw/vowpal_wabbit/vowpalwabbit/vw --cbify 10 -d text_lownoise_m.vw --warm_start 15 --choices_lambda 10 --cb_type ips --no_supervised +Num weight bits = 18 +learning rate = 0.5 +initial_t = 0 +power_t = 0.5 +using no cache +Reading datafile = text_lownoise_m.vw +num sources = 1 +average since example example current current current +loss last counter weight label predict features +1.000000 1.000000 1 1.0 4 1 101 +1.000000 1.000000 2 2.0 10 1 101 +1.000000 1.000000 4 4.0 7 1 101 +0.875000 0.750000 8 8.0 8 1 101 +0.978261 1.000000 16 46.0 10 1 101 +0.990741 1.000000 18 108.0 2 3 101 +0.995690 1.000000 22 232.0 3 7 101 +0.933333 0.875000 30 480.0 9 7 101 +0.871926 0.812500 46 976.0 8 8 101 +0.715955 0.562500 78 1968.0 2 2 101 +0.693826 0.671875 142 3952.0 9 3 101 +0.647601 0.601562 270 7920.0 8 8 101 +0.648020 0.648438 526 15856.0 8 8 101 +0.666793 0.685547 1038 31728.0 2 6 101 +0.622936 0.579102 2062 63472.0 1 1 101 +0.513603 0.404297 4110 126960.0 8 1 101 +0.413289 0.312988 8206 253936.0 8 8 101 + +finished run +number of examples per pass = 10000 +passes used = 1 +weighted example sum = 309550.000000 +weighted label sum = 0.000000 +average loss = 0.354960 +total feature number = 1010000 +chiczhan@chiczhan-ubuntu:~/datasets$ ~/local_vw/vowpal_wabbit/vowpalwabbit/vw --cbify 10 -d text_lownoise_m.vw --warm_start 15 --choices_lambda 10 --cb_type ips --no_bandit +Num weight bits = 18 +learning rate = 0.5 +initial_t = 0 +power_t = 0.5 +using no cache +Reading datafile = text_lownoise_m.vw +num sources = 1 +average since example example current current current +loss last counter weight label predict features +1.000000 1.000000 1 1.0 4 1 101 +1.000000 1.000000 2 2.0 10 4 101 +0.750000 0.500000 4 4.0 7 10 101 +0.625000 0.500000 8 8.0 8 8 101 +0.375000 0.125000 16 16.0 10 10 101 +0.437500 0.500000 32 32.0 8 8 101 +0.406250 0.375000 64 64.0 3 5 101 +0.476562 0.546875 128 128.0 3 5 101 +0.480469 0.484375 256 256.0 10 10 101 +0.443359 0.406250 512 512.0 2 10 101 +0.445312 0.447266 1024 1024.0 1 1 101 +0.438965 0.432617 2048 2048.0 9 5 101 +0.430176 0.421387 4096 4096.0 4 4 101 +0.423340 0.416504 8192 8192.0 10 10 101 + +finished run +number of examples per pass = 10000 +passes used = 1 +weighted example sum = 10000.000000 +weighted label sum = 0.000000 +average loss = 0.426300 +total feature number = 1010000 + +chiczhan@chiczhan-ubuntu:~/datasets$ ~/local_vw/vowpal_wabbit/vowpalwabbit/vw --cbify 10 -d text_highnoise_m.vw --warm_start 40 --choices_lambda 20 --cb_type ips +Num weight bits = 18 +learning rate = 0.5 +initial_t = 0 +power_t = 0.5 +using no cache +Reading datafile = text_highnoise_m.vw +num sources = 1 +average since example example current current current +loss last counter weight label predict features +0.000000 0.000000 1 1.0 1 1 34 +0.500000 1.000000 2 2.0 7 1 41 +0.750000 1.000000 4 4.0 7 9 36 +0.750000 0.750000 8 8.0 2 5 38 +0.750000 0.750000 16 16.0 9 9 40 +0.812500 0.875000 32 32.0 8 3 45 +0.991533 0.997090 41 1063.0 1 2 39 +0.668060 0.500000 43 3109.0 5 5 33 +0.714623 0.750000 47 7201.0 7 7 35 +0.600455 0.500000 55 15385.0 10 10 42 +0.516455 0.437500 71 31753.0 9 9 32 +0.587418 0.656250 103 64489.0 7 3 42 +0.629966 0.671875 167 129961.0 6 6 41 +0.678446 0.726562 295 260905.0 2 6 37 +0.684938 0.691406 551 522793.0 6 8 42 +0.706747 0.728516 1063 1046569.0 8 9 43 +0.677090 0.647461 2087 2094121.0 2 2 37 +0.672040 0.666992 4135 4189225.0 1 1 45 +0.663167 0.654297 8231 8379433.0 10 5 33 + +finished run +number of examples per pass = 10000 +passes used = 1 +weighted example sum = 10189120.000000 +weighted label sum = 0.000000 +average loss = 0.663153 +total feature number = 390046 +chiczhan@chiczhan-ubuntu:~/datasets$ ~/local_vw/vowpal_wabbit/vowpalwabbit/vw --cbify 10 -d text_highnoise_m.vw --warm_start 40 --choices_lambda 20 --cb_type ips --no_supervised +Num weight bits = 18 +learning rate = 0.5 +initial_t = 0 +power_t = 0.5 +using no cache +Reading datafile = text_highnoise_m.vw +num sources = 1 +average since example example current current current +loss last counter weight label predict features +0.000000 0.000000 1 1.0 1 1 34 +0.500000 1.000000 2 2.0 7 1 41 +0.750000 1.000000 4 4.0 7 1 36 +0.875000 1.000000 8 8.0 2 1 38 +0.937500 1.000000 16 16.0 9 1 40 +0.937500 0.937500 32 32.0 8 1 45 +0.035748 0.007759 41 1063.0 1 1 39 +0.670312 1.000000 43 3109.0 5 2 33 +0.715595 0.750000 47 7201.0 7 4 35 +0.866883 1.000000 55 15385.0 10 4 42 +0.903285 0.937500 71 31753.0 9 5 32 +0.888927 0.875000 103 64489.0 7 2 42 +0.874039 0.859375 167 129961.0 6 6 41 +0.913731 0.953125 295 260905.0 2 2 37 +0.876718 0.839844 551 522793.0 6 7 42 +0.864128 0.851562 1063 1046569.0 8 6 43 +0.851980 0.839844 2087 2094121.0 2 4 37 +0.848841 0.845703 4135 4189225.0 1 1 45 +0.837139 0.825439 8231 8379433.0 10 5 33 + +finished run +number of examples per pass = 10000 +passes used = 1 +weighted example sum = 10189120.000000 +weighted label sum = 0.000000 +average loss = 0.834037 +total feature number = 390046 From 0da506a88b74327c66589e1c9697b273febe280c Mon Sep 17 00:00:00 2001 From: chicheng Date: Fri, 16 Feb 2018 12:57:08 -0500 Subject: [PATCH 017/127] added some results of warm starting --- results.txt | 116 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 116 insertions(+) diff --git a/results.txt b/results.txt index 5be1452c1b9..4c0daef948f 100644 --- a/results.txt +++ b/results.txt @@ -176,3 +176,119 @@ weighted example sum = 10189120.000000 weighted label sum = 0.000000 average loss = 0.834037 total feature number = 390046 + + + + + + + +chiczhan@chiczhan-ubuntu:~/datasets$ ~/local_vw/vowpal_wabbit/vowpalwabbit/vw --cbify 10 -d text_highnoise_m.vw --warm_start 100 --choices_lambda 20 --cb_type ips --no_supervised +Num weight bits = 18 +learning rate = 0.5 +initial_t = 0 +power_t = 0.5 +using no cache +Reading datafile = text_highnoise_m.vw +num sources = 1 +average since example example current current current +loss last counter weight label predict features +0.000000 0.000000 1 1.0 1 1 34 +0.500000 1.000000 2 2.0 7 1 41 +0.750000 1.000000 4 4.0 7 1 36 +0.875000 1.000000 8 8.0 2 1 38 +0.937500 1.000000 16 16.0 9 1 40 +0.937500 0.937500 32 32.0 8 1 45 +0.921875 0.906250 64 64.0 6 1 37 +0.991095 0.995279 101 1123.0 8 1 31 +0.996844 1.000000 103 3169.0 7 3 42 +0.998623 1.000000 107 7261.0 4 1 40 +0.933118 0.875000 115 15445.0 2 4 40 +0.967529 1.000000 131 31813.0 8 10 42 +0.920603 0.875000 163 64549.0 10 9 46 +0.897640 0.875000 227 130021.0 4 2 32 +0.858839 0.820312 355 260965.0 5 7 42 +0.835629 0.812500 611 522853.0 8 7 34 +0.838716 0.841797 1123 1046629.0 4 9 40 +0.837326 0.835938 2147 2094181.0 9 4 43 +0.831015 0.824707 4195 4189285.0 7 1 39 +0.826152 0.821289 8291 8379493.0 7 5 39 + +finished run +number of examples per pass = 10000 +passes used = 1 +weighted example sum = 10127800.000000 +weighted label sum = 0.000000 +average loss = 0.825455 +total feature number = 390046 +chiczhan@chiczhan-ubuntu:~/datasets$ ~/local_vw/vowpal_wabbit/vowpalwabbit/vw --cbify 10 -d text_highnoise_m.vw --warm_start 100 --choices_lambda 20 --cb_type ips --no_bandit +Num weight bits = 18 +learning rate = 0.5 +initial_t = 0 +power_t = 0.5 +using no cache +Reading datafile = text_highnoise_m.vw +num sources = 1 +average since example example current current current +loss last counter weight label predict features +0.000000 0.000000 1 1.0 1 1 34 +0.500000 1.000000 2 2.0 7 1 41 +0.750000 1.000000 4 4.0 7 9 36 +0.750000 0.750000 8 8.0 2 5 38 +0.750000 0.750000 16 16.0 9 9 40 +0.812500 0.875000 32 32.0 8 3 45 +0.703125 0.593750 64 64.0 6 6 37 +0.578125 0.453125 128 128.0 10 10 36 +0.488281 0.398438 256 256.0 6 6 37 +0.443359 0.398438 512 512.0 10 10 46 +0.416992 0.390625 1024 1024.0 4 8 37 +0.395020 0.373047 2048 2048.0 9 2 39 +0.382568 0.370117 4096 4096.0 4 8 41 +0.374878 0.367188 8192 8192.0 1 1 40 + +finished run +number of examples per pass = 10000 +passes used = 1 +weighted example sum = 10000.000000 +weighted label sum = 0.000000 +average loss = 0.372700 +total feature number = 390046 +chiczhan@chiczhan-ubuntu:~/datasets$ ~/local_vw/vowpal_wabbit/vowpalwabbit/vw --cbify 10 -d text_highnoise_m.vw --warm_start 100 --choices_lambda 20 --cb_type ips +Num weight bits = 18 +learning rate = 0.5 +initial_t = 0 +power_t = 0.5 +using no cache +Reading datafile = text_highnoise_m.vw +num sources = 1 +average since example example current current current +loss last counter weight label predict features +0.000000 0.000000 1 1.0 1 1 34 +0.500000 1.000000 2 2.0 7 1 41 +0.750000 1.000000 4 4.0 7 9 36 +0.750000 0.750000 8 8.0 2 5 38 +0.750000 0.750000 16 16.0 9 9 40 +0.812500 0.875000 32 32.0 8 3 45 +0.703125 0.593750 64 64.0 6 6 37 +0.059662 0.020774 101 1123.0 8 8 31 +0.343957 0.500000 103 3169.0 7 7 42 +0.291007 0.250000 107 7261.0 4 4 40 +0.136808 0.000000 115 15445.0 2 2 40 +0.195046 0.250000 131 31813.0 8 8 42 +0.333855 0.468750 163 64549.0 10 3 46 +0.456857 0.578125 227 130021.0 4 2 32 +0.498105 0.539062 355 260965.0 5 5 42 +0.512750 0.527344 611 522853.0 8 8 34 +0.463363 0.414062 1123 1046629.0 4 6 40 +0.542263 0.621094 2147 2094181.0 9 9 43 +0.562640 0.583008 4195 4189285.0 7 1 39 +0.484681 0.406738 8291 8379493.0 7 7 39 + +finished run +number of examples per pass = 10000 +passes used = 1 +weighted example sum = 10127800.000000 +weighted label sum = 0.000000 +average loss = 0.473638 +total feature number = 390046 + From 122c8a30c4e600a42f74cc92445c96ed18fdaa3f Mon Sep 17 00:00:00 2001 From: chicheng Date: Mon, 19 Feb 2018 16:18:48 -0500 Subject: [PATCH 018/127] before modifying cbify adf code --- vowpalwabbit/cbify.cc | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc index 5dc8f0acbb7..0911adb2dd3 100644 --- a/vowpalwabbit/cbify.cc +++ b/vowpalwabbit/cbify.cc @@ -159,6 +159,7 @@ template void predict_or_learn(cbify& data, base_learner& base, example& ec) { bool is_supervised; + float old_weight; if (data.warm_start_period > 0) { @@ -249,11 +250,13 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec) //base.learn(ec); ec.pred = old_pred; + old_weight = ec.weight; + if (data.ind_bandit) { for (uint32_t i = 0; i < data.choices_lambda; i++) { - ec.weight = data.lambdas[i] / (1-data.lambdas[i]); + ec.weight = old_weight * data.lambdas[i] / (1-data.lambdas[i]); //ec.l.cb.costs[0].cost = 0; //cl.cost * data.lambdas[i] / (1-data.lambdas[i]); @@ -264,6 +267,7 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec) data.a_s = ec.pred.a_s; ec.l.multi = ld; ec.pred.multiclass = action; + ec.weight = old_weight; } } From c01f8ccc89ae8d2151eb7bce28f304bfaca1044f Mon Sep 17 00:00:00 2001 From: chicheng Date: Mon, 19 Feb 2018 18:30:11 -0500 Subject: [PATCH 019/127] start modifying cbify adf code --- vowpalwabbit/cbify.cc | 145 +++++++++++++++++++++++++++--------------- 1 file changed, 92 insertions(+), 53 deletions(-) diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc index 0911adb2dd3..d4852f959cd 100644 --- a/vowpalwabbit/cbify.cc +++ b/vowpalwabbit/cbify.cc @@ -155,6 +155,33 @@ uint32_t find_min(v_array arr) return argmin; } +void accumulate_costs_ips(cbify& data, example& ec, CB::cb_class& cl) +{ + //IPS for approximating the cumulative costs for all lambdas + for (uint32_t i = 0; i < data.choices_lambda; i++) + { + data.all->cost_sensitive->predict(ec, i); + if (ec.pred.multiclass == cl.action) + data.cumulative_costs[i] += cl.cost / cl.probability; + //cout< void predict_or_learn(cbify& data, base_learner& base, example& ec) { @@ -171,9 +198,6 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec) uint32_t argmin; argmin = find_min(data.cumulative_costs); - //cout<cost_sensitive->predict(ec, argmin); - //auto old_pred = ec.pred; - //uint32_t chosen = ec.pred.multiclass-1; - //cout<Choose_Action(*data.generic_explorer, StringUtils::to_string(data.example_counter++), ec); @@ -232,23 +250,13 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec) THROW("No action with non-zero probability found!"); cl.cost = loss(data, ld.label, cl.action); - //IPS for approximating the cumulative costs for all lambdas - for (uint32_t i = 0; i < data.choices_lambda; i++) - { - //assert(0); - data.all->cost_sensitive->predict(ec, i); - //cout< void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) { + bool is_supervised; + float old_weight; + + if (data.warm_start_period > 0) + { + is_supervised = true; + data.warm_start_period--; + } + else + is_supervised = false; + + uint32_t argmin; + argmin = find_min(data.cumulative_costs); + //Store the multiclass input label MULTICLASS::label_t ld = ec.l.multi; copy_example_to_adf(data, ec); - for (size_t a = 0; a < data.adf_data.num_actions; ++a) - { - base.predict(data.adf_data.ecs[a]); - } - base.predict(*data.adf_data.empty_example); - // get output scores - auto& out_ec = data.adf_data.ecs[0]; - uint32_t idx = data.mwt_explorer->Choose_Action( - *data.generic_explorer, - StringUtils::to_string(data.example_counter++), out_ec) - 1; - - CB::cb_class cl; - cl.action = out_ec.pred.a_s[idx].action + 1; - cl.probability = out_ec.pred.a_s[idx].score; - - if(!cl.action) - THROW("No action with non-zero probability found!"); - cl.cost = loss(data, ld.label, cl.action); - - // add cb label to chosen action - auto& lab = data.adf_data.ecs[cl.action - 1].l.cb; - lab.costs.push_back(cl); - - for (size_t a = 0; a < data.adf_data.num_actions; ++a) - { - base.learn(data.adf_data.ecs[a]); - } - base.learn(*data.adf_data.empty_example); - ec.pred.multiclass = cl.action; + + if (is_supervised) // Call the cost-sensitive learner directly + { + + } + else // call the bandit learner + { + for (size_t a = 0; a < data.adf_data.num_actions; ++a) + { + base.predict(data.adf_data.ecs[a], argmin); + } + base.predict(*data.adf_data.empty_example, argmin); + // get output scores + auto& out_ec = data.adf_data.ecs[0]; + uint32_t idx = data.mwt_explorer->Choose_Action( + *data.generic_explorer, + StringUtils::to_string(data.example_counter++), out_ec) - 1; + + CB::cb_class cl; + cl.action = out_ec.pred.a_s[idx].action + 1; + cl.probability = out_ec.pred.a_s[idx].score; + + if(!cl.action) + THROW("No action with non-zero probability found!"); + cl.cost = loss(data, ld.label, cl.action); + + // accumulate the cumulative costs of lambdas + accumulate_costs_ips_adf(data, base, ec); + + + + // add cb label to chosen action + auto& lab = data.adf_data.ecs[cl.action - 1].l.cb; + lab.costs.push_back(cl); + + + if (data.ind_bandit) + { + for (uint32_t i = 0; i < data.choices_lambda; i++) + { + for (size_t a = 0; a < data.adf_data.num_actions; ++a) + { + base.learn(data.adf_data.ecs[a], i); + } + base.learn(*data.adf_data.empty_example, i); + } + } + ec.pred.multiclass = cl.action; + } } void init_adf_data(cbify& data, const size_t num_actions) From 0d4d633efa5637ceece5499961699f547e6b5803 Mon Sep 17 00:00:00 2001 From: chicheng Date: Wed, 21 Feb 2018 14:41:44 -0500 Subject: [PATCH 020/127] unkwown segfault error --- vowpalwabbit/cbify.cc | 90 ++++++++++++++++++++++++++++++++++++------- 1 file changed, 76 insertions(+), 14 deletions(-) diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc index d4852f959cd..9f663760f35 100644 --- a/vowpalwabbit/cbify.cc +++ b/vowpalwabbit/cbify.cc @@ -169,14 +169,36 @@ void accumulate_costs_ips(cbify& data, example& ec, CB::cb_class& cl) } -void accumulate_costs_ips_adf(cbify& data, example& ec, CB::cb_class& cl) +void accumulate_costs_ips_adf(cbify& data, CB::cb_class& cl) { + float best_score = FLT_MAX; + uint32_t best_action; + + //IPS for approximating the cumulative costs for all lambdas for (uint32_t i = 0; i < data.choices_lambda; i++) { + for (size_t a = 0; a < data.adf_data.num_actions; ++a) + { + //data.all->cost_sensitive->predict(data.adf_data.ecs[a], i); + + //data.adf_data.empty_example->in_use = true; + //data.adf_data.empty_example->ft_offset = data.all->cost_sensitive->offset; + //data.all->cost_sensitive->predict(*data.adf_data.empty_example, i); + + if (data.adf_data.ecs[a].partial_prediction < best_score) + { + best_score = data.adf_data.ecs[a].partial_prediction; + best_action = a; + } + } + if (best_action == cl.action - 1) + data.cumulative_costs[i] += cl.cost / cl.probability; + //cout<(); + csl.costs.resize(data.num_actions); + csl.costs.end() = csl.costs.begin()+data.num_actions; + for (uint32_t j = 0; j < data.num_actions; j++) + { + csl.costs[j].class_index = j+1; + csl.costs[j].x = loss(data, ld.label, j+1); + } - } - else // call the bandit learner + ec.l.cs = csl; + */ + //predict + //data.all->cost_sensitive->predict(ec, argmin); + + for (size_t a = 0; a < data.adf_data.num_actions; ++a) + { + //data.all->cost_sensitive->predict(data.adf_data.ecs[a], argmin); + base.predict(data.adf_data.ecs[a], argmin); + } + //data.all->cost_sensitive->predict(*data.adf_data.empty_example, argmin); + base.predict(*data.adf_data.empty_example, argmin); + + + if (data.ind_supervised) + { + for (uint32_t i = 0; i < data.choices_lambda; i++) + { + for (size_t a = 0; a < data.adf_data.num_actions; ++a) + { + data.all->cost_sensitive->learn(data.adf_data.ecs[a], i); + } + data.all->cost_sensitive->learn(*data.adf_data.empty_example, i); + } + } + ec.l.multi = ld; + } + else// call the bandit learner { for (size_t a = 0; a < data.adf_data.num_actions; ++a) { @@ -324,27 +381,32 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) cl.cost = loss(data, ld.label, cl.action); // accumulate the cumulative costs of lambdas - accumulate_costs_ips_adf(data, base, ec); - - + accumulate_costs_ips_adf(data, cl); // add cb label to chosen action auto& lab = data.adf_data.ecs[cl.action - 1].l.cb; lab.costs.push_back(cl); - if (data.ind_bandit) { - for (uint32_t i = 0; i < data.choices_lambda; i++) - { + //for (uint32_t i = 0; i < data.choices_lambda; i++) + //{ + //ec.weight = old_weight * data.lambdas[i] / (1-data.lambdas[i]); for (size_t a = 0; a < data.adf_data.num_actions; ++a) { - base.learn(data.adf_data.ecs[a], i); + //old_weight = data.adf_data.ecs[a].weight; + //data.adf_data.ecs[a].weight = data.lambdas[i] / (1 - data.lambdas[i] ); + base.learn(data.adf_data.ecs[a]); + //data.adf_data.ecs[a].weight = old_weight; } - base.learn(*data.adf_data.empty_example, i); - } + + //old_weight = data.adf_data.empty_example->weight; + //data.adf_data.empty_example->weight = data.lambdas[i] / (1 - data.lambdas[i] ); + base.learn(*data.adf_data.empty_example); + //data.adf_data.empty_example->weight = old_weight; + //} } - ec.pred.multiclass = cl.action; + //ec.pred.multiclass = cl.action; } } From ded8f531be386329f6acf0fd05e81573697e1271 Mon Sep 17 00:00:00 2001 From: chicheng Date: Wed, 21 Feb 2018 17:47:16 -0500 Subject: [PATCH 021/127] everything good except for the cost sensitive learn part --- vowpalwabbit/cbify.cc | 115 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 109 insertions(+), 6 deletions(-) diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc index d4852f959cd..0c969a60c17 100644 --- a/vowpalwabbit/cbify.cc +++ b/vowpalwabbit/cbify.cc @@ -60,6 +60,7 @@ struct cbify size_t num_actions; bool ind_bandit; bool ind_supervised; + COST_SENSITIVE::label csl; }; @@ -169,15 +170,36 @@ void accumulate_costs_ips(cbify& data, example& ec, CB::cb_class& cl) } -void accumulate_costs_ips_adf(cbify& data, example& ec, CB::cb_class& cl) +void accumulate_costs_ips_adf(cbify& data, example& ec, CB::cb_class& cl, base_learner& base) { + float best_score; + uint32_t best_action; + example* ecs = data.adf_data.ecs; + + //IPS for approximating the cumulative costs for all lambdas for (uint32_t i = 0; i < data.choices_lambda; i++) { + for (size_t a = 0; a < data.adf_data.num_actions; ++a) + { + base.predict(ecs[a], i); + + base.predict(*data.adf_data.empty_example, i); + if ( (a == 0) || (ecs[a].partial_prediction < best_score) ) + { + best_action = a + 1; + best_score = ecs[a].partial_prediction; + } + } + + if (best_action == cl.action) + data.cumulative_costs[i] += cl.cost / cl.probability; + cout<(); + COST_SENSITIVE::label& csl = data.csl; csl.costs.resize(data.num_actions); csl.costs.end() = csl.costs.begin()+data.num_actions; for (uint32_t j = 0; j < data.num_actions; j++) @@ -251,7 +273,7 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec) cl.cost = loss(data, ld.label, cl.action); // accumulate the cumulative costs of lambdas - accumulate_costs_ips(data, base, ec); + accumulate_costs_ips(data, ec, cl); //Create a new cb label data.cb_label.costs.push_back(cl); @@ -281,6 +303,7 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) { bool is_supervised; float old_weight; + uint32_t argmin; if (data.warm_start_period > 0) { @@ -290,7 +313,7 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) else is_supervised = false; - uint32_t argmin; + argmin = find_min(data.cumulative_costs); //Store the multiclass input label @@ -300,7 +323,83 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) if (is_supervised) // Call the cost-sensitive learner directly { + float best_score; + uint32_t best_action; + example* ecs = data.adf_data.ecs; + + for (size_t a = 0; a < data.adf_data.num_actions; ++a) + { + base.predict(ecs[a], argmin); + } + base.predict(*data.adf_data.empty_example, argmin); + + for (size_t a = 0; a < data.adf_data.num_actions; ++a) + { + if ( (a == 0) || (ecs[a].partial_prediction < best_score) ) + { + best_action = a + 1; + best_score = ecs[a].partial_prediction; + } + } + + + //data.all->cost_sensitive->predict(ec,argmin); + + //generate cost-sensitive label + COST_SENSITIVE::label& csl = data.csl; + csl.costs.resize(data.num_actions); + csl.costs.end() = csl.costs.begin()+data.num_actions; + for (uint32_t j = 0; j < data.num_actions; j++) + { + csl.costs[j].class_index = j+1; + csl.costs[j].x = loss(data, ld.label, j+1); + } + + ec.l.cs = csl; + + + /* + if (data.ind_supervised) + { + for (uint32_t i = 0; i < data.choices_lambda; i++) + { + for (size_t a = 0; a < data.adf_data.num_actions; ++a) + { + COST_SENSITIVE::label& lab = ecs[a].l.cs; + lab.costs.erase(); + lab.costs.resize(1); + + lab.costs[0].class_index = a+1; + lab.costs[0].x = loss(data, ld.label, a+1); + + ecs[a].weight = 1; + //base.learn(ecs[a], i); + data.all->cost_sensitive->learn(ecs[a],i); + } + //base.learn(*data.adf_data.empty_example, i); + COST_SENSITIVE::label& lab = data.adf_data.empty_example->l.cs; + lab.costs.erase(); + COST_SENSITIVE::wclass wc = { 0., 0, 0., 0. }; + lab.costs.push_back(wc); + + data.all->cost_sensitive->learn(*data.adf_data.empty_example,i); + } + } + + + + if (data.ind_supervised) + { + for (uint32_t i = 0; i < data.choices_lambda; i++) + { + data.all->cost_sensitive->learn(ec,i); + } + } + */ + + ec.pred.multiclass = best_action; + ec.l.multi = ld; } else // call the bandit learner { @@ -324,7 +423,7 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) cl.cost = loss(data, ld.label, cl.action); // accumulate the cumulative costs of lambdas - accumulate_costs_ips_adf(data, base, ec); + accumulate_costs_ips_adf(data, ec, cl, base); @@ -339,7 +438,10 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) { for (size_t a = 0; a < data.adf_data.num_actions; ++a) { + old_weight = data.adf_data.ecs[a].weight; + data.adf_data.ecs[a].weight = data.lambdas[i] / (1- data.lambdas[i]); base.learn(data.adf_data.ecs[a], i); + data.adf_data.ecs[a].weight = old_weight; } base.learn(*data.adf_data.empty_example, i); } @@ -411,6 +513,7 @@ base_learner* cbify_setup(vw& all) //data.probs = v_init(); data.generic_explorer = new GenericExplorer(*data.scorer, (u32)num_actions); data.all = &all; + data.csl = calloc_or_throw(); //cout<() : 0; From 68d860068813546434e9fb2405f3e5b849308fb5 Mon Sep 17 00:00:00 2001 From: chicheng Date: Thu, 22 Feb 2018 11:47:41 -0500 Subject: [PATCH 022/127] . --- vowpalwabbit/cbify.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc index 0c969a60c17..358547ff441 100644 --- a/vowpalwabbit/cbify.cc +++ b/vowpalwabbit/cbify.cc @@ -196,9 +196,9 @@ void accumulate_costs_ips_adf(cbify& data, example& ec, CB::cb_class& cl, base_l if (best_action == cl.action) data.cumulative_costs[i] += cl.cost / cl.probability; - cout< Date: Thu, 22 Feb 2018 18:32:40 -0500 Subject: [PATCH 023/127] . --- vowpalwabbit/cbify.cc | 54 ++++++++++++++++++++++++++++--------------- 1 file changed, 35 insertions(+), 19 deletions(-) diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc index 358547ff441..8fb9ac55e8a 100644 --- a/vowpalwabbit/cbify.cc +++ b/vowpalwabbit/cbify.cc @@ -304,6 +304,9 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) bool is_supervised; float old_weight; uint32_t argmin; + uint32_t best_action; + float best_score; + example* ecs = data.adf_data.ecs; if (data.warm_start_period > 0) { @@ -323,10 +326,7 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) if (is_supervised) // Call the cost-sensitive learner directly { - float best_score; - uint32_t best_action; - example* ecs = data.adf_data.ecs; - + for (size_t a = 0; a < data.adf_data.num_actions; ++a) { base.predict(ecs[a], argmin); @@ -341,13 +341,15 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) best_score = ecs[a].partial_prediction; } } - + + //cout<cost_sensitive->predict(ec,argmin); //generate cost-sensitive label - COST_SENSITIVE::label& csl = data.csl; + /*COST_SENSITIVE::label& csl = data.csl; csl.costs.resize(data.num_actions); csl.costs.end() = csl.costs.begin()+data.num_actions; for (uint32_t j = 0; j < data.num_actions; j++) @@ -357,38 +359,41 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) } ec.l.cs = csl; + */ + + COST_SENSITIVE::label& csl = data.csl; + COST_SENSITIVE::wclass wc = {0, 0, 0, 0}; + + csl.costs.erase(); + csl.costs.push_back(wc); - /* if (data.ind_supervised) { for (uint32_t i = 0; i < data.choices_lambda; i++) { for (size_t a = 0; a < data.adf_data.num_actions; ++a) { - COST_SENSITIVE::label& lab = ecs[a].l.cs; - lab.costs.erase(); - lab.costs.resize(1); + csl.costs[0].class_index = a+1; + csl.costs[0].x = loss(data, ld.label, a+1); - lab.costs[0].class_index = a+1; - lab.costs[0].x = loss(data, ld.label, a+1); + ecs[a].l.cs = csl; ecs[a].weight = 1; //base.learn(ecs[a], i); data.all->cost_sensitive->learn(ecs[a],i); } //base.learn(*data.adf_data.empty_example, i); - COST_SENSITIVE::label& lab = data.adf_data.empty_example->l.cs; - lab.costs.erase(); - COST_SENSITIVE::wclass wc = { 0., 0, 0., 0. }; - lab.costs.push_back(wc); - + + csl.costs[0].class_index = 0; + csl.costs[0].x = 0; + data.adf_data.empty_example->l.cs = csl; data.all->cost_sensitive->learn(*data.adf_data.empty_example,i); } } - + /* if (data.ind_supervised) { for (uint32_t i = 0; i < data.choices_lambda; i++) @@ -405,9 +410,20 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) { for (size_t a = 0; a < data.adf_data.num_actions; ++a) { - base.predict(data.adf_data.ecs[a], argmin); + base.predict(ecs[a], argmin); } base.predict(*data.adf_data.empty_example, argmin); + + for (size_t a = 0; a < data.adf_data.num_actions; ++a) + { + if ( (a == 0) || (ecs[a].partial_prediction < best_score) ) + { + best_action = a + 1; + best_score = ecs[a].partial_prediction; + } + } + + // get output scores auto& out_ec = data.adf_data.ecs[0]; uint32_t idx = data.mwt_explorer->Choose_Action( From 41127f8de1e1820dd31571a312114433f0f3d913 Mon Sep 17 00:00:00 2001 From: chicheng Date: Fri, 23 Feb 2018 18:21:35 -0500 Subject: [PATCH 024/127] fixed the bug of empty example cost wrongly set --- vowpalwabbit/cbify.cc | 190 ++++++++++++++++----------------- vowpalwabbit/cost_sensitive.cc | 1 + vowpalwabbit/csoaa.cc | 13 ++- vowpalwabbit/example.h | 1 + 4 files changed, 107 insertions(+), 98 deletions(-) diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc index 8fb9ac55e8a..176152c992a 100644 --- a/vowpalwabbit/cbify.cc +++ b/vowpalwabbit/cbify.cc @@ -60,7 +60,8 @@ struct cbify size_t num_actions; bool ind_bandit; bool ind_supervised; - COST_SENSITIVE::label csl; + COST_SENSITIVE::label* csls; + COST_SENSITIVE::label* csl_empty; }; @@ -92,6 +93,10 @@ void finish(cbify& data) delete_it(data.mwt_explorer); delete_it(data.recorder); data.a_s.delete_v(); + data.lambdas.delete_v(); + data.cumulative_costs.delete_v(); + free(data.csls); + if (data.use_adf) { for (size_t a = 0; a < data.adf_data.num_actions; ++a) @@ -101,6 +106,8 @@ void finish(cbify& data) VW::dealloc_example(CB::cb_label.delete_label, *data.adf_data.empty_example); free(data.adf_data.ecs); free(data.adf_data.empty_example); + + free(data.csl_empty); } } @@ -156,6 +163,49 @@ uint32_t find_min(v_array arr) return argmin; } +uint32_t predict_sublearner(cbify& data, base_learner& base, uint32_t i) +{ + uint32_t best_action, best_action_dir; + float best_score; + + example* ecs = data.adf_data.ecs; + example* empty = data.adf_data.empty_example; + + + for (size_t a = 0; a < data.adf_data.num_actions; ++a) + { + base.predict(ecs[a], i); + //data.all->cost_sensitive->predict(ecs[a], argmin); + } + base.predict(*empty, i); + //data.all->cost_sensitive->predict(*empty, argmin); + + + for (size_t a = 0; a < data.adf_data.num_actions; ++a) + { + if ( (a == 0) || (ecs[a].partial_prediction < best_score) ) + { + best_action = a + 1; + best_score = ecs[a].partial_prediction; + } + } + + /*for (size_t a = 0; a < data.adf_data.num_actions; ++a) + { + if ( ecs[a].pred.multiclass != 0 ) + best_action_dir = ecs[a].pred.multiclass; + } + + cout< 0) { @@ -218,7 +255,6 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec) else is_supervised = false; - uint32_t argmin; argmin = find_min(data.cumulative_costs); //Store the multiclass input label @@ -229,7 +265,7 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec) if (is_supervised) // Call the cost-sensitive learner directly { //generate cost-sensitive label - COST_SENSITIVE::label& csl = data.csl; + COST_SENSITIVE::label& csl = *data.csls; csl.costs.resize(data.num_actions); csl.costs.end() = csl.costs.begin()+data.num_actions; for (uint32_t j = 0; j < data.num_actions; j++) @@ -298,6 +334,7 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec) } } + template void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) { @@ -305,8 +342,8 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) float old_weight; uint32_t argmin; uint32_t best_action; - float best_score; example* ecs = data.adf_data.ecs; + example* empty_example = data.adf_data.empty_example; if (data.warm_start_period > 0) { @@ -326,47 +363,14 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) if (is_supervised) // Call the cost-sensitive learner directly { + best_action = predict_sublearner(data, base, argmin); - for (size_t a = 0; a < data.adf_data.num_actions; ++a) - { - base.predict(ecs[a], argmin); - } - base.predict(*data.adf_data.empty_example, argmin); - - for (size_t a = 0; a < data.adf_data.num_actions; ++a) - { - if ( (a == 0) || (ecs[a].partial_prediction < best_score) ) - { - best_action = a + 1; - best_score = ecs[a].partial_prediction; - } - } - //cout<cost_sensitive->predict(ec,argmin); - //generate cost-sensitive label - /*COST_SENSITIVE::label& csl = data.csl; - csl.costs.resize(data.num_actions); - csl.costs.end() = csl.costs.begin()+data.num_actions; - for (uint32_t j = 0; j < data.num_actions; j++) - { - csl.costs[j].class_index = j+1; - csl.costs[j].x = loss(data, ld.label, j+1); - } - - ec.l.cs = csl; - */ - COST_SENSITIVE::label& csl = data.csl; - COST_SENSITIVE::wclass wc = {0, 0, 0, 0}; - - csl.costs.erase(); - csl.costs.push_back(wc); - + COST_SENSITIVE::label* csls = data.csls; if (data.ind_supervised) { @@ -374,35 +378,18 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) { for (size_t a = 0; a < data.adf_data.num_actions; ++a) { - csl.costs[0].class_index = a+1; - csl.costs[0].x = loss(data, ld.label, a+1); - - ecs[a].l.cs = csl; + csls[a].costs[0].class_index = a+1; + csls[a].costs[0].x = loss(data, ld.label, a+1); - ecs[a].weight = 1; - //base.learn(ecs[a], i); + ecs[a].l.cs = csls[a]; + ecs[a].weight *= 1; + // cout << "size cbify = " << ecs[a].l.cs.costs.size() << endl; data.all->cost_sensitive->learn(ecs[a],i); } - //base.learn(*data.adf_data.empty_example, i); - - csl.costs[0].class_index = 0; - csl.costs[0].x = 0; - data.adf_data.empty_example->l.cs = csl; - data.all->cost_sensitive->learn(*data.adf_data.empty_example,i); - } - } - - - /* - if (data.ind_supervised) - { - for (uint32_t i = 0; i < data.choices_lambda; i++) - { - data.all->cost_sensitive->learn(ec,i); + empty_example->l.cs = *data.csl_empty; + data.all->cost_sensitive->learn(*empty_example,i); } } - */ - ec.pred.multiclass = best_action; ec.l.multi = ld; } @@ -412,17 +399,7 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) { base.predict(ecs[a], argmin); } - base.predict(*data.adf_data.empty_example, argmin); - - for (size_t a = 0; a < data.adf_data.num_actions; ++a) - { - if ( (a == 0) || (ecs[a].partial_prediction < best_score) ) - { - best_action = a + 1; - best_score = ecs[a].partial_prediction; - } - } - + base.predict(*empty_example, argmin); // get output scores auto& out_ec = data.adf_data.ecs[0]; @@ -441,8 +418,6 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) // accumulate the cumulative costs of lambdas accumulate_costs_ips_adf(data, ec, cl, base); - - // add cb label to chosen action auto& lab = data.adf_data.ecs[cl.action - 1].l.cb; lab.costs.push_back(cl); @@ -454,12 +429,12 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) { for (size_t a = 0; a < data.adf_data.num_actions; ++a) { - old_weight = data.adf_data.ecs[a].weight; - data.adf_data.ecs[a].weight = data.lambdas[i] / (1- data.lambdas[i]); - base.learn(data.adf_data.ecs[a], i); - data.adf_data.ecs[a].weight = old_weight; + old_weight = ecs[a].weight; + ecs[a].weight *= data.lambdas[i] / (1- data.lambdas[i]); + base.learn(ecs[a], i); + ecs[a].weight = old_weight; } - base.learn(*data.adf_data.empty_example, i); + base.learn(*empty_example, i); } } ec.pred.multiclass = cl.action; @@ -480,6 +455,21 @@ void init_adf_data(cbify& data, const size_t num_actions) } CB::cb_label.default_label(&adf_data.empty_example->l.cb); adf_data.empty_example->in_use = true; + + data.csls = calloc_or_throw(num_actions); + data.csl_empty = calloc_or_throw(1); + + data.csl_empty->costs.erase(); + data.csl_empty->costs.push_back({0, 0, 0, 0}); + data.csl_empty->costs[0].class_index = 0; + data.csl_empty->costs[0].x = FLT_MAX; + + for (size_t a = 0; a < num_actions; ++a) + { + data.csls[a].costs.erase(); + data.csls[a].costs.push_back({0, 0, 0, 0}); + } + } void generate_lambdas(v_array& lambdas, size_t lambda_size) @@ -529,7 +519,7 @@ base_learner* cbify_setup(vw& all) //data.probs = v_init(); data.generic_explorer = new GenericExplorer(*data.scorer, (u32)num_actions); data.all = &all; - data.csl = calloc_or_throw(); + //cout<() : 0; @@ -542,11 +532,17 @@ base_learner* cbify_setup(vw& all) data.cumulative_costs.push_back(0.); data.num_actions = num_actions; + if (data.use_adf) { init_adf_data(data, num_actions); } + else + { + data.csls = calloc_or_throw(1); + } + if (count(all.args.begin(), all.args.end(),"--cb_explore") == 0 && !data.use_adf) { diff --git a/vowpalwabbit/cost_sensitive.cc b/vowpalwabbit/cost_sensitive.cc index 28414218235..f8c7863fc2a 100644 --- a/vowpalwabbit/cost_sensitive.cc +++ b/vowpalwabbit/cost_sensitive.cc @@ -302,6 +302,7 @@ void output_example(vw& all, example& ec) bool example_is_test(example& ec) { v_array costs = ec.l.cs.costs; + //cout << "is_test " << costs.size() << endl; if (costs.size() == 0) return true; for (size_t j=0; j void do_actual_learning(ldf& data, base_learner& base) { + // cout << "called do_actual_learning" << endl; if (data.ec_seq.size() <= 0) return; // nothing to do /////////////////////// handle label definitions if (ec_seq_is_label_definition(data.ec_seq)) { + // cout << "length is " << data.ec_seq.size() << endl; for (size_t i=0; ifeature_space[data.ec_seq[i]->indices[0]]; @@ -794,7 +798,12 @@ void predict_or_learn(ldf& data, base_learner& base, example &ec) data.ft_offset = ec.ft_offset; bool is_test_ec = COST_SENSITIVE::example_is_test(ec); bool need_to_break = data.ec_seq.size() >= all->p->ring_size - 2; - + /*if (is_learn) + cout << "is_learn "; + else + cout << "predict ";*/ + // cout << "data.ec_seq.size() = " << data.ec_seq.size() << " is_test_ec = " << is_test_ec << endl; + // singleline is used by library/ezexample_predict if (data.is_singleline) { @@ -813,6 +822,7 @@ void predict_or_learn(ldf& data, base_learner& base, example &ec) } else if ((example_is_newline(ec) && is_test_ec) || need_to_break) { + // cout << "newline" << endl; if (need_to_break && data.first_pass) data.all->trace_message << "warning: length of sequence at " << ec.example_counter << " exceeds ring size; breaking apart" << endl; do_actual_learning(data, base); @@ -820,6 +830,7 @@ void predict_or_learn(ldf& data, base_learner& base, example &ec) } else { + // cout << "not newline" << endl; if (data.need_to_clear) // should only happen if we're NOT driving { data.ec_seq.erase(); diff --git a/vowpalwabbit/example.h b/vowpalwabbit/example.h index 8641653bbda..1b8e8a1457b 100644 --- a/vowpalwabbit/example.h +++ b/vowpalwabbit/example.h @@ -131,6 +131,7 @@ void free_flatten_example(flat_example* fec); inline int example_is_newline(example& ec) { // if only index is constant namespace or no index + // std::cout << "call e_i_n " << ec.indices.size() << " " << ec.tag.size() << std::endl; if (ec.tag.size() > 0) return false; return ((ec.indices.size() == 0) || ((ec.indices.size() == 1) && From 94c8103575a5ff4763323e07c61c8bc744bea080 Mon Sep 17 00:00:00 2001 From: chicheng Date: Mon, 26 Feb 2018 17:56:02 -0500 Subject: [PATCH 025/127] fixed the bug of empty example cost wrongly set --- vowpalwabbit/cbify.cc | 45 +++++++++++++++++++++++++++++-------------- 1 file changed, 31 insertions(+), 14 deletions(-) diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc index 176152c992a..da43bb885a2 100644 --- a/vowpalwabbit/cbify.cc +++ b/vowpalwabbit/cbify.cc @@ -62,6 +62,7 @@ struct cbify bool ind_supervised; COST_SENSITIVE::label* csls; COST_SENSITIVE::label* csl_empty; + bool warm_start; }; @@ -99,11 +100,23 @@ void finish(cbify& data) if (data.use_adf) { - for (size_t a = 0; a < data.adf_data.num_actions; ++a) - { - VW::dealloc_example(CB::cb_label.delete_label, data.adf_data.ecs[a]); - } - VW::dealloc_example(CB::cb_label.delete_label, *data.adf_data.empty_example); + if (data.warm_start) + { + for (size_t a = 0; a < data.adf_data.num_actions; ++a) + { + VW::dealloc_example(COST_SENSITIVE::cs_label.delete_label, data.adf_data.ecs[a]); + } + VW::dealloc_example(COST_SENSITIVE::cs_label.delete_label, *data.adf_data.empty_example); + } + else + { + for (size_t a = 0; a < data.adf_data.num_actions; ++a) + { + VW::dealloc_example(CB::cb_label.delete_label, data.adf_data.ecs[a]); + } + VW::dealloc_example(CB::cb_label.delete_label, *data.adf_data.empty_example); + } + free(data.adf_data.ecs); free(data.adf_data.empty_example); @@ -243,17 +256,16 @@ void accumulate_costs_ips_adf(cbify& data, example& ec, CB::cb_class& cl, base_l template void predict_or_learn(cbify& data, base_learner& base, example& ec) { - bool is_supervised; float old_weight; uint32_t argmin; if (data.warm_start_period > 0) { - is_supervised = true; + data.warm_start = true; data.warm_start_period--; } else - is_supervised = false; + data.warm_start = false; argmin = find_min(data.cumulative_costs); @@ -262,7 +274,7 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec) //cout< void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) { - bool is_supervised; float old_weight; uint32_t argmin; uint32_t best_action; @@ -347,11 +358,11 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) if (data.warm_start_period > 0) { - is_supervised = true; + data.warm_start = true; data.warm_start_period--; } else - is_supervised = false; + data.warm_start = false; argmin = find_min(data.cumulative_costs); @@ -361,8 +372,14 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) copy_example_to_adf(data, ec); - if (is_supervised) // Call the cost-sensitive learner directly + if (data.warm_start) // Call the cost-sensitive learner directly { + for (size_t a = 0; a < data.adf_data.num_actions; ++a) + { + ecs[a].l.cs.costs.delete_v(); + } + + best_action = predict_sublearner(data, base, argmin); //cout<weight << endl; uint64_t old_offset = ec->ft_offset; ec->ft_offset = offset; if (is_learn) @@ -139,6 +140,7 @@ void predict_or_learn_first(cb_explore_adf& data, base_learner& base, v_array void predict_or_learn_greedy(cb_explore_adf& data, base_learner& base, v_array& examples) { + //cout << "in p_or_l_g" << endl; //Explore uniform random an epsilon fraction of the time. if (is_learn && test_adf_sequence(data.ec_seq) != nullptr) multiline_learn_or_predict(base, examples, data.offset); diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc index da43bb885a2..f8ab7062ade 100644 --- a/vowpalwabbit/cbify.cc +++ b/vowpalwabbit/cbify.cc @@ -63,6 +63,7 @@ struct cbify COST_SENSITIVE::label* csls; COST_SENSITIVE::label* csl_empty; bool warm_start; + float* old_weights; }; @@ -121,6 +122,7 @@ void finish(cbify& data) free(data.adf_data.empty_example); free(data.csl_empty); + free(data.old_weights); } } @@ -178,9 +180,6 @@ uint32_t find_min(v_array arr) uint32_t predict_sublearner(cbify& data, base_learner& base, uint32_t i) { - uint32_t best_action, best_action_dir; - float best_score; - example* ecs = data.adf_data.ecs; example* empty = data.adf_data.empty_example; @@ -194,26 +193,19 @@ uint32_t predict_sublearner(cbify& data, base_learner& base, uint32_t i) //data.all->cost_sensitive->predict(*empty, argmin); - for (size_t a = 0; a < data.adf_data.num_actions; ++a) - { - if ( (a == 0) || (ecs[a].partial_prediction < best_score) ) - { - best_action = a + 1; - best_score = ecs[a].partial_prediction; - } - } - - /*for (size_t a = 0; a < data.adf_data.num_actions; ++a) - { - if ( ecs[a].pred.multiclass != 0 ) - best_action_dir = ecs[a].pred.multiclass; - } - - cout< void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) { - float old_weight; uint32_t argmin; uint32_t best_action; example* ecs = data.adf_data.ecs; @@ -374,12 +365,6 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) if (data.warm_start) // Call the cost-sensitive learner directly { - for (size_t a = 0; a < data.adf_data.num_actions; ++a) - { - ecs[a].l.cs.costs.delete_v(); - } - - best_action = predict_sublearner(data, base, argmin); //cout<weight; + //empty_example->weight = data.lambdas[i] / (1- data.lambdas[i]); + //empty_example->weight = old_weight; + //cout << "about to finish in cbify" << endl; + //cout << "finished in cbify" << endl; } } + ec.pred.multiclass = cl.action; } } @@ -476,6 +470,8 @@ void init_adf_data(cbify& data, const size_t num_actions) data.csls = calloc_or_throw(num_actions); data.csl_empty = calloc_or_throw(1); + data.old_weights = calloc_or_throw(num_actions); + data.csl_empty->costs.erase(); data.csl_empty->costs.push_back({0, 0, 0, 0}); data.csl_empty->costs[0].class_index = 0; diff --git a/vowpalwabbit/csoaa.cc b/vowpalwabbit/csoaa.cc index 170dd0de0a6..0bc18af9e02 100644 --- a/vowpalwabbit/csoaa.cc +++ b/vowpalwabbit/csoaa.cc @@ -371,10 +371,12 @@ void do_actual_learning_oaa(ldf& data, base_learner& base, size_t start_K) simple_label.initial = 0.; float old_weight = ec->weight; + //cout << "weight = " << ec->weight << endl; if (!data.treat_as_classifier) // treat like regression simple_label.label = costs[0].x; else // treat like classification { + //cout << "here" << endl; if (costs[0].x <= min_cost) { simple_label.label = -1.; diff --git a/vowpalwabbit/gd.cc b/vowpalwabbit/gd.cc index 0fb5a552094..9d996b30f53 100644 --- a/vowpalwabbit/gd.cc +++ b/vowpalwabbit/gd.cc @@ -646,6 +646,7 @@ void learn(gd& g, base_learner& base, example& ec) assert(ec.l.simple.label != FLT_MAX); assert(ec.weight > 0.); g.predict(g,base,ec); + //cout << "iw = " << ec.weight << endl; update(g,base,ec); } diff --git a/vowpalwabbit/gen_cs_example.cc b/vowpalwabbit/gen_cs_example.cc index 363ce97925b..14ce7d43748 100644 --- a/vowpalwabbit/gen_cs_example.cc +++ b/vowpalwabbit/gen_cs_example.cc @@ -49,7 +49,7 @@ void gen_cs_example_ips(v_array examples, COST_SENSITIVE::label& cs_la for (uint32_t i = 0; i < examples.size()-1; i++) { CB::label ld = examples[i]->l.cb; - + //std::cout << "example weight = " << examples[i]->weight << std::endl; COST_SENSITIVE::wclass wc = {0.,i,0.,0.}; if (shared && i > 0) wc.class_index = (uint32_t)i-1; From 46d91c0e6bf9b76d82d22cca8a39332d0a297c82 Mon Sep 17 00:00:00 2001 From: chicheng Date: Wed, 28 Feb 2018 16:22:44 -0500 Subject: [PATCH 027/127] fixed memory leak bug --- vowpalwabbit/cbify.cc | 72 ++++++++++++++++++++++++------------------- 1 file changed, 41 insertions(+), 31 deletions(-) diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc index f8ab7062ade..06aee399a81 100644 --- a/vowpalwabbit/cbify.cc +++ b/vowpalwabbit/cbify.cc @@ -62,6 +62,8 @@ struct cbify bool ind_supervised; COST_SENSITIVE::label* csls; COST_SENSITIVE::label* csl_empty; + CB::label* cbls; + CB::label* cbl_empty; bool warm_start; float* old_weights; @@ -97,33 +99,36 @@ void finish(cbify& data) data.a_s.delete_v(); data.lambdas.delete_v(); data.cumulative_costs.delete_v(); - free(data.csls); + if (data.use_adf) { - if (data.warm_start) - { - for (size_t a = 0; a < data.adf_data.num_actions; ++a) - { - VW::dealloc_example(COST_SENSITIVE::cs_label.delete_label, data.adf_data.ecs[a]); - } - VW::dealloc_example(COST_SENSITIVE::cs_label.delete_label, *data.adf_data.empty_example); - } - else - { - for (size_t a = 0; a < data.adf_data.num_actions; ++a) - { - VW::dealloc_example(CB::cb_label.delete_label, data.adf_data.ecs[a]); - } - VW::dealloc_example(CB::cb_label.delete_label, *data.adf_data.empty_example); - } + for (size_t a = 0; a < data.adf_data.num_actions; ++a) + { + VW::dealloc_example(CB::cb_label.delete_label, data.adf_data.ecs[a]); + data.adf_data.ecs[a].pred.a_s.delete_v(); + } + VW::dealloc_example(CB::cb_label.delete_label, *data.adf_data.empty_example); + data.adf_data.empty_example->pred.a_s.delete_v(); free(data.adf_data.ecs); free(data.adf_data.empty_example); + for (size_t a = 0; a < data.adf_data.num_actions; ++a) + data.csls[a].costs.delete_v(); + + data.csl_empty->costs.delete_v(); + free(data.csl_empty); + free(data.cbl_empty); + free(data.old_weights); + free(data.cbls); + } + free(data.csls); + + } void copy_example_to_adf(cbify& data, example& ec) @@ -344,7 +349,7 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) { uint32_t argmin; uint32_t best_action; - example* ecs = data.adf_data.ecs; + example* ecs = data.adf_data.ecs; example* empty_example = data.adf_data.empty_example; if (data.warm_start_period > 0) @@ -367,12 +372,16 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) { best_action = predict_sublearner(data, base, argmin); - //cout<cost_sensitive->predict(ec,argmin); //generate cost-sensitive label + // ecs[a].weight *= 1; + // cout << "size cbify = " << ecs[a].l.cs.costs.size() << endl; COST_SENSITIVE::label* csls = data.csls; + COST_SENSITIVE::label* csl_empty = data.csl_empty; + CB::label* cbls = data.cbls; + CB::label* cbl_empty = data.cbl_empty; if (data.ind_supervised) { @@ -383,13 +392,17 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) csls[a].costs[0].class_index = a+1; csls[a].costs[0].x = loss(data, ld.label, a+1); + cbls[a] = ecs[a].l.cb; ecs[a].l.cs = csls[a]; - ecs[a].weight *= 1; - // cout << "size cbify = " << ecs[a].l.cs.costs.size() << endl; data.all->cost_sensitive->learn(ecs[a],i); } - empty_example->l.cs = *data.csl_empty; + *cbl_empty = empty_example->l.cb; + empty_example->l.cs = *csl_empty; data.all->cost_sensitive->learn(*empty_example,i); + + for (size_t a = 0; a < data.adf_data.num_actions; ++a) + ecs[a].l.cb = cbls[a]; + empty_example->l.cb = *cbl_empty; } } ec.pred.multiclass = best_action; @@ -439,12 +452,6 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) for (size_t a = 0; a < data.adf_data.num_actions; ++a) ecs[a].weight = data.old_weights[a]; - - //old_weight = empty_example->weight; - //empty_example->weight = data.lambdas[i] / (1- data.lambdas[i]); - //empty_example->weight = old_weight; - //cout << "about to finish in cbify" << endl; - //cout << "finished in cbify" << endl; } } @@ -466,20 +473,23 @@ void init_adf_data(cbify& data, const size_t num_actions) } CB::cb_label.default_label(&adf_data.empty_example->l.cb); adf_data.empty_example->in_use = true; + adf_data.empty_example->pred.a_s = v_init(); + data.csls = calloc_or_throw(num_actions); data.csl_empty = calloc_or_throw(1); + data.cbls = calloc_or_throw(num_actions); + data.cbl_empty = calloc_or_throw(1); - data.old_weights = calloc_or_throw(num_actions); - data.csl_empty->costs.erase(); + data.old_weights = calloc_or_throw(num_actions); + data.csl_empty->costs.push_back({0, 0, 0, 0}); data.csl_empty->costs[0].class_index = 0; data.csl_empty->costs[0].x = FLT_MAX; for (size_t a = 0; a < num_actions; ++a) { - data.csls[a].costs.erase(); data.csls[a].costs.push_back({0, 0, 0, 0}); } From fad3955543240e2be46a030ae5b50f72a02b50e4 Mon Sep 17 00:00:00 2001 From: chicheng Date: Fri, 2 Mar 2018 14:55:48 -0500 Subject: [PATCH 028/127] start changing the sample size paramters --- vowpalwabbit/cb_adf.cc | 7 ++++++- vowpalwabbit/cbify.cc | 9 +++++++-- vowpalwabbit/gen_cs_example.h | 3 +++ 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/vowpalwabbit/cb_adf.cc b/vowpalwabbit/cb_adf.cc index ad5f21ed2e4..91fce9bca4c 100644 --- a/vowpalwabbit/cb_adf.cc +++ b/vowpalwabbit/cb_adf.cc @@ -113,7 +113,9 @@ void learn_MTR(cb_adf& mydata, base_learner& base, v_array& examples) gen_cs_example_mtr(mydata.gen_cs, examples, mydata.cs_labels); uint32_t nf = (uint32_t)examples[mydata.gen_cs.mtr_example]->num_features; float old_weight = examples[mydata.gen_cs.mtr_example]->weight; - examples[mydata.gen_cs.mtr_example]->weight *= 1.f / examples[mydata.gen_cs.mtr_example]->l.cb.costs[0].probability * ((float)mydata.gen_cs.event_sum / (float)mydata.gen_cs.action_sum); + + //adjust the importance weight to scale by a factor of 1/K (the last term) + examples[mydata.gen_cs.mtr_example]->weight *= 1.f / examples[mydata.gen_cs.mtr_example]->l.cb.costs[0].probability * ((float)mydata.gen_cs.event_sum / (float)mydata.gen_cs.action_sum) * (1.f / mydata.gen_cs.num_actions); GEN_CS::call_cs_ldf(base, mydata.gen_cs.mtr_ec_seq, mydata.cb_labels, mydata.cs_labels, mydata.prepped_cs_labels, mydata.offset); examples[mydata.gen_cs.mtr_example]->num_features = nf; examples[mydata.gen_cs.mtr_example]->weight = old_weight; @@ -395,6 +397,9 @@ base_learner* cb_adf_setup(vw& all) ld.all = &all; + cb_to_cs_adf& c = ld.gen_cs; + c.num_actions = (uint32_t)(all.vm["cbify"].as()); + // number of weight vectors needed size_t problem_multiplier = 1;//default for IPS bool check_baseline_enabled = false; diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc index 06aee399a81..a959d9bef9b 100644 --- a/vowpalwabbit/cbify.cc +++ b/vowpalwabbit/cbify.cc @@ -261,8 +261,10 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec) data.warm_start = true; data.warm_start_period--; } - else - data.warm_start = false; + else if (bandit_period > 0) + { + data.bandit = true; + } argmin = find_min(data.cumulative_costs); @@ -546,6 +548,9 @@ base_learner* cbify_setup(vw& all) //cout<() : 0; + data.bandit_period = vm.count("bandit") ? vm["bandit"].as() : UINT32_MAX; //ideally should be the size of the dataset + data.test_period = vm.count("test") ? vm["test"].as() : 0; + //cout<() : 1; diff --git a/vowpalwabbit/gen_cs_example.h b/vowpalwabbit/gen_cs_example.h index b634d04e148..84080c3bfaa 100644 --- a/vowpalwabbit/gen_cs_example.h +++ b/vowpalwabbit/gen_cs_example.h @@ -39,6 +39,9 @@ struct cb_to_cs_adf COST_SENSITIVE::label pred_scores; CB::cb_class known_cost; LEARNER::base_learner* scorer; + + //for scaling the weights of MTR + uint32_t num_actions; }; CB::cb_class* get_observed_cost(CB::label& ld); From 1351a316d371ee86e20ea52f99269cdd3a309ed4 Mon Sep 17 00:00:00 2001 From: chicheng Date: Fri, 2 Mar 2018 17:15:47 -0500 Subject: [PATCH 029/127] adding the bandit period as an explicit option --- vowpalwabbit/cbify.cc | 55 ++++++++++++++++++++++++------------------- 1 file changed, 31 insertions(+), 24 deletions(-) diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc index a959d9bef9b..b302636e3af 100644 --- a/vowpalwabbit/cbify.cc +++ b/vowpalwabbit/cbify.cc @@ -55,6 +55,8 @@ struct cbify size_t choices_lambda; size_t warm_start_period; + size_t bandit_period; + v_array cumulative_costs; v_array lambdas; size_t num_actions; @@ -256,16 +258,6 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec) float old_weight; uint32_t argmin; - if (data.warm_start_period > 0) - { - data.warm_start = true; - data.warm_start_period--; - } - else if (bandit_period > 0) - { - data.bandit = true; - } - argmin = find_min(data.cumulative_costs); //Store the multiclass input label @@ -273,8 +265,10 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec) //cout< 0) // Call the cost-sensitive learner directly { + data.warm_start_period--; + //generate cost-sensitive label COST_SENSITIVE::label& csl = *data.csls; csl.costs.resize(data.num_actions); @@ -299,9 +293,12 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec) } } ec.l.multi = ld; + ec.weight = 0; } - else //Call the cb_explore algorithm. It returns a vector of probabilities for each action + else if (data.bandit_period > 0)//Call the cb_explore algorithm. It returns a vector of probabilities for each action { + data.bandit_period--; + data.cb_label.costs.erase(); ec.l.cb = data.cb_label; ec.pred.a_s = data.a_s; @@ -343,6 +340,16 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec) ec.pred.multiclass = action; ec.weight = old_weight; } + else + { + //skipping + //base.predict(ec, argmin); + ec.pred.multiclass = 0; + ec.weight = 0; + + } + + } @@ -353,15 +360,6 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) uint32_t best_action; example* ecs = data.adf_data.ecs; example* empty_example = data.adf_data.empty_example; - - if (data.warm_start_period > 0) - { - data.warm_start = true; - data.warm_start_period--; - } - else - data.warm_start = false; - argmin = find_min(data.cumulative_costs); @@ -370,8 +368,10 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) copy_example_to_adf(data, ec); - if (data.warm_start) // Call the cost-sensitive learner directly + if (data.warm_start_period > 0) // Call the cost-sensitive learner directly { + data.warm_start_period--; + best_action = predict_sublearner(data, base, argmin); //data.all->cost_sensitive->predict(ec,argmin); @@ -410,8 +410,10 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) ec.pred.multiclass = best_action; ec.l.multi = ld; } - else // call the bandit learner + else if (data.bandit_period > 0) // call the bandit learner { + data.bandit_period--; + for (size_t a = 0; a < data.adf_data.num_actions; ++a) { base.predict(ecs[a], argmin); @@ -459,6 +461,11 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) ec.pred.multiclass = cl.action; } + else + { + ec.pred.multiclass = 0; + ec.weight = 0; + } } void init_adf_data(cbify& data, const size_t num_actions) @@ -522,6 +529,7 @@ base_learner* cbify_setup(vw& all) ("loss0", po::value(), "loss for correct label") ("loss1", po::value(), "loss for incorrect label") ("warm_start", po::value(), "number of training examples for fully-supervised warm start") + ("bandit", po::value(), "number of training examples for bandit processing") ("choices_lambda", po::value(), "numbers of lambdas importance weights to aggregate") ("no_supervised", "indicator of using supervised only") ("no_bandit", "indicator of using bandit only"); @@ -549,7 +557,6 @@ base_learner* cbify_setup(vw& all) //cout<() : 0; data.bandit_period = vm.count("bandit") ? vm["bandit"].as() : UINT32_MAX; //ideally should be the size of the dataset - data.test_period = vm.count("test") ? vm["test"].as() : 0; //cout<() : 1; From e7384bb63fafcc4b85ec62f204ec507d4f735804 Mon Sep 17 00:00:00 2001 From: Chicheng Zhang Date: Tue, 6 Mar 2018 16:10:18 -0500 Subject: [PATCH 030/127] file reorg --- results.txt | 294 ----------------------------- data_gen.py => scripts/data_gen.py | 54 +++--- scripts/plot_warm_start.py | 141 ++++++++++++++ 3 files changed, 166 insertions(+), 323 deletions(-) delete mode 100644 results.txt rename data_gen.py => scripts/data_gen.py (69%) create mode 100644 scripts/plot_warm_start.py diff --git a/results.txt b/results.txt deleted file mode 100644 index 4c0daef948f..00000000000 --- a/results.txt +++ /dev/null @@ -1,294 +0,0 @@ - -chiczhan@chiczhan-ubuntu:~/datasets$ ~/local_vw/vowpal_wabbit/vowpalwabbit/vw --cbify 10 -d text_lownoise_m.vw --warm_start 15 --choices_lambda 10 --cb_type ips -Num weight bits = 18 -learning rate = 0.5 -initial_t = 0 -power_t = 0.5 -using no cache -Reading datafile = text_lownoise_m.vw -num sources = 1 -average since example example current current current -loss last counter weight label predict features -1.000000 1.000000 1 1.0 4 1 101 -1.000000 1.000000 2 2.0 10 4 101 -0.750000 0.500000 4 4.0 7 10 101 -0.625000 0.500000 8 8.0 8 8 101 -0.130435 0.026316 16 46.0 10 10 101 -0.629630 1.000000 18 108.0 2 10 101 -0.560345 0.500000 22 232.0 3 7 101 -0.529167 0.500000 30 480.0 9 8 101 -0.355533 0.187500 46 976.0 8 8 101 -0.365346 0.375000 78 1968.0 2 7 101 -0.480010 0.593750 142 3952.0 9 5 101 -0.517424 0.554688 270 7920.0 8 8 101 -0.496973 0.476562 526 15856.0 8 8 101 -0.472107 0.447266 1038 31728.0 2 9 101 -0.441124 0.410156 2062 63472.0 1 1 101 -0.348968 0.256836 4110 126960.0 8 1 101 -0.242348 0.135742 8206 253936.0 8 8 101 - -finished run -number of examples per pass = 10000 -passes used = 1 -weighted example sum = 309550.000000 -weighted label sum = 0.000000 -average loss = 0.209223 -total feature number = 1010000 -chiczhan@chiczhan-ubuntu:~/datasets$ ~/local_vw/vowpal_wabbit/vowpalwabbit/vw --cbify 10 -d text_lownoise_m.vw --warm_start 15 --choices_lambda 10 --cb_type ips --no_supervised -Num weight bits = 18 -learning rate = 0.5 -initial_t = 0 -power_t = 0.5 -using no cache -Reading datafile = text_lownoise_m.vw -num sources = 1 -average since example example current current current -loss last counter weight label predict features -1.000000 1.000000 1 1.0 4 1 101 -1.000000 1.000000 2 2.0 10 1 101 -1.000000 1.000000 4 4.0 7 1 101 -0.875000 0.750000 8 8.0 8 1 101 -0.978261 1.000000 16 46.0 10 1 101 -0.990741 1.000000 18 108.0 2 3 101 -0.995690 1.000000 22 232.0 3 7 101 -0.933333 0.875000 30 480.0 9 7 101 -0.871926 0.812500 46 976.0 8 8 101 -0.715955 0.562500 78 1968.0 2 2 101 -0.693826 0.671875 142 3952.0 9 3 101 -0.647601 0.601562 270 7920.0 8 8 101 -0.648020 0.648438 526 15856.0 8 8 101 -0.666793 0.685547 1038 31728.0 2 6 101 -0.622936 0.579102 2062 63472.0 1 1 101 -0.513603 0.404297 4110 126960.0 8 1 101 -0.413289 0.312988 8206 253936.0 8 8 101 - -finished run -number of examples per pass = 10000 -passes used = 1 -weighted example sum = 309550.000000 -weighted label sum = 0.000000 -average loss = 0.354960 -total feature number = 1010000 -chiczhan@chiczhan-ubuntu:~/datasets$ ~/local_vw/vowpal_wabbit/vowpalwabbit/vw --cbify 10 -d text_lownoise_m.vw --warm_start 15 --choices_lambda 10 --cb_type ips --no_bandit -Num weight bits = 18 -learning rate = 0.5 -initial_t = 0 -power_t = 0.5 -using no cache -Reading datafile = text_lownoise_m.vw -num sources = 1 -average since example example current current current -loss last counter weight label predict features -1.000000 1.000000 1 1.0 4 1 101 -1.000000 1.000000 2 2.0 10 4 101 -0.750000 0.500000 4 4.0 7 10 101 -0.625000 0.500000 8 8.0 8 8 101 -0.375000 0.125000 16 16.0 10 10 101 -0.437500 0.500000 32 32.0 8 8 101 -0.406250 0.375000 64 64.0 3 5 101 -0.476562 0.546875 128 128.0 3 5 101 -0.480469 0.484375 256 256.0 10 10 101 -0.443359 0.406250 512 512.0 2 10 101 -0.445312 0.447266 1024 1024.0 1 1 101 -0.438965 0.432617 2048 2048.0 9 5 101 -0.430176 0.421387 4096 4096.0 4 4 101 -0.423340 0.416504 8192 8192.0 10 10 101 - -finished run -number of examples per pass = 10000 -passes used = 1 -weighted example sum = 10000.000000 -weighted label sum = 0.000000 -average loss = 0.426300 -total feature number = 1010000 - -chiczhan@chiczhan-ubuntu:~/datasets$ ~/local_vw/vowpal_wabbit/vowpalwabbit/vw --cbify 10 -d text_highnoise_m.vw --warm_start 40 --choices_lambda 20 --cb_type ips -Num weight bits = 18 -learning rate = 0.5 -initial_t = 0 -power_t = 0.5 -using no cache -Reading datafile = text_highnoise_m.vw -num sources = 1 -average since example example current current current -loss last counter weight label predict features -0.000000 0.000000 1 1.0 1 1 34 -0.500000 1.000000 2 2.0 7 1 41 -0.750000 1.000000 4 4.0 7 9 36 -0.750000 0.750000 8 8.0 2 5 38 -0.750000 0.750000 16 16.0 9 9 40 -0.812500 0.875000 32 32.0 8 3 45 -0.991533 0.997090 41 1063.0 1 2 39 -0.668060 0.500000 43 3109.0 5 5 33 -0.714623 0.750000 47 7201.0 7 7 35 -0.600455 0.500000 55 15385.0 10 10 42 -0.516455 0.437500 71 31753.0 9 9 32 -0.587418 0.656250 103 64489.0 7 3 42 -0.629966 0.671875 167 129961.0 6 6 41 -0.678446 0.726562 295 260905.0 2 6 37 -0.684938 0.691406 551 522793.0 6 8 42 -0.706747 0.728516 1063 1046569.0 8 9 43 -0.677090 0.647461 2087 2094121.0 2 2 37 -0.672040 0.666992 4135 4189225.0 1 1 45 -0.663167 0.654297 8231 8379433.0 10 5 33 - -finished run -number of examples per pass = 10000 -passes used = 1 -weighted example sum = 10189120.000000 -weighted label sum = 0.000000 -average loss = 0.663153 -total feature number = 390046 -chiczhan@chiczhan-ubuntu:~/datasets$ ~/local_vw/vowpal_wabbit/vowpalwabbit/vw --cbify 10 -d text_highnoise_m.vw --warm_start 40 --choices_lambda 20 --cb_type ips --no_supervised -Num weight bits = 18 -learning rate = 0.5 -initial_t = 0 -power_t = 0.5 -using no cache -Reading datafile = text_highnoise_m.vw -num sources = 1 -average since example example current current current -loss last counter weight label predict features -0.000000 0.000000 1 1.0 1 1 34 -0.500000 1.000000 2 2.0 7 1 41 -0.750000 1.000000 4 4.0 7 1 36 -0.875000 1.000000 8 8.0 2 1 38 -0.937500 1.000000 16 16.0 9 1 40 -0.937500 0.937500 32 32.0 8 1 45 -0.035748 0.007759 41 1063.0 1 1 39 -0.670312 1.000000 43 3109.0 5 2 33 -0.715595 0.750000 47 7201.0 7 4 35 -0.866883 1.000000 55 15385.0 10 4 42 -0.903285 0.937500 71 31753.0 9 5 32 -0.888927 0.875000 103 64489.0 7 2 42 -0.874039 0.859375 167 129961.0 6 6 41 -0.913731 0.953125 295 260905.0 2 2 37 -0.876718 0.839844 551 522793.0 6 7 42 -0.864128 0.851562 1063 1046569.0 8 6 43 -0.851980 0.839844 2087 2094121.0 2 4 37 -0.848841 0.845703 4135 4189225.0 1 1 45 -0.837139 0.825439 8231 8379433.0 10 5 33 - -finished run -number of examples per pass = 10000 -passes used = 1 -weighted example sum = 10189120.000000 -weighted label sum = 0.000000 -average loss = 0.834037 -total feature number = 390046 - - - - - - - -chiczhan@chiczhan-ubuntu:~/datasets$ ~/local_vw/vowpal_wabbit/vowpalwabbit/vw --cbify 10 -d text_highnoise_m.vw --warm_start 100 --choices_lambda 20 --cb_type ips --no_supervised -Num weight bits = 18 -learning rate = 0.5 -initial_t = 0 -power_t = 0.5 -using no cache -Reading datafile = text_highnoise_m.vw -num sources = 1 -average since example example current current current -loss last counter weight label predict features -0.000000 0.000000 1 1.0 1 1 34 -0.500000 1.000000 2 2.0 7 1 41 -0.750000 1.000000 4 4.0 7 1 36 -0.875000 1.000000 8 8.0 2 1 38 -0.937500 1.000000 16 16.0 9 1 40 -0.937500 0.937500 32 32.0 8 1 45 -0.921875 0.906250 64 64.0 6 1 37 -0.991095 0.995279 101 1123.0 8 1 31 -0.996844 1.000000 103 3169.0 7 3 42 -0.998623 1.000000 107 7261.0 4 1 40 -0.933118 0.875000 115 15445.0 2 4 40 -0.967529 1.000000 131 31813.0 8 10 42 -0.920603 0.875000 163 64549.0 10 9 46 -0.897640 0.875000 227 130021.0 4 2 32 -0.858839 0.820312 355 260965.0 5 7 42 -0.835629 0.812500 611 522853.0 8 7 34 -0.838716 0.841797 1123 1046629.0 4 9 40 -0.837326 0.835938 2147 2094181.0 9 4 43 -0.831015 0.824707 4195 4189285.0 7 1 39 -0.826152 0.821289 8291 8379493.0 7 5 39 - -finished run -number of examples per pass = 10000 -passes used = 1 -weighted example sum = 10127800.000000 -weighted label sum = 0.000000 -average loss = 0.825455 -total feature number = 390046 -chiczhan@chiczhan-ubuntu:~/datasets$ ~/local_vw/vowpal_wabbit/vowpalwabbit/vw --cbify 10 -d text_highnoise_m.vw --warm_start 100 --choices_lambda 20 --cb_type ips --no_bandit -Num weight bits = 18 -learning rate = 0.5 -initial_t = 0 -power_t = 0.5 -using no cache -Reading datafile = text_highnoise_m.vw -num sources = 1 -average since example example current current current -loss last counter weight label predict features -0.000000 0.000000 1 1.0 1 1 34 -0.500000 1.000000 2 2.0 7 1 41 -0.750000 1.000000 4 4.0 7 9 36 -0.750000 0.750000 8 8.0 2 5 38 -0.750000 0.750000 16 16.0 9 9 40 -0.812500 0.875000 32 32.0 8 3 45 -0.703125 0.593750 64 64.0 6 6 37 -0.578125 0.453125 128 128.0 10 10 36 -0.488281 0.398438 256 256.0 6 6 37 -0.443359 0.398438 512 512.0 10 10 46 -0.416992 0.390625 1024 1024.0 4 8 37 -0.395020 0.373047 2048 2048.0 9 2 39 -0.382568 0.370117 4096 4096.0 4 8 41 -0.374878 0.367188 8192 8192.0 1 1 40 - -finished run -number of examples per pass = 10000 -passes used = 1 -weighted example sum = 10000.000000 -weighted label sum = 0.000000 -average loss = 0.372700 -total feature number = 390046 -chiczhan@chiczhan-ubuntu:~/datasets$ ~/local_vw/vowpal_wabbit/vowpalwabbit/vw --cbify 10 -d text_highnoise_m.vw --warm_start 100 --choices_lambda 20 --cb_type ips -Num weight bits = 18 -learning rate = 0.5 -initial_t = 0 -power_t = 0.5 -using no cache -Reading datafile = text_highnoise_m.vw -num sources = 1 -average since example example current current current -loss last counter weight label predict features -0.000000 0.000000 1 1.0 1 1 34 -0.500000 1.000000 2 2.0 7 1 41 -0.750000 1.000000 4 4.0 7 9 36 -0.750000 0.750000 8 8.0 2 5 38 -0.750000 0.750000 16 16.0 9 9 40 -0.812500 0.875000 32 32.0 8 3 45 -0.703125 0.593750 64 64.0 6 6 37 -0.059662 0.020774 101 1123.0 8 8 31 -0.343957 0.500000 103 3169.0 7 7 42 -0.291007 0.250000 107 7261.0 4 4 40 -0.136808 0.000000 115 15445.0 2 2 40 -0.195046 0.250000 131 31813.0 8 8 42 -0.333855 0.468750 163 64549.0 10 3 46 -0.456857 0.578125 227 130021.0 4 2 32 -0.498105 0.539062 355 260965.0 5 5 42 -0.512750 0.527344 611 522853.0 8 8 34 -0.463363 0.414062 1123 1046629.0 4 6 40 -0.542263 0.621094 2147 2094181.0 9 9 43 -0.562640 0.583008 4195 4189285.0 7 1 39 -0.484681 0.406738 8291 8379493.0 7 7 39 - -finished run -number of examples per pass = 10000 -passes used = 1 -weighted example sum = 10127800.000000 -weighted label sum = 0.000000 -average loss = 0.473638 -total feature number = 390046 - diff --git a/data_gen.py b/scripts/data_gen.py similarity index 69% rename from data_gen.py rename to scripts/data_gen.py index f1c15ae7716..41bdee73c8f 100644 --- a/data_gen.py +++ b/scripts/data_gen.py @@ -3,11 +3,9 @@ classes = 10 m = 100 +kwperclass = 20 def gen_keyword(): - - kwperclass = 20 - keyword = np.zeros((classes, m)) for i in range(classes): @@ -21,44 +19,26 @@ def gen_keyword(): def classify(classifier, example): - result = classifier.dot(example) - return np.argmax(result) - - -if __name__ == '__main__': - - - filename = "text_lownoise" +def gen_datasets(filename, keyword, num_samples, fprob): f = open(filename+".vw", "w") g = open(filename+"_m.vw", "w") - keyword = gen_keyword() - - - samples = 10000 - fprob = 0 - - cs = False - - for i in range(samples): + for i in range(num_samples): c = random.randint(0, classes-1) #generate a pair of datasets (one is cost-sensitive, the other is multiclass) - for l in range(classes): f.write(str(l+1)+':') cost = 1 if l == c: cost = 0 f.write(str(cost)+' ') - - g.write(str(c+1)) - + g.write(str(c+1)) f.write(' | ') g.write(' | ') @@ -70,8 +50,8 @@ def classify(classifier, example): if flip: vec[j] = 2 * (1-keyword[c][j]) - 1 else: - vec[j] = 2 * keyword[c][j] - 1 - + vec[j] = 2 * keyword[c][j] - 1 + for j in range(m): f.write('w'+str(j)+':') f.write(str(vec[j])+' ') @@ -79,14 +59,30 @@ def classify(classifier, example): g.write(str(vec[j])+' ') #print 'Is the prediction equal to the class label? ', classify(keyword, vec) == c - f.write('\n') g.write('\n') f.close() g.close() - - +if __name__ == '__main__': + + keyword = gen_keyword() + # Remember to generate a pair of datasets at the same time + # so that the class-dependent feature is retained + + + num_samples = 10000 + fprob = 0.1 + filename = "source1"+'_'+str(fprob) + + gen_datasets(filename, keyword, num_samples, fprob) + + + num_samples = 10000 + fprob = 0.1 + filename = "source2"+'_'+str(fprob) + + gen_datasets(filename, keyword, num_samples, fprob) diff --git a/scripts/plot_warm_start.py b/scripts/plot_warm_start.py new file mode 100644 index 00000000000..c0d06afd20b --- /dev/null +++ b/scripts/plot_warm_start.py @@ -0,0 +1,141 @@ +import matplotlib +import matplotlib.pyplot as plt +import subprocess +import pylab +from itertools import product + +class model: + def __init__(self): + self.no_bandit = False + self.no_supervised = False + +def collect_stats(mod): + + filename = mod.filename + # using progress parameter + # num_rows = mod.bandit / mod.progress + + + + avg_loss = [] + last_loss = [] + wt = [] + end_table = False + + f = open(filename, 'r') + linenumber = 0 + for line in f: + if not line.strip(): + end_table = True + if linenumber >= 9 and (not end_table): + items = line.split() + avg_loss.append(float(items[0])) + last_loss.append(float(items[1])) + wt.append(float(items[3])) + linenumber += 1 + + return avg_loss, last_loss, wt + +def execute_vw(mod): + + alg_option = ' ' + if mod.no_bandit: + alg_option += ' --no_bandit ' + if mod.no_supervised: + alg_option += ' --no_supervised ' + if mod.no_exploration: + alg_option += ' --epsilon 0.0 ' + if mod.cb_type == 'mtr': + mod.adf_on = True; + if mod.adf_on: + alg_option += ' --cb_explore_adf ' + + cmd_catfile = '( head -n ' + str(mod.warm_start) + ' ' + mod.dataset_supervised + ';' + ' head -n ' + str(mod.bandit) + ' ' + mod.dataset_bandit + '; )' + cmd_vw = mod.vw_path + ' --cbify ' + str(mod.num_classes) + ' --cb_type ' + str(mod.cb_type) + ' --warm_start ' + str(mod.warm_start) + ' --bandit ' + str(mod.bandit) + ' --choices_lambda ' + str(mod.choices_lambda) + alg_option + #+ ' --progress ' + str(mod.progress) + + cmd = cmd_catfile + ' | ' + cmd_vw + + print cmd + + f = open(mod.filename, 'w') + process = subprocess.Popen(cmd, shell=True, stdout=f, stderr=f) + #subprocess.check_call(cmd, shell=True) + process.wait() + f.close() + +def gen_comparison_graph(mod): + + for mod.warm_start in mod.choices_warm_start: + + config_name = str(mod.fprob1)+'_'+str(mod.fprob2)+'_'+str(mod.warm_start)+'_'+str(mod.bandit)+ '_' + str(mod.cb_type) + '_' + str(mod.choices_lambda) + + # combined approach + mod.no_bandit = False + mod.no_supervised = False + mod.no_exploration = False + mod.filename = config_name + execute_vw(mod) + avg_loss_comb, last_loss_comb, wt_comb = collect_stats(mod) + line = plt.plot(wt_comb, avg_loss_comb, 'r', label=('Combined approach, #lambdas=' + str(mod.choices_lambda) )) + + # bandit only approach + mod.no_bandit = False + mod.no_supervised = True + mod.no_exploration = False + mod.filename = config_name+'_no_supervised' + execute_vw(mod) + avg_loss_band_only, last_loss_band_only, wt_band_only = collect_stats(mod) + line = plt.plot(wt_band_only, avg_loss_band_only, 'b', label='Bandit only') + + # supervised only approach + mod.no_bandit = True + mod.no_supervised = False + mod.no_exploration = False + mod.filename = config_name+'_no_bandit' + execute_vw(mod) + avg_loss_sup_only, last_loss_sup_only, wt_sup_only = collect_stats(mod) + line = plt.plot(wt_sup_only, avg_loss_sup_only, 'g', label='Supervised only') + + pylab.legend() + pylab.xlabel('#bandit examples') + pylab.ylabel('Progressive validation error') + pylab.title('Source 1 feature flipping prob = ' + str(mod.fprob1) + '; source 2 feature flipping prob = ' + str(mod.fprob2) + 'cb_type = '+ mod.cb_type ) + pylab.savefig('figs/'+config_name +'.png') + plt.gcf().clear() + print('') + #plt.show() + + + + +if __name__ == '__main__': + + mod = model() + + mod.vw_path = './vowpalwabbit/vw' + #mod.warm_start = 50 + mod.bandit = 4096 + mod.num_classes = 10 + #mod.cb_type = 'mtr' #'ips' + #mod.choices_lambda = 10 + #mod.progress = 25 + mod.adf_on = True + + mod.choices_warm_start = [pow(2,i) for i in range(11)] #put it here in order to plot 2d mesh + # we are implicitly iterating over the bandit sample sizes + #choices_fprob1 = [0.1, 0.2, 0.3] + #choices_fprob2 = [0.1, 0.2, 0.3] + #choices_cb_types = ['mtr', 'ips'] + choices_choices_lambda = [pow(2,i) for i in range(5)] + + #for correctness test + #mod.choices_warm_start = [20] + choices_fprob1 = [0.1] + choices_fprob2 = [0.1] + + + for mod.fprob1, mod.fprob2, mod.cb_type, mod.choices_lambda in product(choices_fprob1, choices_fprob2, choices_cb_types, choices_choices_lambda): + mod.dataset_supervised = './source1_' + str(mod.fprob1) + '_m.vw' + mod.dataset_bandit = './source2_' + str(mod.fprob2) + '_m.vw' + gen_comparison_graph(mod) From 630fd5fa49bf1bfa11e39e896fd7cc21cafa7922 Mon Sep 17 00:00:00 2001 From: Chicheng Zhang Date: Fri, 9 Mar 2018 17:10:45 -0500 Subject: [PATCH 031/127] tweak the python script --- scripts/plot_warm_start.py | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/scripts/plot_warm_start.py b/scripts/plot_warm_start.py index c0d06afd20b..d3edec6f2dd 100644 --- a/scripts/plot_warm_start.py +++ b/scripts/plot_warm_start.py @@ -3,6 +3,7 @@ import subprocess import pylab from itertools import product +import os.path class model: def __init__(self): @@ -14,9 +15,6 @@ def collect_stats(mod): filename = mod.filename # using progress parameter # num_rows = mod.bandit / mod.progress - - - avg_loss = [] last_loss = [] wt = [] @@ -51,8 +49,7 @@ def execute_vw(mod): alg_option += ' --cb_explore_adf ' cmd_catfile = '( head -n ' + str(mod.warm_start) + ' ' + mod.dataset_supervised + ';' + ' head -n ' + str(mod.bandit) + ' ' + mod.dataset_bandit + '; )' - cmd_vw = mod.vw_path + ' --cbify ' + str(mod.num_classes) + ' --cb_type ' + str(mod.cb_type) + ' --warm_start ' + str(mod.warm_start) + ' --bandit ' + str(mod.bandit) + ' --choices_lambda ' + str(mod.choices_lambda) + alg_option - #+ ' --progress ' + str(mod.progress) + cmd_vw = mod.vw_path + ' --cbify ' + str(mod.num_classes) + ' --cb_type ' + str(mod.cb_type) + ' --warm_start ' + str(mod.warm_start) + ' --bandit ' + str(mod.bandit) + ' --choices_lambda ' + str(mod.choices_lambda) + alg_option + ' --progress ' + str(mod.progress) cmd = cmd_catfile + ' | ' + cmd_vw @@ -74,7 +71,7 @@ def gen_comparison_graph(mod): mod.no_bandit = False mod.no_supervised = False mod.no_exploration = False - mod.filename = config_name + mod.filename = mod.output_dir+'/'+config_name execute_vw(mod) avg_loss_comb, last_loss_comb, wt_comb = collect_stats(mod) line = plt.plot(wt_comb, avg_loss_comb, 'r', label=('Combined approach, #lambdas=' + str(mod.choices_lambda) )) @@ -83,7 +80,7 @@ def gen_comparison_graph(mod): mod.no_bandit = False mod.no_supervised = True mod.no_exploration = False - mod.filename = config_name+'_no_supervised' + mod.filename = mod.output_dir+'/'+config_name+'_no_supervised' execute_vw(mod) avg_loss_band_only, last_loss_band_only, wt_band_only = collect_stats(mod) line = plt.plot(wt_band_only, avg_loss_band_only, 'b', label='Bandit only') @@ -92,7 +89,7 @@ def gen_comparison_graph(mod): mod.no_bandit = True mod.no_supervised = False mod.no_exploration = False - mod.filename = config_name+'_no_bandit' + mod.filename = mod.output_dir+'/'+config_name+'_no_bandit' execute_vw(mod) avg_loss_sup_only, last_loss_sup_only, wt_sup_only = collect_stats(mod) line = plt.plot(wt_sup_only, avg_loss_sup_only, 'g', label='Supervised only') @@ -101,7 +98,7 @@ def gen_comparison_graph(mod): pylab.xlabel('#bandit examples') pylab.ylabel('Progressive validation error') pylab.title('Source 1 feature flipping prob = ' + str(mod.fprob1) + '; source 2 feature flipping prob = ' + str(mod.fprob2) + 'cb_type = '+ mod.cb_type ) - pylab.savefig('figs/'+config_name +'.png') + pylab.savefig(mod.output_dir+'/'+config_name +'.png') plt.gcf().clear() print('') #plt.show() @@ -113,13 +110,15 @@ def gen_comparison_graph(mod): mod = model() - mod.vw_path = './vowpalwabbit/vw' + mod.vw_path = '../vowpalwabbit/vw' + mod.output_dir = '../figs' + mod.data_dir = '../data' #mod.warm_start = 50 - mod.bandit = 4096 + mod.bandit = 4000 mod.num_classes = 10 #mod.cb_type = 'mtr' #'ips' #mod.choices_lambda = 10 - #mod.progress = 25 + mod.progress = 25 mod.adf_on = True mod.choices_warm_start = [pow(2,i) for i in range(11)] #put it here in order to plot 2d mesh @@ -127,6 +126,7 @@ def gen_comparison_graph(mod): #choices_fprob1 = [0.1, 0.2, 0.3] #choices_fprob2 = [0.1, 0.2, 0.3] #choices_cb_types = ['mtr', 'ips'] + choices_cb_types = ['mtr'] choices_choices_lambda = [pow(2,i) for i in range(5)] #for correctness test @@ -134,8 +134,13 @@ def gen_comparison_graph(mod): choices_fprob1 = [0.1] choices_fprob2 = [0.1] - for mod.fprob1, mod.fprob2, mod.cb_type, mod.choices_lambda in product(choices_fprob1, choices_fprob2, choices_cb_types, choices_choices_lambda): - mod.dataset_supervised = './source1_' + str(mod.fprob1) + '_m.vw' - mod.dataset_bandit = './source2_' + str(mod.fprob2) + '_m.vw' + mod.dataset_supervised = mod.data_dir + '/source1_' + str(mod.fprob1) + '_m.vw' + mod.dataset_bandit = mod.data_dir + '/source2_' + str(mod.fprob2) + '_m.vw' + if not os.path.isfile(mod.dataset_supervised): + print 'The supervised dataset does not exist!' + break + if not os.path.isfile(mod.dataset_bandit): + print 'The bandit dataset does not exist!' + break gen_comparison_graph(mod) From a7d5360c9e1d4dbe6efd09178509d4c41c0ec6fb Mon Sep 17 00:00:00 2001 From: Chicheng Zhang Date: Tue, 13 Mar 2018 08:58:26 -0400 Subject: [PATCH 032/127] added scatterplot script --- scripts/alg_comparison.py | 64 +++++++++++ scripts/plot_warm_start.py | 216 +++++++++++++++++++++++++++---------- scripts/run_vw_job.py | 205 +++++++++++++++++++++++++++++++++++ vowpalwabbit/cbify.cc | 45 ++++---- 4 files changed, 450 insertions(+), 80 deletions(-) create mode 100644 scripts/alg_comparison.py create mode 100644 scripts/run_vw_job.py diff --git a/scripts/alg_comparison.py b/scripts/alg_comparison.py new file mode 100644 index 00000000000..c0556442dc9 --- /dev/null +++ b/scripts/alg_comparison.py @@ -0,0 +1,64 @@ +import matplotlib +matplotlib.use('Agg') +import matplotlib.pyplot as plt +import pylab +import os +import glob + + +def sum_files(result_path): + prevdir = os.getcwd() + os.chdir(result_path) + dss = sorted(glob.glob('*.sum')) + os.chdir(prevdir) + return dss + +def parse_sum_file(sum_filename): + f = open(sum_filename, 'r') + line = f.readline() + num_cols = len(line.split()) + f.seek(0) + results = [[] for i in range(num_cols)] + + for line in f: + splitted = line.split() + for i in range(len(splitted)): + if (i == 0): + results[i].append(splitted[i]) + else: + results[i].append(float(splitted[i])) + return results + + +if __name__ == '__main__': + results_path = '../figs/' + dss = sum_files(results_path) + + all_results = [] + for i in range(len(dss)): + result = parse_sum_file(results_path + dss[i]) + + if (i == 0): + all_results = result + else: + num_cols = len(result) + for j in range(num_cols): + all_results[j] += result[j] + + print all_results + + + + # compare combined w/ supervised + plt.plot([0,1],[0,1]) + plt.scatter(all_results[1], all_results[3]) + plt.title('combined vs supervised only') + pylab.savefig('comb_v_super' +'.png') + plt.gcf().clear() + + # compare combined w/ bandit + plt.plot([0,1],[0,1]) + plt.scatter(all_results[1], all_results[2]) + plt.title('combined vs bandit only') + pylab.savefig('comb_v_bandit' +'.png') + plt.gcf().clear() diff --git a/scripts/plot_warm_start.py b/scripts/plot_warm_start.py index c0d06afd20b..9b4be0d84c4 100644 --- a/scripts/plot_warm_start.py +++ b/scripts/plot_warm_start.py @@ -1,8 +1,16 @@ import matplotlib +matplotlib.use('Agg') import matplotlib.pyplot as plt import subprocess import pylab from itertools import product +import os +import math +import argparse +import time +import glob +import re + class model: def __init__(self): @@ -11,18 +19,17 @@ def __init__(self): def collect_stats(mod): - filename = mod.filename + vw_output_filename = mod.vw_output_filename # using progress parameter # num_rows = mod.bandit / mod.progress - - + #print vw_output_filename avg_loss = [] last_loss = [] wt = [] end_table = False - f = open(filename, 'r') + f = open(vw_output_filename, 'r') linenumber = 0 for line in f: if not line.strip(): @@ -50,15 +57,20 @@ def execute_vw(mod): if mod.adf_on: alg_option += ' --cb_explore_adf ' - cmd_catfile = '( head -n ' + str(mod.warm_start) + ' ' + mod.dataset_supervised + ';' + ' head -n ' + str(mod.bandit) + ' ' + mod.dataset_bandit + '; )' - cmd_vw = mod.vw_path + ' --cbify ' + str(mod.num_classes) + ' --cb_type ' + str(mod.cb_type) + ' --warm_start ' + str(mod.warm_start) + ' --bandit ' + str(mod.bandit) + ' --choices_lambda ' + str(mod.choices_lambda) + alg_option - #+ ' --progress ' + str(mod.progress) + # using two datasets + #cmd_catfile = '( head -n ' + str(mod.warm_start) + ' ' + mod.dataset_supervised + ';' + ' head -n ' + str(mod.bandit) + ' ' + mod.dataset_bandit + '; )' + # using only one dataset + #cmd_catfile = '( head -n ' + str(mod.warm_start + mod.bandit) + ' ' + mod.dataset + '; )' + #cmd_catfile = '( cat ' + mod.ds_path+mod.dataset + '; )' - cmd = cmd_catfile + ' | ' + cmd_vw + cmd_vw = mod.vw_path + ' --cbify ' + str(mod.num_classes) + ' --cb_type ' + str(mod.cb_type) + ' --warm_start ' + str(mod.warm_start) + ' --bandit ' + str(mod.bandit) + ' --choices_lambda ' + str(mod.choices_lambda) + alg_option + ' --progress ' + str(mod.progress) + ' -d ' + mod.ds_path + mod.dataset + + cmd = cmd_vw + #cmd = cmd_catfile + ' | ' + cmd_vw print cmd - f = open(mod.filename, 'w') + f = open(mod.vw_output_filename, 'w') process = subprocess.Popen(cmd, shell=True, stdout=f, stderr=f) #subprocess.check_call(cmd, shell=True) process.wait() @@ -66,76 +78,164 @@ def execute_vw(mod): def gen_comparison_graph(mod): - for mod.warm_start in mod.choices_warm_start: - - config_name = str(mod.fprob1)+'_'+str(mod.fprob2)+'_'+str(mod.warm_start)+'_'+str(mod.bandit)+ '_' + str(mod.cb_type) + '_' + str(mod.choices_lambda) - - # combined approach - mod.no_bandit = False - mod.no_supervised = False - mod.no_exploration = False - mod.filename = config_name - execute_vw(mod) - avg_loss_comb, last_loss_comb, wt_comb = collect_stats(mod) - line = plt.plot(wt_comb, avg_loss_comb, 'r', label=('Combined approach, #lambdas=' + str(mod.choices_lambda) )) - - # bandit only approach - mod.no_bandit = False - mod.no_supervised = True - mod.no_exploration = False - mod.filename = config_name+'_no_supervised' - execute_vw(mod) - avg_loss_band_only, last_loss_band_only, wt_band_only = collect_stats(mod) - line = plt.plot(wt_band_only, avg_loss_band_only, 'b', label='Bandit only') - - # supervised only approach - mod.no_bandit = True - mod.no_supervised = False - mod.no_exploration = False - mod.filename = config_name+'_no_bandit' - execute_vw(mod) - avg_loss_sup_only, last_loss_sup_only, wt_sup_only = collect_stats(mod) - line = plt.plot(wt_sup_only, avg_loss_sup_only, 'g', label='Supervised only') - - pylab.legend() - pylab.xlabel('#bandit examples') - pylab.ylabel('Progressive validation error') - pylab.title('Source 1 feature flipping prob = ' + str(mod.fprob1) + '; source 2 feature flipping prob = ' + str(mod.fprob2) + 'cb_type = '+ mod.cb_type ) - pylab.savefig('figs/'+config_name +'.png') - plt.gcf().clear() - print('') - #plt.show() - + mod.num_lines = get_num_lines(mod.ds_path+mod.dataset) + mod.warm_start = int(math.floor(mod.warm_start_frac * mod.num_lines)) + mod.bandit = mod.num_lines - mod.warm_start + mod.progress = int(math.floor(mod.bandit / mod.num_checkpoints)) + + #config_name = str(mod.dataset) + '_' + str(mod.fprob1)+'_'+str(mod.fprob2)+'_'+str(mod.warm_start)+'_'+str(mod.bandit)+ '_' + str(mod.cb_type) + '_' + str(mod.choices_lambda) + + config_name = str(mod.dataset) + '_'+str(mod.warm_start)+ '_' + str(mod.cb_type) + '_' + str(mod.choices_lambda) + + # combined approach + mod.no_bandit = False + mod.no_supervised = False + mod.no_exploration = False + mod.vw_output_filename = mod.results_path+config_name+'.txt' + execute_vw(mod) + ''' + avg_loss_comb, last_loss_comb, wt_comb = collect_stats(mod) + line = plt.plot(wt_comb, avg_loss_comb, 'r', label=('Combined approach, #lambdas=' + str(mod.choices_lambda) )) + ''' + avg_error_comb = avg_error(mod) + + # bandit only approach + mod.no_bandit = False + mod.no_supervised = True + mod.no_exploration = False + mod.vw_output_filename = mod.results_path+config_name+'_no_supervised'+'.txt' + execute_vw(mod) + ''' + avg_loss_band_only, last_loss_band_only, wt_band_only = collect_stats(mod) + line = plt.plot(wt_band_only, avg_loss_band_only, 'b', label='Bandit only') + ''' + avg_error_band_only = avg_error(mod) + + # supervised only approach + mod.no_bandit = True + mod.no_supervised = False + mod.no_exploration = False + mod.vw_output_filename = mod.results_path+config_name+'_no_bandit'+'.txt' + execute_vw(mod) + ''' + avg_loss_sup_only, last_loss_sup_only, wt_sup_only = collect_stats(mod) + # for supervised only, we simply plot a horizontal line using the last point + len_avg_loss = len(avg_loss_sup_only) + avg_loss = avg_loss_sup_only[len_avg_loss-1] + avg_loss_sup_only = [avg_loss for i in range(len_avg_loss)] + line = plt.plot(wt_sup_only, avg_loss_sup_only, 'g', label='Supervised only') + ''' + avg_error_sup_only = avg_error(mod) + + summary_file = open(mod.results_path+str(mod.task_id)+'of'+str(mod.num_tasks)+'.sum', 'a') + summary_file.write(config_name + ' ' + str(avg_error_comb) + ' ' + str(avg_error_band_only) + ' ' + str(avg_error_sup_only) + '\n') + + print('') + + ''' + pylab.legend() + pylab.xlabel('#bandit examples') + pylab.ylabel('Progressive validation error') + pylab.title(mod.dataset + ' warm_start = ' + str(mod.warm_start) + ' cb_type = ' + mod.cb_type) + #pylab.title('Source 1 feature flipping prob = ' + str(mod.fprob1) + '; source 2 feature flipping prob = ' + str(mod.fprob2) + 'cb_type = '+ mod.cb_type ) + pylab.savefig(mod.results_path+config_name +'.png') + plt.gcf().clear() + + #plt.show() + ''' + +def ds_files(ds_path): + prevdir = os.getcwd() + os.chdir(ds_path) + dss = sorted(glob.glob('*.vw.gz')) + os.chdir(prevdir) + return dss + + +def ds_per_task(dss, num_tasks, task_id): + ds_task = [] + for i in range(len(dss)): + if (i % num_tasks == task_id): + ds_task.append(dss[i]) + + return ds_task + +def get_num_lines(dataset_name): + ps = subprocess.Popen(('zcat', dataset_name), stdout=subprocess.PIPE) + output = subprocess.check_output(('wc', '-l'), stdin=ps.stdout) + ps.wait() + return int(output) + +def avg_error(mod): + vw_output = open(mod.vw_output_filename, 'r') + vw_output_text = vw_output.read() + rgx = re.compile('^average loss = (.*)$', flags=re.M) + return float(rgx.findall(vw_output_text)[0]) if __name__ == '__main__': + parser = argparse.ArgumentParser(description='vw job') + parser.add_argument('task_id', type=int, help='task ID, between 0 and num_tasks - 1') + parser.add_argument('num_tasks', type=int) + parser.add_argument('--results_dir', default='../figs/') + args = parser.parse_args() + if args.task_id == 0: + if not os.path.exists(args.results_dir): + os.makedirs(args.results_dir) + import stat + os.chmod(args.results_dir, os.stat(args.results_dir).st_mode | stat.S_IWOTH) + else: + while not os.path.exists(args.results_dir): + time.sleep(1) mod = model() + mod.num_tasks = args.num_tasks + mod.task_id = args.task_id + + mod.ds_path = '../data/' + mod.vw_path = '../vowpalwabbit/vw' + mod.results_path = args.results_dir - mod.vw_path = './vowpalwabbit/vw' + #DIR_PATTERN = '../results/cbresults_{}/' + + mod.num_checkpoints = 100 #mod.warm_start = 50 - mod.bandit = 4096 + #mod.bandit = 4096 mod.num_classes = 10 #mod.cb_type = 'mtr' #'ips' #mod.choices_lambda = 10 #mod.progress = 25 mod.adf_on = True - mod.choices_warm_start = [pow(2,i) for i in range(11)] #put it here in order to plot 2d mesh + # use fractions instead of absolute numbers + + mod.choices_warm_start = [0.01 * pow(2, i) for i in range(5)] + #mod.choices_bandit = [0.01 * pow(2, i) for i in range(5)] + + #mod.choices_warm_start = [pow(2,i) for i in range(11)] #put it here in order to plot 2d mesh # we are implicitly iterating over the bandit sample sizes #choices_fprob1 = [0.1, 0.2, 0.3] #choices_fprob2 = [0.1, 0.2, 0.3] #choices_cb_types = ['mtr', 'ips'] - choices_choices_lambda = [pow(2,i) for i in range(5)] + choices_cb_types = ['mtr', 'ips'] + #choices_choices_lambda = [pow(2,i) for i in range(10,11)] + choices_choices_lambda = [i for i in range(10,11)] #for correctness test #mod.choices_warm_start = [20] - choices_fprob1 = [0.1] - choices_fprob2 = [0.1] + #choices_fprob1 = [0.1] + #choices_fprob2 = [0.1] + + dss = ds_files(mod.ds_path) + mod.ds_task = ds_per_task(dss, args.num_tasks, args.task_id) + + print mod.ds_task + # we only need to vary the warm start fraction, and there is no need to vary the bandit fraction, + # as each run of vw automatically accumulates the bandit dataset - for mod.fprob1, mod.fprob2, mod.cb_type, mod.choices_lambda in product(choices_fprob1, choices_fprob2, choices_cb_types, choices_choices_lambda): - mod.dataset_supervised = './source1_' + str(mod.fprob1) + '_m.vw' - mod.dataset_bandit = './source2_' + str(mod.fprob2) + '_m.vw' + for mod.cb_type, mod.choices_lambda, mod.dataset, mod.warm_start_frac in product(choices_cb_types, choices_choices_lambda, mod.ds_task, mod.choices_warm_start): + #mod.dataset_supervised = './vw_' + str(mod.fprob1) + '_m.vw' + #mod.dataset_bandit = './source2_' + str(mod.fprob2) + '_m.vw' gen_comparison_graph(mod) diff --git a/scripts/run_vw_job.py b/scripts/run_vw_job.py new file mode 100644 index 00000000000..d2551819f4e --- /dev/null +++ b/scripts/run_vw_job.py @@ -0,0 +1,205 @@ +import argparse +import os +import re +import subprocess +import sys +import time + +USE_ADF = True +USE_CS = False + +VW = '/scratch/clear/abietti/.local/bin/vw' +if USE_CS: + VW_DS_DIR = '/scratch/clear/abietti/cb_eval/vwshuffled_cs/' + DIR_PATTERN = '/scratch/clear/abietti/cb_eval/res_cs/cbresults_{}/' +else: + VW_DS_DIR = '/scratch/clear/abietti/cb_eval/vwshuffled/' + DIR_PATTERN = '/scratch/clear/abietti/cb_eval/res/cbresults_{}/' +# VW_DS_DIR = '/bscratch/b-albiet/vwshuffled/' +# DIR_PATTERN = '/bscratch/b-albiet/cbresults_{}/' + +rgx = re.compile('^average loss = (.*)$', flags=re.M) + + +def expand_cover(policies): + algs = [] + for psi in [0, 0.01, 0.1, 1.0]: + algs.append(('cover', policies, 'psi', psi)) + algs.append(('cover', policies, 'psi', psi, 'nounif', None)) + # algs.append(('cover', policies, 'psi', psi, 'nounifagree', None, 'agree_mellowness', 0.1)) + # algs.append(('cover', policies, 'psi', psi, 'nounifagree', None, 'agree_mellowness', 0.01)) + return algs + +params_old = { + 'alg': [ + ('supervised',), + ('epsilon', 0), + ('epsilon', 0.02), + ('epsilon', 0.05), + ('epsilon', 0.1), + ('epsilon', 0.05, 'nounifagree', None, 'agree_mellowness', 1.0), + ('epsilon', 0.05, 'nounifagree', None, 'agree_mellowness', 1e-2), + ('epsilon', 0.05, 'nounifagree', None, 'agree_mellowness', 1e-4), + ('epsilon', 0.05, 'nounifagree', None, 'agree_mellowness', 1e-6), + # agree + ('epsilon', 1, 'nounifagree', None, 'agree_mellowness', 1.0), + ('epsilon', 1, 'nounifagree', None, 'agree_mellowness', 1e-2), + ('epsilon', 1, 'nounifagree', None, 'agree_mellowness', 1e-4), + ('epsilon', 1, 'nounifagree', None, 'agree_mellowness', 1e-6), + ('bag', 2), + ('bag', 4), + ('bag', 8), + ('bag', 16), + ('bag', 2, 'greedify', None), + ('bag', 4, 'greedify', None), + ('bag', 8, 'greedify', None), + ('bag', 16, 'greedify', None), + ] + expand_cover(1) + expand_cover(4) + expand_cover(8) + expand_cover(16), + 'learning_rate': [0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1.0, 3.0, 10.0], + 'cb_type': ['dr', 'ips', 'mtr'], + } + +params = { + 'alg': [ + ('epsilon', 0.05, 'nounifagree', None, 'agree_mellowness', 1.0), + ('epsilon', 0.05, 'nounifagree', None, 'agree_mellowness', 1e-2), + ('epsilon', 0.05, 'nounifagree', None, 'agree_mellowness', 1e-4), + ('epsilon', 0.05, 'nounifagree', None, 'agree_mellowness', 1e-6), + ('epsilon', 1, 'nounifagree', None, 'agree_mellowness', 1.0), + ('epsilon', 1, 'nounifagree', None, 'agree_mellowness', 1e-2), + ('epsilon', 1, 'nounifagree', None, 'agree_mellowness', 1e-4), + ('epsilon', 1, 'nounifagree', None, 'agree_mellowness', 1e-6), + ], + 'learning_rate': [0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1.0, 3.0, 10.0], + 'cb_type': ['dr', 'ips', 'mtr'], + } + +extra_flags = None +# extra_flags = ['--loss0', '9', '--loss1', '10', '--baseline'] + +def param_grid(): + grid = [{}] + for k in params: + new_grid = [] + for g in grid: + for param in params[k]: + gg = g.copy() + gg[k] = param + new_grid.append(gg) + grid = new_grid + + return sorted(grid) + + +def ds_files(): + import glob + return sorted(glob.glob(os.path.join(VW_DS_DIR, '*.vw.gz'))) + + +def get_task_name(ds, params): + did, n_actions = os.path.basename(ds).split('.')[0].split('_')[1:] + did, n_actions = int(did), int(n_actions) + + task_name = 'ds:{}|na:{}'.format(did, n_actions) + if len(params) > 1: + task_name += '|' + '|'.join('{}:{}'.format(k, v) for k, v in sorted(params.items()) if k != 'alg') + task_name += '|' + ':'.join([str(p) for p in params['alg'] if p is not None]) + return task_name + + +def process(ds, params, results_dir): + print 'processing', ds, params + did, n_actions = os.path.basename(ds).split('.')[0].split('_')[1:] + did, n_actions = int(did), int(n_actions) + + cmd = [VW, ds, '-b', '24'] + for k, v in params.iteritems(): + if k == 'alg': + if v[0] == 'supervised': + cmd += ['--csoaa' if USE_CS else '--oaa', str(n_actions)] + else: + cmd += ['--cbify', str(n_actions)] + if USE_CS: + cmd += ['--cbify_cs'] + if extra_flags: + cmd += extra_flags + if USE_ADF: + cmd += ['--cb_explore_adf'] + assert len(v) % 2 == 0, 'params should be in pairs of (option, value)' + for i in range(len(v) / 2): + cmd += ['--{}'.format(v[2 * i])] + if v[2 * i + 1] is not None: + cmd += [str(v[2 * i + 1])] + else: + if params['alg'][0] == 'supervised' and k == 'cb_type': + pass + else: + cmd += ['--{}'.format(k), str(v)] + + print 'running', cmd + t = time.time() + output = subprocess.check_output(cmd, stderr=subprocess.STDOUT) + sys.stderr.write('\n\n{}, {}, time: {}, output:\n'.format(ds, params, time.time() - t)) + sys.stderr.write(output) + pv_loss = float(rgx.findall(output)[0]) + print 'elapsed time:', time.time() - t, 'pv loss:', pv_loss + + return pv_loss + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='vw job') + parser.add_argument('task_id', type=int, help='task ID, between 0 and num_tasks - 1') + parser.add_argument('num_tasks', type=int) + parser.add_argument('--task_offset', type=int, default=0, + help='offset for task_id in output filenames') + parser.add_argument('--results_dir', default=DIR_PATTERN.format('agree01')) + parser.add_argument('--name', default=None) + parser.add_argument('--test', action='store_true') + parser.add_argument('--flags', default=None, help='extra flags for cb algorithms') + args = parser.parse_args() + + if args.name is not None: + args.results_dir = DIR_PATTERN.format(args.name) + + if args.flags is not None: + extra_flags = args.flags.split() + grid = param_grid() + dss = ds_files() + tot_jobs = len(grid) * len(dss) + + if args.task_id == 0: + if not os.path.exists(args.results_dir): + os.makedirs(args.results_dir) + import stat + os.chmod(args.results_dir, os.stat(args.results_dir).st_mode | stat.S_IWOTH) + else: + while not os.path.exists(args.results_dir): + time.sleep(1) + if not args.test: + fname = os.path.join(args.results_dir, 'loss{}.txt'.format(args.task_offset + args.task_id)) + done_tasks = set() + if os.path.exists(fname): + done_tasks = set([line.split()[0] for line in open(fname).readlines()]) + loss_file = open(fname, 'a') + idx = args.task_id + while idx < tot_jobs: + ds = dss[idx / len(grid)] + params = grid[idx % len(grid)] + if args.test: + print ds, params + else: + task_name = get_task_name(ds, params) + if task_name not in done_tasks: + try: + pv_loss = process(ds, params, args.results_dir) + loss_file.write('{} {}\n'.format(task_name, pv_loss)) + loss_file.flush() + os.fsync(loss_file.fileno()) + except subprocess.CalledProcessError: + sys.stderr.write('\nERROR: TASK FAILED {} {}\n\n'.format(ds, params)) + print 'ERROR: TASK FAILED', ds, params + idx += args.num_tasks + + if not args.test: + loss_file.close() diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc index b302636e3af..2ec09cce29a 100644 --- a/vowpalwabbit/cbify.cc +++ b/vowpalwabbit/cbify.cc @@ -67,7 +67,7 @@ struct cbify CB::label* cbls; CB::label* cbl_empty; bool warm_start; - float* old_weights; + float* old_weights; }; @@ -101,12 +101,12 @@ void finish(cbify& data) data.a_s.delete_v(); data.lambdas.delete_v(); data.cumulative_costs.delete_v(); - - + + if (data.use_adf) { for (size_t a = 0; a < data.adf_data.num_actions; ++a) - { + { VW::dealloc_example(CB::cb_label.delete_label, data.adf_data.ecs[a]); data.adf_data.ecs[a].pred.a_s.delete_v(); } @@ -118,7 +118,7 @@ void finish(cbify& data) for (size_t a = 0; a < data.adf_data.num_actions; ++a) data.csls[a].costs.delete_v(); - + data.csl_empty->costs.delete_v(); free(data.csl_empty); @@ -129,7 +129,7 @@ void finish(cbify& data) } free(data.csls); - + } @@ -175,7 +175,7 @@ uint32_t find_min(v_array arr) { //cout<cost_sensitive->predict(*empty, argmin); - + //float best_score; //for (size_t a = 0; a < data.adf_data.num_actions; ++a) @@ -225,7 +225,7 @@ void accumulate_costs_ips(cbify& data, example& ec, CB::cb_class& cl) { data.all->cost_sensitive->predict(ec, i); if (ec.pred.multiclass == cl.action) - data.cumulative_costs[i] += cl.cost / cl.probability; + data.cumulative_costs[i] += cl.cost / cl.probability; //cout< 0) // Call the cost-sensitive learner directly { data.warm_start_period--; @@ -302,7 +302,7 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec) data.cb_label.costs.erase(); ec.l.cb = data.cb_label; ec.pred.a_s = data.a_s; - + base.predict(ec, argmin); auto old_pred = ec.pred; @@ -360,7 +360,7 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) uint32_t best_action; example* ecs = data.adf_data.ecs; example* empty_example = data.adf_data.empty_example; - + argmin = find_min(data.cumulative_costs); //Store the multiclass input label @@ -384,7 +384,7 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) COST_SENSITIVE::label* csl_empty = data.csl_empty; CB::label* cbls = data.cbls; CB::label* cbl_empty = data.cbl_empty; - + if (data.ind_supervised) { for (uint32_t i = 0; i < data.choices_lambda; i++) @@ -393,13 +393,13 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) { csls[a].costs[0].class_index = a+1; csls[a].costs[0].x = loss(data, ld.label, a+1); - + cbls[a] = ecs[a].l.cb; ecs[a].l.cs = csls[a]; data.all->cost_sensitive->learn(ecs[a],i); } *cbl_empty = empty_example->l.cb; - empty_example->l.cs = *csl_empty; + empty_example->l.cs = *csl_empty; data.all->cost_sensitive->learn(*empty_example,i); for (size_t a = 0; a < data.adf_data.num_actions; ++a) @@ -409,6 +409,7 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) } ec.pred.multiclass = best_action; ec.l.multi = ld; + ec.weight = 0; } else if (data.bandit_period > 0) // call the bandit learner { @@ -441,7 +442,7 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) auto& lab = data.adf_data.ecs[cl.action - 1].l.cb; lab.costs.push_back(cl); - + if (data.ind_bandit) { for (uint32_t i = 0; i < data.choices_lambda; i++) @@ -492,14 +493,14 @@ void init_adf_data(cbify& data, const size_t num_actions) data.old_weights = calloc_or_throw(num_actions); - + data.csl_empty->costs.push_back({0, 0, 0, 0}); data.csl_empty->costs[0].class_index = 0; data.csl_empty->costs[0].x = FLT_MAX; for (size_t a = 0; a < num_actions; ++a) { - data.csls[a].costs.push_back({0, 0, 0, 0}); + data.csls[a].costs.push_back({0, 0, 0, 0}); } } @@ -552,7 +553,7 @@ base_learner* cbify_setup(vw& all) //data.probs = v_init(); data.generic_explorer = new GenericExplorer(*data.scorer, (u32)num_actions); data.all = &all; - + //cout<() : 0; @@ -567,7 +568,7 @@ base_learner* cbify_setup(vw& all) data.cumulative_costs.push_back(0.); data.num_actions = num_actions; - + if (data.use_adf) { From f2f9bb6e56df2cafe1f7c3d34317e0ac530cf00c Mon Sep 17 00:00:00 2001 From: Chicheng Zhang Date: Thu, 15 Mar 2018 13:11:37 -0400 Subject: [PATCH 033/127] retracted the matplotlib inclusion --- scripts/plot_warm_start.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/plot_warm_start.py b/scripts/plot_warm_start.py index 9b4be0d84c4..53e20114b15 100644 --- a/scripts/plot_warm_start.py +++ b/scripts/plot_warm_start.py @@ -1,6 +1,6 @@ -import matplotlib -matplotlib.use('Agg') -import matplotlib.pyplot as plt +#import matplotlib +#matplotlib.use('Agg') +#import matplotlib.pyplot as plt import subprocess import pylab from itertools import product From 8298ec6a890bb7ded411fdaf4279d97f89d589df Mon Sep 17 00:00:00 2001 From: chicheng zhang Date: Thu, 15 Mar 2018 22:30:04 +0000 Subject: [PATCH 034/127] . --- scripts/plot_warm_start.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/scripts/plot_warm_start.py b/scripts/plot_warm_start.py index 53e20114b15..d7e69147fcb 100644 --- a/scripts/plot_warm_start.py +++ b/scripts/plot_warm_start.py @@ -2,7 +2,7 @@ #matplotlib.use('Agg') #import matplotlib.pyplot as plt import subprocess -import pylab +#import pylab from itertools import product import os import math @@ -81,7 +81,7 @@ def gen_comparison_graph(mod): mod.num_lines = get_num_lines(mod.ds_path+mod.dataset) mod.warm_start = int(math.floor(mod.warm_start_frac * mod.num_lines)) mod.bandit = mod.num_lines - mod.warm_start - mod.progress = int(math.floor(mod.bandit / mod.num_checkpoints)) + mod.progress = int(math.ceil(mod.bandit / mod.num_checkpoints)) #config_name = str(mod.dataset) + '_' + str(mod.fprob1)+'_'+str(mod.fprob2)+'_'+str(mod.warm_start)+'_'+str(mod.bandit)+ '_' + str(mod.cb_type) + '_' + str(mod.choices_lambda) @@ -167,8 +167,10 @@ def get_num_lines(dataset_name): return int(output) def avg_error(mod): + print mod.vw_output_filename vw_output = open(mod.vw_output_filename, 'r') vw_output_text = vw_output.read() + print vw_output_text rgx = re.compile('^average loss = (.*)$', flags=re.M) return float(rgx.findall(vw_output_text)[0]) @@ -178,7 +180,7 @@ def avg_error(mod): parser = argparse.ArgumentParser(description='vw job') parser.add_argument('task_id', type=int, help='task ID, between 0 and num_tasks - 1') parser.add_argument('num_tasks', type=int) - parser.add_argument('--results_dir', default='../figs/') + parser.add_argument('--results_dir', default='../../figs/') args = parser.parse_args() if args.task_id == 0: if not os.path.exists(args.results_dir): @@ -193,7 +195,7 @@ def avg_error(mod): mod.num_tasks = args.num_tasks mod.task_id = args.task_id - mod.ds_path = '../data/' + mod.ds_path = '../../vwshuffled/' mod.vw_path = '../vowpalwabbit/vw' mod.results_path = args.results_dir From 543bab9676f9f0f83f2b4d947b0b5c69dd500317 Mon Sep 17 00:00:00 2001 From: Chicheng Zhang Date: Fri, 16 Mar 2018 18:00:55 -0400 Subject: [PATCH 035/127] . --- scripts/plot_warm_start.py | 39 ++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/scripts/plot_warm_start.py b/scripts/plot_warm_start.py index d7e69147fcb..ca9e897044e 100644 --- a/scripts/plot_warm_start.py +++ b/scripts/plot_warm_start.py @@ -1,8 +1,8 @@ -#import matplotlib -#matplotlib.use('Agg') -#import matplotlib.pyplot as plt +import matplotlib +matplotlib.use('Agg') +import matplotlib.pyplot as plt import subprocess -#import pylab +import pylab from itertools import product import os import math @@ -81,7 +81,7 @@ def gen_comparison_graph(mod): mod.num_lines = get_num_lines(mod.ds_path+mod.dataset) mod.warm_start = int(math.floor(mod.warm_start_frac * mod.num_lines)) mod.bandit = mod.num_lines - mod.warm_start - mod.progress = int(math.ceil(mod.bandit / mod.num_checkpoints)) + mod.progress = int(math.ceil(float(mod.bandit) / float(mod.num_checkpoints))) #config_name = str(mod.dataset) + '_' + str(mod.fprob1)+'_'+str(mod.fprob2)+'_'+str(mod.warm_start)+'_'+str(mod.bandit)+ '_' + str(mod.cb_type) + '_' + str(mod.choices_lambda) @@ -93,10 +93,10 @@ def gen_comparison_graph(mod): mod.no_exploration = False mod.vw_output_filename = mod.results_path+config_name+'.txt' execute_vw(mod) - ''' + avg_loss_comb, last_loss_comb, wt_comb = collect_stats(mod) line = plt.plot(wt_comb, avg_loss_comb, 'r', label=('Combined approach, #lambdas=' + str(mod.choices_lambda) )) - ''' + avg_error_comb = avg_error(mod) # bandit only approach @@ -105,10 +105,10 @@ def gen_comparison_graph(mod): mod.no_exploration = False mod.vw_output_filename = mod.results_path+config_name+'_no_supervised'+'.txt' execute_vw(mod) - ''' + avg_loss_band_only, last_loss_band_only, wt_band_only = collect_stats(mod) line = plt.plot(wt_band_only, avg_loss_band_only, 'b', label='Bandit only') - ''' + avg_error_band_only = avg_error(mod) # supervised only approach @@ -117,22 +117,22 @@ def gen_comparison_graph(mod): mod.no_exploration = False mod.vw_output_filename = mod.results_path+config_name+'_no_bandit'+'.txt' execute_vw(mod) - ''' + avg_loss_sup_only, last_loss_sup_only, wt_sup_only = collect_stats(mod) # for supervised only, we simply plot a horizontal line using the last point len_avg_loss = len(avg_loss_sup_only) avg_loss = avg_loss_sup_only[len_avg_loss-1] avg_loss_sup_only = [avg_loss for i in range(len_avg_loss)] line = plt.plot(wt_sup_only, avg_loss_sup_only, 'g', label='Supervised only') - ''' + avg_error_sup_only = avg_error(mod) summary_file = open(mod.results_path+str(mod.task_id)+'of'+str(mod.num_tasks)+'.sum', 'a') summary_file.write(config_name + ' ' + str(avg_error_comb) + ' ' + str(avg_error_band_only) + ' ' + str(avg_error_sup_only) + '\n') - + summary_file.close() print('') - ''' + pylab.legend() pylab.xlabel('#bandit examples') pylab.ylabel('Progressive validation error') @@ -142,7 +142,7 @@ def gen_comparison_graph(mod): plt.gcf().clear() #plt.show() - ''' + def ds_files(ds_path): prevdir = os.getcwd() @@ -167,10 +167,10 @@ def get_num_lines(dataset_name): return int(output) def avg_error(mod): - print mod.vw_output_filename + #print mod.vw_output_filename vw_output = open(mod.vw_output_filename, 'r') vw_output_text = vw_output.read() - print vw_output_text + #print vw_output_text rgx = re.compile('^average loss = (.*)$', flags=re.M) return float(rgx.findall(vw_output_text)[0]) @@ -212,7 +212,8 @@ def avg_error(mod): # use fractions instead of absolute numbers - mod.choices_warm_start = [0.01 * pow(2, i) for i in range(5)] + mod.choices_warm_start = [0.01 * pow(2, i) for i in range(4,5)] + #mod.choices_warm_start = [0.01 * pow(2, i) for i in range(5)] #mod.choices_bandit = [0.01 * pow(2, i) for i in range(5)] #mod.choices_warm_start = [pow(2,i) for i in range(11)] #put it here in order to plot 2d mesh @@ -220,7 +221,8 @@ def avg_error(mod): #choices_fprob1 = [0.1, 0.2, 0.3] #choices_fprob2 = [0.1, 0.2, 0.3] #choices_cb_types = ['mtr', 'ips'] - choices_cb_types = ['mtr', 'ips'] + #choices_cb_types = ['mtr', 'ips'] + choices_cb_types = ['mtr'] #choices_choices_lambda = [pow(2,i) for i in range(10,11)] choices_choices_lambda = [i for i in range(10,11)] @@ -232,6 +234,7 @@ def avg_error(mod): dss = ds_files(mod.ds_path) mod.ds_task = ds_per_task(dss, args.num_tasks, args.task_id) + print 'task ' + str(mod.task_id) + ' of ' + str(mod.num_tasks) + ':' print mod.ds_task # we only need to vary the warm start fraction, and there is no need to vary the bandit fraction, From c9beeb00b5b78030b9dca04693e46558588eac8b Mon Sep 17 00:00:00 2001 From: Chicheng Zhang Date: Mon, 19 Mar 2018 15:38:56 -0400 Subject: [PATCH 036/127] regexp based line parsing for vw output (not tested yet) --- scripts/plot_warm_start.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/scripts/plot_warm_start.py b/scripts/plot_warm_start.py index ca9e897044e..183ba0b4200 100644 --- a/scripts/plot_warm_start.py +++ b/scripts/plot_warm_start.py @@ -32,9 +32,12 @@ def collect_stats(mod): f = open(vw_output_filename, 'r') linenumber = 0 for line in f: - if not line.strip(): - end_table = True - if linenumber >= 9 and (not end_table): + #if not line.strip(): + # end_table = True + #if linenumber >= 9 and (not end_table): + vw_progress_pattern = '\d+\.\d+\s+\d+\.\d+\s+\d+\s+\d+\.\d+\s+[a-zA-Z0-9]+\s+[a-zA-Z0-9]\s+\d+' + matchobj = re.match(vw_progress_patter, line) + if matchobj: items = line.split() avg_loss.append(float(items[0])) last_loss.append(float(items[1])) From 2343af46f316bd19383d8a133106aa5f04246d89 Mon Sep 17 00:00:00 2001 From: Chicheng Zhang Date: Mon, 19 Mar 2018 19:12:13 -0400 Subject: [PATCH 037/127] . --- scripts/plot_warm_start.py | 71 +++++++++++++++++++++++++------------- 1 file changed, 47 insertions(+), 24 deletions(-) diff --git a/scripts/plot_warm_start.py b/scripts/plot_warm_start.py index 183ba0b4200..91cce14b1e5 100644 --- a/scripts/plot_warm_start.py +++ b/scripts/plot_warm_start.py @@ -36,7 +36,7 @@ def collect_stats(mod): # end_table = True #if linenumber >= 9 and (not end_table): vw_progress_pattern = '\d+\.\d+\s+\d+\.\d+\s+\d+\s+\d+\.\d+\s+[a-zA-Z0-9]+\s+[a-zA-Z0-9]\s+\d+' - matchobj = re.match(vw_progress_patter, line) + matchobj = re.match(vw_progress_pattern, line) if matchobj: items = line.split() avg_loss.append(float(items[0])) @@ -44,6 +44,7 @@ def collect_stats(mod): wt.append(float(items[3])) linenumber += 1 + f.close() return avg_loss, last_loss, wt def execute_vw(mod): @@ -85,6 +86,7 @@ def gen_comparison_graph(mod): mod.warm_start = int(math.floor(mod.warm_start_frac * mod.num_lines)) mod.bandit = mod.num_lines - mod.warm_start mod.progress = int(math.ceil(float(mod.bandit) / float(mod.num_checkpoints))) + mod.num_classes = get_num_classes(mod.dataset) #config_name = str(mod.dataset) + '_' + str(mod.fprob1)+'_'+str(mod.fprob2)+'_'+str(mod.warm_start)+'_'+str(mod.bandit)+ '_' + str(mod.cb_type) + '_' + str(mod.choices_lambda) @@ -131,7 +133,7 @@ def gen_comparison_graph(mod): avg_error_sup_only = avg_error(mod) summary_file = open(mod.results_path+str(mod.task_id)+'of'+str(mod.num_tasks)+'.sum', 'a') - summary_file.write(config_name + ' ' + str(avg_error_comb) + ' ' + str(avg_error_band_only) + ' ' + str(avg_error_sup_only) + '\n') + summary_file.write(config_name + ' ' + str(avg_error_comb) + ' ' + str(avg_error_band_only) + ' ' + str(avg_error_sup_only) + ' ' + str(mod.choices_lambda) + '\n') summary_file.close() print('') @@ -155,13 +157,23 @@ def ds_files(ds_path): return dss -def ds_per_task(dss, num_tasks, task_id): - ds_task = [] - for i in range(len(dss)): - if (i % num_tasks == task_id): - ds_task.append(dss[i]) +def get_num_classes(ds): + did, n_actions = os.path.basename(ds).split('.')[0].split('_')[1:] + did, n_actions = int(did), int(n_actions) + return n_actions + - return ds_task +def ds_per_task(mod): + # put dataset name to the last coordinate so that the task workloads tend to be + # allocated equally + config_all = [item for item in product(mod.choices_cb_types, mod.choices_warm_start_frac, mod.choices_choices_lambda, mod.dss)] + config_task = [] + for i in range(len(config_all)): + if (i % mod.num_tasks == mod.task_id): + config_task.append(config_all[i]) + print config_all[i] + + return config_task def get_num_lines(dataset_name): ps = subprocess.Popen(('zcat', dataset_name), stdout=subprocess.PIPE) @@ -175,15 +187,25 @@ def avg_error(mod): vw_output_text = vw_output.read() #print vw_output_text rgx = re.compile('^average loss = (.*)$', flags=re.M) - return float(rgx.findall(vw_output_text)[0]) + avge = float(rgx.findall(vw_output_text)[0]) + vw_output.close() + return avge + +def main_loop(mod): + + summary_file = open(mod.results_path+str(mod.task_id)+'of'+str(mod.num_tasks)+'.sum', 'w') + summary_file.close() + + for mod.cb_type, mod.warm_start_frac, mod.choices_lambda, mod.dataset in mod.config_task: + gen_comparison_graph(mod) if __name__ == '__main__': parser = argparse.ArgumentParser(description='vw job') parser.add_argument('task_id', type=int, help='task ID, between 0 and num_tasks - 1') parser.add_argument('num_tasks', type=int) - parser.add_argument('--results_dir', default='../../figs/') + parser.add_argument('--results_dir', default='../../../figs/') args = parser.parse_args() if args.task_id == 0: if not os.path.exists(args.results_dir): @@ -198,7 +220,7 @@ def avg_error(mod): mod.num_tasks = args.num_tasks mod.task_id = args.task_id - mod.ds_path = '../../vwshuffled/' + mod.ds_path = '../../../vwshuffled/' mod.vw_path = '../vowpalwabbit/vw' mod.results_path = args.results_dir @@ -207,7 +229,7 @@ def avg_error(mod): mod.num_checkpoints = 100 #mod.warm_start = 50 #mod.bandit = 4096 - mod.num_classes = 10 + #mod.num_classes = 10 #mod.cb_type = 'mtr' #'ips' #mod.choices_lambda = 10 #mod.progress = 25 @@ -215,7 +237,7 @@ def avg_error(mod): # use fractions instead of absolute numbers - mod.choices_warm_start = [0.01 * pow(2, i) for i in range(4,5)] + mod.choices_warm_start_frac = [0.01 * pow(2, i) for i in range(3,5)] #mod.choices_warm_start = [0.01 * pow(2, i) for i in range(5)] #mod.choices_bandit = [0.01 * pow(2, i) for i in range(5)] @@ -224,26 +246,27 @@ def avg_error(mod): #choices_fprob1 = [0.1, 0.2, 0.3] #choices_fprob2 = [0.1, 0.2, 0.3] #choices_cb_types = ['mtr', 'ips'] - #choices_cb_types = ['mtr', 'ips'] - choices_cb_types = ['mtr'] + mod.choices_cb_types = ['mtr', 'ips'] + #mod.choices_cb_types = ['mtr'] #choices_choices_lambda = [pow(2,i) for i in range(10,11)] - choices_choices_lambda = [i for i in range(10,11)] + mod.choices_choices_lambda = [i for i in range(1,3)] + #[i for i in range(10,11)] #for correctness test #mod.choices_warm_start = [20] #choices_fprob1 = [0.1] #choices_fprob2 = [0.1] - dss = ds_files(mod.ds_path) - mod.ds_task = ds_per_task(dss, args.num_tasks, args.task_id) + mod.dss = ds_files(mod.ds_path) + + # here, we are generating the task specific parameter settings + # by first generate all parameter setting and pick every num_tasks of them + mod.config_task = ds_per_task(mod) print 'task ' + str(mod.task_id) + ' of ' + str(mod.num_tasks) + ':' - print mod.ds_task + + #print mod.ds_task # we only need to vary the warm start fraction, and there is no need to vary the bandit fraction, # as each run of vw automatically accumulates the bandit dataset - - for mod.cb_type, mod.choices_lambda, mod.dataset, mod.warm_start_frac in product(choices_cb_types, choices_choices_lambda, mod.ds_task, mod.choices_warm_start): - #mod.dataset_supervised = './vw_' + str(mod.fprob1) + '_m.vw' - #mod.dataset_bandit = './source2_' + str(mod.fprob2) + '_m.vw' - gen_comparison_graph(mod) + main_loop(mod) From 32d33bad964f114749a587cf6346aa81286f3df7 Mon Sep 17 00:00:00 2001 From: Chicheng Zhang Date: Tue, 20 Mar 2018 09:50:23 -0400 Subject: [PATCH 038/127] . --- scripts/alg_comparison.py | 64 +++++++++++++++++--------------------- scripts/plot_warm_start.py | 11 +++++-- 2 files changed, 37 insertions(+), 38 deletions(-) diff --git a/scripts/alg_comparison.py b/scripts/alg_comparison.py index c0556442dc9..c22baa8ef17 100644 --- a/scripts/alg_comparison.py +++ b/scripts/alg_comparison.py @@ -4,6 +4,7 @@ import pylab import os import glob +import pandas as pd def sum_files(result_path): @@ -15,50 +16,43 @@ def sum_files(result_path): def parse_sum_file(sum_filename): f = open(sum_filename, 'r') - line = f.readline() - num_cols = len(line.split()) - f.seek(0) - results = [[] for i in range(num_cols)] - - for line in f: - splitted = line.split() - for i in range(len(splitted)): - if (i == 0): - results[i].append(splitted[i]) - else: - results[i].append(float(splitted[i])) - return results + table = pd.read_table(f, sep=' ', header=None, names=['dataset','combined','bandit_only','supervised_only','choices_lambda'], + lineterminator='\n') + return table if __name__ == '__main__': - results_path = '../figs/' + results_path = '../../../figs/' dss = sum_files(results_path) - all_results = [] + all_results = None for i in range(len(dss)): result = parse_sum_file(results_path + dss[i]) - if (i == 0): all_results = result else: - num_cols = len(result) - for j in range(num_cols): - all_results[j] += result[j] - + all_results = all_results.append(result) print all_results - - - # compare combined w/ supervised - plt.plot([0,1],[0,1]) - plt.scatter(all_results[1], all_results[3]) - plt.title('combined vs supervised only') - pylab.savefig('comb_v_super' +'.png') - plt.gcf().clear() - - # compare combined w/ bandit - plt.plot([0,1],[0,1]) - plt.scatter(all_results[1], all_results[2]) - plt.title('combined vs bandit only') - pylab.savefig('comb_v_bandit' +'.png') - plt.gcf().clear() + #choices_choices_lambda = sorted(all_results['choices_lambda'].unique()) + grouped = all_results.groupby('choices_lambda') + + for cl, results_lambda in grouped: + #results_lambda = all_results[all_results['choices_lambda'] == cl] + # compare combined w/ supervised + results_combined = results_lambda['combined'].tolist() + results_bandit = results_lambda['bandit_only'].tolist() + results_supervised = results_lambda['supervised_only'].tolist() + + # compare combined w/ bandit + plt.plot([0,1],[0,1]) + plt.scatter(results_combined, results_bandit) + plt.title('combined vs bandit only') + pylab.savefig('comb_v_bandit ' + 'choices_lambda=' + str(cl) +'.png') + plt.gcf().clear() + + plt.plot([0,1],[0,1]) + plt.scatter(results_combined, results_supervised) + plt.title('combined vs supervised only') + pylab.savefig('comb_v_supervised ' + 'choices_lambda=' + str(cl) +'.png') + plt.gcf().clear() diff --git a/scripts/plot_warm_start.py b/scripts/plot_warm_start.py index 91cce14b1e5..a9a787f6138 100644 --- a/scripts/plot_warm_start.py +++ b/scripts/plot_warm_start.py @@ -168,6 +168,7 @@ def ds_per_task(mod): # allocated equally config_all = [item for item in product(mod.choices_cb_types, mod.choices_warm_start_frac, mod.choices_choices_lambda, mod.dss)] config_task = [] + print len(config_all) for i in range(len(config_all)): if (i % mod.num_tasks == mod.task_id): config_task.append(config_all[i]) @@ -237,7 +238,8 @@ def main_loop(mod): # use fractions instead of absolute numbers - mod.choices_warm_start_frac = [0.01 * pow(2, i) for i in range(3,5)] + #mod.choices_warm_start_frac = [0.01 * pow(2, i) for i in range(1)] + mod.choices_warm_start_frac = [0.01, 0.03, 0.1, 0.3] #mod.choices_warm_start = [0.01 * pow(2, i) for i in range(5)] #mod.choices_bandit = [0.01 * pow(2, i) for i in range(5)] @@ -246,10 +248,12 @@ def main_loop(mod): #choices_fprob1 = [0.1, 0.2, 0.3] #choices_fprob2 = [0.1, 0.2, 0.3] #choices_cb_types = ['mtr', 'ips'] + #mod.choices_cb_types = ['mtr', 'ips'] mod.choices_cb_types = ['mtr', 'ips'] - #mod.choices_cb_types = ['mtr'] #choices_choices_lambda = [pow(2,i) for i in range(10,11)] - mod.choices_choices_lambda = [i for i in range(1,3)] + #mod.choices_choices_lambda = [i for i in range(1,3)] + #mod.choices_choices_lambda = [i for i in range(1,2)] + mod.choices_choices_lambda = [1, 3, 5, 7] #[i for i in range(10,11)] #for correctness test @@ -258,6 +262,7 @@ def main_loop(mod): #choices_fprob2 = [0.1] mod.dss = ds_files(mod.ds_path) + #mod.dss = mod.dss[:5] # here, we are generating the task specific parameter settings # by first generate all parameter setting and pick every num_tasks of them From f1355b74d092a19e25aeafa2d28242dd89ebd0fc Mon Sep 17 00:00:00 2001 From: Chicheng Zhang Date: Mon, 2 Apr 2018 17:39:09 -0400 Subject: [PATCH 039/127] tweaked the scripts --- scripts/alg_comparison.py | 79 +++++++++++++++++++++++++++----------- scripts/plot_warm_start.py | 41 ++++++++++++++------ 2 files changed, 86 insertions(+), 34 deletions(-) diff --git a/scripts/alg_comparison.py b/scripts/alg_comparison.py index c22baa8ef17..29a68e136d2 100644 --- a/scripts/alg_comparison.py +++ b/scripts/alg_comparison.py @@ -5,6 +5,9 @@ import os import glob import pandas as pd +import scipy.stats as stats +from itertools import compress +from math import sqrt def sum_files(result_path): @@ -16,10 +19,45 @@ def sum_files(result_path): def parse_sum_file(sum_filename): f = open(sum_filename, 'r') - table = pd.read_table(f, sep=' ', header=None, names=['dataset','combined','bandit_only','supervised_only','choices_lambda'], + table = pd.read_table(f, sep=' ', header=None, names=['dataset','choices_lambda_1','choices_lambda_5','bandit_only','supervised_only','size'], lineterminator='\n') return table +def get_significance(errors_1, errors_2, sizes): + significance = [] + for i in range(len(errors_1)): + significance.append( significant(errors_1[i], errors_2[i], sizes[i]) ) + return significance + +def significant(err_1, err_2, size): + z = (err_1 - err_2) / sqrt( (err_1*(1 - err_1) + err_2*(1-err_2)) / size ) + + print z + + if (stats.norm.cdf(z) < 0.05) or (stats.norm.cdf(z) > 0.95): + return True + else: + return False + +def plot_comparison(errors_1, errors_2, sizes, title, filename): + print title + + plt.plot([0,1],[0,1]) + significance = get_significance(errors_1, errors_2, sizes) + results_signi_1 = list(compress(errors_1, significance)) + results_signi_2 = list(compress(errors_2, significance)) + plt.scatter(results_signi_1, results_signi_2, s=18, c='r') + + insignificance = [not b for b in significance] + results_insigni_1 = list(compress(errors_1, insignificance)) + results_insigni_2 = list(compress(errors_2, insignificance)) + + plt.scatter(results_insigni_1, results_insigni_2, s=2, c='k') + plt.title(title) + pylab.savefig(filename) + plt.gcf().clear() + + if __name__ == '__main__': results_path = '../../../figs/' @@ -35,24 +73,21 @@ def parse_sum_file(sum_filename): print all_results #choices_choices_lambda = sorted(all_results['choices_lambda'].unique()) - grouped = all_results.groupby('choices_lambda') - - for cl, results_lambda in grouped: - #results_lambda = all_results[all_results['choices_lambda'] == cl] - # compare combined w/ supervised - results_combined = results_lambda['combined'].tolist() - results_bandit = results_lambda['bandit_only'].tolist() - results_supervised = results_lambda['supervised_only'].tolist() - - # compare combined w/ bandit - plt.plot([0,1],[0,1]) - plt.scatter(results_combined, results_bandit) - plt.title('combined vs bandit only') - pylab.savefig('comb_v_bandit ' + 'choices_lambda=' + str(cl) +'.png') - plt.gcf().clear() - - plt.plot([0,1],[0,1]) - plt.scatter(results_combined, results_supervised) - plt.title('combined vs supervised only') - pylab.savefig('comb_v_supervised ' + 'choices_lambda=' + str(cl) +'.png') - plt.gcf().clear() + #grouped = all_results.groupby('choices_lambda') + + #for cl, results_lambda in grouped: + #results_lambda = all_results[all_results['choices_lambda'] == cl] + # compare combined w/ supervised + + results_choices_lambda_1 = all_results['choices_lambda_1'].tolist() + results_choices_lambda_5 = all_results['choices_lambda_5'].tolist() + results_bandit = all_results['bandit_only'].tolist() + results_supervised = all_results['supervised_only'].tolist() + dataset_sizes = all_results['size'].tolist() + + # compare combined w/ bandit + plot_comparison(results_choices_lambda_1, results_bandit, dataset_sizes, 'choices_lambda=1 vs bandit only', 'choices_lambda_1_v_bandit_only.png') + plot_comparison(results_choices_lambda_1, results_supervised, dataset_sizes, 'choices_lambda=1 vs supervised only', 'choices_lambda_1_v_supervised_only.png') + plot_comparison(results_choices_lambda_5, results_bandit, dataset_sizes, 'choices_lambda=5 vs bandit only', 'choices_lambda_5_v_bandit_only.png') + plot_comparison(results_choices_lambda_5, results_supervised, dataset_sizes, 'choices_lambda=5 vs supervised only', 'choices_lambda_5_v_supervised_only.png') + plot_comparison(results_choices_lambda_1, results_choices_lambda_5, dataset_sizes, 'choices_lambda=1 vs choices_lambda=5', 'choices_lambda_1_v_choices_lambda_5.png') diff --git a/scripts/plot_warm_start.py b/scripts/plot_warm_start.py index a9a787f6138..fffbd8c8a5b 100644 --- a/scripts/plot_warm_start.py +++ b/scripts/plot_warm_start.py @@ -90,21 +90,36 @@ def gen_comparison_graph(mod): #config_name = str(mod.dataset) + '_' + str(mod.fprob1)+'_'+str(mod.fprob2)+'_'+str(mod.warm_start)+'_'+str(mod.bandit)+ '_' + str(mod.cb_type) + '_' + str(mod.choices_lambda) - config_name = str(mod.dataset) + '_'+str(mod.warm_start)+ '_' + str(mod.cb_type) + '_' + str(mod.choices_lambda) + config_name = str(mod.dataset) + '_'+str(mod.warm_start)+ '_' + str(mod.cb_type) - # combined approach + # combined approach, lambdas = 1 + mod.choices_lambda = 1 mod.no_bandit = False mod.no_supervised = False mod.no_exploration = False - mod.vw_output_filename = mod.results_path+config_name+'.txt' + mod.vw_output_filename = mod.results_path+config_name+'choices_lambda='+str(mod.choices_lambda)+'.txt' execute_vw(mod) - avg_loss_comb, last_loss_comb, wt_comb = collect_stats(mod) - line = plt.plot(wt_comb, avg_loss_comb, 'r', label=('Combined approach, #lambdas=' + str(mod.choices_lambda) )) + avg_loss_comb_1, last_loss_comb_1, wt_comb_1 = collect_stats(mod) + line = plt.plot(wt_comb_1, avg_loss_comb_1, 'r', label=('Combined approach, #lambdas=' + str(mod.choices_lambda) )) - avg_error_comb = avg_error(mod) + avg_error_comb_1 = avg_error(mod) + + # combined approach, lambdas = 5 + mod.choices_lambda = 5 + mod.no_bandit = False + mod.no_supervised = False + mod.no_exploration = False + mod.vw_output_filename = mod.results_path+config_name+'choices_lambda='+str(mod.choices_lambda)+'.txt' + execute_vw(mod) + + avg_loss_comb_5, last_loss_comb_5, wt_comb_5 = collect_stats(mod) + line = plt.plot(wt_comb_5, avg_loss_comb_5, 'm', label=('Combined approach, #lambdas=' + str(mod.choices_lambda) )) + + avg_error_comb_5 = avg_error(mod) # bandit only approach + mod.choices_lambda = 1 mod.no_bandit = False mod.no_supervised = True mod.no_exploration = False @@ -117,6 +132,7 @@ def gen_comparison_graph(mod): avg_error_band_only = avg_error(mod) # supervised only approach + mod.choices_lambda = 1 mod.no_bandit = True mod.no_supervised = False mod.no_exploration = False @@ -133,7 +149,7 @@ def gen_comparison_graph(mod): avg_error_sup_only = avg_error(mod) summary_file = open(mod.results_path+str(mod.task_id)+'of'+str(mod.num_tasks)+'.sum', 'a') - summary_file.write(config_name + ' ' + str(avg_error_comb) + ' ' + str(avg_error_band_only) + ' ' + str(avg_error_sup_only) + ' ' + str(mod.choices_lambda) + '\n') + summary_file.write(config_name + ' ' + str(avg_error_comb_1) + ' ' + str(avg_error_comb_5) + ' ' + str(avg_error_band_only) + ' ' + str(avg_error_sup_only) + ' ' + str(mod.bandit) + '\n') summary_file.close() print('') @@ -166,7 +182,7 @@ def get_num_classes(ds): def ds_per_task(mod): # put dataset name to the last coordinate so that the task workloads tend to be # allocated equally - config_all = [item for item in product(mod.choices_cb_types, mod.choices_warm_start_frac, mod.choices_choices_lambda, mod.dss)] + config_all = [item for item in product(mod.choices_cb_types, mod.choices_warm_start_frac, mod.dss)] config_task = [] print len(config_all) for i in range(len(config_all)): @@ -198,7 +214,7 @@ def main_loop(mod): summary_file = open(mod.results_path+str(mod.task_id)+'of'+str(mod.num_tasks)+'.sum', 'w') summary_file.close() - for mod.cb_type, mod.warm_start_frac, mod.choices_lambda, mod.dataset in mod.config_task: + for mod.cb_type, mod.warm_start_frac, mod.dataset in mod.config_task: gen_comparison_graph(mod) @@ -239,7 +255,8 @@ def main_loop(mod): # use fractions instead of absolute numbers #mod.choices_warm_start_frac = [0.01 * pow(2, i) for i in range(1)] - mod.choices_warm_start_frac = [0.01, 0.03, 0.1, 0.3] + #mod.choices_warm_start_frac = [0.01, 0.03, 0.1, 0.3] + mod.choices_warm_start_frac = [0.03] #mod.choices_warm_start = [0.01 * pow(2, i) for i in range(5)] #mod.choices_bandit = [0.01 * pow(2, i) for i in range(5)] @@ -249,11 +266,11 @@ def main_loop(mod): #choices_fprob2 = [0.1, 0.2, 0.3] #choices_cb_types = ['mtr', 'ips'] #mod.choices_cb_types = ['mtr', 'ips'] - mod.choices_cb_types = ['mtr', 'ips'] + mod.choices_cb_types = ['mtr'] #choices_choices_lambda = [pow(2,i) for i in range(10,11)] #mod.choices_choices_lambda = [i for i in range(1,3)] #mod.choices_choices_lambda = [i for i in range(1,2)] - mod.choices_choices_lambda = [1, 3, 5, 7] + #mod.choices_choices_lambda = [1, 3, 5, 7] #[i for i in range(10,11)] #for correctness test From caac66e264aa4f47a58b5296574d059b92a7d812 Mon Sep 17 00:00:00 2001 From: Chicheng Zhang Date: Mon, 2 Apr 2018 18:31:18 -0400 Subject: [PATCH 040/127] . --- scripts/shuffle.sh | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 scripts/shuffle.sh diff --git a/scripts/shuffle.sh b/scripts/shuffle.sh new file mode 100644 index 00000000000..69aacfc3ee5 --- /dev/null +++ b/scripts/shuffle.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +suffix=".gz" + +for filename in ./*.vw.gz; do + vw_name=$(echo "$filename" | sed -e "s/$suffix$//") + echo $vw_name + zcat $filename | shuf > ../vwshuffled/$vw_name + gzip ../vwshuffled/$vw_name +done From 9a4eef56f80904c15e7be303fa1d4c3c4bbebc42 Mon Sep 17 00:00:00 2001 From: chicheng Date: Mon, 2 Apr 2018 20:54:37 -0400 Subject: [PATCH 041/127] . --- vowpalwabbit/cbify.cc | 62 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 61 insertions(+), 1 deletion(-) diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc index 2ec09cce29a..0b3babf5a15 100644 --- a/vowpalwabbit/cbify.cc +++ b/vowpalwabbit/cbify.cc @@ -5,6 +5,7 @@ #include "bs.h" #include "../explore/cpp/MWTExplorer.h" #include "vw.h" +#include using namespace LEARNER; using namespace MultiWorldTesting; @@ -68,10 +69,45 @@ struct cbify CB::label* cbl_empty; bool warm_start; float* old_weights; + float label_corrupt; }; +float rand_zeroone() +{ + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution<> dis(0.0, 1.0); + return dis(gen); +} + + +size_t generate_uar_action(size_t num_actions) +{ + float rand = rand_zeroone(); + for (size_t i = 1; i <= num_actions; i++) + { + if (rand <= float(i) / num_actions) + return i; + } + return num_actions; + +} + +size_t corrupt_action(size_t action, size_t num_actions, float label_corrupt) +{ + float rand = rand_zeroone(); + if (rand < label_corrupt) + return generate_uar_action(num_actions); + else + return action; + +} + + + + vector vw_scorer::Score_Actions(example& ctx) { vector probs_vec; @@ -269,6 +305,7 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec) { data.warm_start_period--; + /* //generate cost-sensitive label COST_SENSITIVE::label& csl = *data.csls; csl.costs.resize(data.num_actions); @@ -283,9 +320,29 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec) //predict data.all->cost_sensitive->predict(ec, argmin); + */ + + //predict + data.all->cost_sensitive->predict(ec, argmin); + + //first, corrupt fully supervised example ec's label here + size_t corrupted_label = corrupt_action(ld.label, data.num_actions, data.label_corrupt); + + //generate cost-sensitive label + COST_SENSITIVE::label& csl = *data.csls; + csl.costs.resize(data.num_actions); + csl.costs.end() = csl.costs.begin()+data.num_actions; + for (uint32_t j = 0; j < data.num_actions; j++) + { + csl.costs[j].class_index = j+1; + csl.costs[j].x = loss(data, corrupted_label, j+1); + } + + ec.l.cs = csl; if (data.ind_supervised) { + for (uint32_t i = 0; i < data.choices_lambda; i++) { ec.weight = 1; @@ -533,7 +590,8 @@ base_learner* cbify_setup(vw& all) ("bandit", po::value(), "number of training examples for bandit processing") ("choices_lambda", po::value(), "numbers of lambdas importance weights to aggregate") ("no_supervised", "indicator of using supervised only") - ("no_bandit", "indicator of using bandit only"); + ("no_bandit", "indicator of using bandit only") + ("label_corrupt", po::value(), "probability of label corruption in the supervised datasets (when corruption happens, the new label is chosen uniformly at random)"); add_options(all); po::variables_map& vm = all.vm; @@ -562,6 +620,8 @@ base_learner* cbify_setup(vw& all) //cout<() : 1; + data.label_corrupt = vm.count("label_corrupt") ? vm["label_corrupt"].as() : 0.0; + generate_lambdas(data.lambdas, data.choices_lambda); for (size_t i = 0; i < data.choices_lambda; i++) From b30d98747a8146548d5b87a5dda23cb20090d1ce Mon Sep 17 00:00:00 2001 From: chicheng Date: Mon, 2 Apr 2018 21:22:49 -0400 Subject: [PATCH 042/127] label corruption code --- vowpalwabbit/cbify.cc | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc index 0b3babf5a15..f92fcdb454a 100644 --- a/vowpalwabbit/cbify.cc +++ b/vowpalwabbit/cbify.cc @@ -86,6 +86,8 @@ float rand_zeroone() size_t generate_uar_action(size_t num_actions) { float rand = rand_zeroone(); + //cout< vw_scorer::Score_Actions(example& ctx) { vector probs_vec; @@ -322,9 +321,6 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec) data.all->cost_sensitive->predict(ec, argmin); */ - //predict - data.all->cost_sensitive->predict(ec, argmin); - //first, corrupt fully supervised example ec's label here size_t corrupted_label = corrupt_action(ld.label, data.num_actions, data.label_corrupt); @@ -338,7 +334,10 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec) csl.costs[j].x = loss(data, corrupted_label, j+1); } - ec.l.cs = csl; + ec.l.cs = csl; + + //predict (for vw's internal reason, this step has to be put after ec's cs label is created) + data.all->cost_sensitive->predict(ec, argmin); if (data.ind_supervised) { @@ -405,8 +404,6 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec) ec.weight = 0; } - - } @@ -444,12 +441,14 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) if (data.ind_supervised) { + size_t corrupted_label = corrupt_action(ld.label, data.num_actions, data.label_corrupt); + for (uint32_t i = 0; i < data.choices_lambda; i++) { for (size_t a = 0; a < data.adf_data.num_actions; ++a) { csls[a].costs[0].class_index = a+1; - csls[a].costs[0].x = loss(data, ld.label, a+1); + csls[a].costs[0].x = loss(data, corrupted_label, a+1); cbls[a] = ecs[a].l.cb; ecs[a].l.cs = csls[a]; @@ -507,7 +506,7 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) for (size_t a = 0; a < data.adf_data.num_actions; ++a) { data.old_weights[a] = ecs[a].weight; - ecs[a].weight *= data.lambdas[i] / (1- data.lambdas[i]); + ecs[a].weight *= data.lambdas[i] / (1-data.lambdas[i]); base.learn(ecs[a], i); } base.learn(*empty_example, i); From 6735a024640200d1d4cd35d4193c005b0f04b43e Mon Sep 17 00:00:00 2001 From: chicheng Date: Tue, 3 Apr 2018 01:37:44 -0400 Subject: [PATCH 043/127] supervised dataset validation --- vowpalwabbit/cbify.cc | 195 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 157 insertions(+), 38 deletions(-) diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc index f92fcdb454a..523e062e048 100644 --- a/vowpalwabbit/cbify.cc +++ b/vowpalwabbit/cbify.cc @@ -69,8 +69,15 @@ struct cbify CB::label* cbl_empty; bool warm_start; float* old_weights; - float label_corrupt; + float corrupt_prob_supervised; + float corrupt_prob_bandit; + size_t corrupt_type_supervised; + size_t corrupt_type_bandit; + size_t validation_method; + size_t bandit_iter; + size_t warm_start_iter; + v_array supervised_validation; }; @@ -80,6 +87,7 @@ float rand_zeroone() std::mt19937 gen(rd()); std::uniform_real_distribution<> dis(0.0, 1.0); return dis(gen); + //return 0.5; } @@ -97,11 +105,16 @@ size_t generate_uar_action(size_t num_actions) } -size_t corrupt_action(size_t action, size_t num_actions, float label_corrupt) +size_t corrupt_action(size_t action, size_t num_actions, float label_corrupt, size_t type) { float rand = rand_zeroone(); if (rand < label_corrupt) - return generate_uar_action(num_actions); + { + if (type == 1) + return generate_uar_action(num_actions); + else + return (action % num_actions) + 1; + } else return action; @@ -255,33 +268,95 @@ uint32_t predict_sublearner(cbify& data, base_learner& base, uint32_t i) void accumulate_costs_ips(cbify& data, example& ec, CB::cb_class& cl) { - //IPS for approximating the cumulative costs for all lambdas - for (uint32_t i = 0; i < data.choices_lambda; i++) + // validation using bandit data + if (data.validation_method == 1) { - data.all->cost_sensitive->predict(ec, i); - if (ec.pred.multiclass == cl.action) - data.cumulative_costs[i] += cl.cost / cl.probability; - //cout<cost_sensitive->predict(ec, i); + if (ec.pred.multiclass == cl.action) + data.cumulative_costs[i] += cl.cost / cl.probability; + //cout<cost_sensitive->predict(ec_valid, i); + + //cout< 0) // Call the cost-sensitive learner directly + if (data.warm_start_iter < data.warm_start_period) // Call the cost-sensitive learner directly { - data.warm_start_period--; - /* //generate cost-sensitive label COST_SENSITIVE::label& csl = *data.csls; @@ -322,10 +395,14 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec) */ //first, corrupt fully supervised example ec's label here - size_t corrupted_label = corrupt_action(ld.label, data.num_actions, data.label_corrupt); + size_t corrupted_label = corrupt_action(ld.label, data.num_actions, data.corrupt_prob_supervised, data.corrupt_type_supervised); + //use this for now; I am not sure if v_array is the same as STL's array where elements are copied when brought in + ld.label = corrupted_label; //generate cost-sensitive label - COST_SENSITIVE::label& csl = *data.csls; + //COST_SENSITIVE::label& csl = *data.csls; + COST_SENSITIVE::label* cslp = calloc_or_throw(1); + COST_SENSITIVE::label csl = *cslp; csl.costs.resize(data.num_actions); csl.costs.end() = csl.costs.begin()+data.num_actions; for (uint32_t j = 0; j < data.num_actions; j++) @@ -348,13 +425,23 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec) data.all->cost_sensitive->learn(ec, i); } } - ec.l.multi = ld; + + //ec.l.multi = ld; ec.weight = 0; + + // This is purely a hack here - need to clean up; I also did not deallocate the label and the copied example in finish() + example* ecp = calloc_or_throw(1); + VW::copy_example_data(false, ecp, &ec); + ecp->l.cs = csl; + + // I am not sure if written this way, ec will be deleted in some other stages and causes error + if (data.validation_method == 2) + data.supervised_validation.push_back(*ecp); + + data.warm_start_iter++; } - else if (data.bandit_period > 0)//Call the cb_explore algorithm. It returns a vector of probabilities for each action + else if (data.bandit_iter < data.bandit_period)//Call the cb_explore algorithm. It returns a vector of probabilities for each action { - data.bandit_period--; - data.cb_label.costs.erase(); ec.l.cb = data.cb_label; ec.pred.a_s = data.a_s; @@ -370,7 +457,9 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec) if(!cl.action) THROW("No action with non-zero probability found!"); - cl.cost = loss(data, ld.label, cl.action); + + size_t corrupted_label = corrupt_action(ld.label, data.num_actions, data.corrupt_prob_bandit, data.corrupt_type_bandit); + cl.cost = loss(data, corrupted_label, cl.action); // accumulate the cumulative costs of lambdas accumulate_costs_ips(data, ec, cl); @@ -395,6 +484,8 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec) ec.l.multi = ld; ec.pred.multiclass = action; ec.weight = old_weight; + + data.bandit_iter++; } else { @@ -422,9 +513,8 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) copy_example_to_adf(data, ec); - if (data.warm_start_period > 0) // Call the cost-sensitive learner directly + if (data.warm_start_iter < data.warm_start_period) // Call the cost-sensitive learner directly { - data.warm_start_period--; best_action = predict_sublearner(data, base, argmin); @@ -439,10 +529,10 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) CB::label* cbls = data.cbls; CB::label* cbl_empty = data.cbl_empty; + size_t corrupted_label = corrupt_action(ld.label, data.num_actions, data.corrupt_prob_supervised, data.corrupt_type_supervised); + if (data.ind_supervised) { - size_t corrupted_label = corrupt_action(ld.label, data.num_actions, data.label_corrupt); - for (uint32_t i = 0; i < data.choices_lambda; i++) { for (size_t a = 0; a < data.adf_data.num_actions; ++a) @@ -466,10 +556,21 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) ec.pred.multiclass = best_action; ec.l.multi = ld; ec.weight = 0; + + //a hack here - allocated memories not deleted + example* ecp = calloc_or_throw(1); + VW::copy_example_data(false, ecp, &ec); + ecp->l.multi.label = corrupted_label; + ecp->l.multi.weight = 1.0; + + if (data.validation_method == 2) + data.supervised_validation.push_back(*ecp); + + data.warm_start_iter++; + } - else if (data.bandit_period > 0) // call the bandit learner + else if (data.bandit_iter < data.bandit_period) // call the bandit learner { - data.bandit_period--; for (size_t a = 0; a < data.adf_data.num_actions; ++a) { @@ -489,7 +590,9 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) if(!cl.action) THROW("No action with non-zero probability found!"); - cl.cost = loss(data, ld.label, cl.action); + + size_t corrupted_label = corrupt_action(ld.label, data.num_actions, data.corrupt_prob_bandit, data.corrupt_type_bandit); + cl.cost = loss(data, corrupted_label, cl.action); // accumulate the cumulative costs of lambdas accumulate_costs_ips_adf(data, ec, cl, base); @@ -517,6 +620,8 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) } ec.pred.multiclass = cl.action; + + data.bandit_iter++; } else { @@ -563,6 +668,8 @@ void init_adf_data(cbify& data, const size_t num_actions) void generate_lambdas(v_array& lambdas, size_t lambda_size) { + // The lambdas are in fact arranged in ascending order (the middle lambda is 0.5) + lambdas = v_init(); uint32_t mid = lambda_size / 2; for (uint32_t i = 0; i < lambda_size; i++) @@ -590,7 +697,11 @@ base_learner* cbify_setup(vw& all) ("choices_lambda", po::value(), "numbers of lambdas importance weights to aggregate") ("no_supervised", "indicator of using supervised only") ("no_bandit", "indicator of using bandit only") - ("label_corrupt", po::value(), "probability of label corruption in the supervised datasets (when corruption happens, the new label is chosen uniformly at random)"); + ("corrupt_prob_supervised", po::value(), "probability of label corruption in the supervised part") + ("corrupt_prob_bandit", po::value(), "probability of label corruption in the bandit part") + ("corrupt_type_supervised", po::value(), "type of label corruption in the supervised part (1 is uar, 2 is circular)") + ("corrupt_type_bandit", po::value(), "probability of label corruption in the bandit part (1 is uar, 2 is circular)") + ("validation_method", po::value(), "lambda selection criterion (1 is using bandit with progressive validation, 2 is using supervised and amortizing)"); add_options(all); po::variables_map& vm = all.vm; @@ -619,7 +730,15 @@ base_learner* cbify_setup(vw& all) //cout<() : 1; - data.label_corrupt = vm.count("label_corrupt") ? vm["label_corrupt"].as() : 0.0; + data.corrupt_prob_supervised = vm.count("corrupt_prob_supervised") ? vm["corrupt_prob_supervised"].as() : 0.0; + data.corrupt_prob_bandit = vm.count("corrupt_prob_bandit") ? vm["corrupt_prob_bandit"].as() : 0.0; + data.corrupt_type_supervised = vm.count("corrupt_type_supervised") ? vm["corrupt_type_supervised"].as() : 1; + data.corrupt_type_bandit = vm.count("corrupt_type_bandit") ? vm["corrupt_type_bandit"].as() : 1; + data.validation_method = vm.count("validation_method") ? vm["validation_method"].as() : 1; + + data.bandit_iter = 0; + data.warm_start_iter = 0; + generate_lambdas(data.lambdas, data.choices_lambda); From 024d9cc88f1b12aea6be490e733ae541986ac4f4 Mon Sep 17 00:00:00 2001 From: Chicheng Zhang Date: Tue, 3 Apr 2018 03:41:14 -0400 Subject: [PATCH 044/127] lambda script --- scripts/plot_warm_start.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/scripts/plot_warm_start.py b/scripts/plot_warm_start.py index fffbd8c8a5b..3198b3acfc6 100644 --- a/scripts/plot_warm_start.py +++ b/scripts/plot_warm_start.py @@ -35,8 +35,9 @@ def collect_stats(mod): #if not line.strip(): # end_table = True #if linenumber >= 9 and (not end_table): - vw_progress_pattern = '\d+\.\d+\s+\d+\.\d+\s+\d+\s+\d+\.\d+\s+[a-zA-Z0-9]+\s+[a-zA-Z0-9]\s+\d+' + vw_progress_pattern = '\d+\.\d+\s+\d+\.\d+\s+\d+\s+\d+\.\d+\s+[a-zA-Z0-9]+\s+[a-zA-Z0-9]+\s+\d+' matchobj = re.match(vw_progress_pattern, line) + if matchobj: items = line.split() avg_loss.append(float(items[0])) @@ -146,6 +147,7 @@ def gen_comparison_graph(mod): avg_loss_sup_only = [avg_loss for i in range(len_avg_loss)] line = plt.plot(wt_sup_only, avg_loss_sup_only, 'g', label='Supervised only') + avg_error_sup_only = avg_error(mod) summary_file = open(mod.results_path+str(mod.task_id)+'of'+str(mod.num_tasks)+'.sum', 'a') @@ -279,6 +281,7 @@ def main_loop(mod): #choices_fprob2 = [0.1] mod.dss = ds_files(mod.ds_path) + #mod.dss = ["ds_223_63.vw.gz"] #mod.dss = mod.dss[:5] # here, we are generating the task specific parameter settings From bd5fe57469b7ddce35661eedc107ca816509a5b9 Mon Sep 17 00:00:00 2001 From: chicheng Date: Tue, 3 Apr 2018 05:53:40 -0400 Subject: [PATCH 045/127] weighting scheme --- vowpalwabbit/cbify.cc | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc index 523e062e048..5fb42fb32a4 100644 --- a/vowpalwabbit/cbify.cc +++ b/vowpalwabbit/cbify.cc @@ -77,6 +77,7 @@ struct cbify size_t validation_method; size_t bandit_iter; size_t warm_start_iter; + size_t weighting_scheme; v_array supervised_validation; }; @@ -475,7 +476,11 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec) { for (uint32_t i = 0; i < data.choices_lambda; i++) { - ec.weight = old_weight * data.lambdas[i] / (1-data.lambdas[i]); + if (data.weighting_scheme == 1) + ec.weight = old_weight * data.lambdas[i] / (1-data.lambdas[i]); + else + ec.weight = old_weight * data.lambdas[i] / (1-data.lambdas[i]) * data.warm_start_period / ( (data.bandit_iter+1) * (data.bandit_iter+2) ); + base.learn(ec, i); } } @@ -609,7 +614,12 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) for (size_t a = 0; a < data.adf_data.num_actions; ++a) { data.old_weights[a] = ecs[a].weight; - ecs[a].weight *= data.lambdas[i] / (1-data.lambdas[i]); + + if (data.weighting_scheme == 1) + ecs[a].weight *= data.lambdas[i] / (1-data.lambdas[i]); + else + ecs[a].weight *= data.lambdas[i] / (1-data.lambdas[i]) * data.warm_start_period / ( (data.bandit_iter+1) * (data.bandit_iter+2) ); + base.learn(ecs[a], i); } base.learn(*empty_example, i); @@ -701,7 +711,8 @@ base_learner* cbify_setup(vw& all) ("corrupt_prob_bandit", po::value(), "probability of label corruption in the bandit part") ("corrupt_type_supervised", po::value(), "type of label corruption in the supervised part (1 is uar, 2 is circular)") ("corrupt_type_bandit", po::value(), "probability of label corruption in the bandit part (1 is uar, 2 is circular)") - ("validation_method", po::value(), "lambda selection criterion (1 is using bandit with progressive validation, 2 is using supervised and amortizing)"); + ("validation_method", po::value(), "lambda selection criterion (1 is using bandit with progressive validation, 2 is using supervised and amortizing)") + ("weighting_scheme", po::value(), "weighting scheme (1 is per instance weighting, 2 is per dataset weighting (where we use a diminishing weighting scheme) )"); add_options(all); po::variables_map& vm = all.vm; @@ -735,6 +746,8 @@ base_learner* cbify_setup(vw& all) data.corrupt_type_supervised = vm.count("corrupt_type_supervised") ? vm["corrupt_type_supervised"].as() : 1; data.corrupt_type_bandit = vm.count("corrupt_type_bandit") ? vm["corrupt_type_bandit"].as() : 1; data.validation_method = vm.count("validation_method") ? vm["validation_method"].as() : 1; + data.weighting_scheme = vm.count("weighting_scheme") ? vm["weighting_scheme"].as() : 1; + data.bandit_iter = 0; data.warm_start_iter = 0; From 3f64541ccd3f96a663c03bcc9c1a6b9a9e097705 Mon Sep 17 00:00:00 2001 From: Chicheng Zhang Date: Tue, 3 Apr 2018 11:09:14 -0400 Subject: [PATCH 046/127] . --- scripts/alg_comparison.py | 35 ++++++++++++++------ scripts/plot_warm_start.py | 66 +++++++++++++++++++++++++++++++++----- 2 files changed, 83 insertions(+), 18 deletions(-) diff --git a/scripts/alg_comparison.py b/scripts/alg_comparison.py index 29a68e136d2..fad7c281aa6 100644 --- a/scripts/alg_comparison.py +++ b/scripts/alg_comparison.py @@ -9,6 +9,13 @@ from itertools import compress from math import sqrt +# this part is changable +#alg1 = 'epsilon' +#alg2 = 'cover' +#alg1 = 'choices_lambda_1' +#alg2 = 'choices_lambda_5' +alg1 = 'instance weighting' +alg2 = 'dataset weighting' def sum_files(result_path): prevdir = os.getcwd() @@ -19,8 +26,9 @@ def sum_files(result_path): def parse_sum_file(sum_filename): f = open(sum_filename, 'r') - table = pd.read_table(f, sep=' ', header=None, names=['dataset','choices_lambda_1','choices_lambda_5','bandit_only','supervised_only','size'], - lineterminator='\n') + table = pd.read_table(f, sep=' ', header=None, names=['dataset',alg1,alg2,'bandit_only','supervised_only','size'], + lineterminator='\n') + return table def get_significance(errors_1, errors_2, sizes): @@ -60,7 +68,14 @@ def plot_comparison(errors_1, errors_2, sizes, title, filename): if __name__ == '__main__': - results_path = '../../../figs/' + #results_path = '../../../lambdas/' + #results_path = '../../../warm_start_frac=0.1/' + #results_path = '../../../cover_vs_epsilon/' + #results_path = '../../../corrupt_supervised_type1_0.3/' + #results_path = '../../../corrupt_supervised_type2_0.3/' + #results_path = '../../../supervised_validation/' + results_path = '../../../weighting_schemes/' + dss = sum_files(results_path) all_results = None @@ -79,15 +94,15 @@ def plot_comparison(errors_1, errors_2, sizes, title, filename): #results_lambda = all_results[all_results['choices_lambda'] == cl] # compare combined w/ supervised - results_choices_lambda_1 = all_results['choices_lambda_1'].tolist() - results_choices_lambda_5 = all_results['choices_lambda_5'].tolist() + results_alg1 = all_results[alg1].tolist() + results_alg2 = all_results[alg2].tolist() results_bandit = all_results['bandit_only'].tolist() results_supervised = all_results['supervised_only'].tolist() dataset_sizes = all_results['size'].tolist() # compare combined w/ bandit - plot_comparison(results_choices_lambda_1, results_bandit, dataset_sizes, 'choices_lambda=1 vs bandit only', 'choices_lambda_1_v_bandit_only.png') - plot_comparison(results_choices_lambda_1, results_supervised, dataset_sizes, 'choices_lambda=1 vs supervised only', 'choices_lambda_1_v_supervised_only.png') - plot_comparison(results_choices_lambda_5, results_bandit, dataset_sizes, 'choices_lambda=5 vs bandit only', 'choices_lambda_5_v_bandit_only.png') - plot_comparison(results_choices_lambda_5, results_supervised, dataset_sizes, 'choices_lambda=5 vs supervised only', 'choices_lambda_5_v_supervised_only.png') - plot_comparison(results_choices_lambda_1, results_choices_lambda_5, dataset_sizes, 'choices_lambda=1 vs choices_lambda=5', 'choices_lambda_1_v_choices_lambda_5.png') + plot_comparison(results_alg1, results_bandit, dataset_sizes, alg1 + ' vs ' + 'bandit only', results_path + alg1 + ' vs ' + 'bandit only' + '.png') + plot_comparison(results_alg1, results_supervised, dataset_sizes, alg1 + ' vs ' + 'supervised only', results_path + alg1 + ' vs ' + 'supervised only' + '.png') + plot_comparison(results_alg2, results_bandit, dataset_sizes, alg2 + ' vs ' + 'bandit only', results_path + alg2 + ' vs ' + 'bandit only' + '.png') + plot_comparison(results_alg2, results_supervised, dataset_sizes, alg2 + ' vs ' + 'supervised only', results_path + alg2 + ' vs ' + 'supervised only' + '.png') + plot_comparison(results_alg1, results_alg2, dataset_sizes, alg1 + ' vs ' + alg2, results_path+alg1 + ' vs ' + alg2 + '.png') diff --git a/scripts/plot_warm_start.py b/scripts/plot_warm_start.py index 3198b3acfc6..436596cb0d0 100644 --- a/scripts/plot_warm_start.py +++ b/scripts/plot_warm_start.py @@ -46,11 +46,21 @@ def collect_stats(mod): linenumber += 1 f.close() + + if len(avg_loss) == 0: + avg_loss = [0] + last_loss = [0] + wt = [0] + return avg_loss, last_loss, wt def execute_vw(mod): alg_option = ' ' + if mod.cover_on: + alg_option += ' --cb_explore ' + str(mod.num_classes) + ' --cover 5 --psi 0.01 ' + mod.cb_type = 'dr' + mod.adf_on = False if mod.no_bandit: alg_option += ' --no_bandit ' if mod.no_supervised: @@ -62,14 +72,21 @@ def execute_vw(mod): if mod.adf_on: alg_option += ' --cb_explore_adf ' + # using two datasets #cmd_catfile = '( head -n ' + str(mod.warm_start) + ' ' + mod.dataset_supervised + ';' + ' head -n ' + str(mod.bandit) + ' ' + mod.dataset_bandit + '; )' # using only one dataset #cmd_catfile = '( head -n ' + str(mod.warm_start + mod.bandit) + ' ' + mod.dataset + '; )' #cmd_catfile = '( cat ' + mod.ds_path+mod.dataset + '; )' - cmd_vw = mod.vw_path + ' --cbify ' + str(mod.num_classes) + ' --cb_type ' + str(mod.cb_type) + ' --warm_start ' + str(mod.warm_start) + ' --bandit ' + str(mod.bandit) + ' --choices_lambda ' + str(mod.choices_lambda) + alg_option + ' --progress ' + str(mod.progress) + ' -d ' + mod.ds_path + mod.dataset - + cmd_vw = mod.vw_path + ' --cbify ' + str(mod.num_classes) + ' --cb_type ' + str(mod.cb_type) + ' --warm_start ' + str(mod.warm_start) + ' --bandit ' + str(mod.bandit) + ' --choices_lambda ' + str(mod.choices_lambda) + alg_option + ' --progress ' + str(mod.progress) \ + + ' -d ' + mod.ds_path + mod.dataset \ + + ' --corrupt_type_supervised ' + str(mod.corrupt_type_supervised) \ + + ' --corrupt_prob_supervised ' + str(mod.corrupt_prob_supervised) \ + + ' --corrupt_type_bandit ' + str(mod.corrupt_type_bandit) \ + + ' --corrupt_prob_bandit ' + str(mod.corrupt_prob_bandit) \ + + ' --validation_method ' + str(mod.validation_method) \ + + ' --weighting_scheme ' + str(mod.weighting_scheme) cmd = cmd_vw #cmd = cmd_catfile + ' | ' + cmd_vw @@ -93,37 +110,53 @@ def gen_comparison_graph(mod): config_name = str(mod.dataset) + '_'+str(mod.warm_start)+ '_' + str(mod.cb_type) - # combined approach, lambdas = 1 - mod.choices_lambda = 1 + # combined approach, epsilon + mod.choices_lambda = 5 + mod.weighting_scheme = 1 mod.no_bandit = False mod.no_supervised = False mod.no_exploration = False + mod.cover_on = False mod.vw_output_filename = mod.results_path+config_name+'choices_lambda='+str(mod.choices_lambda)+'.txt' + execute_vw(mod) avg_loss_comb_1, last_loss_comb_1, wt_comb_1 = collect_stats(mod) - line = plt.plot(wt_comb_1, avg_loss_comb_1, 'r', label=('Combined approach, #lambdas=' + str(mod.choices_lambda) )) + line = plt.plot(wt_comb_1, avg_loss_comb_1, 'r', label=('Combined approach, instance weighting')) + #line = plt.plot(wt_comb_1, avg_loss_comb_1, 'r', label=('Combined approach, lambda=1')) avg_error_comb_1 = avg_error(mod) - # combined approach, lambdas = 5 + # combined approach, cover + # combined approach, per-dataset weighting + #mod.choices_lambda = 1 + #mod.no_bandit = False + #mod.no_supervised = False + #mod.no_exploration = False + #mod.cover_on = True + #mod.vw_output_filename = mod.results_path+config_name+'choices_lambda='+str(mod.choices_lambda)+'.txt' mod.choices_lambda = 5 + mod.weighting_scheme = 2 mod.no_bandit = False mod.no_supervised = False mod.no_exploration = False + mod.cover_on = False mod.vw_output_filename = mod.results_path+config_name+'choices_lambda='+str(mod.choices_lambda)+'.txt' execute_vw(mod) avg_loss_comb_5, last_loss_comb_5, wt_comb_5 = collect_stats(mod) - line = plt.plot(wt_comb_5, avg_loss_comb_5, 'm', label=('Combined approach, #lambdas=' + str(mod.choices_lambda) )) + #line = plt.plot(wt_comb_5, avg_loss_comb_5, 'm', label=('Combined approach, lambda=5')) + line = plt.plot(wt_comb_5, avg_loss_comb_5, 'm', label=('Combined approach, dataset weighting')) avg_error_comb_5 = avg_error(mod) # bandit only approach mod.choices_lambda = 1 + mod.weighting_scheme = 1 mod.no_bandit = False mod.no_supervised = True mod.no_exploration = False + mod.cover_on = False mod.vw_output_filename = mod.results_path+config_name+'_no_supervised'+'.txt' execute_vw(mod) @@ -134,9 +167,11 @@ def gen_comparison_graph(mod): # supervised only approach mod.choices_lambda = 1 + mod.weighting_scheme = 1 mod.no_bandit = True mod.no_supervised = False mod.no_exploration = False + mod.cover_on = False mod.vw_output_filename = mod.results_path+config_name+'_no_bandit'+'.txt' execute_vw(mod) @@ -206,7 +241,13 @@ def avg_error(mod): vw_output_text = vw_output.read() #print vw_output_text rgx = re.compile('^average loss = (.*)$', flags=re.M) - avge = float(rgx.findall(vw_output_text)[0]) + + errs = rgx.findall(vw_output_text) + if not errs: + avge = 0 + else: + avge = float(errs[0]) + vw_output.close() return avge @@ -274,6 +315,15 @@ def main_loop(mod): #mod.choices_choices_lambda = [i for i in range(1,2)] #mod.choices_choices_lambda = [1, 3, 5, 7] #[i for i in range(10,11)] + #mod.corrupt_type_supervised = 2 + #mod.corrupt_prob_supervised = 0.3 + mod.corrupt_type_supervised = 1 + mod.corrupt_prob_supervised = 0.0 + + mod.corrupt_type_bandit = 1 + mod.corrupt_prob_bandit = 0.3 + + mod.validation_method = 2 #for correctness test #mod.choices_warm_start = [20] From 87f9afa57f6c53329351c00b70180be2640910f5 Mon Sep 17 00:00:00 2001 From: chicheng Date: Fri, 6 Apr 2018 17:05:45 -0400 Subject: [PATCH 047/127] start properly copying the examples --- vowpalwabbit/cbify.cc | 87 +++++++++++++++++++++++++++---------------- 1 file changed, 55 insertions(+), 32 deletions(-) diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc index 5fb42fb32a4..ff1a26837f6 100644 --- a/vowpalwabbit/cbify.cc +++ b/vowpalwabbit/cbify.cc @@ -5,7 +5,19 @@ #include "bs.h" #include "../explore/cpp/MWTExplorer.h" #include "vw.h" -#include + +#define SUPERVISED 1 +#define BANDIT 2 + +#define UAR 1 +#define CIRCULAR 2 + +#define BANDIT_VALI 1 +#define SUPERVISED_VALI 2 + +#define INSTANCE_WT 1 +#define DATASET_WT 2 + using namespace LEARNER; using namespace MultiWorldTesting; @@ -82,39 +94,51 @@ struct cbify }; -float rand_zeroone() +float rand_zeroone(vw* all) { - std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_real_distribution<> dis(0.0, 1.0); - return dis(gen); - //return 0.5; + float f = merand48(all->random_state); + //cout<cost_sensitive->predict(ec, argmin); */ + //Note: v_array is different STL's array; elements' references are used in v_array //first, corrupt fully supervised example ec's label here - size_t corrupted_label = corrupt_action(ld.label, data.num_actions, data.corrupt_prob_supervised, data.corrupt_type_supervised); - //use this for now; I am not sure if v_array is the same as STL's array where elements are copied when brought in - ld.label = corrupted_label; + size_t corrupted_label = corrupt_action(ld.label, data, SUPERVISED); //generate cost-sensitive label //COST_SENSITIVE::label& csl = *data.csls; @@ -436,7 +459,7 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec) ecp->l.cs = csl; // I am not sure if written this way, ec will be deleted in some other stages and causes error - if (data.validation_method == 2) + if (data.validation_method == SUPERVISED_VALI) data.supervised_validation.push_back(*ecp); data.warm_start_iter++; @@ -459,7 +482,7 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec) if(!cl.action) THROW("No action with non-zero probability found!"); - size_t corrupted_label = corrupt_action(ld.label, data.num_actions, data.corrupt_prob_bandit, data.corrupt_type_bandit); + size_t corrupted_label = corrupt_action(ld.label, data, BANDIT); cl.cost = loss(data, corrupted_label, cl.action); // accumulate the cumulative costs of lambdas @@ -476,7 +499,7 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec) { for (uint32_t i = 0; i < data.choices_lambda; i++) { - if (data.weighting_scheme == 1) + if (data.weighting_scheme == INSTANCE_WT) ec.weight = old_weight * data.lambdas[i] / (1-data.lambdas[i]); else ec.weight = old_weight * data.lambdas[i] / (1-data.lambdas[i]) * data.warm_start_period / ( (data.bandit_iter+1) * (data.bandit_iter+2) ); @@ -534,7 +557,7 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) CB::label* cbls = data.cbls; CB::label* cbl_empty = data.cbl_empty; - size_t corrupted_label = corrupt_action(ld.label, data.num_actions, data.corrupt_prob_supervised, data.corrupt_type_supervised); + size_t corrupted_label = corrupt_action(ld.label, data, SUPERVISED); if (data.ind_supervised) { @@ -568,7 +591,7 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) ecp->l.multi.label = corrupted_label; ecp->l.multi.weight = 1.0; - if (data.validation_method == 2) + if (data.validation_method == SUPERVISED_VALI) data.supervised_validation.push_back(*ecp); data.warm_start_iter++; @@ -596,7 +619,7 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) if(!cl.action) THROW("No action with non-zero probability found!"); - size_t corrupted_label = corrupt_action(ld.label, data.num_actions, data.corrupt_prob_bandit, data.corrupt_type_bandit); + size_t corrupted_label = corrupt_action(ld.label, data, BANDIT); cl.cost = loss(data, corrupted_label, cl.action); // accumulate the cumulative costs of lambdas @@ -615,7 +638,7 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) { data.old_weights[a] = ecs[a].weight; - if (data.weighting_scheme == 1) + if (data.weighting_scheme == INSTANCE_WT) ecs[a].weight *= data.lambdas[i] / (1-data.lambdas[i]); else ecs[a].weight *= data.lambdas[i] / (1-data.lambdas[i]) * data.warm_start_period / ( (data.bandit_iter+1) * (data.bandit_iter+2) ); @@ -743,10 +766,10 @@ base_learner* cbify_setup(vw& all) data.corrupt_prob_supervised = vm.count("corrupt_prob_supervised") ? vm["corrupt_prob_supervised"].as() : 0.0; data.corrupt_prob_bandit = vm.count("corrupt_prob_bandit") ? vm["corrupt_prob_bandit"].as() : 0.0; - data.corrupt_type_supervised = vm.count("corrupt_type_supervised") ? vm["corrupt_type_supervised"].as() : 1; - data.corrupt_type_bandit = vm.count("corrupt_type_bandit") ? vm["corrupt_type_bandit"].as() : 1; - data.validation_method = vm.count("validation_method") ? vm["validation_method"].as() : 1; - data.weighting_scheme = vm.count("weighting_scheme") ? vm["weighting_scheme"].as() : 1; + data.corrupt_type_supervised = vm.count("corrupt_type_supervised") ? vm["corrupt_type_supervised"].as() : UAR; // 1 is the default value + data.corrupt_type_bandit = vm.count("corrupt_type_bandit") ? vm["corrupt_type_bandit"].as() : UAR; // 1 is the default value + data.validation_method = vm.count("validation_method") ? vm["validation_method"].as() : BANDIT_VALI; // 1 is the default value + data.weighting_scheme = vm.count("weighting_scheme") ? vm["weighting_scheme"].as() : INSTANCE_WT; // 1 is the default value data.bandit_iter = 0; From 4b54dc0f509525481f552032d097441be7233f2c Mon Sep 17 00:00:00 2001 From: chicheng Date: Fri, 6 Apr 2018 18:28:12 -0400 Subject: [PATCH 048/127] model is not updating in the supervised phase --- vowpalwabbit/cbify.cc | 63 +++++++++++++++++++------------------------ 1 file changed, 28 insertions(+), 35 deletions(-) diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc index ff1a26837f6..dd2a1cd3543 100644 --- a/vowpalwabbit/cbify.cc +++ b/vowpalwabbit/cbify.cc @@ -175,6 +175,14 @@ void finish(cbify& data) data.lambdas.delete_v(); data.cumulative_costs.delete_v(); + for (size_t i = 0; i < data.warm_start_period; ++i) + { + VW::dealloc_example(COST_SENSITIVE::cs_label.delete_label, data.supervised_validation[i]); + free(&data.supervised_validation[i]); + } + + data.supervised_validation.delete_v(); + if (data.use_adf) { @@ -402,47 +410,30 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec) if (data.warm_start_iter < data.warm_start_period) // Call the cost-sensitive learner directly { - /* - //generate cost-sensitive label - COST_SENSITIVE::label& csl = *data.csls; - csl.costs.resize(data.num_actions); - csl.costs.end() = csl.costs.begin()+data.num_actions; - for (uint32_t j = 0; j < data.num_actions; j++) - { - csl.costs[j].class_index = j+1; - csl.costs[j].x = loss(data, ld.label, j+1); - } - - ec.l.cs = csl; - - //predict - data.all->cost_sensitive->predict(ec, argmin); - */ - //Note: v_array is different STL's array; elements' references are used in v_array //first, corrupt fully supervised example ec's label here size_t corrupted_label = corrupt_action(ld.label, data, SUPERVISED); - //generate cost-sensitive label - //COST_SENSITIVE::label& csl = *data.csls; - COST_SENSITIVE::label* cslp = calloc_or_throw(1); - COST_SENSITIVE::label csl = *cslp; + //generate cost-sensitive label (only for CSOAA's use - this will be retracted at the end) + COST_SENSITIVE::label& csl = *data.csls; + //COST_SENSITIVE::label* cslp = calloc_or_throw(1); + //COST_SENSITIVE::label csl = *cslp; + //csl.costs.end() = csl.costs.begin()+data.num_actions; + csl.costs.resize(data.num_actions); - csl.costs.end() = csl.costs.begin()+data.num_actions; for (uint32_t j = 0; j < data.num_actions; j++) { csl.costs[j].class_index = j+1; csl.costs[j].x = loss(data, corrupted_label, j+1); } - ec.l.cs = csl; + ec.l.cs = csl; //predict (for vw's internal reason, this step has to be put after ec's cs label is created) data.all->cost_sensitive->predict(ec, argmin); if (data.ind_supervised) { - for (uint32_t i = 0; i < data.choices_lambda; i++) { ec.weight = 1; @@ -450,21 +441,23 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec) } } - //ec.l.multi = ld; - ec.weight = 0; - - // This is purely a hack here - need to clean up; I also did not deallocate the label and the copied example in finish() - example* ecp = calloc_or_throw(1); - VW::copy_example_data(false, ecp, &ec); - ecp->l.cs = csl; - - // I am not sure if written this way, ec will be deleted in some other stages and causes error - if (data.validation_method == SUPERVISED_VALI) + // NOTE WELL: for convenience in supervised validation, we intentionally use a cost-sensitive label as opposed to + // a multiclass label. This is because the csoaa learner needs a cost-sensitive label to predict (not sure why). + // I also did not deallocate the label and the copied example in finish() + if (data.validation_method == SUPERVISED_VALI) + { + example* ecp = calloc_or_throw(1); + VW::copy_example_data(false, ecp, &ec, 0, COST_SENSITIVE::cs_label.copy_label); data.supervised_validation.push_back(*ecp); + } + + //set the label of ec back to a multiclass label + ec.l.multi = ld; + ec.weight = 0; data.warm_start_iter++; } - else if (data.bandit_iter < data.bandit_period)//Call the cb_explore algorithm. It returns a vector of probabilities for each action + else if (data.bandit_iter < data.bandit_period) //Call the cb_explore learner. It returns a vector of probabilities for each action { data.cb_label.costs.erase(); ec.l.cb = data.cb_label; From 5e993af78a0d184b9d9bdbac1d3bdaa78e9a0390 Mon Sep 17 00:00:00 2001 From: chicheng Date: Sat, 7 Apr 2018 17:38:39 -0400 Subject: [PATCH 049/127] change to using proper copy example functions. Memory leak issues persist. --- vowpalwabbit/cbify.cc | 66 +++++++++++++++++++++++++++++++------------ 1 file changed, 48 insertions(+), 18 deletions(-) diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc index dd2a1cd3543..cb518732608 100644 --- a/vowpalwabbit/cbify.cc +++ b/vowpalwabbit/cbify.cc @@ -90,7 +90,7 @@ struct cbify size_t bandit_iter; size_t warm_start_iter; size_t weighting_scheme; - v_array supervised_validation; + example* supervised_validation; }; @@ -177,11 +177,9 @@ void finish(cbify& data) for (size_t i = 0; i < data.warm_start_period; ++i) { - VW::dealloc_example(COST_SENSITIVE::cs_label.delete_label, data.supervised_validation[i]); - free(&data.supervised_validation[i]); + //VW::dealloc_example(COST_SENSITIVE::cs_label.delete_label, data.supervised_validation[i]); } - - data.supervised_validation.delete_v(); + free(data.supervised_validation); if (data.use_adf) @@ -209,6 +207,11 @@ void finish(cbify& data) free(data.cbls); } + else + { + data.csls->costs.delete_v(); + } + free(data.csls); @@ -326,7 +329,7 @@ void accumulate_costs_ips(cbify& data, example& ec, CB::cb_class& cl) for (uint32_t i = 0; i < data.choices_lambda; i++) { //go over the supervised validation set - for (uint32_t j = 0; j < data.supervised_validation.size(); j++) + for (uint32_t j = 0; j < data.warm_start_period; j++) { example& ec_valid = data.supervised_validation[j]; data.all->cost_sensitive->predict(ec_valid, i); @@ -337,8 +340,12 @@ void accumulate_costs_ips(cbify& data, example& ec, CB::cb_class& cl) //cout<(1); //COST_SENSITIVE::label csl = *cslp; - //csl.costs.end() = csl.costs.begin()+data.num_actions; - + + //Note: these two lines are important, otherwise the cost sensitive vector seems to be unbounded. + //This is crucial for 1. cost-sensitive learn 2. label copy csl.costs.resize(data.num_actions); + csl.costs.end() = csl.costs.begin()+data.num_actions; + for (uint32_t j = 0; j < data.num_actions; j++) { csl.costs[j].class_index = j+1; @@ -429,6 +439,10 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec) ec.l.cs = csl; + //cout<<"in predict or learn:"<cost_sensitive->predict(ec, argmin); @@ -446,11 +460,20 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec) // I also did not deallocate the label and the copied example in finish() if (data.validation_method == SUPERVISED_VALI) { - example* ecp = calloc_or_throw(1); - VW::copy_example_data(false, ecp, &ec, 0, COST_SENSITIVE::cs_label.copy_label); - data.supervised_validation.push_back(*ecp); + example& ec_copy = data.supervised_validation[data.warm_start_iter]; + //why doesn't the following two apporaches leak memory? + VW::copy_example_data(false, &ec_copy, &ec, 0, COST_SENSITIVE::cs_label.copy_label); + //copy_array(ec_copy.l.cs.costs, ec.l.cs.costs); + //VW::copy_example_data(false, &ec_copy, &ec); + //for (uint32_t j = 0; j < data.num_actions; j++) + //{ + // ec_copy.l.cs.costs.push_back(ec.l.cs.costs[j]); + //} + //cout<<"after copying"<(1); - VW::copy_example_data(false, ecp, &ec); - ecp->l.multi.label = corrupted_label; - ecp->l.multi.weight = 1.0; + //example* ecp = calloc_or_throw(1); + //VW::copy_example_data(false, ecp, &ec); + //ecp->l.multi.label = corrupted_label; + //ecp->l.multi.weight = 1.0; + //to be corrected if (data.validation_method == SUPERVISED_VALI) - data.supervised_validation.push_back(*ecp); + VW::copy_example_data(false, &data.supervised_validation[data.warm_start_iter], &ec, 0, MULTICLASS::mc_label.copy_label); data.warm_start_iter++; @@ -765,6 +789,12 @@ base_learner* cbify_setup(vw& all) data.weighting_scheme = vm.count("weighting_scheme") ? vm["weighting_scheme"].as() : INSTANCE_WT; // 1 is the default value + if (data.validation_method == SUPERVISED_VALI) + { + data.supervised_validation = calloc_or_throw(data.warm_start_period); + } + + data.bandit_iter = 0; data.warm_start_iter = 0; From 24c79e88167ce5db9aef7d685f9b37ad7ac1cb16 Mon Sep 17 00:00:00 2001 From: chicheng Date: Mon, 9 Apr 2018 17:15:32 -0400 Subject: [PATCH 050/127] . --- vowpalwabbit/cbify.cc | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc index cb518732608..bdeb590d422 100644 --- a/vowpalwabbit/cbify.cc +++ b/vowpalwabbit/cbify.cc @@ -716,7 +716,7 @@ void init_adf_data(cbify& data, const size_t num_actions) } -void generate_lambdas(v_array& lambdas, size_t lambda_size) +void generate_lambdas(cbify& data, v_array& lambdas, size_t lambda_size) { // The lambdas are in fact arranged in ascending order (the middle lambda is 0.5) @@ -734,6 +734,23 @@ void generate_lambdas(v_array& lambdas, size_t lambda_size) } +void minimax_lambda(float epsilon, size_t num_actions, size_t warm_start_period, size_t bandit_period, size_t dim) +{ + if ( (epsilon / num_actions) * bandit_period >= dim ) + return 1.0; + else + { + float z = sqrt( dim * ( (epsilon / num_actions) * bandit_period + warm_start_period) - (epsilon / num_actions) * bandit_period * warm_start_period ); + + float numer = (epsilon / num_actions) + warm_start_period * (epsilon / num_actions) * (1/z); + float denom = 1 + (epsilon / num_actions) + (warm_start_period - bandit_period) * (epsilon / num_actions) * (1/z); + + return numer / denom; + + } + +} + base_learner* cbify_setup(vw& all) { //parse and set arguments @@ -752,7 +769,8 @@ base_learner* cbify_setup(vw& all) ("corrupt_type_supervised", po::value(), "type of label corruption in the supervised part (1 is uar, 2 is circular)") ("corrupt_type_bandit", po::value(), "probability of label corruption in the bandit part (1 is uar, 2 is circular)") ("validation_method", po::value(), "lambda selection criterion (1 is using bandit with progressive validation, 2 is using supervised and amortizing)") - ("weighting_scheme", po::value(), "weighting scheme (1 is per instance weighting, 2 is per dataset weighting (where we use a diminishing weighting scheme) )"); + ("weighting_scheme", po::value(), "weighting scheme (1 is per instance weighting, 2 is per dataset weighting (where we use a diminishing weighting scheme) )") + ("lambda_scheme", po::value(), "Lambda set scheme (1 is expanding based on center 0.5, 2 is expanding based on center=minimax lambda, 3 is expanding based on center=minimax lambda along with forcing 0,1 in Lambda )"); add_options(all); po::variables_map& vm = all.vm; From 502d593aef9112aeafe7d02d0496b7559fff5ca6 Mon Sep 17 00:00:00 2001 From: Chicheng Zhang Date: Mon, 9 Apr 2018 21:45:56 -0400 Subject: [PATCH 051/127] updated the lambda tuning scheme --- scripts/plot_warm_start.py | 4 +- vowpalwabbit/cbify.cc | 138 +++++++++++++++++++++++-------------- 2 files changed, 89 insertions(+), 53 deletions(-) diff --git a/scripts/plot_warm_start.py b/scripts/plot_warm_start.py index 436596cb0d0..65fabebbaf8 100644 --- a/scripts/plot_warm_start.py +++ b/scripts/plot_warm_start.py @@ -320,8 +320,8 @@ def main_loop(mod): mod.corrupt_type_supervised = 1 mod.corrupt_prob_supervised = 0.0 - mod.corrupt_type_bandit = 1 - mod.corrupt_prob_bandit = 0.3 + mod.corrupt_type_bandit = 2 + mod.corrupt_prob_bandit = 1.0 mod.validation_method = 2 diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc index bdeb590d422..cf77a36e249 100644 --- a/vowpalwabbit/cbify.cc +++ b/vowpalwabbit/cbify.cc @@ -18,6 +18,10 @@ #define INSTANCE_WT 1 #define DATASET_WT 2 +#define ABS_CENTRAL 1 +#define MINIMAX_CENTRAL 2 +#define MINIMAX_CENTRAL_ZEROONE 3 + using namespace LEARNER; using namespace MultiWorldTesting; @@ -91,9 +95,61 @@ struct cbify size_t warm_start_iter; size_t weighting_scheme; example* supervised_validation; + size_t lambda_scheme; + float epsilon; }; +float minimax_lambda(float epsilon, size_t num_actions, size_t warm_start_period, size_t bandit_period, size_t dim) +{ + if ( (epsilon / num_actions) * bandit_period >= dim ) + return 1.0; + else + { + float z = sqrt( dim * ( (epsilon / num_actions) * bandit_period + warm_start_period) - (epsilon / num_actions) * bandit_period * warm_start_period ); + + float numer = (epsilon / num_actions) + warm_start_period * (epsilon / num_actions) * (1/z); + float denom = 1 + (epsilon / num_actions) + (warm_start_period - bandit_period) * (epsilon / num_actions) * (1/z); + + //cout<<"z = "<random_state); @@ -111,7 +167,7 @@ size_t generate_uar_action(cbify& data) { if (rand <= float(i) / data.num_actions) return i; - } + } return data.num_actions; } @@ -137,7 +193,7 @@ size_t corrupt_action(size_t action, cbify& data, size_t data_type) { if (corrupt_type == UAR) return generate_uar_action(data); - else + else return (action % data.num_actions) + 1; } else @@ -355,7 +411,7 @@ void accumulate_costs_ips(cbify& data, example& ec, CB::cb_class& cl) void accumulate_costs_ips_adf(cbify& data, example& ec, CB::cb_class& cl, base_learner& base) { - + if (data.validation_method == 1) { uint32_t best_action; @@ -393,9 +449,9 @@ void accumulate_costs_ips_adf(cbify& data, example& ec, CB::cb_class& cl, base_l } //cout<(1); //COST_SENSITIVE::label csl = *cslp; - + //Note: these two lines are important, otherwise the cost sensitive vector seems to be unbounded. //This is crucial for 1. cost-sensitive learn 2. label copy csl.costs.resize(data.num_actions); @@ -454,14 +510,14 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec) data.all->cost_sensitive->learn(ec, i); } } - - // NOTE WELL: for convenience in supervised validation, we intentionally use a cost-sensitive label as opposed to + + // NOTE WELL: for convenience in supervised validation, we intentionally use a cost-sensitive label as opposed to // a multiclass label. This is because the csoaa learner needs a cost-sensitive label to predict (not sure why). // I also did not deallocate the label and the copied example in finish() - if (data.validation_method == SUPERVISED_VALI) + if (data.validation_method == SUPERVISED_VALI) { example& ec_copy = data.supervised_validation[data.warm_start_iter]; - //why doesn't the following two apporaches leak memory? + //why doesn't the following two apporaches leak memory? VW::copy_example_data(false, &ec_copy, &ec, 0, COST_SENSITIVE::cs_label.copy_label); //copy_array(ec_copy.l.cs.costs, ec.l.cs.costs); //VW::copy_example_data(false, &ec_copy, &ec); @@ -473,7 +529,7 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec) //for (uint32_t j = 0; j < data.num_actions; j++) // cout<(1); + //example* ecp = calloc_or_throw(1); //VW::copy_example_data(false, ecp, &ec); //ecp->l.multi.label = corrupted_label; //ecp->l.multi.weight = 1.0; @@ -611,11 +671,13 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) if (data.validation_method == SUPERVISED_VALI) VW::copy_example_data(false, &data.supervised_validation[data.warm_start_iter], &ec, 0, MULTICLASS::mc_label.copy_label); - data.warm_start_iter++; + data.warm_start_iter++; } else if (data.bandit_iter < data.bandit_period) // call the bandit learner { + if (data.bandit_iter == 0) + setup_lambdas(data, ec); for (size_t a = 0; a < data.adf_data.num_actions; ++a) { @@ -658,7 +720,7 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) if (data.weighting_scheme == INSTANCE_WT) ecs[a].weight *= data.lambdas[i] / (1-data.lambdas[i]); else - ecs[a].weight *= data.lambdas[i] / (1-data.lambdas[i]) * data.warm_start_period / ( (data.bandit_iter+1) * (data.bandit_iter+2) ); + ecs[a].weight *= data.lambdas[i] / (1-data.lambdas[i]) * data.warm_start_period / ( (data.bandit_iter+1) * (data.bandit_iter+2) ); base.learn(ecs[a], i); } @@ -670,7 +732,7 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) } ec.pred.multiclass = cl.action; - + data.bandit_iter++; } else @@ -716,40 +778,6 @@ void init_adf_data(cbify& data, const size_t num_actions) } -void generate_lambdas(cbify& data, v_array& lambdas, size_t lambda_size) -{ - // The lambdas are in fact arranged in ascending order (the middle lambda is 0.5) - - lambdas = v_init(); - uint32_t mid = lambda_size / 2; - for (uint32_t i = 0; i < lambda_size; i++) - lambdas.push_back(0); - - lambdas[mid] = 0.5; - for (uint32_t i = mid; i > 0; i--) - lambdas[i-1] = lambdas[i] / 2; - - for (uint32_t i = mid+1; i < lambda_size; i++) - lambdas[i] = 1 - (1-lambdas[i-1]) / 2; - -} - -void minimax_lambda(float epsilon, size_t num_actions, size_t warm_start_period, size_t bandit_period, size_t dim) -{ - if ( (epsilon / num_actions) * bandit_period >= dim ) - return 1.0; - else - { - float z = sqrt( dim * ( (epsilon / num_actions) * bandit_period + warm_start_period) - (epsilon / num_actions) * bandit_period * warm_start_period ); - - float numer = (epsilon / num_actions) + warm_start_period * (epsilon / num_actions) * (1/z); - float denom = 1 + (epsilon / num_actions) + (warm_start_period - bandit_period) * (epsilon / num_actions) * (1/z); - - return numer / denom; - - } - -} base_learner* cbify_setup(vw& all) { @@ -805,7 +833,11 @@ base_learner* cbify_setup(vw& all) data.corrupt_type_bandit = vm.count("corrupt_type_bandit") ? vm["corrupt_type_bandit"].as() : UAR; // 1 is the default value data.validation_method = vm.count("validation_method") ? vm["validation_method"].as() : BANDIT_VALI; // 1 is the default value data.weighting_scheme = vm.count("weighting_scheme") ? vm["weighting_scheme"].as() : INSTANCE_WT; // 1 is the default value + data.lambda_scheme = vm.count("lambda_scheme") ? vm["lambda_scheme"].as() : ABS_CENTRAL; + data.epsilon = vm.count("epsilon") ? vm["epsilon"].as() : 0.05; + //cout<<"does epsilon exist?"<set_finish_example(finish_example); return make_base(*l); } - diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc index cf77a36e249..10a6e2a2315 100644 --- a/vowpalwabbit/cbify.cc +++ b/vowpalwabbit/cbify.cc @@ -102,6 +102,7 @@ struct cbify float minimax_lambda(float epsilon, size_t num_actions, size_t warm_start_period, size_t bandit_period, size_t dim) { + /* if ( (epsilon / num_actions) * bandit_period >= dim ) return 1.0; else @@ -117,6 +118,8 @@ float minimax_lambda(float epsilon, size_t num_actions, size_t warm_start_period return numer / denom; } + */ + return epsilon / (num_actions + epsilon); } void setup_lambdas(cbify& data, example& ec) From 1922659c1b12e0c6ddf38283ce2786502df864d7 Mon Sep 17 00:00:00 2001 From: Chicheng Zhang Date: Thu, 12 Apr 2018 17:17:28 -0400 Subject: [PATCH 053/127] fixed bug on zero warm start examples on small datasets --- scripts/alg_comparison.py | 70 +++++++++++++++++++++++++------------- scripts/plot_warm_start.py | 2 +- 2 files changed, 48 insertions(+), 24 deletions(-) diff --git a/scripts/alg_comparison.py b/scripts/alg_comparison.py index 703c0adac30..4509260bc06 100644 --- a/scripts/alg_comparison.py +++ b/scripts/alg_comparison.py @@ -8,6 +8,7 @@ import scipy.stats as stats from itertools import compress from math import sqrt +import argparse # this part is changable #alg1 = 'epsilon' @@ -26,6 +27,7 @@ def sum_files(result_path): def parse_sum_file(sum_filename): f = open(sum_filename, 'r') + #f.seek(0, 0) table = pd.read_table(f, sep=' ',lineterminator='\n') return table @@ -33,6 +35,7 @@ def parse_sum_file(sum_filename): def get_z_scores(errors_1, errors_2, sizes): z_scores = [] for i in range(len(errors_1)): + #print i z_scores.append( z_score(errors_1[i], errors_2[i], sizes[i]) ) return z_scores @@ -73,33 +76,54 @@ def plot_comparison(errors_1, errors_2, sizes, title, filename): if __name__ == '__main__': - #results_path = '../../../lambdas/' - #results_path = '../../../warm_start_frac=0.1/' - #results_path = '../../../cover_vs_epsilon/' - #results_path = '../../../corrupt_supervised_type1_0.3/' - #results_path = '../../../expt_0403/corrupt_supervised_type2_0.3/' - #results_path = '../../../expt_0403/supervised_validation/' - #results_path = '../../../weighting_schemes/' - #results_path = '../../../central_lambda/' - #results_path = '../../../central_lambda_naive/' - #results_path = '../../../central_lambda_zeroone/' - #results_path = '../../../type2_0.3/' - #results_path = '../../../type1_0.3/' - #results_path = '../../../type2_1/' - #results_path = '../../../type2_0.65/' - results_path = '../../../type2_0.3/' - - dss = sum_files(results_path) + parser = argparse.ArgumentParser(description='result summary') + parser.add_argument('--results_dir', default='../../../figs/') + args = parser.parse_args() + results_dir = args.results_dir + + #results_dir = '../../../lambdas/' + #results_dir = '../../../warm_start_frac=0.1/' + #results_dir = '../../../cover_vs_epsilon/' + #results_dir = '../../../corrupt_supervised_type1_0.3/' + #results_dir = '../../../expt_0403/corrupt_supervised_type2_0.3/' + #results_dir = '../../../expt_0403/supervised_validation/' + #results_dir = '../../../weighting_schemes/' + #results_dir = '../../../central_lambda/' + #results_dir = '../../../central_lambda_naive/' + #results_dir = '../../../central_lambda_zeroone/' + #results_dir = '../../../type2_0.3/' + #results_dir = '../../../type1_0.3/' + #results_dir = '../../../type2_1/' + #results_dir = '../../../type2_0.65/' + #results_dir = '../../../type2_0.3/' + + dss = sum_files(results_dir) + + #print dss[160] all_results = None + for i in range(len(dss)): - result = parse_sum_file(results_path + dss[i]) + print 'dataset name: ', dss[i] + result = parse_sum_file(results_dir + dss[i]) + if (i == 0): all_results = result else: all_results = all_results.append(result) + + + #if i >= 331 and i <= 340: + # print 'result:', result + # print 'all_results:', all_results + print all_results + + + #result = parse_sum_file(results_dir + '400of600.sum') + #print result + #choices_choices_lambda = sorted(all_results['choices_lambda'].unique()) #grouped = all_results.groupby('choices_lambda') @@ -122,8 +146,8 @@ def plot_comparison(errors_1, errors_2, sizes, title, filename): print results_alg1 # compare combined w/ bandit - plot_comparison(results_alg1, results_bandit, dataset_sizes, alg1 + ' vs ' + 'bandit only', results_path + alg1 + ' vs ' + 'bandit only' + '.png') - plot_comparison(results_alg1, results_supervised, dataset_sizes, alg1 + ' vs ' + 'supervised only', results_path + alg1 + ' vs ' + 'supervised only' + '.png') - plot_comparison(results_alg2, results_bandit, dataset_sizes, alg2 + ' vs ' + 'bandit only', results_path + alg2 + ' vs ' + 'bandit only' + '.png') - plot_comparison(results_alg2, results_supervised, dataset_sizes, alg2 + ' vs ' + 'supervised only', results_path + alg2 + ' vs ' + 'supervised only' + '.png') - plot_comparison(results_alg1, results_alg2, dataset_sizes, alg1 + ' vs ' + alg2, results_path+alg1 + ' vs ' + alg2 + '.png') + plot_comparison(results_alg1, results_bandit, dataset_sizes, alg1 + ' vs ' + 'bandit only', results_dir + alg1 + ' vs ' + 'bandit only' + '.png') + plot_comparison(results_alg1, results_supervised, dataset_sizes, alg1 + ' vs ' + 'supervised only', results_dir + alg1 + ' vs ' + 'supervised only' + '.png') + plot_comparison(results_alg2, results_bandit, dataset_sizes, alg2 + ' vs ' + 'bandit only', results_dir + alg2 + ' vs ' + 'bandit only' + '.png') + plot_comparison(results_alg2, results_supervised, dataset_sizes, alg2 + ' vs ' + 'supervised only', results_dir + alg2 + ' vs ' + 'supervised only' + '.png') + plot_comparison(results_alg1, results_alg2, dataset_sizes, alg1 + ' vs ' + alg2, results_dir+alg1 + ' vs ' + alg2 + '.png') diff --git a/scripts/plot_warm_start.py b/scripts/plot_warm_start.py index 6dc38741512..c6716d4c889 100644 --- a/scripts/plot_warm_start.py +++ b/scripts/plot_warm_start.py @@ -125,7 +125,7 @@ def plot_errors(mod): def gen_comparison_graph(mod): mod.num_lines = get_num_lines(mod.ds_path+mod.dataset) - mod.warm_start = int(math.floor(mod.warm_start_frac * mod.num_lines)) + mod.warm_start = int(math.ceil(mod.warm_start_frac * mod.num_lines)) mod.bandit = mod.num_lines - mod.warm_start mod.progress = int(math.ceil(float(mod.bandit) / float(mod.num_checkpoints))) mod.num_classes = get_num_classes(mod.dataset) From f6539b5700fc53454841fa3e2cb8958a180ba466 Mon Sep 17 00:00:00 2001 From: Chicheng Zhang Date: Tue, 24 Apr 2018 01:13:44 -0400 Subject: [PATCH 054/127] added a refined weighting scheme and cumulative var calculation (not tested yet) --- scripts/alg_comparison.py | 4 +- scripts/plot_warm_start.py | 25 +++++---- vowpalwabbit/cbify.cc | 102 ++++++++++++++++++++++++++++++++++--- 3 files changed, 113 insertions(+), 18 deletions(-) diff --git a/scripts/alg_comparison.py b/scripts/alg_comparison.py index 4509260bc06..40bdc1b0972 100644 --- a/scripts/alg_comparison.py +++ b/scripts/alg_comparison.py @@ -35,7 +35,7 @@ def parse_sum_file(sum_filename): def get_z_scores(errors_1, errors_2, sizes): z_scores = [] for i in range(len(errors_1)): - #print i + print i z_scores.append( z_score(errors_1[i], errors_2[i], sizes[i]) ) return z_scores @@ -99,7 +99,7 @@ def plot_comparison(errors_1, errors_2, sizes, title, filename): dss = sum_files(results_dir) - #print dss[160] + #print dss[168] all_results = None diff --git a/scripts/plot_warm_start.py b/scripts/plot_warm_start.py index c6716d4c889..1e806f512e3 100644 --- a/scripts/plot_warm_start.py +++ b/scripts/plot_warm_start.py @@ -135,9 +135,9 @@ def gen_comparison_graph(mod): config_name = str(mod.dataset) + '_'+str(mod.warm_start)+ '_' + str(mod.cb_type) # combined approach, epsilon - mod.choices_lambda = 5 + mod.choices_lambda = 2 mod.weighting_scheme = 1 - mod.lambda_scheme = 2 + mod.lambda_scheme = 3 mod.no_bandit = False mod.no_supervised = False mod.no_exploration = False @@ -145,8 +145,8 @@ def gen_comparison_graph(mod): mod.epsilon_on = True mod.plot_color = 'r' mod.plot_flat = False - mod.vw_output_filename = mod.results_path+config_name+'central_minimax'+'.txt' - mod.plot_label = 'Central lambda: minimax' + mod.vw_output_filename = mod.results_path+config_name+'zeroone'+'.txt' + mod.plot_label = 'zeroone only' avg_error_comb_1 = plot_errors(mod) # combined approach, cover @@ -275,7 +275,7 @@ def avg_error(mod): def main_loop(mod): mod.summary_file_name = mod.results_path+str(mod.task_id)+'of'+str(mod.num_tasks)+'.sum' summary_file = open(mod.summary_file_name, 'w') - summary_file.write('dataset' + ' ' + 'central_minimax' + ' ' + 'central_minimax_zeroone' + ' ' + 'bandit_only' + ' ' + 'supervised_only' + ' ' + 'size' + '\n') + summary_file.write('dataset' + ' ' + 'zeroone_only' + ' ' + 'central_minimax_zeroone' + ' ' + 'bandit_only' + ' ' + 'supervised_only' + ' ' + 'size' + '\n') summary_file.close() for mod.cb_type, mod.warm_start_frac, mod.dataset in mod.config_task: @@ -287,6 +287,11 @@ def main_loop(mod): parser.add_argument('task_id', type=int, help='task ID, between 0 and num_tasks - 1') parser.add_argument('num_tasks', type=int) parser.add_argument('--results_dir', default='../../../figs/') + parser.add_argument('--warm_start_fraction', type=float) + parser.add_argument('--corrupt_prob_supervised', type=float) + parser.add_argument('--corrupt_prob_bandit',type=float) + + args = parser.parse_args() if args.task_id == 0: if not os.path.exists(args.results_dir): @@ -320,7 +325,8 @@ def main_loop(mod): #mod.choices_warm_start_frac = [0.01 * pow(2, i) for i in range(1)] #mod.choices_warm_start_frac = [0.01, 0.03, 0.1, 0.3] - mod.choices_warm_start_frac = [0.03] + #mod.choices_warm_start_frac = [0.03] + mod.choices_warm_start_frac = [args.warm_start_fraction] #mod.choices_warm_start = [0.01 * pow(2, i) for i in range(5)] #mod.choices_bandit = [0.01 * pow(2, i) for i in range(5)] @@ -339,10 +345,11 @@ def main_loop(mod): #mod.corrupt_type_supervised = 2 #mod.corrupt_prob_supervised = 0.3 mod.corrupt_type_supervised = 1 - mod.corrupt_prob_supervised = 0.0 + #mod.corrupt_prob_supervised = 0.3 + mod.corrupt_prob_supervised = args.corrupt_prob_supervised - mod.corrupt_type_bandit = 2 - mod.corrupt_prob_bandit = 1 + mod.corrupt_type_bandit = 1 + mod.corrupt_prob_bandit = args.corrupt_prob_bandit mod.validation_method = 2 mod.epsilon = 0.05 diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc index 10a6e2a2315..e9e80d79a88 100644 --- a/vowpalwabbit/cbify.cc +++ b/vowpalwabbit/cbify.cc @@ -97,6 +97,7 @@ struct cbify example* supervised_validation; size_t lambda_scheme; float epsilon; + float cumulative_variance; }; @@ -143,7 +144,7 @@ void setup_lambdas(cbify& data, example& ec) if (data.lambda_scheme == MINIMAX_CENTRAL_ZEROONE) { lambdas[0] = 0.0; - lambdas[data.choices_lambda-1] = 1.0 - 1e-4; + lambdas[data.choices_lambda-1] = 1.0; } //cout<<"lambdas:"<cost_sensitive->predict(ec, argmin); + + return ec.pred.multiclass; + +} template @@ -509,8 +532,14 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec) { for (uint32_t i = 0; i < data.choices_lambda; i++) { - ec.weight = 1; + if (data.lambdas[i] >= 0.5) + ec.weight = (1 - data.lambdas[i]) / data.lambdas[i]; + else + ec.weight = 1; + data.all->cost_sensitive->learn(ec, i); + + ec.weight = 1; } } @@ -541,7 +570,7 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec) } else if (data.bandit_iter < data.bandit_period) //Call the cb_explore learner. It returns a vector of probabilities for each action { - // Need to initilize the lambda vector + // Need to initialize the lambda vector if (data.bandit_iter == 0) setup_lambdas(data, ec); @@ -578,14 +607,24 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec) { for (uint32_t i = 0; i < data.choices_lambda; i++) { + float weight_multiplier; + if (data.lambdas[i] >= 0.5) + weight_multiplier = 1; + else + weight_multiplier = data.lambdas[i] / (1-data.lambdas[i]); + if (data.weighting_scheme == INSTANCE_WT) - ec.weight = old_weight * data.lambdas[i] / (1-data.lambdas[i]); + ec.weight = old_weight * weight_multiplier; else - ec.weight = old_weight * data.lambdas[i] / (1-data.lambdas[i]) * data.warm_start_period / ( (data.bandit_iter+1) * (data.bandit_iter+2) ); + ec.weight = old_weight * weight_multiplier * data.warm_start_period / ( (data.bandit_iter+1) * (data.bandit_iter+2) ); base.learn(ec, i); } } + + size_t pred_best_approx = predict_cs(data, ec); + data.cumulative_variance += 1.0 / ec.pred.a_s[pred_best_approx-1].score; + data.a_s.erase(); data.a_s = ec.pred.a_s; ec.l.multi = ld; @@ -593,6 +632,12 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec) ec.weight = old_weight; data.bandit_iter++; + + if (data.bandit_iter == data.bandit_period) + { + cout<<"Ideal average variance = "<Choose_Action(*data.generic_explorer, StringUtils::to_string(data.example_counter++), ec); + + return action; + +} + +void learn_bandit(cbify& data, base_learner& base, example& ec) +{ + float old_weight = ec.weight; + for (uint32_t i = 0; i < data.choices_lambda; i++) + { + float weight_multiplier; + if (data.lambdas[i] >= 0.5) + weight_multiplier = 1; + else + weight_multiplier = data.lambdas[i] / (1-data.lambdas[i]); + if (data.weighting_scheme == INSTANCE_WT) + ec.weight = old_weight * weight_multiplier; + else + ec.weight = old_weight * weight_multiplier * data.warm_start_period / ( (data.bandit_iter+1) * (data.bandit_iter+2) ); + + base.learn(ec, i); + } + ec.weight = old_weight; } template void predict_or_learn(cbify& data, base_learner& base, example& ec) { - float old_weight; - uint32_t argmin; - - argmin = find_min(data.cumulative_costs); + //float old_weight; + //uint32_t argmin; + //argmin = find_min(data.cumulative_costs); //Store the multiclass input label MULTICLASS::label_t ld = ec.l.multi; //cout<(1); - //COST_SENSITIVE::label csl = *cslp; - - //Note: these two lines are important, otherwise the cost sensitive vector seems to be unbounded. - //This is crucial for 1. cost-sensitive learn 2. label copy - csl.costs.resize(data.num_actions); - csl.costs.end() = csl.costs.begin()+data.num_actions; - - for (uint32_t j = 0; j < data.num_actions; j++) - { - csl.costs[j].class_index = j+1; - csl.costs[j].x = loss(data, corrupted_label, j+1); - } - - ec.l.cs = csl; - - //cout<<"in predict or learn:"<cost_sensitive->predict(ec, argmin); + //learn + //first, corrupt fully supervised example ec's label here + generate_corrupted_cs(data, ec, ld); if (data.ind_supervised) - { - for (uint32_t i = 0; i < data.choices_lambda; i++) - { - if (data.lambdas[i] >= 0.5) - ec.weight = (1 - data.lambdas[i]) / data.lambdas[i]; - else - ec.weight = 1; - - data.all->cost_sensitive->learn(ec, i); - - ec.weight = 1; - } - } + learn_cs(data, ec); - // NOTE WELL: for convenience in supervised validation, we intentionally use a cost-sensitive label as opposed to - // a multiclass label. This is because the csoaa learner needs a cost-sensitive label to predict (not sure why). - // I also did not deallocate the label and the copied example in finish() if (data.validation_method == SUPERVISED_VALI) - { - example& ec_copy = data.supervised_validation[data.warm_start_iter]; - //why doesn't the following two apporaches leak memory? - VW::copy_example_data(false, &ec_copy, &ec, 0, COST_SENSITIVE::cs_label.copy_label); - //copy_array(ec_copy.l.cs.costs, ec.l.cs.costs); - //VW::copy_example_data(false, &ec_copy, &ec); - //for (uint32_t j = 0; j < data.num_actions; j++) - //{ - // ec_copy.l.cs.costs.push_back(ec.l.cs.costs[j]); - //} - //cout<<"after copying"<Choose_Action(*data.generic_explorer, StringUtils::to_string(data.example_counter++), ec); + size_t action = predict_bandit(data, base, ec); CB::cb_class cl; cl.action = action; @@ -600,36 +634,24 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec) data.cb_label.costs.push_back(cl); ec.l.cb = data.cb_label; - ec.pred = old_pred; - old_weight = ec.weight; + ec.pred = data.pred; - if (data.ind_bandit) - { - for (uint32_t i = 0; i < data.choices_lambda; i++) - { - float weight_multiplier; - if (data.lambdas[i] >= 0.5) - weight_multiplier = 1; - else - weight_multiplier = data.lambdas[i] / (1-data.lambdas[i]); - if (data.weighting_scheme == INSTANCE_WT) - ec.weight = old_weight * weight_multiplier; - else - ec.weight = old_weight * weight_multiplier * data.warm_start_period / ( (data.bandit_iter+1) * (data.bandit_iter+2) ); + if (data.ind_bandit) + learn_bandit(data, base, ec); - base.learn(ec, i); - } - } + data.a_s.erase(); + data.a_s = ec.pred.a_s; size_t pred_best_approx = predict_cs(data, ec); - data.cumulative_variance += 1.0 / ec.pred.a_s[pred_best_approx-1].score; + data.cumulative_variance += 1.0 / data.a_s[pred_best_approx-1].score; + + //cout<<"variance at bandit round "<< data.bandit_iter << " = " << 1.0 / data.a_s[pred_best_approx-1].score << endl; + //cout<l.cb = *cbl_empty; @@ -811,7 +833,7 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) } } - size_t pred_best_approx = predict_cs(data, ec); + size_t pred_best_approx = predict_cs_adf(data, base, ec); data.cumulative_variance += 1.0 / out_ec.pred.a_s[pred_best_approx-1].score; ec.pred.multiclass = cl.action; @@ -959,6 +981,13 @@ base_learner* cbify_setup(vw& all) else { data.csls = calloc_or_throw(1); + auto& csl = data.csls[0]; + + csl.costs = v_init(); + //Note: these two lines are important, otherwise the cost sensitive vector seems to be unbounded. + //This is crucial for 1. cost-sensitive learn 2. label copy + csl.costs.resize(data.num_actions); + csl.costs.end() = csl.costs.begin()+data.num_actions; } From 6259c672e3eeaebd99474647095ffcac85c03e61 Mon Sep 17 00:00:00 2001 From: Chicheng Zhang Date: Sun, 29 Apr 2018 13:48:44 -0400 Subject: [PATCH 056/127] fixed the csl label zero problem - now the label is set properly: 1,2,..K --- vowpalwabbit/cbify.cc | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc index c9da279db42..98af1430ef5 100644 --- a/vowpalwabbit/cbify.cc +++ b/vowpalwabbit/cbify.cc @@ -474,7 +474,7 @@ size_t predict_cs(cbify& data, example& ec) data.all->cost_sensitive->predict(ec, argmin); - cout<(); //Note: these two lines are important, otherwise the cost sensitive vector seems to be unbounded. - //This is crucial for 1. cost-sensitive learn 2. label copy - csl.costs.resize(data.num_actions); - csl.costs.end() = csl.costs.begin()+data.num_actions; + + for (size_t a = 0; a < num_actions; ++a) + { + csl.costs.push_back({0, a+1, 0, 0}); + } } From c3304502f5872738c5d7a078e06bcd3ecdd0ee17 Mon Sep 17 00:00:00 2001 From: Chicheng Zhang Date: Sun, 29 Apr 2018 14:25:46 -0400 Subject: [PATCH 057/127] . --- vowpalwabbit/cbify.cc | 34 ++++++++++++++-------------------- 1 file changed, 14 insertions(+), 20 deletions(-) diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc index 98af1430ef5..1d9a36c0fd3 100644 --- a/vowpalwabbit/cbify.cc +++ b/vowpalwabbit/cbify.cc @@ -536,6 +536,18 @@ void add_to_sup_validation(cbify& data, example& ec) // cout< void predict_or_learn(cbify& data, base_learner& base, example& ec) { - //float old_weight; - //uint32_t argmin; - //argmin = find_min(data.cumulative_costs); - //Store the multiclass input label MULTICLASS::label_t ld = ec.l.multi; - //cout< Date: Sun, 29 Apr 2018 14:51:27 -0400 Subject: [PATCH 058/127] make the lambda weighting more modular --- vowpalwabbit/cbify.cc | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc index 1d9a36c0fd3..6f8a8f56a14 100644 --- a/vowpalwabbit/cbify.cc +++ b/vowpalwabbit/cbify.cc @@ -463,6 +463,25 @@ void accumulate_costs_ips_adf(cbify& data, example& ec, CB::cb_class& cl, base_l } +float compute_weight_multiplier(cbify& data, size_t i, size_t data_type) +{ + if (data_type == SUPERVISED) + { + if (data.lambdas[i] >= 0.5) + return (1 - data.lambdas[i]) / data.lambdas[i]; + else + return 1; + } + else + { + if (data.lambdas[i] >= 0.5) + return 1; + else + return data.lambdas[i] / (1-data.lambdas[i]); + } +} + + size_t predict_cs(cbify& data, example& ec) { uint32_t argmin = find_min(data.cumulative_costs); @@ -482,17 +501,14 @@ size_t predict_cs(cbify& data, example& ec) void learn_cs(cbify& data, example& ec) { + float old_weight = ec.weight; for (uint32_t i = 0; i < data.choices_lambda; i++) { - if (data.lambdas[i] >= 0.5) - ec.weight = (1 - data.lambdas[i]) / data.lambdas[i]; - else - ec.weight = 1; - + float weight_multiplier = compute_weight_multiplier(data, i, SUPERVISED); + ec.weight = old_weight * weight_multiplier; data.all->cost_sensitive->learn(ec, i); - - ec.weight = 1; } + ec.weight = old_weight; } //Requires the csl's cost array to have num_actions elements @@ -569,11 +585,7 @@ void learn_bandit(cbify& data, base_learner& base, example& ec) float old_weight = ec.weight; for (uint32_t i = 0; i < data.choices_lambda; i++) { - float weight_multiplier; - if (data.lambdas[i] >= 0.5) - weight_multiplier = 1; - else - weight_multiplier = data.lambdas[i] / (1-data.lambdas[i]); + float weight_multiplier = compute_weight_multiplier(data, i, BANDIT); if (data.weighting_scheme == INSTANCE_WT) ec.weight = old_weight * weight_multiplier; From 7240acb970c572065c87043659f9fa25522cca9b Mon Sep 17 00:00:00 2001 From: Chicheng Zhang Date: Mon, 30 Apr 2018 00:40:41 -0400 Subject: [PATCH 059/127] make adf modular --- vowpalwabbit/cbify.cc | 261 +++++++++++++++++++++++++----------------- 1 file changed, 158 insertions(+), 103 deletions(-) diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc index 6f8a8f56a14..1bf09c5c07b 100644 --- a/vowpalwabbit/cbify.cc +++ b/vowpalwabbit/cbify.cc @@ -249,10 +249,10 @@ void finish(cbify& data) { for (size_t a = 0; a < data.adf_data.num_actions; ++a) { - VW::dealloc_example(CB::cb_label.delete_label, data.adf_data.ecs[a]); + //VW::dealloc_example(CB::cb_label.delete_label, data.adf_data.ecs[a]); data.adf_data.ecs[a].pred.a_s.delete_v(); } - VW::dealloc_example(CB::cb_label.delete_label, *data.adf_data.empty_example); + //VW::dealloc_example(CB::cb_label.delete_label, *data.adf_data.empty_example); data.adf_data.empty_example->pred.a_s.delete_v(); free(data.adf_data.ecs); @@ -690,86 +690,185 @@ size_t predict_cs_adf(cbify& data, base_learner& base, example& ec) return best_action; } -template -void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) +size_t predict_bandit_adf(cbify& data, base_learner& base, example& ec) { - uint32_t argmin; - uint32_t best_action; example* ecs = data.adf_data.ecs; example* empty_example = data.adf_data.empty_example; - argmin = find_min(data.cumulative_costs); + uint32_t argmin = find_min(data.cumulative_costs); - //Store the multiclass input label - MULTICLASS::label_t ld = ec.l.multi; + copy_example_to_adf(data, ec); - copy_example_to_adf(data, ec); + for (size_t a = 0; a < data.adf_data.num_actions; ++a) + { + base.predict(ecs[a], argmin); + } + base.predict(*empty_example, argmin); - if (data.warm_start_iter < data.warm_start_period) // Call the cost-sensitive learner directly + // get output scores + auto& out_ec = data.adf_data.ecs[0]; + uint32_t idx = data.mwt_explorer->Choose_Action( + *data.generic_explorer, + StringUtils::to_string(data.example_counter++), out_ec) - 1; + + return idx; + +} + +void multiclass_to_cs_adf(cbify& data, COST_SENSITIVE::label* csls, size_t corrupted_label) +{ + for (size_t a = 0; a < data.adf_data.num_actions; ++a) { + csls[a].costs[0].class_index = a+1; + csls[a].costs[0].x = loss(data, corrupted_label, a+1); + } - best_action = predict_sublearner(data, base, argmin); +} - //data.all->cost_sensitive->predict(ec,argmin); - //generate cost-sensitive label - // ecs[a].weight *= 1; - // cout << "size cbify = " << ecs[a].l.cs.costs.size() << endl; +void generate_corrupted_cs_adf(cbify& data, example& ec, MULTICLASS::label_t ld) +{ + //suppose copy_example_data has already been called + example* ecs = data.adf_data.ecs; + example* empty_example = data.adf_data.empty_example; - COST_SENSITIVE::label* csls = data.csls; - COST_SENSITIVE::label* csl_empty = data.csl_empty; - CB::label* cbls = data.cbls; - CB::label* cbl_empty = data.cbl_empty; + size_t corrupted_label = corrupt_action(ld.label, data, SUPERVISED); + + //generate cost-sensitive label (only for CSOAA's use - this will be retracted at the end) + COST_SENSITIVE::label* csls = data.csls; + COST_SENSITIVE::label* csl_empty = data.csl_empty; - size_t corrupted_label = corrupt_action(ld.label, data, SUPERVISED); + multiclass_to_cs_adf(data, csls, corrupted_label); - if (data.ind_supervised) + for (size_t a = 0; a < data.adf_data.num_actions; ++a) + ecs[a].l.cs = csls[a]; + + empty_example->l.cs = *csl_empty; + +} + +void learn_cs_adf(cbify& data, example& ec) +{ + example* ecs = data.adf_data.ecs; + example* empty_example = data.adf_data.empty_example; + + for (size_t a = 0; a < data.adf_data.num_actions; ++a) + data.old_weights[a] = ecs[a].weight; + + for (uint32_t i = 0; i < data.choices_lambda; i++) + { + float weight_multiplier = compute_weight_multiplier(data, i, SUPERVISED); + for (size_t a = 0; a < data.adf_data.num_actions; ++a) { - for (uint32_t i = 0; i < data.choices_lambda; i++) - { - float weight_multiplier; - if (data.lambdas[i] >= 0.5) - weight_multiplier = (1 - data.lambdas[i]) / data.lambdas[i]; - else - weight_multiplier = 1; + ecs[a].weight = data.old_weights[a] * weight_multiplier; + data.all->cost_sensitive->learn(ecs[a],i); + } + data.all->cost_sensitive->learn(*empty_example,i); + } - for (size_t a = 0; a < data.adf_data.num_actions; ++a) - { - data.old_weights[a] = ecs[a].weight; + //Seems like we don't need to set the weights back as this example will be + //discarded anyway + for (size_t a = 0; a < data.adf_data.num_actions; ++a) + ecs[a].weight = data.old_weights[a]; +} - csls[a].costs[0].class_index = a+1; - csls[a].costs[0].x = loss(data, corrupted_label, a+1); +void generate_corrupt_cb_adf(cbify& data, example& out_ec, CB::cb_class& cl, MULTICLASS::label_t& ld, size_t idx) +{ + cl.action = out_ec.pred.a_s[idx].action + 1; + cl.probability = out_ec.pred.a_s[idx].score; - cbls[a] = ecs[a].l.cb; - ecs[a].l.cs = csls[a]; + if(!cl.action) + THROW("No action with non-zero probability found!"); - ecs[a].weight *= weight_multiplier; + size_t corrupted_label = corrupt_action(ld.label, data, BANDIT); + cl.cost = loss(data, corrupted_label, cl.action); - data.all->cost_sensitive->learn(ecs[a],i); - } - *cbl_empty = empty_example->l.cb; - empty_example->l.cs = *csl_empty; - data.all->cost_sensitive->learn(*empty_example,i); +} - for (size_t a = 0; a < data.adf_data.num_actions; ++a) - { - ecs[a].l.cb = cbls[a]; - ecs[a].weight = data.old_weights[a]; - } +void learn_bandit_adf(cbify& data, base_learner& base, example& ec) +{ + example* ecs = data.adf_data.ecs; + example* empty_example = data.adf_data.empty_example; - empty_example->l.cb = *cbl_empty; - } + for (size_t a = 0; a < data.adf_data.num_actions; ++a) + data.old_weights[a] = ecs[a].weight; + + for (uint32_t i = 0; i < data.choices_lambda; i++) + { + float weight_multiplier = compute_weight_multiplier(data, i, BANDIT); + + for (size_t a = 0; a < data.adf_data.num_actions; ++a) + { + if (data.weighting_scheme == INSTANCE_WT) + ecs[a].weight = data.old_weights[a] * weight_multiplier; + else + ecs[a].weight = data.old_weights[a] * weight_multiplier * data.warm_start_period / ( (data.bandit_iter+1) * (data.bandit_iter+2) ); + + base.learn(ecs[a], i); } + base.learn(*empty_example, i); + } + + for (size_t a = 0; a < data.adf_data.num_actions; ++a) + ecs[a].weight = data.old_weights[a]; +} + +void accumulate_variance_adf(cbify& data, base_learner& base, example& ec) +{ + auto& out_ec = data.adf_data.ecs[0]; + + data.a_s.erase(); + for (size_t a = 0; a < data.adf_data.num_actions; ++a) + data.a_s.push_back({out_ec.pred.a_s[a].action, out_ec.pred.a_s[a].score}); + + size_t pred_best_approx = predict_cs_adf(data, base, ec); + float temp_variance; + + for (size_t a = 0; a < data.adf_data.num_actions; ++a) + if (pred_best_approx == data.a_s[a].action + 1) + temp_variance = 1.0 / data.a_s[a].score; + + data.cumulative_variance += temp_variance; + + //cout<<"variance at bandit round "<< data.bandit_iter << " = " << temp_variance << endl; + //cout< +void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) +{ + + //Store the multiclass input label + MULTICLASS::label_t ld = ec.l.multi; + + if (data.warm_start_iter == 0 && data.bandit_iter == 0) + setup_lambdas(data, ec); + + //copy_example_to_adf(data, ec); + + if (data.warm_start_iter < data.warm_start_period) // Call the cost-sensitive learner directly + { + + //best_action = predict_sublearner(data, base, argmin); + uint32_t best_action = predict_cs_adf(data, base, ec); + + //data.all->cost_sensitive->predict(ec,argmin); + + //generate cost-sensitive label + // ecs[a].weight *= 1; + // cout << "size cbify = " << ecs[a].l.cs.costs.size() << endl; + + generate_corrupted_cs_adf(data, ec, ld); + + if (data.ind_supervised) + learn_cs_adf(data, ec); + ec.pred.multiclass = best_action; ec.l.multi = ld; ec.weight = 0; //a hack here - allocated memories not deleted - //example* ecp = calloc_or_throw(1); - //VW::copy_example_data(false, ecp, &ec); - //ecp->l.multi.label = corrupted_label; - //ecp->l.multi.weight = 1.0; - //to be corrected if (data.validation_method == SUPERVISED_VALI) VW::copy_example_data(false, &data.supervised_validation[data.warm_start_iter], &ec, 0, MULTICLASS::mc_label.copy_label); @@ -779,30 +878,13 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) } else if (data.bandit_iter < data.bandit_period) // call the bandit learner { - if (data.bandit_iter == 0) - setup_lambdas(data, ec); - - for (size_t a = 0; a < data.adf_data.num_actions; ++a) - { - base.predict(ecs[a], argmin); - } - base.predict(*empty_example, argmin); - - // get output scores + //size_t pred_pi = predict_cs_adf(data, base, ec); + uint32_t idx = predict_bandit_adf(data, base, ec); auto& out_ec = data.adf_data.ecs[0]; - uint32_t idx = data.mwt_explorer->Choose_Action( - *data.generic_explorer, - StringUtils::to_string(data.example_counter++), out_ec) - 1; CB::cb_class cl; - cl.action = out_ec.pred.a_s[idx].action + 1; - cl.probability = out_ec.pred.a_s[idx].score; - - if(!cl.action) - THROW("No action with non-zero probability found!"); - size_t corrupted_label = corrupt_action(ld.label, data, BANDIT); - cl.cost = loss(data, corrupted_label, cl.action); + generate_corrupt_cb_adf(data, out_ec, cl, ld, idx); // accumulate the cumulative costs of lambdas accumulate_costs_ips_adf(data, ec, cl, base); @@ -813,36 +895,9 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) if (data.ind_bandit) - { - for (uint32_t i = 0; i < data.choices_lambda; i++) - { - float weight_multiplier; - - if (data.lambdas[i] >= 0.5) - weight_multiplier = 1; - else - weight_multiplier = data.lambdas[i] / (1-data.lambdas[i]); - - for (size_t a = 0; a < data.adf_data.num_actions; ++a) - { - data.old_weights[a] = ecs[a].weight; - - if (data.weighting_scheme == INSTANCE_WT) - ecs[a].weight *= weight_multiplier; - else - ecs[a].weight *= weight_multiplier * data.warm_start_period / ( (data.bandit_iter+1) * (data.bandit_iter+2) ); - - base.learn(ecs[a], i); - } - base.learn(*empty_example, i); - - for (size_t a = 0; a < data.adf_data.num_actions; ++a) - ecs[a].weight = data.old_weights[a]; - } - } + learn_bandit_adf(data, base, ec); - size_t pred_best_approx = predict_cs_adf(data, base, ec); - data.cumulative_variance += 1.0 / out_ec.pred.a_s[pred_best_approx-1].score; + accumulate_variance_adf(data, base, ec); ec.pred.multiclass = cl.action; From 621b39202503d58904da880a27ff888800fd6311 Mon Sep 17 00:00:00 2001 From: Chicheng Zhang Date: Mon, 30 Apr 2018 00:47:02 -0400 Subject: [PATCH 060/127] the version where there is an error on memory free --- vowpalwabbit/cbify.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc index 1bf09c5c07b..fe61f95787f 100644 --- a/vowpalwabbit/cbify.cc +++ b/vowpalwabbit/cbify.cc @@ -682,7 +682,6 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec) size_t predict_cs_adf(cbify& data, base_learner& base, example& ec) { uint32_t argmin = find_min(data.cumulative_costs); - copy_example_to_adf(data, ec); size_t best_action = predict_sublearner(data, base, argmin); @@ -845,7 +844,7 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) if (data.warm_start_iter == 0 && data.bandit_iter == 0) setup_lambdas(data, ec); - //copy_example_to_adf(data, ec); + copy_example_to_adf(data, ec); if (data.warm_start_iter < data.warm_start_period) // Call the cost-sensitive learner directly { From d1fbfd7d7c0f7c0991596d8e414771a52df2cf5d Mon Sep 17 00:00:00 2001 From: Chicheng Zhang Date: Mon, 30 Apr 2018 01:24:56 -0400 Subject: [PATCH 061/127] finished cleanup (need to double check the cb label swap in the adf case) --- vowpalwabbit/cbify.cc | 65 ++++++++++++++++++++++++++----------------- 1 file changed, 40 insertions(+), 25 deletions(-) diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc index fe61f95787f..036afc6355e 100644 --- a/vowpalwabbit/cbify.cc +++ b/vowpalwabbit/cbify.cc @@ -597,6 +597,16 @@ void learn_bandit(cbify& data, base_learner& base, example& ec) ec.weight = old_weight; } +void accumulate_variance(cbify& data, example& ec) +{ + size_t pred_best_approx = predict_cs(data, ec); + data.cumulative_variance += 1.0 / data.a_s[pred_best_approx-1].score; + + //cout<<"variance at bandit round "<< data.bandit_iter << " = " << 1.0 / data.a_s[pred_best_approx-1].score << endl; + //cout< void predict_or_learn(cbify& data, base_learner& base, example& ec) @@ -651,11 +661,7 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec) data.a_s.erase(); data.a_s = ec.pred.a_s; - size_t pred_best_approx = predict_cs(data, ec); - data.cumulative_variance += 1.0 / data.a_s[pred_best_approx-1].score; - - //cout<<"variance at bandit round "<< data.bandit_iter << " = " << 1.0 / data.a_s[pred_best_approx-1].score << endl; - //cout<costs = data.adf_data.empty_example->l.cb.costs; + if (data.warm_start_iter < data.warm_start_period) // Call the cost-sensitive learner directly { //best_action = predict_sublearner(data, base, argmin); - uint32_t best_action = predict_cs_adf(data, base, ec); + uint32_t best_action = predict_cs_adf(data, base); //data.all->cost_sensitive->predict(ec,argmin); @@ -858,10 +866,10 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) // ecs[a].weight *= 1; // cout << "size cbify = " << ecs[a].l.cs.costs.size() << endl; - generate_corrupted_cs_adf(data, ec, ld); + generate_corrupted_cs_adf(data, ld); if (data.ind_supervised) - learn_cs_adf(data, ec); + learn_cs_adf(data); ec.pred.multiclass = best_action; ec.l.multi = ld; @@ -878,12 +886,11 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) else if (data.bandit_iter < data.bandit_period) // call the bandit learner { //size_t pred_pi = predict_cs_adf(data, base, ec); - uint32_t idx = predict_bandit_adf(data, base, ec); - auto& out_ec = data.adf_data.ecs[0]; + uint32_t idx = predict_bandit_adf(data, base); CB::cb_class cl; - generate_corrupt_cb_adf(data, out_ec, cl, ld, idx); + generate_corrupt_cb_adf(data, cl, ld, idx); // accumulate the cumulative costs of lambdas accumulate_costs_ips_adf(data, ec, cl, base); @@ -894,9 +901,9 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) if (data.ind_bandit) - learn_bandit_adf(data, base, ec); + learn_bandit_adf(data, base); - accumulate_variance_adf(data, base, ec); + accumulate_variance_adf(data, base); ec.pred.multiclass = cl.action; @@ -913,6 +920,10 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) ec.pred.multiclass = 0; ec.weight = 0; } + + for (size_t a = 0; a < data.adf_data.num_actions; ++a) + data.adf_data.ecs[a].l.cb.costs = data.cbls[a].costs; + data.adf_data.empty_example->l.cb.costs = data.cbl_empty->costs; } void init_adf_data(cbify& data, const size_t num_actions) @@ -933,6 +944,8 @@ void init_adf_data(cbify& data, const size_t num_actions) data.csls = calloc_or_throw(num_actions); + + data.csl_empty = calloc_or_throw(1); data.cbls = calloc_or_throw(num_actions); data.cbl_empty = calloc_or_throw(1); @@ -940,13 +953,15 @@ void init_adf_data(cbify& data, const size_t num_actions) data.old_weights = calloc_or_throw(num_actions); + data.csl_empty->costs = v_init(); data.csl_empty->costs.push_back({0, 0, 0, 0}); data.csl_empty->costs[0].class_index = 0; data.csl_empty->costs[0].x = FLT_MAX; for (size_t a = 0; a < num_actions; ++a) { - data.csls[a].costs.push_back({0, 0, 0, 0}); + data.csls[a].costs = v_init(); + data.csls[a].costs.push_back({0, a+1, 0, 0}); } } From 6bddc96aa77516a54ff2d1e492b0be9b0ad42033 Mon Sep 17 00:00:00 2001 From: Chicheng Zhang Date: Mon, 30 Apr 2018 15:11:05 -0400 Subject: [PATCH 062/127] adjusted the output of the script so that it is more systematic --- scripts/alg_comparison.py | 24 ++++----- scripts/plot_warm_start.py | 103 +++++++++++++++++++++++++------------ 2 files changed, 81 insertions(+), 46 deletions(-) diff --git a/scripts/alg_comparison.py b/scripts/alg_comparison.py index 40bdc1b0972..4a20fb48ce1 100644 --- a/scripts/alg_comparison.py +++ b/scripts/alg_comparison.py @@ -35,7 +35,7 @@ def parse_sum_file(sum_filename): def get_z_scores(errors_1, errors_2, sizes): z_scores = [] for i in range(len(errors_1)): - print i + #print i z_scores.append( z_score(errors_1[i], errors_2[i], sizes[i]) ) return z_scores @@ -104,7 +104,7 @@ def plot_comparison(errors_1, errors_2, sizes, title, filename): all_results = None for i in range(len(dss)): - print 'dataset name: ', dss[i] + print 'result file name: ', dss[i] result = parse_sum_file(results_dir + dss[i]) if (i == 0): @@ -112,13 +112,11 @@ def plot_comparison(errors_1, errors_2, sizes, title, filename): else: all_results = all_results.append(result) - - #if i >= 331 and i <= 340: - # print 'result:', result - # print 'all_results:', all_results - print all_results + #if i >= 331 and i <= 340: + # print 'result:', result + # print 'all_results:', all_results #result = parse_sum_file(results_dir + '400of600.sum') @@ -142,12 +140,12 @@ def plot_comparison(errors_1, errors_2, sizes, title, filename): results_supervised = all_results[supervised_only].tolist() dataset_sizes = all_results[sizes].tolist() - print alg1 - print results_alg1 + #print alg1 + #print results_alg1 # compare combined w/ bandit plot_comparison(results_alg1, results_bandit, dataset_sizes, alg1 + ' vs ' + 'bandit only', results_dir + alg1 + ' vs ' + 'bandit only' + '.png') - plot_comparison(results_alg1, results_supervised, dataset_sizes, alg1 + ' vs ' + 'supervised only', results_dir + alg1 + ' vs ' + 'supervised only' + '.png') - plot_comparison(results_alg2, results_bandit, dataset_sizes, alg2 + ' vs ' + 'bandit only', results_dir + alg2 + ' vs ' + 'bandit only' + '.png') - plot_comparison(results_alg2, results_supervised, dataset_sizes, alg2 + ' vs ' + 'supervised only', results_dir + alg2 + ' vs ' + 'supervised only' + '.png') - plot_comparison(results_alg1, results_alg2, dataset_sizes, alg1 + ' vs ' + alg2, results_dir+alg1 + ' vs ' + alg2 + '.png') + #plot_comparison(results_alg1, results_supervised, dataset_sizes, alg1 + ' vs ' + 'supervised only', results_dir + alg1 + ' vs ' + 'supervised only' + '.png') + #plot_comparison(results_alg2, results_bandit, dataset_sizes, alg2 + ' vs ' + 'bandit only', results_dir + alg2 + ' vs ' + 'bandit only' + '.png') + #plot_comparison(results_alg2, results_supervised, dataset_sizes, alg2 + ' vs ' + 'supervised only', results_dir + alg2 + ' vs ' + 'supervised only' + '.png') + #plot_comparison(results_alg1, results_alg2, dataset_sizes, alg1 + ' vs ' + alg2, results_dir+alg1 + ' vs ' + alg2 + '.png') diff --git a/scripts/plot_warm_start.py b/scripts/plot_warm_start.py index 1e806f512e3..31c0880b379 100644 --- a/scripts/plot_warm_start.py +++ b/scripts/plot_warm_start.py @@ -116,10 +116,12 @@ def plot_errors(mod): avg_loss = avg_loss[len_avg_loss-1] avg_loss = [avg_loss for i in range(len_avg_loss)] - line = plt.plot(wt, avg_loss, mod.plot_color, label=(mod.plot_label)) + #line = plt.plot(wt, avg_loss, mod.plot_color, label=(mod.plot_label)) avg_error_value = avg_error(mod) + actual_var_value = actual_var(mod) + ideal_var_value = ideal_var(mod) - return avg_error_value + return avg_error_value, actual_var_value, ideal_var_value def gen_comparison_graph(mod): @@ -132,23 +134,27 @@ def gen_comparison_graph(mod): #config_name = str(mod.dataset) + '_' + str(mod.fprob1)+'_'+str(mod.fprob2)+'_'+str(mod.warm_start)+'_'+str(mod.bandit)+ '_' + str(mod.cb_type) + '_' + str(mod.choices_lambda) - config_name = str(mod.dataset) + '_'+str(mod.warm_start)+ '_' + str(mod.cb_type) + config_name = str(mod.dataset) + ' ' \ + + str(mod.corrupt_type_supervised) + ' ' +str(mod.corrupt_prob_supervised) \ + + ' ' + str(mod.corrupt_type_bandit) + ' ' + str(mod.corrupt_prob_bandit) \ + + ' ' + str(mod.warm_start) + ' ' + str(mod.bandit) + ' ' + str(mod.cb_type) \ + + ' ' + str(mod.validation_method) + ' ' + str(mod.weighting_scheme) \ + + ' ' + str(mod.lambda_scheme) + ' ' + str(mod.choices_lambda) \ + + ' ' + str(mod.no_supervised) + ' ' + str(mod.no_bandit) # combined approach, epsilon - mod.choices_lambda = 2 - mod.weighting_scheme = 1 - mod.lambda_scheme = 3 - mod.no_bandit = False - mod.no_supervised = False - mod.no_exploration = False - mod.cover_on = False - mod.epsilon_on = True - mod.plot_color = 'r' - mod.plot_flat = False - mod.vw_output_filename = mod.results_path+config_name+'zeroone'+'.txt' - mod.plot_label = 'zeroone only' - avg_error_comb_1 = plot_errors(mod) + mod.vw_output_filename = mod.results_path+config_name+'.txt' + avg_error_value, actual_var_value, ideal_var_value = plot_errors(mod) + + result = str(avg_error_value) + ' ' + str(actual_var_value) + ' ' + str(ideal_var_value) + summary_file = open(mod.summary_file_name, 'a') + summary_file.write(config_name + ' ' + result + '\n') + summary_file.close() + print('') + + + ''' # combined approach, cover # combined approach, per-dataset weighting #mod.choices_lambda = 1 @@ -204,13 +210,6 @@ def gen_comparison_graph(mod): mod.plot_label = 'Supervised only' avg_error_sup_only = plot_errors(mod) - - summary_file = open(mod.summary_file_name, 'a') - summary_file.write(config_name + ' ' + str(avg_error_comb_1) + ' ' + str(avg_error_comb_2) + ' ' + str(avg_error_band_only) + ' ' + str(avg_error_sup_only) + ' ' + str(mod.bandit) + '\n') - summary_file.close() - print('') - - pylab.legend() pylab.xlabel('#bandit examples') pylab.ylabel('Progressive validation error') @@ -220,7 +219,7 @@ def gen_comparison_graph(mod): plt.gcf().clear() #plt.show() - + ''' def ds_files(ds_path): prevdir = os.getcwd() @@ -239,7 +238,7 @@ def get_num_classes(ds): def ds_per_task(mod): # put dataset name to the last coordinate so that the task workloads tend to be # allocated equally - config_all = [item for item in product(mod.choices_cb_types, mod.choices_warm_start_frac, mod.dss)] + config_all = [item for item in product(mod.choices_corrupt_type_supervised, mod.choices_corrupt_prob_supervised, mod.choices_cb_types, mod.choices_warm_start_frac, mod.choices_no_supervised, mod.choices_no_bandit, mod.dss)] config_task = [] print len(config_all) for i in range(len(config_all)): @@ -256,11 +255,20 @@ def get_num_lines(dataset_name): return int(output) def avg_error(mod): + return vw_output_extract(mod, 'average loss') + +def actual_var(mod): + return vw_output_extract(mod, 'Measured average variance') + +def ideal_var(mod): + return vw_output_extract(mod, 'Ideal average variance') + +def vw_output_extract(mod, pattern): #print mod.vw_output_filename vw_output = open(mod.vw_output_filename, 'r') vw_output_text = vw_output.read() #print vw_output_text - rgx = re.compile('^average loss = (.*)$', flags=re.M) + rgx = re.compile('^'+pattern+' = (.*)$', flags=re.M) errs = rgx.findall(vw_output_text) if not errs: @@ -275,10 +283,23 @@ def avg_error(mod): def main_loop(mod): mod.summary_file_name = mod.results_path+str(mod.task_id)+'of'+str(mod.num_tasks)+'.sum' summary_file = open(mod.summary_file_name, 'w') - summary_file.write('dataset' + ' ' + 'zeroone_only' + ' ' + 'central_minimax_zeroone' + ' ' + 'bandit_only' + ' ' + 'supervised_only' + ' ' + 'size' + '\n') + + summary_header = 'str(mod.dataset)' + ' ' \ + + 'str(mod.corrupt_type_supervised)' + ' ' + 'str(mod.corrupt_prob_supervised)' \ + + ' ' + 'str(mod.corrupt_type_bandit)' + ' ' + 'str(mod.corrupt_prob_bandit)' \ + + ' ' + 'str(mod.warm_start)' + ' ' + 'str(mod.bandit)' + ' ' + 'str(mod.cb_type)' \ + + ' ' + 'str(mod.validation_method)' + ' ' + 'str(mod.weighting_scheme)' \ + + ' ' + 'str(mod.lambda_scheme)' + ' ' + 'str(mod.choices_lambda)' \ + + ' ' + 'str(mod.no_supervised)' + ' ' + 'str(mod.no_bandit)' \ + + ' ' + 'str(avg_error_value)' + ' ' + 'str(actual_var_value)' \ + + ' ' + 'str(ideal_var_value)' + + summary_file.write(summary_header+'\n') + + #summary_file.write('dataset' + ' ' + 'zeroone_only' + ' ' + 'central_minimax_zeroone' + ' ' + 'bandit_only' + ' ' + 'supervised_only' + ' ' + 'size' + '\n') summary_file.close() - for mod.cb_type, mod.warm_start_frac, mod.dataset in mod.config_task: + for mod.corrupt_type_supervised, mod.corrupt_prob_supervised, mod.cb_type, mod.warm_start_frac, mod.no_supervised, mod.no_bandit, mod.dataset in mod.config_task: gen_comparison_graph(mod) @@ -326,7 +347,8 @@ def main_loop(mod): #mod.choices_warm_start_frac = [0.01 * pow(2, i) for i in range(1)] #mod.choices_warm_start_frac = [0.01, 0.03, 0.1, 0.3] #mod.choices_warm_start_frac = [0.03] - mod.choices_warm_start_frac = [args.warm_start_fraction] + mod.choices_warm_start_frac = [0.01, 0.02, 0.04, 0.08, 0.16, 0.32] + #mod.choices_warm_start = [0.01 * pow(2, i) for i in range(5)] #mod.choices_bandit = [0.01 * pow(2, i) for i in range(5)] @@ -337,6 +359,8 @@ def main_loop(mod): #choices_cb_types = ['mtr', 'ips'] #mod.choices_cb_types = ['mtr', 'ips'] mod.choices_cb_types = ['mtr'] + mod.choices_no_supervised = [False, True] + mod.choices_no_bandit = [False, True] #choices_choices_lambda = [pow(2,i) for i in range(10,11)] #mod.choices_choices_lambda = [i for i in range(1,3)] #mod.choices_choices_lambda = [i for i in range(1,2)] @@ -344,16 +368,29 @@ def main_loop(mod): #[i for i in range(10,11)] #mod.corrupt_type_supervised = 2 #mod.corrupt_prob_supervised = 0.3 - mod.corrupt_type_supervised = 1 + mod.choices_corrupt_type_supervised = [1,2] + #mod.choices_corrupt_type_supervised = [2] #mod.corrupt_prob_supervised = 0.3 - mod.corrupt_prob_supervised = args.corrupt_prob_supervised + mod.choices_corrupt_prob_supervised = [0,0.3] + #mod.choices_corrupt_prob_supervised = [0.3] mod.corrupt_type_bandit = 1 - mod.corrupt_prob_bandit = args.corrupt_prob_bandit + mod.corrupt_prob_bandit = 0 - mod.validation_method = 2 + mod.validation_method = 1 mod.epsilon = 0.05 + mod.choices_lambda = 2 + mod.weighting_scheme = 1 + mod.lambda_scheme = 3 + mod.no_bandit = False + mod.no_supervised = False + mod.no_exploration = False + mod.cover_on = False + mod.epsilon_on = True + mod.plot_color = 'r' + mod.plot_flat = False + #for correctness test #mod.choices_warm_start = [20] #choices_fprob1 = [0.1] From f529db02240c54c6ebf60153f8837b53ca4bd601 Mon Sep 17 00:00:00 2001 From: Chicheng Zhang Date: Mon, 30 Apr 2018 17:43:19 -0400 Subject: [PATCH 063/127] a more complete summary file --- scripts/plot_warm_start.py | 224 +++++++++++++++---------------------- 1 file changed, 92 insertions(+), 132 deletions(-) diff --git a/scripts/plot_warm_start.py b/scripts/plot_warm_start.py index 31c0880b379..764ed4855b0 100644 --- a/scripts/plot_warm_start.py +++ b/scripts/plot_warm_start.py @@ -24,35 +24,60 @@ def collect_stats(mod): # num_rows = mod.bandit / mod.progress #print vw_output_filename + #avg_error_value = avg_error(mod) + mod.actual_var = actual_var(mod) + mod.ideal_var = ideal_var(mod) + avg_loss = [] last_loss = [] wt = [] end_table = False f = open(vw_output_filename, 'r') - linenumber = 0 + #linenumber = 0 + i = 0 for line in f: - #if not line.strip(): - # end_table = True - #if linenumber >= 9 and (not end_table): vw_progress_pattern = '\d+\.\d+\s+\d+\.\d+\s+\d+\s+\d+\.\d+\s+[a-zA-Z0-9]+\s+[a-zA-Z0-9]+\s+\d+' matchobj = re.match(vw_progress_pattern, line) if matchobj: - items = line.split() - avg_loss.append(float(items[0])) - last_loss.append(float(items[1])) - wt.append(float(items[3])) - linenumber += 1 + avg_loss_str, last_loss_str, counter_str, weight_str, curr_label_str, \ + curr_pred_str, curr_feat_str = line.split() + + avg_loss.append(float(avg_loss_str)) + last_loss.append(float(last_loss_str)) + wt.append(float(weight_str)) + + mod.avg_loss = float(avg_loss_str) + mod.bandit = float(weight_str) + + for mod.ratio in mod.critical_size_ratios: + if mod.bandit >= 0.99 * mod.warm_start * mod.ratio and \ + mod.bandit <= 1.01 * mod.warm_start * mod.ratio: + record_result(mod) + + + #linenumber += 1 f.close() - if len(avg_loss) == 0: - avg_loss = [0] - last_loss = [0] - wt = [0] + #if len(avg_loss) == 0: + # avg_loss = [0] + # last_loss = [0] + # wt = [0] + #return avg_loss, last_loss, wt + +def record_result(mod): + problem_params_trailer = [mod.bandit, mod.ratio] + config_name = disperse(mod.problem_params + problem_params_trailer + mod.alg_params, ' ') + + list_results = [mod.avg_loss, mod.actual_var, mod.ideal_var] + result = disperse(list_results, ' ') + + summary_file = open(mod.summary_file_name, 'a') + summary_file.write(config_name + ' ' + result + '\n') + summary_file.close() - return avg_loss, last_loss, wt def execute_vw(mod): @@ -64,7 +89,7 @@ def execute_vw(mod): if mod.cover_on: alg_option += ' --cover 5 --psi 0.01 --nounif ' - mod.cb_type = 'dr' + #mod.cb_type = 'dr' if mod.epsilon_on: alg_option += ' --epsilon ' + str(mod.epsilon) + ' ' if mod.no_bandit: @@ -76,14 +101,6 @@ def execute_vw(mod): #if mod.cb_type == 'mtr': # mod.adf_on = True; - - - # using two datasets - #cmd_catfile = '( head -n ' + str(mod.warm_start) + ' ' + mod.dataset_supervised + ';' + ' head -n ' + str(mod.bandit) + ' ' + mod.dataset_bandit + '; )' - # using only one dataset - #cmd_catfile = '( head -n ' + str(mod.warm_start + mod.bandit) + ' ' + mod.dataset + '; )' - #cmd_catfile = '( cat ' + mod.ds_path+mod.dataset + '; )' - cmd_vw = mod.vw_path + ' --cbify ' + str(mod.num_classes) + ' --cb_type ' + str(mod.cb_type) + ' --warm_start ' + str(mod.warm_start) + ' --bandit ' + str(mod.bandit) + ' --choices_lambda ' + str(mod.choices_lambda) + alg_option + ' --progress ' + str(mod.progress) \ + ' -d ' + mod.ds_path + mod.dataset \ + ' --corrupt_type_supervised ' + str(mod.corrupt_type_supervised) \ @@ -95,8 +112,6 @@ def execute_vw(mod): + ' --lambda_scheme ' + str(mod.lambda_scheme) cmd = cmd_vw - #cmd = cmd_catfile + ' | ' + cmd_vw - print cmd f = open(mod.vw_output_filename, 'w') @@ -105,11 +120,9 @@ def execute_vw(mod): process.wait() f.close() +''' def plot_errors(mod): - - execute_vw(mod) - avg_loss, last_loss, wt = collect_stats(mod) - + #avg_loss, last_loss, wt = if mod.plot_flat: # for supervised only, we simply plot a horizontal line using the last point len_avg_loss = len(avg_loss) @@ -122,105 +135,42 @@ def plot_errors(mod): ideal_var_value = ideal_var(mod) return avg_error_value, actual_var_value, ideal_var_value +''' + +def disperse(l, ch): + s = '' + for item in l: + s += str(item) + s += ch + return s def gen_comparison_graph(mod): mod.num_lines = get_num_lines(mod.ds_path+mod.dataset) - mod.warm_start = int(math.ceil(mod.warm_start_frac * mod.num_lines)) + mod.progress = int(math.ceil(float(mod.num_lines) / float(mod.num_checkpoints))) + mod.warm_start = mod.warm_start_multiplier * mod.progress mod.bandit = mod.num_lines - mod.warm_start - mod.progress = int(math.ceil(float(mod.bandit) / float(mod.num_checkpoints))) mod.num_classes = get_num_classes(mod.dataset) - #config_name = str(mod.dataset) + '_' + str(mod.fprob1)+'_'+str(mod.fprob2)+'_'+str(mod.warm_start)+'_'+str(mod.bandit)+ '_' + str(mod.cb_type) + '_' + str(mod.choices_lambda) + mod.problem_params = [mod.dataset, mod.num_classes, mod.num_lines, \ + mod.corrupt_type_supervised, mod.corrupt_prob_supervised, \ + mod.corrupt_type_bandit, mod.corrupt_prob_bandit, \ + mod.warm_start] - config_name = str(mod.dataset) + ' ' \ - + str(mod.corrupt_type_supervised) + ' ' +str(mod.corrupt_prob_supervised) \ - + ' ' + str(mod.corrupt_type_bandit) + ' ' + str(mod.corrupt_prob_bandit) \ - + ' ' + str(mod.warm_start) + ' ' + str(mod.bandit) + ' ' + str(mod.cb_type) \ - + ' ' + str(mod.validation_method) + ' ' + str(mod.weighting_scheme) \ - + ' ' + str(mod.lambda_scheme) + ' ' + str(mod.choices_lambda) \ - + ' ' + str(mod.no_supervised) + ' ' + str(mod.no_bandit) + mod.alg_params = [ mod.cb_type, \ + mod.validation_method, mod.weighting_scheme, \ + mod.lambda_scheme, mod.choices_lambda, \ + mod.no_supervised, mod.no_bandit] - # combined approach, epsilon - mod.vw_output_filename = mod.results_path+config_name+'.txt' - avg_error_value, actual_var_value, ideal_var_value = plot_errors(mod) + mod.vw_output_filename = mod.results_path + disperse(mod.problem_params+mod.alg_params, '_') + '.txt' - result = str(avg_error_value) + ' ' + str(actual_var_value) + ' ' + str(ideal_var_value) + #plot_errors(mod) + execute_vw(mod) + collect_stats(mod) - summary_file = open(mod.summary_file_name, 'a') - summary_file.write(config_name + ' ' + result + '\n') - summary_file.close() print('') - - ''' - # combined approach, cover - # combined approach, per-dataset weighting - #mod.choices_lambda = 1 - #mod.no_bandit = False - #mod.no_supervised = False - #mod.no_exploration = False - #mod.cover_on = True - #mod.vw_output_filename = mod.results_path+config_name+'choices_lambda='+str(mod.choices_lambda)+'.txt' - - mod.choices_lambda = 5 - mod.weighting_scheme = 1 - mod.lambda_scheme = 3 - mod.no_bandit = False - mod.no_supervised = False - mod.no_exploration = False - mod.cover_on = False - mod.epsilon_on = True - #'Combined approach, lambda=5' - mod.plot_color = 'm' - mod.plot_flat = False - mod.vw_output_filename = mod.results_path+config_name+'central_minimax_zeroone'+'.txt' - mod.plot_label = 'Central lambda: minimax, forcing zeroone' - avg_error_comb_2 = plot_errors(mod) - - - # bandit only approach - mod.choices_lambda = 1 - mod.weighting_scheme = 1 - mod.lambda_scheme = 1 - mod.no_bandit = False - mod.no_supervised = True - mod.no_exploration = False - mod.cover_on = False - mod.epsilon_on = True - mod.plot_color = 'b' - mod.plot_flat = False - mod.vw_output_filename = mod.results_path+config_name+'_no_supervised'+'.txt' - mod.plot_label = 'Bandit only' - avg_error_band_only = plot_errors(mod) - - # supervised only approach - mod.choices_lambda = 1 - mod.weighting_scheme = 1 - mod.lambda_scheme = 1 - mod.no_bandit = True - mod.no_supervised = False - mod.no_exploration = False - mod.cover_on = False - mod.epsilon_on = True - mod.plot_color = 'g' - mod.plot_flat = True - mod.vw_output_filename = mod.results_path+config_name+'_no_bandit'+'.txt' - mod.plot_label = 'Supervised only' - avg_error_sup_only = plot_errors(mod) - - pylab.legend() - pylab.xlabel('#bandit examples') - pylab.ylabel('Progressive validation error') - pylab.title(mod.dataset + ' warm_start = ' + str(mod.warm_start) + ' cb_type = ' + mod.cb_type) - #pylab.title('Source 1 feature flipping prob = ' + str(mod.fprob1) + '; source 2 feature flipping prob = ' + str(mod.fprob2) + 'cb_type = '+ mod.cb_type ) - pylab.savefig(mod.results_path+config_name +'.png') - plt.gcf().clear() - - #plt.show() - ''' - def ds_files(ds_path): prevdir = os.getcwd() os.chdir(ds_path) @@ -238,7 +188,14 @@ def get_num_classes(ds): def ds_per_task(mod): # put dataset name to the last coordinate so that the task workloads tend to be # allocated equally - config_all = [item for item in product(mod.choices_corrupt_type_supervised, mod.choices_corrupt_prob_supervised, mod.choices_cb_types, mod.choices_warm_start_frac, mod.choices_no_supervised, mod.choices_no_bandit, mod.dss)] + config_baselines_raw = list(product(mod.choices_corrupt_type_supervised, mod.choices_corrupt_prob_supervised, mod.choices_cb_types, mod.dss, mod.warm_start_multipliers, [1], [False, True], [False, True])) + + config_baselines = filter(lambda (x1, x2, x3, x4, x5, x6, x7, x8): x7 == True or x8 == True, config_baselines_raw) + + + config_algs = list(product(mod.choices_corrupt_type_supervised, mod.choices_corrupt_prob_supervised, mod.choices_cb_types, mod.dss, mod.warm_start_multipliers, mod.choices_choices_lambda, [False], [False])) + + config_all = config_baselines + config_algs config_task = [] print len(config_all) for i in range(len(config_all)): @@ -284,22 +241,25 @@ def main_loop(mod): mod.summary_file_name = mod.results_path+str(mod.task_id)+'of'+str(mod.num_tasks)+'.sum' summary_file = open(mod.summary_file_name, 'w') - summary_header = 'str(mod.dataset)' + ' ' \ - + 'str(mod.corrupt_type_supervised)' + ' ' + 'str(mod.corrupt_prob_supervised)' \ - + ' ' + 'str(mod.corrupt_type_bandit)' + ' ' + 'str(mod.corrupt_prob_bandit)' \ - + ' ' + 'str(mod.warm_start)' + ' ' + 'str(mod.bandit)' + ' ' + 'str(mod.cb_type)' \ - + ' ' + 'str(mod.validation_method)' + ' ' + 'str(mod.weighting_scheme)' \ - + ' ' + 'str(mod.lambda_scheme)' + ' ' + 'str(mod.choices_lambda)' \ - + ' ' + 'str(mod.no_supervised)' + ' ' + 'str(mod.no_bandit)' \ - + ' ' + 'str(avg_error_value)' + ' ' + 'str(actual_var_value)' \ - + ' ' + 'str(ideal_var_value)' + list_header = ['dataset', 'num_classes', 'total_size', \ + 'corrupt_type_supervised', 'corrupt_prob_supervised', \ + 'corrupt_type_bandit', 'corrupt_prob_bandit', \ + 'warm_start_size', 'bandit_size', 'bandit_supervised_size_ratio', \ + 'cb_type', 'validation_method', 'weighting_scheme', \ + 'lambda_scheme', 'choices_lambda', \ + 'no_supervised', 'no_bandit', \ + 'avg_error', 'actual_variance', \ + 'ideal_variance'] - summary_file.write(summary_header+'\n') + summary_header = disperse(list_header, ' ') - #summary_file.write('dataset' + ' ' + 'zeroone_only' + ' ' + 'central_minimax_zeroone' + ' ' + 'bandit_only' + ' ' + 'supervised_only' + ' ' + 'size' + '\n') + summary_file.write(summary_header+'\n') summary_file.close() - for mod.corrupt_type_supervised, mod.corrupt_prob_supervised, mod.cb_type, mod.warm_start_frac, mod.no_supervised, mod.no_bandit, mod.dataset in mod.config_task: + for mod.corrupt_type_supervised, mod.corrupt_prob_supervised, \ + mod.cb_type, mod.dataset, mod.warm_start_multiplier, \ + mod.choices_lambda, \ + mod.no_supervised, mod.no_bandit in mod.config_task: gen_comparison_graph(mod) @@ -343,11 +303,11 @@ def main_loop(mod): mod.adf_on = True # use fractions instead of absolute numbers - + mod.warm_start_multipliers = [pow(2, i) for i in range(6)] #mod.choices_warm_start_frac = [0.01 * pow(2, i) for i in range(1)] #mod.choices_warm_start_frac = [0.01, 0.03, 0.1, 0.3] #mod.choices_warm_start_frac = [0.03] - mod.choices_warm_start_frac = [0.01, 0.02, 0.04, 0.08, 0.16, 0.32] + #mod.choices_warm_start_frac = [0.01, 0.02, 0.04, 0.08, 0.16, 0.32] #mod.choices_warm_start = [0.01 * pow(2, i) for i in range(5)] #mod.choices_bandit = [0.01 * pow(2, i) for i in range(5)] @@ -361,7 +321,7 @@ def main_loop(mod): mod.choices_cb_types = ['mtr'] mod.choices_no_supervised = [False, True] mod.choices_no_bandit = [False, True] - #choices_choices_lambda = [pow(2,i) for i in range(10,11)] + mod.choices_choices_lambda = [2*i for i in range(1,5)] #mod.choices_choices_lambda = [i for i in range(1,3)] #mod.choices_choices_lambda = [i for i in range(1,2)] #mod.choices_choices_lambda = [1, 3, 5, 7] @@ -371,7 +331,7 @@ def main_loop(mod): mod.choices_corrupt_type_supervised = [1,2] #mod.choices_corrupt_type_supervised = [2] #mod.corrupt_prob_supervised = 0.3 - mod.choices_corrupt_prob_supervised = [0,0.3] + mod.choices_corrupt_prob_supervised = [0.0,0.3] #mod.choices_corrupt_prob_supervised = [0.3] mod.corrupt_type_bandit = 1 @@ -390,6 +350,7 @@ def main_loop(mod): mod.epsilon_on = True mod.plot_color = 'r' mod.plot_flat = False + mod.critical_size_ratios = [pow(2,i) for i in range(-5, 7)] #for correctness test #mod.choices_warm_start = [20] @@ -403,7 +364,6 @@ def main_loop(mod): # here, we are generating the task specific parameter settings # by first generate all parameter setting and pick every num_tasks of them mod.config_task = ds_per_task(mod) - print 'task ' + str(mod.task_id) + ' of ' + str(mod.num_tasks) + ':' #print mod.ds_task From e84c7d9bc685ddacdee55762477d110f3b28e614 Mon Sep 17 00:00:00 2001 From: Chicheng Zhang Date: Wed, 2 May 2018 01:04:53 -0400 Subject: [PATCH 064/127] bring back the pairwise comparison plot --- scripts/alg_comparison.py | 185 ++++++++++++++++++++++++++++++++++--- scripts/plot_warm_start.py | 42 +++++---- vowpalwabbit/cbify.cc | 19 ++++ 3 files changed, 215 insertions(+), 31 deletions(-) diff --git a/scripts/alg_comparison.py b/scripts/alg_comparison.py index 4a20fb48ce1..27057514b5c 100644 --- a/scripts/alg_comparison.py +++ b/scripts/alg_comparison.py @@ -9,6 +9,12 @@ from itertools import compress from math import sqrt import argparse +import numpy as np + + +class model: + def __init__(self): + pass # this part is changable #alg1 = 'epsilon' @@ -28,7 +34,7 @@ def sum_files(result_path): def parse_sum_file(sum_filename): f = open(sum_filename, 'r') #f.seek(0, 0) - table = pd.read_table(f, sep=' ',lineterminator='\n') + table = pd.read_table(f, sep='\s+',lineterminator='\n') return table @@ -40,6 +46,11 @@ def get_z_scores(errors_1, errors_2, sizes): return z_scores def z_score(err_1, err_2, size): + if (abs(err_1) < 1e-6 or abs(err_1) > 1-1e-6) and (abs(err_2) < 1e-6 or abs(err_2) > 1-1e-6): + return 0 + + #print err_1, err_2, size, sqrt( (err_1*(1 - err_1) + err_2*(1-err_2)) / size ) + z = (err_1 - err_2) / sqrt( (err_1*(1 - err_1) + err_2*(1-err_2)) / size ) return z #print z @@ -50,14 +61,13 @@ def is_significant(z): else: return False -def plot_comparison(errors_1, errors_2, sizes, title, filename): - print title - +def plot_comparison(errors_1, errors_2, sizes): + #print title plt.plot([0,1],[0,1]) z_scores = get_z_scores(errors_1, errors_2, sizes) sorted_z_scores = sorted(enumerate(z_scores), key=lambda x:x[1]) - for s in sorted_z_scores: - print s, is_significant(s[1]) + #for s in sorted_z_scores: + # print s, is_significant(s[1]) significance = map(is_significant, z_scores) results_signi_1 = list(compress(errors_1, significance)) @@ -69,17 +79,145 @@ def plot_comparison(errors_1, errors_2, sizes, title, filename): results_insigni_2 = list(compress(errors_2, insignificance)) plt.scatter(results_insigni_1, results_insigni_2, s=2, c='k') - plt.title(title) - pylab.savefig(filename) - plt.gcf().clear() + + +def normalized_score(lst): + #print lst + l = min(lst) + u = max(lst) + return [ (item - l) / (u - l + 1e-4) for item in lst ] + +def alg_str(alg_name): + if (alg_name[1] == True and alg_name[2] == True): + return 'no_update' + if (alg_name[1] == True and alg_name[2] == False): + return 'bandit_only' + if (alg_name[1] == False and alg_name[2] == True): + return 'supervised_only' + if (alg_name[1] == False and alg_name[2] == False): + return 'combined_choices_lambda='+str(alg_name[0]) + +def problem_str(name_problem): + return 'supervised_corrupt_type='+str(name_problem[0]) \ + +'_supervised_corrupt_prob='+str(name_problem[1]) \ + +'_bandit_supervised_size_ratio='+str(name_problem[2]) + + + +def plot_cdf(alg_name, errs): + + plt.step(np.sort(errs), np.linspace(0, 1, len(errs), endpoint=False), label=alg_str(alg_name)) + + print alg_name + print errs + print len(errs) + #raw_input("Press Enter to continue...") + +def plot_all_cdfs(alg_results, mod): + #plot all cdfs: + i = 0 + for alg_name, errs in alg_results.iteritems(): + plot_cdf(alg_name, errs) + + plt.legend() + plt.xlim(0,1) + plt.ylim(0,1) + plt.savefig(mod.fulldir+problem_str(mod.name_problem)+'.png') + plt.clf() + + +def plot_all_pair_comp(alg_results, sizes, mod): + alg_names = alg_results.keys() + + for i in range(len(alg_names)): + for j in range(len(alg_names)): + if i < j: + errs_1 = alg_results[alg_names[i]] + errs_2 = alg_results[alg_names[j]] + + print len(errs_1), len(errs_2), len(sizes) + #raw_input('Press any key to continue..') + + plot_comparison(errs_1, errs_2, sizes) + + plt.title(alg_str(alg_names[i])+' vs '+alg_str(alg_names[j])) + plt.savefig(mod.fulldir+problem_str(mod.name_problem)+'_'+alg_str(alg_names[i])+'_vs_'+alg_str(alg_names[j])+'.png') + plt.clf() + +def init_results(result_table): + alg_results = {} + for idx, row in result_table.iterrows(): + alg_name = (row['choices_lambda'], row['no_supervised'], row['no_bandit']) + alg_results[alg_name] = [] + return alg_results + +def plot_all(mod, all_results): + grouped_by_problem = all_results.groupby(['corrupt_type_supervised', + 'corrupt_prob_supervised','bandit_supervised_size_ratio']) + + #then group by dataset and warm_start size (corresponding to each point in cdf) + for name_problem, group_problem in grouped_by_problem: + normalized_results = None + unnormalized_results = None + sizes = None + mod.name_problem = name_problem + + grouped_by_dataset = group_problem.groupby(['dataset','warm_start_size']) + #then select unique combinations of (no_supervised, no_bandit, choices_lambda) + #e.g. (True, True, 1), (True, False, 1), (False, True, 1), (False, False, 2) + #(False, False, 8), and compute a normalized score + + for name_dataset, group_dataset in grouped_by_dataset: + result_table = group_dataset #group_dataset.groupby(['choices_lambda','no_supervised', 'no_bandit']) + + #first time - generate names of algorithms considered + if normalized_results is None: + sizes = [] + normalized_results = init_results(result_table) + unnormalized_results = init_results(result_table) + + #print alg_results + #dummy = input('') + + #in general (including the first time) - record the error rates of all algorithms + errs = [] + for idx, row in result_table.iterrows(): + errs.append(row['avg_error']) + normalized_errs = normalized_score(errs) + + i = 0 + for idx, row in result_table.iterrows(): + if i == 0: + sizes.append(row['total_size']) + alg_name = (row['choices_lambda'], row['no_supervised'], row['no_bandit']) + unnormalized_results[alg_name].append(errs[i]) + normalized_results[alg_name].append(normalized_errs[i]) + i += 1 + + plot_all_pair_comp(unnormalized_results, sizes, mod) + plot_all_cdfs(normalized_results, mod) + + + if __name__ == '__main__': parser = argparse.ArgumentParser(description='result summary') parser.add_argument('--results_dir', default='../../../figs/') + parser.add_argument('--filter', default='1') + parser.add_argument('--plot_subdir', default='expt1/') args = parser.parse_args() - results_dir = args.results_dir + + mod = model() + + mod.results_dir = args.results_dir + mod.filter = args.filter + mod.plot_subdir = args.plot_subdir + + mod.fulldir = mod.results_dir + mod.plot_subdir + if not os.path.exists(mod.fulldir): + os.makedirs(mod.fulldir) #results_dir = '../../../lambdas/' #results_dir = '../../../warm_start_frac=0.1/' @@ -97,7 +235,7 @@ def plot_comparison(errors_1, errors_2, sizes, title, filename): #results_dir = '../../../type2_0.65/' #results_dir = '../../../type2_0.3/' - dss = sum_files(results_dir) + dss = sum_files(mod.results_dir) #print dss[168] @@ -105,7 +243,7 @@ def plot_comparison(errors_1, errors_2, sizes, title, filename): for i in range(len(dss)): print 'result file name: ', dss[i] - result = parse_sum_file(results_dir + dss[i]) + result = parse_sum_file(mod.results_dir + dss[i]) if (i == 0): all_results = result @@ -114,6 +252,24 @@ def plot_comparison(errors_1, errors_2, sizes, title, filename): print all_results + #first group by corruption mode, then corruption prob + #then group by warm start - bandit ratio + #these constitutes all the problem settings we are looking at (corresponding + #to each cdf graph) + + if mod.filter == '1': + pass + elif mod.filter == '2': + #print all_results['warm_start_size'] >= 100 + #raw_input(' ') + all_results = all_results[all_results['warm_start_size'] >= 100] + elif mod.filter == '3': + all_results = all_results[all_results['num_classes'] >= 3] + elif mod.filter == '4': + all_results = all_results[all_results['num_classes'] <= 2] + + plot_all(mod, all_results) + #if i >= 331 and i <= 340: # print 'result:', result # print 'all_results:', all_results @@ -128,6 +284,7 @@ def plot_comparison(errors_1, errors_2, sizes, title, filename): #for cl, results_lambda in grouped: #results_lambda = all_results[all_results['choices_lambda'] == cl] # compare combined w/ supervised + ''' alg1 = all_results.columns[1] alg2 = all_results.columns[2] bandit_only = all_results.columns[3] @@ -139,12 +296,12 @@ def plot_comparison(errors_1, errors_2, sizes, title, filename): results_bandit = all_results[bandit_only].tolist() results_supervised = all_results[supervised_only].tolist() dataset_sizes = all_results[sizes].tolist() - + ''' #print alg1 #print results_alg1 # compare combined w/ bandit - plot_comparison(results_alg1, results_bandit, dataset_sizes, alg1 + ' vs ' + 'bandit only', results_dir + alg1 + ' vs ' + 'bandit only' + '.png') + #plot_comparison(results_alg1, results_bandit, dataset_sizes, alg1 + ' vs ' + 'bandit only', results_dir + alg1 + ' vs ' + 'bandit only' + '.png') #plot_comparison(results_alg1, results_supervised, dataset_sizes, alg1 + ' vs ' + 'supervised only', results_dir + alg1 + ' vs ' + 'supervised only' + '.png') #plot_comparison(results_alg2, results_bandit, dataset_sizes, alg2 + ' vs ' + 'bandit only', results_dir + alg2 + ' vs ' + 'bandit only' + '.png') #plot_comparison(results_alg2, results_supervised, dataset_sizes, alg2 + ' vs ' + 'supervised only', results_dir + alg2 + ' vs ' + 'supervised only' + '.png') diff --git a/scripts/plot_warm_start.py b/scripts/plot_warm_start.py index 764ed4855b0..e3ce7f7212e 100644 --- a/scripts/plot_warm_start.py +++ b/scripts/plot_warm_start.py @@ -49,11 +49,11 @@ def collect_stats(mod): wt.append(float(weight_str)) mod.avg_loss = float(avg_loss_str) - mod.bandit = float(weight_str) + mod.bandit_effective = int(float(weight_str)) for mod.ratio in mod.critical_size_ratios: - if mod.bandit >= 0.99 * mod.warm_start * mod.ratio and \ - mod.bandit <= 1.01 * mod.warm_start * mod.ratio: + if mod.bandit_effective >= 0.99 * mod.warm_start * mod.ratio and \ + mod.bandit_effective <= 1.01 * mod.warm_start * mod.ratio: record_result(mod) @@ -68,7 +68,7 @@ def collect_stats(mod): #return avg_loss, last_loss, wt def record_result(mod): - problem_params_trailer = [mod.bandit, mod.ratio] + problem_params_trailer = [mod.bandit_effective, mod.ratio] config_name = disperse(mod.problem_params + problem_params_trailer + mod.alg_params, ' ') list_results = [mod.avg_loss, mod.actual_var, mod.ideal_var] @@ -188,14 +188,22 @@ def get_num_classes(ds): def ds_per_task(mod): # put dataset name to the last coordinate so that the task workloads tend to be # allocated equally - config_baselines_raw = list(product(mod.choices_corrupt_type_supervised, mod.choices_corrupt_prob_supervised, mod.choices_cb_types, mod.dss, mod.warm_start_multipliers, [1], [False, True], [False, True])) - config_baselines = filter(lambda (x1, x2, x3, x4, x5, x6, x7, x8): x7 == True or x8 == True, config_baselines_raw) + # put dataset name to the first coordinate so that the result production order is + # in accordance with dataset order + config_corrupt_sup_raw = product(mod.choices_corrupt_type_supervised, mod.choices_corrupt_prob_supervised) + config_corrupt_sup = filter(lambda (type, prob): type == 1 or abs(prob) > 1e-4, config_corrupt_sup_raw) - config_algs = list(product(mod.choices_corrupt_type_supervised, mod.choices_corrupt_prob_supervised, mod.choices_cb_types, mod.dss, mod.warm_start_multipliers, mod.choices_choices_lambda, [False], [False])) + config_common = product(mod.dss, config_corrupt_sup, mod.choices_cb_types, mod.warm_start_multipliers) + + config_baselines_raw = list(product([1], [True, False], [True, False])) + config_baselines = filter(lambda (x1, x2, x3): x2 == True or x3 == True, config_baselines_raw) + config_algs = list(product(mod.choices_choices_lambda, [False], [False])) + config_all_spec = config_baselines + config_algs + + config_all = list(product(config_common, config_all_spec)) - config_all = config_baselines + config_algs config_task = [] print len(config_all) for i in range(len(config_all)): @@ -256,10 +264,10 @@ def main_loop(mod): summary_file.write(summary_header+'\n') summary_file.close() - for mod.corrupt_type_supervised, mod.corrupt_prob_supervised, \ - mod.cb_type, mod.dataset, mod.warm_start_multiplier, \ - mod.choices_lambda, \ - mod.no_supervised, mod.no_bandit in mod.config_task: + for ((mod.dataset, (mod.corrupt_type_supervised, mod.corrupt_prob_supervised), \ + mod.cb_type, mod.warm_start_multiplier), \ + (mod.choices_lambda, \ + mod.no_supervised, mod.no_bandit)) in mod.config_task: gen_comparison_graph(mod) @@ -303,7 +311,7 @@ def main_loop(mod): mod.adf_on = True # use fractions instead of absolute numbers - mod.warm_start_multipliers = [pow(2, i) for i in range(6)] + mod.warm_start_multipliers = [2*pow(4, i) for i in range(3)] #mod.choices_warm_start_frac = [0.01 * pow(2, i) for i in range(1)] #mod.choices_warm_start_frac = [0.01, 0.03, 0.1, 0.3] #mod.choices_warm_start_frac = [0.03] @@ -319,9 +327,9 @@ def main_loop(mod): #choices_cb_types = ['mtr', 'ips'] #mod.choices_cb_types = ['mtr', 'ips'] mod.choices_cb_types = ['mtr'] - mod.choices_no_supervised = [False, True] - mod.choices_no_bandit = [False, True] - mod.choices_choices_lambda = [2*i for i in range(1,5)] + #mod.choices_no_supervised = [False, True] + #mod.choices_no_bandit = [False, True] + mod.choices_choices_lambda = [2, 4, 8] #mod.choices_choices_lambda = [i for i in range(1,3)] #mod.choices_choices_lambda = [i for i in range(1,2)] #mod.choices_choices_lambda = [1, 3, 5, 7] @@ -335,7 +343,7 @@ def main_loop(mod): #mod.choices_corrupt_prob_supervised = [0.3] mod.corrupt_type_bandit = 1 - mod.corrupt_prob_bandit = 0 + mod.corrupt_prob_bandit = 0.0 mod.validation_method = 1 mod.epsilon = 0.05 diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc index 036afc6355e..5fec5f3f233 100644 --- a/vowpalwabbit/cbify.cc +++ b/vowpalwabbit/cbify.cc @@ -131,6 +131,25 @@ void setup_lambdas(cbify& data, example& ec) // The lambdas are in fact arranged in ascending order (the middle lambda is 0.5) v_array& lambdas = data.lambdas; + //bandit only + if (!data.ind_supervised && data.ind_bandit) + { + for (uint32_t i = 0; i Date: Sat, 5 May 2018 18:06:33 -0400 Subject: [PATCH 065/127] added type 3 noise --- scripts/alg_comparison.py | 27 ++++++-- scripts/plot_warm_start.py | 133 +++++++++++++++++++++++++------------ vowpalwabbit/cbify.cc | 8 ++- 3 files changed, 120 insertions(+), 48 deletions(-) diff --git a/scripts/alg_comparison.py b/scripts/alg_comparison.py index 27057514b5c..3826b66b2b8 100644 --- a/scripts/alg_comparison.py +++ b/scripts/alg_comparison.py @@ -81,9 +81,9 @@ def plot_comparison(errors_1, errors_2, sizes): plt.scatter(results_insigni_1, results_insigni_2, s=2, c='k') -def normalized_score(lst): +def normalized_score(lst, l): #print lst - l = min(lst) + #l = min(lst) u = max(lst) return [ (item - l) / (u - l + 1e-4) for item in lst ] @@ -120,7 +120,7 @@ def plot_all_cdfs(alg_results, mod): plot_cdf(alg_name, errs) plt.legend() - plt.xlim(0,1) + plt.xlim(-1,1) plt.ylim(0,1) plt.savefig(mod.fulldir+problem_str(mod.name_problem)+'.png') plt.clf() @@ -151,6 +151,20 @@ def init_results(result_table): alg_results[alg_name] = [] return alg_results +def get_best_error(best_error_table, name_dataset): + name = name_dataset[0] + best_error_oneline = best_error_table[best_error_table['dataset'] == name] + best_error = best_error_oneline.loc[best_error_oneline.index[0], 'avg_error'] + #print name + #raw_input("...") + #print best_error_oneline + #raw_input("...") + #print best_error + #raw_input("...") + return best_error + + + def plot_all(mod, all_results): grouped_by_problem = all_results.groupby(['corrupt_type_supervised', 'corrupt_prob_supervised','bandit_supervised_size_ratio']) @@ -180,10 +194,12 @@ def plot_all(mod, all_results): #dummy = input('') #in general (including the first time) - record the error rates of all algorithms + + err_best = get_best_error(mod.best_error_table, name_dataset) errs = [] for idx, row in result_table.iterrows(): errs.append(row['avg_error']) - normalized_errs = normalized_score(errs) + normalized_errs = normalized_score(errs, err_best) i = 0 for idx, row in result_table.iterrows(): @@ -257,6 +273,9 @@ def plot_all(mod, all_results): #these constitutes all the problem settings we are looking at (corresponding #to each cdf graph) + mod.best_error_table = all_results[all_results['choices_lambda'] == 0] + all_results = all_results[all_results['choices_lambda'] != 0] + if mod.filter == '1': pass elif mod.filter == '2': diff --git a/scripts/plot_warm_start.py b/scripts/plot_warm_start.py index e3ce7f7212e..1b247c142e2 100644 --- a/scripts/plot_warm_start.py +++ b/scripts/plot_warm_start.py @@ -24,29 +24,39 @@ def collect_stats(mod): # num_rows = mod.bandit / mod.progress #print vw_output_filename - #avg_error_value = avg_error(mod) + avg_error_value = avg_error(mod) mod.actual_var = actual_var(mod) mod.ideal_var = ideal_var(mod) - avg_loss = [] - last_loss = [] - wt = [] - end_table = False + #avg_loss = [] + #last_loss = [] + #wt = [] + #end_table = False + + if mod.choices_lambda == 0: + mod.avg_loss = avg_error_value + mod.bandit_effective = 0 + mod.ratio = 0 + record_result(mod) + return f = open(vw_output_filename, 'r') #linenumber = 0 i = 0 for line in f: - vw_progress_pattern = '\d+\.\d+\s+\d+\.\d+\s+\d+\s+\d+\.\d+\s+[a-zA-Z0-9]+\s+[a-zA-Z0-9]+\s+\d+' + vw_progress_pattern = '\d+\.\d+\s+\d+\.\d+\s+\d+\s+\d+\.\d+\s+[a-zA-Z0-9]+\s+[a-zA-Z0-9]+\s+\d+.*' matchobj = re.match(vw_progress_pattern, line) if matchobj: + s = line.split() + if len(s) >= 8: + s = s[:7] avg_loss_str, last_loss_str, counter_str, weight_str, curr_label_str, \ - curr_pred_str, curr_feat_str = line.split() + curr_pred_str, curr_feat_str = s - avg_loss.append(float(avg_loss_str)) - last_loss.append(float(last_loss_str)) - wt.append(float(weight_str)) + #avg_loss.append(float(avg_loss_str)) + #last_loss.append(float(last_loss_str)) + #wt.append(float(weight_str)) mod.avg_loss = float(avg_loss_str) mod.bandit_effective = int(float(weight_str)) @@ -75,7 +85,7 @@ def record_result(mod): result = disperse(list_results, ' ') summary_file = open(mod.summary_file_name, 'a') - summary_file.write(config_name + ' ' + result + '\n') + summary_file.write(config_name + result + '\n') summary_file.close() @@ -101,15 +111,23 @@ def execute_vw(mod): #if mod.cb_type == 'mtr': # mod.adf_on = True; - cmd_vw = mod.vw_path + ' --cbify ' + str(mod.num_classes) + ' --cb_type ' + str(mod.cb_type) + ' --warm_start ' + str(mod.warm_start) + ' --bandit ' + str(mod.bandit) + ' --choices_lambda ' + str(mod.choices_lambda) + alg_option + ' --progress ' + str(mod.progress) \ - + ' -d ' + mod.ds_path + mod.dataset \ - + ' --corrupt_type_supervised ' + str(mod.corrupt_type_supervised) \ - + ' --corrupt_prob_supervised ' + str(mod.corrupt_prob_supervised) \ - + ' --corrupt_type_bandit ' + str(mod.corrupt_type_bandit) \ - + ' --corrupt_prob_bandit ' + str(mod.corrupt_prob_bandit) \ - + ' --validation_method ' + str(mod.validation_method) \ - + ' --weighting_scheme ' + str(mod.weighting_scheme) \ - + ' --lambda_scheme ' + str(mod.lambda_scheme) + if mod.choices_lambda == 0: + cmd_vw = mod.vw_path + ' --oaa ' + str(mod.num_classes) + ' --passes 5 ' \ + + ' --progress ' + str(mod.progress) + ' -d ' \ + + mod.ds_path + mod.dataset \ + + ' --cache_file ' + mod.results_path + mod.dataset + '.cache' + else: + cmd_vw = mod.vw_path + ' --cbify ' + str(mod.num_classes) + ' --cb_type ' + str(mod.cb_type) + ' --warm_start ' + str(mod.warm_start) + ' --bandit ' + str(mod.bandit) + ' --choices_lambda ' + str(mod.choices_lambda) + alg_option + ' --progress ' + str(mod.progress) \ + + ' -d ' + mod.ds_path + mod.dataset \ + + ' --corrupt_type_supervised ' + str(mod.corrupt_type_supervised) \ + + ' --corrupt_prob_supervised ' + str(mod.corrupt_prob_supervised) \ + + ' --corrupt_type_bandit ' + str(mod.corrupt_type_bandit) \ + + ' --corrupt_prob_bandit ' + str(mod.corrupt_prob_bandit) \ + + ' --validation_method ' + str(mod.validation_method) \ + + ' --weighting_scheme ' + str(mod.weighting_scheme) \ + + ' --lambda_scheme ' + str(mod.lambda_scheme) \ + + ' --learning_rate ' + str(mod.learning_rate) \ + + ' --overwrite_label ' + str(mod.majority_class) cmd = cmd_vw print cmd @@ -148,6 +166,7 @@ def disperse(l, ch): def gen_comparison_graph(mod): mod.num_lines = get_num_lines(mod.ds_path+mod.dataset) + mod.majority_class = get_majority_class(mod.ds_path+mod.dataset) mod.progress = int(math.ceil(float(mod.num_lines) / float(mod.num_checkpoints))) mod.warm_start = mod.warm_start_multiplier * mod.progress mod.bandit = mod.num_lines - mod.warm_start @@ -195,14 +214,31 @@ def ds_per_task(mod): config_corrupt_sup_raw = product(mod.choices_corrupt_type_supervised, mod.choices_corrupt_prob_supervised) config_corrupt_sup = filter(lambda (type, prob): type == 1 or abs(prob) > 1e-4, config_corrupt_sup_raw) - config_common = product(mod.dss, config_corrupt_sup, mod.choices_cb_types, mod.warm_start_multipliers) + config_problem = product(mod.dss, config_corrupt_sup, mod.choices_cb_types, mod.warm_start_multipliers, mod.learning_rates) + - config_baselines_raw = list(product([1], [True, False], [True, False])) - config_baselines = filter(lambda (x1, x2, x3): x2 == True or x3 == True, config_baselines_raw) - config_algs = list(product(mod.choices_choices_lambda, [False], [False])) - config_all_spec = config_baselines + config_algs - config_all = list(product(config_common, config_all_spec)) + if mod.baselines_on: + config_baselines_raw = list(product([1], [True, False], [True, False])) + config_baselines_solution = filter(lambda (x1, x2, x3): x2 == True or x3 == True, config_baselines_raw) + config_baselines = list(product(config_problem, config_baselines_solution)) + else: + config_baselines = [] + + if mod.algs_on: + config_algs_solution = list(product(mod.choices_choices_lambda, [False], [False])) + config_algs = list(product(config_problem, config_algs_solution)) + else: + config_algs = [] + + if mod.optimal_on: + config_optimal_problem = product(mod.dss, [(1, 0)], [1], [1], [0.5]) + config_optimal_solution = [(0, False, False)] + config_optimal = list(product(config_optimal_problem, config_optimal_solution)) + else: + config_optimal = [] + + config_all = config_baselines + config_algs + config_optimal config_task = [] print len(config_all) @@ -219,6 +255,10 @@ def get_num_lines(dataset_name): ps.wait() return int(output) +def get_majority_class(dataset_name): + maj_class = subprocess.check_output(('zcat '+ dataset_name +' | cut -d \' \' -f 1 | sort | uniq -c | sort -r | head -1 | xargs | cut -d \' \' -f 2 '), shell=True) + return int(maj_class) + def avg_error(mod): return vw_output_extract(mod, 'average loss') @@ -233,13 +273,17 @@ def vw_output_extract(mod, pattern): vw_output = open(mod.vw_output_filename, 'r') vw_output_text = vw_output.read() #print vw_output_text - rgx = re.compile('^'+pattern+' = (.*)$', flags=re.M) + #rgx_pattern = '^'+pattern+' = (.*)(|\sh)\n.*$' + #print rgx_pattern + rgx_pattern = '.*'+pattern+' = ([\d]*.[\d]*)( h|)\n.*' + rgx = re.compile(rgx_pattern, flags=re.M) errs = rgx.findall(vw_output_text) if not errs: avge = 0 else: - avge = float(errs[0]) + print errs + avge = float(errs[0][0]) vw_output.close() return avge @@ -265,7 +309,7 @@ def main_loop(mod): summary_file.close() for ((mod.dataset, (mod.corrupt_type_supervised, mod.corrupt_prob_supervised), \ - mod.cb_type, mod.warm_start_multiplier), \ + mod.cb_type, mod.warm_start_multiplier, mod.learning_rate), \ (mod.choices_lambda, \ mod.no_supervised, mod.no_bandit)) in mod.config_task: gen_comparison_graph(mod) @@ -292,6 +336,10 @@ def main_loop(mod): time.sleep(1) mod = model() + mod.baselines_on = False + mod.algs_on = False + mod.optimal_on = True + mod.num_tasks = args.num_tasks mod.task_id = args.task_id @@ -301,7 +349,7 @@ def main_loop(mod): #DIR_PATTERN = '../results/cbresults_{}/' - mod.num_checkpoints = 100 + mod.num_checkpoints = 200 #mod.warm_start = 50 #mod.bandit = 4096 #mod.num_classes = 10 @@ -311,11 +359,8 @@ def main_loop(mod): mod.adf_on = True # use fractions instead of absolute numbers - mod.warm_start_multipliers = [2*pow(4, i) for i in range(3)] - #mod.choices_warm_start_frac = [0.01 * pow(2, i) for i in range(1)] - #mod.choices_warm_start_frac = [0.01, 0.03, 0.1, 0.3] - #mod.choices_warm_start_frac = [0.03] - #mod.choices_warm_start_frac = [0.01, 0.02, 0.04, 0.08, 0.16, 0.32] + mod.warm_start_multipliers = [pow(2,i) for i in range(5)] + #mod.warm_start_multipliers = [2*pow(4, i) for i in range(3)]] #mod.choices_warm_start = [0.01 * pow(2, i) for i in range(5)] #mod.choices_bandit = [0.01 * pow(2, i) for i in range(5)] @@ -329,17 +374,18 @@ def main_loop(mod): mod.choices_cb_types = ['mtr'] #mod.choices_no_supervised = [False, True] #mod.choices_no_bandit = [False, True] - mod.choices_choices_lambda = [2, 4, 8] + #mod.choices_choices_lambda = [2, 4, 8] + mod.choices_choices_lambda = [] #mod.choices_choices_lambda = [i for i in range(1,3)] #mod.choices_choices_lambda = [i for i in range(1,2)] #mod.choices_choices_lambda = [1, 3, 5, 7] #[i for i in range(10,11)] #mod.corrupt_type_supervised = 2 #mod.corrupt_prob_supervised = 0.3 - mod.choices_corrupt_type_supervised = [1,2] + mod.choices_corrupt_type_supervised = [1,2,3] #mod.choices_corrupt_type_supervised = [2] #mod.corrupt_prob_supervised = 0.3 - mod.choices_corrupt_prob_supervised = [0.0,0.3] + mod.choices_corrupt_prob_supervised = [0.0,0.3,0.6,0.9,1] #mod.choices_corrupt_prob_supervised = [0.3] mod.corrupt_type_bandit = 1 @@ -351,15 +397,16 @@ def main_loop(mod): mod.choices_lambda = 2 mod.weighting_scheme = 1 mod.lambda_scheme = 3 - mod.no_bandit = False - mod.no_supervised = False + mod.no_exploration = False mod.cover_on = False mod.epsilon_on = True - mod.plot_color = 'r' - mod.plot_flat = False - mod.critical_size_ratios = [pow(2,i) for i in range(-5, 7)] + #mod.plot_color = 'r' + #mod.plot_flat = False + mod.critical_size_ratios = [184 * pow(2, -i) for i in range(8) ] + mod.learning_rates = [0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1.0, 3.0, 10.0] + #pow(2,i) for i in range(-5, 7) #for correctness test #mod.choices_warm_start = [20] #choices_fprob1 = [0.1] diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc index 5fec5f3f233..bec78e99e70 100644 --- a/vowpalwabbit/cbify.cc +++ b/vowpalwabbit/cbify.cc @@ -11,6 +11,7 @@ #define UAR 1 #define CIRCULAR 2 +#define OVERWRITE 3 #define BANDIT_VALI 1 #define SUPERVISED_VALI 2 @@ -101,6 +102,7 @@ struct cbify size_t lambda_scheme; float epsilon; float cumulative_variance; + size_t overwrite_label; }; @@ -219,6 +221,8 @@ size_t corrupt_action(size_t action, cbify& data, size_t data_type) { if (corrupt_type == UAR) return generate_uar_action(data); + else if (corrupt_type == OVERWRITE) + return data.overwrite_label; else return (action % data.num_actions) + 1; } @@ -1005,7 +1009,8 @@ base_learner* cbify_setup(vw& all) ("corrupt_type_bandit", po::value(), "probability of label corruption in the bandit part (1 is uar, 2 is circular)") ("validation_method", po::value(), "lambda selection criterion (1 is using bandit with progressive validation, 2 is using supervised and amortizing)") ("weighting_scheme", po::value(), "weighting scheme (1 is per instance weighting, 2 is per dataset weighting (where we use a diminishing weighting scheme) )") - ("lambda_scheme", po::value(), "Lambda set scheme (1 is expanding based on center 0.5, 2 is expanding based on center=minimax lambda, 3 is expanding based on center=minimax lambda along with forcing 0,1 in Lambda )"); + ("lambda_scheme", po::value(), "Lambda set scheme (1 is expanding based on center 0.5, 2 is expanding based on center=minimax lambda, 3 is expanding based on center=minimax lambda along with forcing 0,1 in Lambda )") + ("overwrite_label", po::value(), "the label type 3 corruptions (overwriting) turn to"); add_options(all); po::variables_map& vm = all.vm; @@ -1042,6 +1047,7 @@ base_learner* cbify_setup(vw& all) data.weighting_scheme = vm.count("weighting_scheme") ? vm["weighting_scheme"].as() : INSTANCE_WT; // 1 is the default value data.lambda_scheme = vm.count("lambda_scheme") ? vm["lambda_scheme"].as() : ABS_CENTRAL; data.epsilon = vm.count("epsilon") ? vm["epsilon"].as() : 0.05; + data.overwrite_label = vm.count("overwrite_label") ? vm["overwrite_label"].as() : 1; //cout<<"does epsilon exist?"<= 0.5) return (1 - data.lambdas[i]) / data.lambdas[i]; @@ -522,12 +528,12 @@ size_t predict_cs(cbify& data, example& ec) } -void learn_cs(cbify& data, example& ec) +void learn_cs(cbify& data, example& ec, size_t ec_type) { float old_weight = ec.weight; for (uint32_t i = 0; i < data.choices_lambda; i++) { - float weight_multiplier = compute_weight_multiplier(data, i, SUPERVISED); + float weight_multiplier = compute_weight_multiplier(data, i, ec_type); ec.weight = old_weight * weight_multiplier; data.all->cost_sensitive->learn(ec, i); } @@ -544,10 +550,8 @@ void multiclass_to_cs(cbify& data, COST_SENSITIVE::label& csl, size_t corrupted_ } } -void generate_corrupted_cs(cbify& data, example& ec, MULTICLASS::label_t ld) +void generate_corrupted_cs(cbify& data, example& ec, MULTICLASS::label_t ld, size_t corrupted_label) { - size_t corrupted_label = corrupt_action(ld.label, data, SUPERVISED); - //generate cost-sensitive label (only for CSOAA's use - this will be retracted at the end) COST_SENSITIVE::label& csl = *data.csls; @@ -575,7 +579,7 @@ void add_to_sup_validation(cbify& data, example& ec) // cout< void predict_or_learn(cbify& data, base_learner& base, example& ec) { //Store the multiclass input label - MULTICLASS::label_t ld = ec.l.multi; + //cout< -void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) +void add_to_sup_validation_adf(cbify& data, example& ec) { + example& ec_copy = data.supervised_validation[data.warm_start_iter]; + VW::copy_example_data(false, &ec_copy, &ec, 0, MULTICLASS::mc_label.copy_label); +} - //Store the multiclass input label - MULTICLASS::label_t ld = ec.l.multi; - - if (data.warm_start_iter == 0 && data.bandit_iter == 0) - setup_lambdas(data, ec); +void predict_or_learn_cs_adf(cbify& data, base_learner& base, example& ec, bool is_update, size_t ec_type) +{ + //Store the multiclass input label + MULTICLASS::label_t ld = ec.l.multi; - copy_example_to_adf(data, ec); + copy_example_to_adf(data, ec); for (size_t a = 0; a < data.adf_data.num_actions; ++a) data.cbls[a].costs = data.adf_data.ecs[a].l.cb.costs; data.cbl_empty->costs = data.adf_data.empty_example->l.cb.costs; - if (data.warm_start_iter < data.warm_start_period) // Call the cost-sensitive learner directly - { + //best_action = predict_sublearner(data, base, argmin); + uint32_t best_action = predict_cs_adf(data, base); - //best_action = predict_sublearner(data, base, argmin); - uint32_t best_action = predict_cs_adf(data, base); + //data.all->cost_sensitive->predict(ec,argmin); - //data.all->cost_sensitive->predict(ec,argmin); + //generate cost-sensitive label + // ecs[a].weight *= 1; + // cout << "size cbify = " << ecs[a].l.cs.costs.size() << endl; + size_t corrupted_label = corrupt_action(ld.label, data, ec_type); + generate_corrupted_cs_adf(data, ld, corrupted_label); - //generate cost-sensitive label - // ecs[a].weight *= 1; - // cout << "size cbify = " << ecs[a].l.cs.costs.size() << endl; + if (is_update) + learn_cs_adf(data, ec_type); - generate_corrupted_cs_adf(data, ld); + ec.pred.multiclass = best_action; + ec.l.multi = ld; - if (data.ind_supervised) - learn_cs_adf(data); + //a hack here - allocated memories not deleted + //to be corrected + if (data.validation_method == SUPERVISED_VALI) + add_to_sup_validation_adf(data, ec); +} - ec.pred.multiclass = best_action; - ec.l.multi = ld; - ec.weight = 0; - //a hack here - allocated memories not deleted - //to be corrected - if (data.validation_method == SUPERVISED_VALI) - VW::copy_example_data(false, &data.supervised_validation[data.warm_start_iter], &ec, 0, MULTICLASS::mc_label.copy_label); +void predict_or_learn_bandit_adf(cbify& data, base_learner& base, example& ec, bool is_update, size_t ec_type) +{ + //Store the multiclass input label + MULTICLASS::label_t ld = ec.l.multi; - data.warm_start_iter++; + copy_example_to_adf(data, ec); - } - else if (data.bandit_iter < data.bandit_period) // call the bandit learner - { - //size_t pred_pi = predict_cs_adf(data, base, ec); - uint32_t idx = predict_bandit_adf(data, base); + for (size_t a = 0; a < data.adf_data.num_actions; ++a) + data.cbls[a].costs = data.adf_data.ecs[a].l.cb.costs; + data.cbl_empty->costs = data.adf_data.empty_example->l.cb.costs; - CB::cb_class cl; + //size_t pred_pi = predict_cs_adf(data, base, ec); + uint32_t idx = predict_bandit_adf(data, base); - generate_corrupt_cb_adf(data, cl, ld, idx); + CB::cb_class cl; - // accumulate the cumulative costs of lambdas - accumulate_costs_ips_adf(data, ec, cl, base); + size_t corrupted_label = corrupt_action(ld.label, data, ec_type); + generate_corrupted_cb_adf(data, cl, ld, idx, corrupted_label); - // add cb label to chosen action - auto& lab = data.adf_data.ecs[cl.action - 1].l.cb; - lab.costs.push_back(cl); + // accumulate the cumulative costs of lambdas + accumulate_costs_ips_adf(data, ec, cl, base); + // add cb label to chosen action + auto& lab = data.adf_data.ecs[cl.action - 1].l.cb; + lab.costs.push_back(cl); - if (data.ind_bandit) - learn_bandit_adf(data, base); - accumulate_variance_adf(data, base); + if (is_update) + learn_bandit_adf(data, base, ec_type); - ec.pred.multiclass = cl.action; + accumulate_variance_adf(data, base); - data.bandit_iter++; + ec.pred.multiclass = cl.action; +} + +template +void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) +{ + if (data.warm_start_iter == 0 && data.bandit_iter == 0) + setup_lambdas(data, ec); + if (data.warm_start_iter < data.warm_start_period) // Call the cost-sensitive learner directly + { + if (data.warm_start_type == SUPERVISED_WS) + predict_or_learn_cs_adf(data, base, ec, data.ind_supervised, SUPERVISED); + else + predict_or_learn_bandit_adf(data, base, ec, data.ind_supervised, SUPERVISED); + ec.weight = 0; + data.warm_start_iter++; + } + else if (data.bandit_iter < data.bandit_period) // call the bandit learner + { + predict_or_learn_bandit_adf(data, base, ec, data.ind_bandit, BANDIT); + data.bandit_iter++; if (data.bandit_iter == data.bandit_period) { cout<<"Ideal average variance = "<(), "loss for correct label") ("loss1", po::value(), "loss for incorrect label") - ("warm_start", po::value(), "number of training examples for fully-supervised warm start") + ("warm_start", po::value(), "number of training examples for warm start") ("bandit", po::value(), "number of training examples for bandit processing") ("choices_lambda", po::value(), "numbers of lambdas importance weights to aggregate") ("no_supervised", "indicator of using supervised only") @@ -1010,7 +1047,8 @@ base_learner* cbify_setup(vw& all) ("validation_method", po::value(), "lambda selection criterion (1 is using bandit with progressive validation, 2 is using supervised and amortizing)") ("weighting_scheme", po::value(), "weighting scheme (1 is per instance weighting, 2 is per dataset weighting (where we use a diminishing weighting scheme) )") ("lambda_scheme", po::value(), "Lambda set scheme (1 is expanding based on center 0.5, 2 is expanding based on center=minimax lambda, 3 is expanding based on center=minimax lambda along with forcing 0,1 in Lambda )") - ("overwrite_label", po::value(), "the label type 3 corruptions (overwriting) turn to"); + ("overwrite_label", po::value(), "the label type 3 corruptions (overwriting) turn to") + ("warm_start_type", po::value(), "the type of warm start approach (1 is supervised warm start, 2 is contextual bandit warm start)"); add_options(all); po::variables_map& vm = all.vm; @@ -1048,7 +1086,7 @@ base_learner* cbify_setup(vw& all) data.lambda_scheme = vm.count("lambda_scheme") ? vm["lambda_scheme"].as() : ABS_CENTRAL; data.epsilon = vm.count("epsilon") ? vm["epsilon"].as() : 0.05; data.overwrite_label = vm.count("overwrite_label") ? vm["overwrite_label"].as() : 1; - + data.warm_start_type = vm.count("warm_start_type") ? vm["warm_start_type"].as() : SUPERVISED_WS; //cout<<"does epsilon exist?"<= 0.5) return (1 - data.lambdas[i]) / data.lambdas[i]; @@ -528,12 +526,12 @@ size_t predict_cs(cbify& data, example& ec) } -void learn_cs(cbify& data, example& ec, size_t ec_type) +void learn_cs(cbify& data, example& ec) { float old_weight = ec.weight; for (uint32_t i = 0; i < data.choices_lambda; i++) { - float weight_multiplier = compute_weight_multiplier(data, i, ec_type); + float weight_multiplier = compute_weight_multiplier(data, i, SUPERVISED); ec.weight = old_weight * weight_multiplier; data.all->cost_sensitive->learn(ec, i); } @@ -606,12 +604,12 @@ size_t predict_bandit(cbify& data, base_learner& base, example& ec) } -void learn_bandit(cbify& data, base_learner& base, example& ec, size_t ec_type) +void learn_bandit(cbify& data, base_learner& base, example& ec) { float old_weight = ec.weight; for (uint32_t i = 0; i < data.choices_lambda; i++) { - float weight_multiplier = compute_weight_multiplier(data, i, ec_type); + float weight_multiplier = compute_weight_multiplier(data, i, BANDIT); if (data.weighting_scheme == INSTANCE_WT) ec.weight = old_weight * weight_multiplier; @@ -633,7 +631,7 @@ void accumulate_variance(cbify& data, example& ec) } -void predict_or_learn_cs(cbify& data, example& ec, bool is_update, size_t ec_type) +void predict_or_learn_cs(cbify& data, example& ec, bool is_update, size_t data_type) { MULTICLASS::label_t ld = ec.l.multi; //predict @@ -641,11 +639,11 @@ void predict_or_learn_cs(cbify& data, example& ec, bool is_update, size_t ec_typ //learn //first, corrupt fully supervised example ec's label here - size_t corrupted_label = corrupt_action(ld.label, data, ec_type); + size_t corrupted_label = corrupt_action(ld.label, data, data_type); generate_corrupted_cs(data, ec, ld, corrupted_label); if (is_update) - learn_cs(data, ec, ec_type); + learn_cs(data, ec); if (data.validation_method == SUPERVISED_VALI) add_to_sup_validation(data, ec); @@ -654,14 +652,14 @@ void predict_or_learn_cs(cbify& data, example& ec, bool is_update, size_t ec_typ ec.l.multi = ld; } -void predict_or_learn_bandit(cbify& data, base_learner& base, example& ec, bool is_update, size_t ec_type) +void predict_or_learn_bandit(cbify& data, base_learner& base, example& ec, bool is_update, size_t data_type) { MULTICLASS::label_t ld = ec.l.multi; size_t action = predict_bandit(data, base, ec); CB::cb_class cl; - size_t corrupted_label = corrupt_action(ld.label, data, ec_type); + size_t corrupted_label = corrupt_action(ld.label, data, data_type); generate_corrupted_cb(data, ec, cl, ld, action, corrupted_label); // accumulate the cumulative costs of lambdas accumulate_costs_ips(data, ec, cl); @@ -673,7 +671,7 @@ void predict_or_learn_bandit(cbify& data, base_learner& base, example& ec, bool ec.pred = data.pred; if (is_update) - learn_bandit(data, base, ec, ec_type); + learn_bandit(data, base, ec); data.a_s.erase(); data.a_s = ec.pred.a_s; @@ -787,7 +785,7 @@ void generate_corrupted_cs_adf(cbify& data, MULTICLASS::label_t ld, size_t corru } -void learn_cs_adf(cbify& data, size_t ec_type) +void learn_cs_adf(cbify& data) { example* ecs = data.adf_data.ecs; example* empty_example = data.adf_data.empty_example; @@ -797,7 +795,7 @@ void learn_cs_adf(cbify& data, size_t ec_type) for (uint32_t i = 0; i < data.choices_lambda; i++) { - float weight_multiplier = compute_weight_multiplier(data, i, ec_type); + float weight_multiplier = compute_weight_multiplier(data, i, SUPERVISED); for (size_t a = 0; a < data.adf_data.num_actions; ++a) { ecs[a].weight = data.old_weights[a] * weight_multiplier; @@ -825,7 +823,7 @@ void generate_corrupted_cb_adf(cbify& data, CB::cb_class& cl, MULTICLASS::label_ } -void learn_bandit_adf(cbify& data, base_learner& base, size_t ec_type) +void learn_bandit_adf(cbify& data, base_learner& base) { example* ecs = data.adf_data.ecs; example* empty_example = data.adf_data.empty_example; @@ -835,7 +833,7 @@ void learn_bandit_adf(cbify& data, base_learner& base, size_t ec_type) for (uint32_t i = 0; i < data.choices_lambda; i++) { - float weight_multiplier = compute_weight_multiplier(data, i, ec_type); + float weight_multiplier = compute_weight_multiplier(data, i, BANDIT); for (size_t a = 0; a < data.adf_data.num_actions; ++a) { @@ -880,7 +878,7 @@ void add_to_sup_validation_adf(cbify& data, example& ec) VW::copy_example_data(false, &ec_copy, &ec, 0, MULTICLASS::mc_label.copy_label); } -void predict_or_learn_cs_adf(cbify& data, base_learner& base, example& ec, bool is_update, size_t ec_type) +void predict_or_learn_cs_adf(cbify& data, base_learner& base, example& ec, bool is_update, size_t data_type) { //Store the multiclass input label MULTICLASS::label_t ld = ec.l.multi; @@ -899,11 +897,11 @@ void predict_or_learn_cs_adf(cbify& data, base_learner& base, example& ec, bool //generate cost-sensitive label // ecs[a].weight *= 1; // cout << "size cbify = " << ecs[a].l.cs.costs.size() << endl; - size_t corrupted_label = corrupt_action(ld.label, data, ec_type); + size_t corrupted_label = corrupt_action(ld.label, data, SUPERVISED); generate_corrupted_cs_adf(data, ld, corrupted_label); if (is_update) - learn_cs_adf(data, ec_type); + learn_cs_adf(data); ec.pred.multiclass = best_action; ec.l.multi = ld; @@ -915,7 +913,7 @@ void predict_or_learn_cs_adf(cbify& data, base_learner& base, example& ec, bool } -void predict_or_learn_bandit_adf(cbify& data, base_learner& base, example& ec, bool is_update, size_t ec_type) +void predict_or_learn_bandit_adf(cbify& data, base_learner& base, example& ec, bool is_update, size_t data_type) { //Store the multiclass input label MULTICLASS::label_t ld = ec.l.multi; @@ -931,7 +929,7 @@ void predict_or_learn_bandit_adf(cbify& data, base_learner& base, example& ec, b CB::cb_class cl; - size_t corrupted_label = corrupt_action(ld.label, data, ec_type); + size_t corrupted_label = corrupt_action(ld.label, data, data_type); generate_corrupted_cb_adf(data, cl, ld, idx, corrupted_label); // accumulate the cumulative costs of lambdas @@ -943,7 +941,7 @@ void predict_or_learn_bandit_adf(cbify& data, base_learner& base, example& ec, b if (is_update) - learn_bandit_adf(data, base, ec_type); + learn_bandit_adf(data, base); accumulate_variance_adf(data, base); @@ -969,6 +967,7 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) { predict_or_learn_bandit_adf(data, base, ec, data.ind_bandit, BANDIT); data.bandit_iter++; + if (data.bandit_iter == data.bandit_period) { cout<<"Ideal average variance = "< Date: Mon, 7 May 2018 14:36:39 -0400 Subject: [PATCH 068/127] fixed the place of weight multiplier calculation --- vowpalwabbit/cbify.cc | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc index 9f634ebe73b..bab211f3da2 100644 --- a/vowpalwabbit/cbify.cc +++ b/vowpalwabbit/cbify.cc @@ -494,20 +494,26 @@ void accumulate_costs_ips_adf(cbify& data, example& ec, CB::cb_class& cl, base_l float compute_weight_multiplier(cbify& data, size_t i, size_t ec_type) { + float weight_multiplier; + if (ec_type == SUPERVISED) { if (data.lambdas[i] >= 0.5) - return (1 - data.lambdas[i]) / data.lambdas[i]; + weight_multiplier = (1 - data.lambdas[i]) / data.lambdas[i]; else - return 1; + weight_multiplier = 1; } else { if (data.lambdas[i] >= 0.5) - return 1; + weight_multiplier = 1; else - return data.lambdas[i] / (1-data.lambdas[i]); + weight_multiplier = data.lambdas[i] / (1-data.lambdas[i]); + + if (data.weighting_scheme == DATASET_WT) + weight_multiplier = weight_multiplier * data.warm_start_period / ( (data.bandit_iter+1) * (data.bandit_iter+2) ); } + return weight_multiplier; } @@ -612,12 +618,7 @@ void learn_bandit(cbify& data, base_learner& base, example& ec, size_t ec_type) for (uint32_t i = 0; i < data.choices_lambda; i++) { float weight_multiplier = compute_weight_multiplier(data, i, ec_type); - - if (data.weighting_scheme == INSTANCE_WT) - ec.weight = old_weight * weight_multiplier; - else - ec.weight = old_weight * weight_multiplier * data.warm_start_period / ( (data.bandit_iter+1) * (data.bandit_iter+2) ); - + ec.weight = old_weight * weight_multiplier; base.learn(ec, i); } ec.weight = old_weight; @@ -836,14 +837,9 @@ void learn_bandit_adf(cbify& data, base_learner& base, size_t ec_type) for (uint32_t i = 0; i < data.choices_lambda; i++) { float weight_multiplier = compute_weight_multiplier(data, i, ec_type); - for (size_t a = 0; a < data.adf_data.num_actions; ++a) { - if (data.weighting_scheme == INSTANCE_WT) - ecs[a].weight = data.old_weights[a] * weight_multiplier; - else - ecs[a].weight = data.old_weights[a] * weight_multiplier * data.warm_start_period / ( (data.bandit_iter+1) * (data.bandit_iter+2) ); - + ecs[a].weight = data.old_weights[a] * weight_multiplier; base.learn(ecs[a], i); } base.learn(*empty_example, i); From bc94f6cb19ba280ec7e63f37aaec964c8a6fd13d Mon Sep 17 00:00:00 2001 From: Chicheng Zhang Date: Mon, 7 May 2018 14:52:46 -0400 Subject: [PATCH 069/127] force the changes --- vowpalwabbit/cbify.cc | 45 ++++++++++++++++++++++--------------------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc index 2050f3b0ded..bab211f3da2 100644 --- a/vowpalwabbit/cbify.cc +++ b/vowpalwabbit/cbify.cc @@ -6,6 +6,8 @@ #include "../explore/cpp/MWTExplorer.h" #include "vw.h" +//In the future, the above two's names should be changed to +//WARM_START and INTERACTIVE #define SUPERVISED 1 #define BANDIT 2 @@ -204,12 +206,12 @@ size_t generate_uar_action(cbify& data) } -size_t corrupt_action(size_t action, cbify& data, size_t data_type) +size_t corrupt_action(size_t action, cbify& data, size_t ec_type) { float corrupt_prob; size_t corrupt_type; - if (data_type == SUPERVISED) + if (ec_type == SUPERVISED) { corrupt_prob = data.corrupt_prob_supervised; corrupt_type = data.corrupt_type_supervised; @@ -490,7 +492,7 @@ void accumulate_costs_ips_adf(cbify& data, example& ec, CB::cb_class& cl, base_l } -float compute_weight_multiplier(cbify& data, size_t i, size_t data_type) +float compute_weight_multiplier(cbify& data, size_t i, size_t ec_type) { float weight_multiplier; @@ -532,12 +534,12 @@ size_t predict_cs(cbify& data, example& ec) } -void learn_cs(cbify& data, example& ec) +void learn_cs(cbify& data, example& ec, size_t ec_type) { float old_weight = ec.weight; for (uint32_t i = 0; i < data.choices_lambda; i++) { - float weight_multiplier = compute_weight_multiplier(data, i, SUPERVISED); + float weight_multiplier = compute_weight_multiplier(data, i, ec_type); ec.weight = old_weight * weight_multiplier; data.all->cost_sensitive->learn(ec, i); } @@ -610,7 +612,7 @@ size_t predict_bandit(cbify& data, base_learner& base, example& ec) } -void learn_bandit(cbify& data, base_learner& base, example& ec) +void learn_bandit(cbify& data, base_learner& base, example& ec, size_t ec_type) { float old_weight = ec.weight; for (uint32_t i = 0; i < data.choices_lambda; i++) @@ -632,7 +634,7 @@ void accumulate_variance(cbify& data, example& ec) } -void predict_or_learn_cs(cbify& data, example& ec, bool is_update, size_t data_type) +void predict_or_learn_cs(cbify& data, example& ec, bool is_update, size_t ec_type) { MULTICLASS::label_t ld = ec.l.multi; //predict @@ -640,11 +642,11 @@ void predict_or_learn_cs(cbify& data, example& ec, bool is_update, size_t data_t //learn //first, corrupt fully supervised example ec's label here - size_t corrupted_label = corrupt_action(ld.label, data, data_type); + size_t corrupted_label = corrupt_action(ld.label, data, ec_type); generate_corrupted_cs(data, ec, ld, corrupted_label); if (is_update) - learn_cs(data, ec); + learn_cs(data, ec, ec_type); if (data.validation_method == SUPERVISED_VALI) add_to_sup_validation(data, ec); @@ -653,14 +655,14 @@ void predict_or_learn_cs(cbify& data, example& ec, bool is_update, size_t data_t ec.l.multi = ld; } -void predict_or_learn_bandit(cbify& data, base_learner& base, example& ec, bool is_update, size_t data_type) +void predict_or_learn_bandit(cbify& data, base_learner& base, example& ec, bool is_update, size_t ec_type) { MULTICLASS::label_t ld = ec.l.multi; size_t action = predict_bandit(data, base, ec); CB::cb_class cl; - size_t corrupted_label = corrupt_action(ld.label, data, data_type); + size_t corrupted_label = corrupt_action(ld.label, data, ec_type); generate_corrupted_cb(data, ec, cl, ld, action, corrupted_label); // accumulate the cumulative costs of lambdas accumulate_costs_ips(data, ec, cl); @@ -672,7 +674,7 @@ void predict_or_learn_bandit(cbify& data, base_learner& base, example& ec, bool ec.pred = data.pred; if (is_update) - learn_bandit(data, base, ec); + learn_bandit(data, base, ec, ec_type); data.a_s.erase(); data.a_s = ec.pred.a_s; @@ -786,7 +788,7 @@ void generate_corrupted_cs_adf(cbify& data, MULTICLASS::label_t ld, size_t corru } -void learn_cs_adf(cbify& data) +void learn_cs_adf(cbify& data, size_t ec_type) { example* ecs = data.adf_data.ecs; example* empty_example = data.adf_data.empty_example; @@ -796,7 +798,7 @@ void learn_cs_adf(cbify& data) for (uint32_t i = 0; i < data.choices_lambda; i++) { - float weight_multiplier = compute_weight_multiplier(data, i, SUPERVISED); + float weight_multiplier = compute_weight_multiplier(data, i, ec_type); for (size_t a = 0; a < data.adf_data.num_actions; ++a) { ecs[a].weight = data.old_weights[a] * weight_multiplier; @@ -824,7 +826,7 @@ void generate_corrupted_cb_adf(cbify& data, CB::cb_class& cl, MULTICLASS::label_ } -void learn_bandit_adf(cbify& data, base_learner& base) +void learn_bandit_adf(cbify& data, base_learner& base, size_t ec_type) { example* ecs = data.adf_data.ecs; example* empty_example = data.adf_data.empty_example; @@ -874,7 +876,7 @@ void add_to_sup_validation_adf(cbify& data, example& ec) VW::copy_example_data(false, &ec_copy, &ec, 0, MULTICLASS::mc_label.copy_label); } -void predict_or_learn_cs_adf(cbify& data, base_learner& base, example& ec, bool is_update, size_t data_type) +void predict_or_learn_cs_adf(cbify& data, base_learner& base, example& ec, bool is_update, size_t ec_type) { //Store the multiclass input label MULTICLASS::label_t ld = ec.l.multi; @@ -893,11 +895,11 @@ void predict_or_learn_cs_adf(cbify& data, base_learner& base, example& ec, bool //generate cost-sensitive label // ecs[a].weight *= 1; // cout << "size cbify = " << ecs[a].l.cs.costs.size() << endl; - size_t corrupted_label = corrupt_action(ld.label, data, SUPERVISED); + size_t corrupted_label = corrupt_action(ld.label, data, ec_type); generate_corrupted_cs_adf(data, ld, corrupted_label); if (is_update) - learn_cs_adf(data); + learn_cs_adf(data, ec_type); ec.pred.multiclass = best_action; ec.l.multi = ld; @@ -909,7 +911,7 @@ void predict_or_learn_cs_adf(cbify& data, base_learner& base, example& ec, bool } -void predict_or_learn_bandit_adf(cbify& data, base_learner& base, example& ec, bool is_update, size_t data_type) +void predict_or_learn_bandit_adf(cbify& data, base_learner& base, example& ec, bool is_update, size_t ec_type) { //Store the multiclass input label MULTICLASS::label_t ld = ec.l.multi; @@ -925,7 +927,7 @@ void predict_or_learn_bandit_adf(cbify& data, base_learner& base, example& ec, b CB::cb_class cl; - size_t corrupted_label = corrupt_action(ld.label, data, data_type); + size_t corrupted_label = corrupt_action(ld.label, data, ec_type); generate_corrupted_cb_adf(data, cl, ld, idx, corrupted_label); // accumulate the cumulative costs of lambdas @@ -937,7 +939,7 @@ void predict_or_learn_bandit_adf(cbify& data, base_learner& base, example& ec, b if (is_update) - learn_bandit_adf(data, base); + learn_bandit_adf(data, base, ec_type); accumulate_variance_adf(data, base); @@ -963,7 +965,6 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) { predict_or_learn_bandit_adf(data, base, ec, data.ind_bandit, BANDIT); data.bandit_iter++; - if (data.bandit_iter == data.bandit_period) { cout<<"Ideal average variance = "< Date: Tue, 8 May 2018 11:11:36 -0400 Subject: [PATCH 070/127] before modifying the baseline of no update --- scripts/alg_comparison.py | 52 ++++++++++++----- scripts/plot_warm_start.py | 115 ++++++++++++++++++++++++++----------- 2 files changed, 118 insertions(+), 49 deletions(-) diff --git a/scripts/alg_comparison.py b/scripts/alg_comparison.py index 3826b66b2b8..3d810373075 100644 --- a/scripts/alg_comparison.py +++ b/scripts/alg_comparison.py @@ -88,14 +88,16 @@ def normalized_score(lst, l): return [ (item - l) / (u - l + 1e-4) for item in lst ] def alg_str(alg_name): - if (alg_name[1] == True and alg_name[2] == True): + if (alg_name[0] == 2): + return 'supervised_underutil_as_bandit' + if (alg_name[2] == True and alg_name[3] == True): return 'no_update' - if (alg_name[1] == True and alg_name[2] == False): + if (alg_name[2] == True and alg_name[3] == False): return 'bandit_only' - if (alg_name[1] == False and alg_name[2] == True): + if (alg_name[2] == False and alg_name[3] == True): return 'supervised_only' - if (alg_name[1] == False and alg_name[2] == False): - return 'combined_choices_lambda='+str(alg_name[0]) + if (alg_name[2] == False and alg_name[3] == False): + return 'combined_choices_lambda='+str(alg_name[1]) def problem_str(name_problem): return 'supervised_corrupt_type='+str(name_problem[0]) \ @@ -117,12 +119,15 @@ def plot_all_cdfs(alg_results, mod): #plot all cdfs: i = 0 for alg_name, errs in alg_results.iteritems(): - plot_cdf(alg_name, errs) + if (alg_name[2] == False and alg_name[3] == False and alg_name[1] != 8): + pass + else: + plot_cdf(alg_name, errs) plt.legend() - plt.xlim(-1,1) + plt.xlim(-0.2,1) plt.ylim(0,1) - plt.savefig(mod.fulldir+problem_str(mod.name_problem)+'.png') + plt.savefig(mod.problemdir+'/cdf.png') plt.clf() @@ -141,13 +146,13 @@ def plot_all_pair_comp(alg_results, sizes, mod): plot_comparison(errs_1, errs_2, sizes) plt.title(alg_str(alg_names[i])+' vs '+alg_str(alg_names[j])) - plt.savefig(mod.fulldir+problem_str(mod.name_problem)+'_'+alg_str(alg_names[i])+'_vs_'+alg_str(alg_names[j])+'.png') + plt.savefig(mod.problemdir+'/'+alg_str(alg_names[i])+'_vs_'+alg_str(alg_names[j])+'.png') plt.clf() def init_results(result_table): alg_results = {} for idx, row in result_table.iterrows(): - alg_name = (row['choices_lambda'], row['no_supervised'], row['no_bandit']) + alg_name = (row['warm_start_type'], row['choices_lambda'], row['no_supervised'], row['no_bandit']) alg_results[alg_name] = [] return alg_results @@ -201,16 +206,30 @@ def plot_all(mod, all_results): errs.append(row['avg_error']) normalized_errs = normalized_score(errs, err_best) + #print result_table + i = 0 for idx, row in result_table.iterrows(): if i == 0: - sizes.append(row['total_size']) - alg_name = (row['choices_lambda'], row['no_supervised'], row['no_bandit']) - unnormalized_results[alg_name].append(errs[i]) - normalized_results[alg_name].append(normalized_errs[i]) + temp_size = row['bandit_size'] + sizes.append(row['bandit_size']) + + if row['bandit_size'] == temp_size: + alg_name = (row['warm_start_type'], row['choices_lambda'], row['no_supervised'], row['no_bandit']) + unnormalized_results[alg_name].append(errs[i]) + normalized_results[alg_name].append(normalized_errs[i]) i += 1 - plot_all_pair_comp(unnormalized_results, sizes, mod) + #print 'sizes:' + #print len(sizes) + #for k, v in unnormalized_results.iteritems(): + # print len(v) + + mod.problemdir = mod.fulldir+problem_str(mod.name_problem)+'/' + if not os.path.exists(mod.problemdir): + os.makedirs(mod.problemdir) + + #plot_all_pair_comp(unnormalized_results, sizes, mod) plot_all_cdfs(normalized_results, mod) @@ -251,12 +270,15 @@ def plot_all(mod, all_results): #results_dir = '../../../type2_0.65/' #results_dir = '../../../type2_0.3/' + print 'reading directory..' dss = sum_files(mod.results_dir) + print len(dss) #print dss[168] all_results = None + print 'reading sum tables..' for i in range(len(dss)): print 'result file name: ', dss[i] result = parse_sum_file(mod.results_dir + dss[i]) diff --git a/scripts/plot_warm_start.py b/scripts/plot_warm_start.py index 1b247c142e2..5f44d5312ad 100644 --- a/scripts/plot_warm_start.py +++ b/scripts/plot_warm_start.py @@ -62,8 +62,8 @@ def collect_stats(mod): mod.bandit_effective = int(float(weight_str)) for mod.ratio in mod.critical_size_ratios: - if mod.bandit_effective >= 0.99 * mod.warm_start * mod.ratio and \ - mod.bandit_effective <= 1.01 * mod.warm_start * mod.ratio: + if mod.bandit_effective >= (1 - 1e-7) * mod.warm_start * mod.ratio and \ + mod.bandit_effective <= (1 + 1e-7) * mod.warm_start * mod.ratio: record_result(mod) @@ -79,13 +79,14 @@ def collect_stats(mod): def record_result(mod): problem_params_trailer = [mod.bandit_effective, mod.ratio] - config_name = disperse(mod.problem_params + problem_params_trailer + mod.alg_params, ' ') + config_name = mod.problem_params + problem_params_trailer + mod.alg_params list_results = [mod.avg_loss, mod.actual_var, mod.ideal_var] - result = disperse(list_results, ' ') + + row = config_name + list_results summary_file = open(mod.summary_file_name, 'a') - summary_file.write(config_name + result + '\n') + summary_file.write( disperse(row, '\t') + '\n') summary_file.close() @@ -127,7 +128,8 @@ def execute_vw(mod): + ' --weighting_scheme ' + str(mod.weighting_scheme) \ + ' --lambda_scheme ' + str(mod.lambda_scheme) \ + ' --learning_rate ' + str(mod.learning_rate) \ - + ' --overwrite_label ' + str(mod.majority_class) + + ' --overwrite_label ' + str(mod.majority_class) \ + + ' --warm_start_type ' + str(mod.warm_start_type) cmd = cmd_vw print cmd @@ -180,7 +182,7 @@ def gen_comparison_graph(mod): mod.alg_params = [ mod.cb_type, \ mod.validation_method, mod.weighting_scheme, \ mod.lambda_scheme, mod.choices_lambda, \ - mod.no_supervised, mod.no_bandit] + mod.no_supervised, mod.no_bandit, mod.warm_start_type, mod.learning_rate] mod.vw_output_filename = mod.results_path + disperse(mod.problem_params+mod.alg_params, '_') + '.txt' @@ -203,6 +205,24 @@ def get_num_classes(ds): did, n_actions = int(did), int(n_actions) return n_actions +def flatten(l): + out = [] + for item in l: + if isinstance(item, (list, tuple)): + out.extend(flatten(item)) + else: + out.append(item) + return tuple(out) + +def flatten_all(l): + out = [] + for item in l: + flattened = flatten(item) + if len(flattened) != 11: + print flattened + out.append(flattened) + return out + def ds_per_task(mod): # put dataset name to the last coordinate so that the task workloads tend to be @@ -211,41 +231,53 @@ def ds_per_task(mod): # put dataset name to the first coordinate so that the result production order is # in accordance with dataset order - config_corrupt_sup_raw = product(mod.choices_corrupt_type_supervised, mod.choices_corrupt_prob_supervised) + config_corrupt_sup_raw = list(product(mod.choices_corrupt_type_supervised, mod.choices_corrupt_prob_supervised)) config_corrupt_sup = filter(lambda (type, prob): type == 1 or abs(prob) > 1e-4, config_corrupt_sup_raw) - config_problem = product(mod.dss, config_corrupt_sup, mod.choices_cb_types, mod.warm_start_multipliers, mod.learning_rates) - - + config_problem = list(product(mod.dss, config_corrupt_sup, mod.choices_cb_types, mod.warm_start_multipliers)) if mod.baselines_on: - config_baselines_raw = list(product([1], [True, False], [True, False])) - config_baselines_solution = filter(lambda (x1, x2, x3): x2 == True or x3 == True, config_baselines_raw) - config_baselines = list(product(config_problem, config_baselines_solution)) + #config_baselines_raw = list(product([1], [True, False], [True, False], [1], [3])) + #config_baselines_solution = filter(lambda (x1, x2, x3, x4): x2 == True or x3 == True, config_baselines_raw) + config_baselines_solution = [(1, True, True, 1, 3), (1, True, False, 1, 3), (1, False, True, 1, 3)] + config_baselines = list(product(*[config_problem, config_baselines_solution, mod.learning_rates])) + config_baselines = flatten_all(config_baselines) else: config_baselines = [] if mod.algs_on: - config_algs_solution = list(product(mod.choices_choices_lambda, [False], [False])) - config_algs = list(product(config_problem, config_algs_solution)) + config_algs_solution_1 = list(product(mod.choices_choices_lambda, [False], [False], [1], [3])) + config_algs_solution_2 = [(1, False, False, 2, 1)] + config_algs_solution = config_algs_solution_1 + config_algs_solution_2 + config_algs = list(product(*[config_problem, config_algs_solution, mod.learning_rates])) + config_algs = flatten_all(config_algs) else: config_algs = [] if mod.optimal_on: - config_optimal_problem = product(mod.dss, [(1, 0)], [1], [1], [0.5]) - config_optimal_solution = [(0, False, False)] - config_optimal = list(product(config_optimal_problem, config_optimal_solution)) + config_optimal_problem = product(mod.dss, [(1, 0)], ['mtr'], [1]) + config_optimal_solution = [(0, False, False, 1, 1)] + config_optimal = list(product(*[config_optimal_problem, config_optimal_solution, [0.5]])) + config_optimal = flatten_all(config_optimal) else: config_optimal = [] + #print len(config_problem) + #print len(config_baselines) + #print len(config_algs) + #print len(config_optimal) + #raw_input(' ') + config_all = config_baselines + config_algs + config_optimal + config_all = sorted(config_all) + #config_all = sorted(config_all, key=lambda a: str(a)) config_task = [] print len(config_all) for i in range(len(config_all)): if (i % mod.num_tasks == mod.task_id): config_task.append(config_all[i]) - print config_all[i] + #print config_all[i] return config_task @@ -300,18 +332,19 @@ def main_loop(mod): 'cb_type', 'validation_method', 'weighting_scheme', \ 'lambda_scheme', 'choices_lambda', \ 'no_supervised', 'no_bandit', \ + 'warm_start_type', 'learning_rate', \ 'avg_error', 'actual_variance', \ 'ideal_variance'] - summary_header = disperse(list_header, ' ') + summary_header = disperse(list_header, '\t') summary_file.write(summary_header+'\n') summary_file.close() - for ((mod.dataset, (mod.corrupt_type_supervised, mod.corrupt_prob_supervised), \ - mod.cb_type, mod.warm_start_multiplier, mod.learning_rate), \ - (mod.choices_lambda, \ - mod.no_supervised, mod.no_bandit)) in mod.config_task: + for (mod.dataset, mod.corrupt_type_supervised, mod.corrupt_prob_supervised, \ + mod.cb_type, mod.warm_start_multiplier, \ + mod.choices_lambda, \ + mod.no_supervised, mod.no_bandit, mod.warm_start_type, mod.lambda_scheme, mod.learning_rate) in mod.config_task: gen_comparison_graph(mod) @@ -320,6 +353,8 @@ def main_loop(mod): parser.add_argument('task_id', type=int, help='task ID, between 0 and num_tasks - 1') parser.add_argument('num_tasks', type=int) parser.add_argument('--results_dir', default='../../../figs/') + parser.add_argument('--ds_dir', default='../../../vwshuffled/') + parser.add_argument('--num_learning_rates', type=int) parser.add_argument('--warm_start_fraction', type=float) parser.add_argument('--corrupt_prob_supervised', type=float) parser.add_argument('--corrupt_prob_bandit',type=float) @@ -336,15 +371,15 @@ def main_loop(mod): time.sleep(1) mod = model() - mod.baselines_on = False - mod.algs_on = False + mod.baselines_on = True + mod.algs_on = True mod.optimal_on = True mod.num_tasks = args.num_tasks mod.task_id = args.task_id - mod.ds_path = '../../../vwshuffled/' mod.vw_path = '../vowpalwabbit/vw' + mod.ds_path = args.ds_dir mod.results_path = args.results_dir #DIR_PATTERN = '../results/cbresults_{}/' @@ -359,7 +394,7 @@ def main_loop(mod): mod.adf_on = True # use fractions instead of absolute numbers - mod.warm_start_multipliers = [pow(2,i) for i in range(5)] + mod.warm_start_multipliers = [pow(2,i) for i in range(4)] #mod.warm_start_multipliers = [2*pow(4, i) for i in range(3)]] #mod.choices_warm_start = [0.01 * pow(2, i) for i in range(5)] @@ -375,7 +410,7 @@ def main_loop(mod): #mod.choices_no_supervised = [False, True] #mod.choices_no_bandit = [False, True] #mod.choices_choices_lambda = [2, 4, 8] - mod.choices_choices_lambda = [] + mod.choices_choices_lambda = [2,4,8] #mod.choices_choices_lambda = [i for i in range(1,3)] #mod.choices_choices_lambda = [i for i in range(1,2)] #mod.choices_choices_lambda = [1, 3, 5, 7] @@ -385,9 +420,10 @@ def main_loop(mod): mod.choices_corrupt_type_supervised = [1,2,3] #mod.choices_corrupt_type_supervised = [2] #mod.corrupt_prob_supervised = 0.3 - mod.choices_corrupt_prob_supervised = [0.0,0.3,0.6,0.9,1] + mod.choices_corrupt_prob_supervised = [0.0,0.5,1.0] #mod.choices_corrupt_prob_supervised = [0.3] + mod.corrupt_type_bandit = 1 mod.corrupt_prob_bandit = 0.0 @@ -396,15 +432,22 @@ def main_loop(mod): mod.choices_lambda = 2 mod.weighting_scheme = 1 - mod.lambda_scheme = 3 + #mod.lambda_scheme = 3 + #mod.warm_start_type = 1 mod.no_exploration = False mod.cover_on = False mod.epsilon_on = True #mod.plot_color = 'r' #mod.plot_flat = False - mod.critical_size_ratios = [184 * pow(2, -i) for i in range(8) ] - mod.learning_rates = [0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1.0, 3.0, 10.0] + mod.critical_size_ratios = [184 * pow(2, -i) for i in range(7) ] + + if args.num_learning_rates == 1: + mod.learning_rates = [0.5] + elif args.num_learning_rates == 3: + mod.learning_rates = [0.1, 0.3, 1.0] + else: + mod.learning_rates = [0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1.0, 3.0, 10.0] #pow(2,i) for i in range(-5, 7) #for correctness test @@ -412,14 +455,18 @@ def main_loop(mod): #choices_fprob1 = [0.1] #choices_fprob2 = [0.1] + print 'reading dataset files..' mod.dss = ds_files(mod.ds_path) + print len(mod.dss) #mod.dss = ["ds_223_63.vw.gz"] #mod.dss = mod.dss[:5] + print 'generating tasks..' # here, we are generating the task specific parameter settings # by first generate all parameter setting and pick every num_tasks of them mod.config_task = ds_per_task(mod) print 'task ' + str(mod.task_id) + ' of ' + str(mod.num_tasks) + ':' + print len(mod.config_task) #print mod.ds_task From 0f6e8dbda76e3dc8b9a13ed9194803fca3e2a4f7 Mon Sep 17 00:00:00 2001 From: Chicheng Zhang Date: Wed, 9 May 2018 02:01:53 -0400 Subject: [PATCH 071/127] a new parameter enumeration scheme --- scripts/plot_warm_start.py | 560 ++++++++++++++++++------------------- 1 file changed, 278 insertions(+), 282 deletions(-) diff --git a/scripts/plot_warm_start.py b/scripts/plot_warm_start.py index 5f44d5312ad..5eeecb6acbb 100644 --- a/scripts/plot_warm_start.py +++ b/scripts/plot_warm_start.py @@ -18,30 +18,33 @@ def __init__(self): self.no_supervised = False def collect_stats(mod): + avg_error_value = avg_error(mod) + actual_var_value = actual_var(mod) + ideal_var_value = ideal_var(mod) - vw_output_filename = mod.vw_output_filename - # using progress parameter - # num_rows = mod.bandit / mod.progress - #print vw_output_filename + vw_run_results = [] + vw_result_template = { + 'bandit_size': 0, + 'bandit_supervised_size_ratio': 0, + 'avg_error': 0.0, + 'actual_variance': 0.0, + 'ideal_variance': 0.0 + } + + if mod.compute_optimal is True: + vw_result = vw_result_template.copy() + if 'optimal_approx' in mod.param: + # this condition is for computing the optimal error + vw_result['avg_error'] = avg_error_value + else: + # this condition is for computing the majority error + err = 1 - float(mod.result['majority_size']) / mod.result['total_size'] + vw_result['avg_error'] = float('%0.5f' % err) + vw_run_results.append(vw_result) + return vw_run_results + + f = open(mod.vw_output_filename, 'r') - avg_error_value = avg_error(mod) - mod.actual_var = actual_var(mod) - mod.ideal_var = ideal_var(mod) - - #avg_loss = [] - #last_loss = [] - #wt = [] - #end_table = False - - if mod.choices_lambda == 0: - mod.avg_loss = avg_error_value - mod.bandit_effective = 0 - mod.ratio = 0 - record_result(mod) - return - - f = open(vw_output_filename, 'r') - #linenumber = 0 i = 0 for line in f: vw_progress_pattern = '\d+\.\d+\s+\d+\.\d+\s+\d+\s+\d+\.\d+\s+[a-zA-Z0-9]+\s+[a-zA-Z0-9]+\s+\d+.*' @@ -54,84 +57,81 @@ def collect_stats(mod): avg_loss_str, last_loss_str, counter_str, weight_str, curr_label_str, \ curr_pred_str, curr_feat_str = s - #avg_loss.append(float(avg_loss_str)) - #last_loss.append(float(last_loss_str)) - #wt.append(float(weight_str)) - - mod.avg_loss = float(avg_loss_str) - mod.bandit_effective = int(float(weight_str)) - - for mod.ratio in mod.critical_size_ratios: - if mod.bandit_effective >= (1 - 1e-7) * mod.warm_start * mod.ratio and \ - mod.bandit_effective <= (1 + 1e-7) * mod.warm_start * mod.ratio: - record_result(mod) - - - #linenumber += 1 - + avg_loss = float(avg_loss_str) + bandit_effective = int(float(weight_str)) + + for ratio in mod.critical_size_ratios: + if bandit_effective >= (1 - 1e-7) * mod.result['warm_start'] * ratio and \ + bandit_effective <= (1 + 1e-7) * mod.result['warm_start'] * ratio: + vw_result = vw_result_template.copy() + vw_result['bandit_size'] = bandit_effective + vw_result['bandit_supervised_size_ratio'] = ratio + vw_result['avg_error'] = avg_loss + vw_result['actual_variance'] = actual_var_value + vw_result['ideal_variance'] = ideal_var_value + vw_run_results.append(vw_result) f.close() - - #if len(avg_loss) == 0: - # avg_loss = [0] - # last_loss = [0] - # wt = [0] - #return avg_loss, last_loss, wt - -def record_result(mod): - problem_params_trailer = [mod.bandit_effective, mod.ratio] - config_name = mod.problem_params + problem_params_trailer + mod.alg_params - - list_results = [mod.avg_loss, mod.actual_var, mod.ideal_var] - - row = config_name + list_results - - summary_file = open(mod.summary_file_name, 'a') - summary_file.write( disperse(row, '\t') + '\n') - summary_file.close() - + return vw_run_results + + +def gen_vw_options_list(vw_options): + vw_options_list = [] + for k, v in vw_options.iteritems(): + vw_options_list.append('--'+str(k)) + vw_options_list.append(str(v)) + return vw_options_list + +def gen_vw_options(mod): + vw_options = {} + vw_options['data'] = mod.data_full_path + vw_options['progress'] = mod.result['progress'] + + if 'optimal_approx' in mod.param: + vw_options['passes'] = 5 + vw_options['oaa'] = mod.result['num_classes'] + vw_options['cache_file'] = mod.param['data'] + '.cache' + elif 'majority_approx' in mod.param: + pass + else: + vw_options['corrupt_type_bandit'] = mod.corrupt_type_bandit + vw_options['corrupt_prob_bandit'] = mod.corrupt_prob_bandit + vw_options['validation_method'] = mod.validation_method + vw_options['weighting_scheme'] = mod.weighting_scheme + vw_options['bandit'] = mod.bandit + + if mod.adf_on is True: + vw_options['cb_explore_adf'] = ' ' + else: + vw_options['cb_explore'] = mod.num_classes + + if mod.epsilon_on is True: + vw_options['epsilon'] = mod.epsilon + + vw_options['cb_type'] = mod.param['cb_type'] + vw_options['choices_lambda'] = mod.param['choices_lambda'] + vw_options['corrupt_type_supervised'] = mod.param['corrupt_type_supervised'] + vw_options['corrupt_prob_supervised'] = mod.param['corrupt_prob_supervised'] + vw_options['lambda_scheme'] = mod.param['lambda_scheme'] + if mod.param['no_supervised'] is True: + vw_options['no_supervised'] = ' ' + if mod.param['no_bandit'] is True: + vw_options['no_bandit'] = ' ' + vw_options['learning_rate'] = mod.param['learning_rate'] + vw_options['warm_start_type'] = mod.param['warm_start_type'] + + vw_options['cbify'] = mod.result['num_classes'] + vw_options['warm_start'] = mod.result['warm_start'] + vw_options['overwrite_label'] = mod.result['majority_class'] + + #if mod.cover_on: + # alg_option += ' --cover 5 --psi 0.01 --nounif ' + #mod.cb_type = 'dr' + return vw_options def execute_vw(mod): - - alg_option = ' ' - if mod.adf_on: - alg_option += ' --cb_explore_adf ' - else: - alg_option += ' --cb_explore ' + str(mod.num_classes) + ' ' - - if mod.cover_on: - alg_option += ' --cover 5 --psi 0.01 --nounif ' - #mod.cb_type = 'dr' - if mod.epsilon_on: - alg_option += ' --epsilon ' + str(mod.epsilon) + ' ' - if mod.no_bandit: - alg_option += ' --no_bandit ' - if mod.no_supervised: - alg_option += ' --no_supervised ' - #if mod.no_exploration: - # alg_option += ' --epsilon 0.0 ' - #if mod.cb_type == 'mtr': - # mod.adf_on = True; - - if mod.choices_lambda == 0: - cmd_vw = mod.vw_path + ' --oaa ' + str(mod.num_classes) + ' --passes 5 ' \ - + ' --progress ' + str(mod.progress) + ' -d ' \ - + mod.ds_path + mod.dataset \ - + ' --cache_file ' + mod.results_path + mod.dataset + '.cache' - else: - cmd_vw = mod.vw_path + ' --cbify ' + str(mod.num_classes) + ' --cb_type ' + str(mod.cb_type) + ' --warm_start ' + str(mod.warm_start) + ' --bandit ' + str(mod.bandit) + ' --choices_lambda ' + str(mod.choices_lambda) + alg_option + ' --progress ' + str(mod.progress) \ - + ' -d ' + mod.ds_path + mod.dataset \ - + ' --corrupt_type_supervised ' + str(mod.corrupt_type_supervised) \ - + ' --corrupt_prob_supervised ' + str(mod.corrupt_prob_supervised) \ - + ' --corrupt_type_bandit ' + str(mod.corrupt_type_bandit) \ - + ' --corrupt_prob_bandit ' + str(mod.corrupt_prob_bandit) \ - + ' --validation_method ' + str(mod.validation_method) \ - + ' --weighting_scheme ' + str(mod.weighting_scheme) \ - + ' --lambda_scheme ' + str(mod.lambda_scheme) \ - + ' --learning_rate ' + str(mod.learning_rate) \ - + ' --overwrite_label ' + str(mod.majority_class) \ - + ' --warm_start_type ' + str(mod.warm_start_type) - - cmd = cmd_vw + vw_options = gen_vw_options(mod) + vw_options_list = gen_vw_options_list(vw_options) + cmd = disperse([mod.vw_path]+vw_options_list, ' ') print cmd f = open(mod.vw_output_filename, 'w') @@ -140,23 +140,6 @@ def execute_vw(mod): process.wait() f.close() -''' -def plot_errors(mod): - #avg_loss, last_loss, wt = - if mod.plot_flat: - # for supervised only, we simply plot a horizontal line using the last point - len_avg_loss = len(avg_loss) - avg_loss = avg_loss[len_avg_loss-1] - avg_loss = [avg_loss for i in range(len_avg_loss)] - - #line = plt.plot(wt, avg_loss, mod.plot_color, label=(mod.plot_label)) - avg_error_value = avg_error(mod) - actual_var_value = actual_var(mod) - ideal_var_value = ideal_var(mod) - - return avg_error_value, actual_var_value, ideal_var_value -''' - def disperse(l, ch): s = '' for item in l: @@ -164,132 +147,169 @@ def disperse(l, ch): s += ch return s +def param_to_str(param): + param_list = [str(k)+'='+str(v) for k,v in param.iteritems() ] + return disperse(param_list, '_') + +def param_to_result(param, result): + for k, v in param.iteritems(): + if k in result: + result[k] = v def gen_comparison_graph(mod): + mod.result = mod.result_template.copy() - mod.num_lines = get_num_lines(mod.ds_path+mod.dataset) - mod.majority_class = get_majority_class(mod.ds_path+mod.dataset) - mod.progress = int(math.ceil(float(mod.num_lines) / float(mod.num_checkpoints))) - mod.warm_start = mod.warm_start_multiplier * mod.progress - mod.bandit = mod.num_lines - mod.warm_start - mod.num_classes = get_num_classes(mod.dataset) + if 'majority_approx' in mod.param or 'optimal_approx' in mod.param: + mod.compute_optimal = True + else: + mod.compute_optimal = False - mod.problem_params = [mod.dataset, mod.num_classes, mod.num_lines, \ - mod.corrupt_type_supervised, mod.corrupt_prob_supervised, \ - mod.corrupt_type_bandit, mod.corrupt_prob_bandit, \ - mod.warm_start] + param_to_result(mod.param, mod.result) + mod.data_full_path = mod.ds_path + mod.param['data'] - mod.alg_params = [ mod.cb_type, \ - mod.validation_method, mod.weighting_scheme, \ - mod.lambda_scheme, mod.choices_lambda, \ - mod.no_supervised, mod.no_bandit, mod.warm_start_type, mod.learning_rate] + mod.result['total_size'] = get_num_lines(mod.data_full_path) + mod.result['num_classes'] = get_num_classes(mod.data_full_path) + mod.result['majority_size'], mod.result['majority_class'] = get_majority_class(mod.data_full_path) + mod.result['progress'] = int(math.ceil(float(mod.result['total_size']) / float(mod.num_checkpoints))) + mod.vw_output_filename = mod.results_path + param_to_str(mod.param) + '.txt' - mod.vw_output_filename = mod.results_path + disperse(mod.problem_params+mod.alg_params, '_') + '.txt' + if mod.compute_optimal is False: + mod.result['warm_start'] = mod.param['warm_start_multiplier'] * mod.result['progress'] + mod.bandit = mod.result['total_size'] - mod.result['warm_start'] #plot_errors(mod) execute_vw(mod) - collect_stats(mod) + vw_run_results = collect_stats(mod) + for vw_result in vw_run_results: + result_combined = merge_two_dicts(mod.result, vw_result) + result_formatted = format_result(mod.result_template, result_combined) + record_result(mod, result_formatted) print('') +def format_result(result_template, result): + result_formatted = result_template.copy() + for k, v in result.iteritems(): + result_formatted[k] = v + return result_formatted + +def record_result(mod, result): + result_row = [] + for k in mod.result_header_list: + result_row.append(result[k]) + + summary_file = open(mod.summary_file_name, 'a') + summary_file.write( disperse(result_row, '\t') + '\n') + summary_file.close() + def ds_files(ds_path): prevdir = os.getcwd() os.chdir(ds_path) dss = sorted(glob.glob('*.vw.gz')) + #dss = [ds_path+ds for ds in dss] os.chdir(prevdir) return dss - -def get_num_classes(ds): - did, n_actions = os.path.basename(ds).split('.')[0].split('_')[1:] - did, n_actions = int(did), int(n_actions) - return n_actions - -def flatten(l): - out = [] - for item in l: - if isinstance(item, (list, tuple)): - out.extend(flatten(item)) - else: - out.append(item) - return tuple(out) - -def flatten_all(l): - out = [] - for item in l: - flattened = flatten(item) - if len(flattened) != 11: - print flattened - out.append(flattened) - return out - - -def ds_per_task(mod): - # put dataset name to the last coordinate so that the task workloads tend to be - # allocated equally - - # put dataset name to the first coordinate so that the result production order is - # in accordance with dataset order - - config_corrupt_sup_raw = list(product(mod.choices_corrupt_type_supervised, mod.choices_corrupt_prob_supervised)) - config_corrupt_sup = filter(lambda (type, prob): type == 1 or abs(prob) > 1e-4, config_corrupt_sup_raw) - - config_problem = list(product(mod.dss, config_corrupt_sup, mod.choices_cb_types, mod.warm_start_multipliers)) - +def merge_two_dicts(x, y): + #print 'x = ', x + #print 'y = ', y + z = x.copy() # start with x's keys and values + z.update(y) # modifies z with y's keys and values & returns None + return z + +def param_cartesian(param_set_1, param_set_2): + prod = [] + for param_1 in param_set_1: + for param_2 in param_set_2: + prod.append(merge_two_dicts(param_1, param_2)) + return prod + +def param_cartesian_multi(param_sets): + #print param_sets + prod = [{}] + for param_set in param_sets: + prod = param_cartesian(prod, param_set) + return prod + +def dictify(param_name, param_choices): + result = [] + for param in param_choices: + dic = {} + dic[param_name] = param + result.append(dic) + return result + +def params_per_task(mod): + # Problem parameters + params_corrupt_type_sup = dictify('corrupt_type_supervised', mod.choices_corrupt_type_supervised) + params_corrupt_prob_sup = dictify('corrupt_prob_supervised', mod.choices_corrupt_prob_supervised) + params_warm_start_multiplier = dictify('warm_start_multiplier', mod.warm_start_multipliers) + params_learning_rate = dictify('learning_rate', mod.learning_rates) + + # Algorithm parameters + params_cb_type = dictify('cb_type', mod.choices_cb_type) + + # Common parameters + params_common = param_cartesian_multi([params_corrupt_type_sup, params_corrupt_prob_sup, params_warm_start_multiplier, params_learning_rate, params_cb_type]) + params_common = filter(lambda param: param['corrupt_type_supervised'] == 1 or abs(param['corrupt_prob_supervised']) > 1e-4, params_common) + + # Baseline parameters construction if mod.baselines_on: - #config_baselines_raw = list(product([1], [True, False], [True, False], [1], [3])) - #config_baselines_solution = filter(lambda (x1, x2, x3, x4): x2 == True or x3 == True, config_baselines_raw) - config_baselines_solution = [(1, True, True, 1, 3), (1, True, False, 1, 3), (1, False, True, 1, 3)] - config_baselines = list(product(*[config_problem, config_baselines_solution, mod.learning_rates])) - config_baselines = flatten_all(config_baselines) + params_baseline_basic = [ + [{'choices_lambda': 1, 'warm_start_type': 1, 'lambda_scheme': 3}], [{'no_supervised': True}, {'no_supervised': False}], [{'no_bandit': True}, {'no_bandit': False}] + ] + params_baseline = param_cartesian_multi([params_common] + params_baseline_basic) + params_baseline = filter(lambda param: param['no_supervised'] == True or param['no_bandit'] == True, params_baseline) else: - config_baselines = [] + params_baseline = [] + + # Algorithm parameters construction if mod.algs_on: - config_algs_solution_1 = list(product(mod.choices_choices_lambda, [False], [False], [1], [3])) - config_algs_solution_2 = [(1, False, False, 2, 1)] - config_algs_solution = config_algs_solution_1 + config_algs_solution_2 - config_algs = list(product(*[config_problem, config_algs_solution, mod.learning_rates])) - config_algs = flatten_all(config_algs) + params_choices_lambd = dictify('choices_lambda', mod.choices_choices_lambda) + params_algs_1 = param_cartesian(params_choices_lambd, [{'no_supervised': False, 'no_bandit': False, 'warm_start_type': 1, 'lambda_scheme': 3}] ) + params_algs_2 = [{'no_supervised': False, 'no_bandit': False, 'warm_start_type': 2, 'lambda_scheme': 1, 'choices_lambda':1}] + params_algs = param_cartesian( params_common, params_algs_1 + params_algs_2 ) else: - config_algs = [] + params_algs = [] + # Optimal baselines parameter construction if mod.optimal_on: - config_optimal_problem = product(mod.dss, [(1, 0)], ['mtr'], [1]) - config_optimal_solution = [(0, False, False, 1, 1)] - config_optimal = list(product(*[config_optimal_problem, config_optimal_solution, [0.5]])) - config_optimal = flatten_all(config_optimal) + params_optimal = [{ 'optimal_approx': True }, { 'majority_approx': True }] else: - config_optimal = [] + params_optimal = [] - #print len(config_problem) - #print len(config_baselines) - #print len(config_algs) - #print len(config_optimal) - #raw_input(' ') + # Common factor in all 3 groups: dataset + params_dataset = dictify('data', mod.dss) + params_all = param_cartesian( params_dataset, params_baseline + params_algs + params_optimal ) + params_all = sorted(params_all) + print len(params_all) + for row in params_all: + print row + return get_params_task(params_all) - config_all = config_baselines + config_algs + config_optimal - config_all = sorted(config_all) - #config_all = sorted(config_all, key=lambda a: str(a)) - config_task = [] - print len(config_all) - for i in range(len(config_all)): +def get_params_task(params_all): + params_task = [] + for i in range(len(params_all)): if (i % mod.num_tasks == mod.task_id): - config_task.append(config_all[i]) - #print config_all[i] - - return config_task + params_task.append(params_all[i]) + return params_task def get_num_lines(dataset_name): - ps = subprocess.Popen(('zcat', dataset_name), stdout=subprocess.PIPE) - output = subprocess.check_output(('wc', '-l'), stdin=ps.stdout) - ps.wait() - return int(output) + num_lines = subprocess.check_output(('zcat ' + dataset_name + ' | wc -l'), shell=True) + return int(num_lines) + +def get_num_classes(ds): + # could be a bug for including the prefix.. + did, n_actions = os.path.basename(ds).split('.')[0].split('_')[1:] + did, n_actions = int(did), int(n_actions) + return n_actions def get_majority_class(dataset_name): - maj_class = subprocess.check_output(('zcat '+ dataset_name +' | cut -d \' \' -f 1 | sort | uniq -c | sort -r | head -1 | xargs | cut -d \' \' -f 2 '), shell=True) - return int(maj_class) + maj_class_str = subprocess.check_output(('zcat '+ dataset_name +' | cut -d \' \' -f 1 | sort | uniq -c | sort -r | head -1 | xargs '), shell=True) + maj_size, maj_class = maj_class_str.split() + return int(maj_size), int(maj_class) def avg_error(mod): return vw_output_extract(mod, 'average loss') @@ -320,44 +340,56 @@ def vw_output_extract(mod, pattern): vw_output.close() return avge - -def main_loop(mod): - mod.summary_file_name = mod.results_path+str(mod.task_id)+'of'+str(mod.num_tasks)+'.sum' +def write_summary_header(mod): summary_file = open(mod.summary_file_name, 'w') - - list_header = ['dataset', 'num_classes', 'total_size', \ - 'corrupt_type_supervised', 'corrupt_prob_supervised', \ - 'corrupt_type_bandit', 'corrupt_prob_bandit', \ - 'warm_start_size', 'bandit_size', 'bandit_supervised_size_ratio', \ - 'cb_type', 'validation_method', 'weighting_scheme', \ - 'lambda_scheme', 'choices_lambda', \ - 'no_supervised', 'no_bandit', \ - 'warm_start_type', 'learning_rate', \ - 'avg_error', 'actual_variance', \ - 'ideal_variance'] - - summary_header = disperse(list_header, '\t') - + summary_header = disperse(mod.result_header_list, '\t') summary_file.write(summary_header+'\n') summary_file.close() - for (mod.dataset, mod.corrupt_type_supervised, mod.corrupt_prob_supervised, \ - mod.cb_type, mod.warm_start_multiplier, \ - mod.choices_lambda, \ - mod.no_supervised, mod.no_bandit, mod.warm_start_type, mod.lambda_scheme, mod.learning_rate) in mod.config_task: +def main_loop(mod): + mod.summary_file_name = mod.results_path+str(mod.task_id)+'of'+str(mod.num_tasks)+'.sum' + mod.result_template_list = [ + 'data', 'ds', + 'num_classes', 0, + 'total_size' , 0, + 'majority_size', 0, + 'corrupt_type_supervised', 0, + 'corrupt_prob_supervised', 0.0, + 'corrupt_type_bandit', 0, + 'corrupt_prob_bandit', 0.0, + 'warm_start', 0, + 'bandit_size', 0, + 'bandit_supervised_size_ratio', 0, + 'cb_type', 'mtr', + 'validation_method', 0, + 'weighting_scheme', 0, + 'lambda_scheme', 0, + 'choices_lambda', 0, + 'no_supervised', False, + 'no_bandit', False, + 'warm_start_type', 0, + 'learning_rate', 0.0, + 'optimal_approx', False, + 'majority_approx', False, + 'avg_error', 0.0, + 'actual_variance', 0.0, + 'ideal_variance', 0.0 ] + + num_cols = len(mod.result_template_list)/2 + mod.result_header_list = [ mod.result_template_list[2*i] for i in range(num_cols) ] + mod.result_template = dict([ (mod.result_template_list[2*i], mod.result_template_list[2*i+1]) for i in range(num_cols) ]) + + write_summary_header(mod) + for mod.param in mod.config_task: gen_comparison_graph(mod) - if __name__ == '__main__': parser = argparse.ArgumentParser(description='vw job') parser.add_argument('task_id', type=int, help='task ID, between 0 and num_tasks - 1') parser.add_argument('num_tasks', type=int) parser.add_argument('--results_dir', default='../../../figs/') parser.add_argument('--ds_dir', default='../../../vwshuffled/') - parser.add_argument('--num_learning_rates', type=int) - parser.add_argument('--warm_start_fraction', type=float) - parser.add_argument('--corrupt_prob_supervised', type=float) - parser.add_argument('--corrupt_prob_bandit',type=float) + parser.add_argument('--num_learning_rates', type=int, default=1) args = parser.parse_args() @@ -371,9 +403,9 @@ def main_loop(mod): time.sleep(1) mod = model() - mod.baselines_on = True + mod.baselines_on = False mod.algs_on = True - mod.optimal_on = True + mod.optimal_on = False mod.num_tasks = args.num_tasks mod.task_id = args.task_id @@ -382,47 +414,22 @@ def main_loop(mod): mod.ds_path = args.ds_dir mod.results_path = args.results_dir - #DIR_PATTERN = '../results/cbresults_{}/' - mod.num_checkpoints = 200 - #mod.warm_start = 50 - #mod.bandit = 4096 - #mod.num_classes = 10 - #mod.cb_type = 'mtr' #'ips' - #mod.choices_lambda = 10 - #mod.progress = 25 + mod.adf_on = True # use fractions instead of absolute numbers - mod.warm_start_multipliers = [pow(2,i) for i in range(4)] - #mod.warm_start_multipliers = [2*pow(4, i) for i in range(3)]] - - #mod.choices_warm_start = [0.01 * pow(2, i) for i in range(5)] - #mod.choices_bandit = [0.01 * pow(2, i) for i in range(5)] - - #mod.choices_warm_start = [pow(2,i) for i in range(11)] #put it here in order to plot 2d mesh - # we are implicitly iterating over the bandit sample sizes - #choices_fprob1 = [0.1, 0.2, 0.3] - #choices_fprob2 = [0.1, 0.2, 0.3] - #choices_cb_types = ['mtr', 'ips'] - #mod.choices_cb_types = ['mtr', 'ips'] - mod.choices_cb_types = ['mtr'] - #mod.choices_no_supervised = [False, True] - #mod.choices_no_bandit = [False, True] - #mod.choices_choices_lambda = [2, 4, 8] - mod.choices_choices_lambda = [2,4,8] - #mod.choices_choices_lambda = [i for i in range(1,3)] - #mod.choices_choices_lambda = [i for i in range(1,2)] - #mod.choices_choices_lambda = [1, 3, 5, 7] - #[i for i in range(10,11)] - #mod.corrupt_type_supervised = 2 - #mod.corrupt_prob_supervised = 0.3 - mod.choices_corrupt_type_supervised = [1,2,3] - #mod.choices_corrupt_type_supervised = [2] - #mod.corrupt_prob_supervised = 0.3 - mod.choices_corrupt_prob_supervised = [0.0,0.5,1.0] - #mod.choices_corrupt_prob_supervised = [0.3] + #mod.warm_start_multipliers = [pow(2,i) for i in range(4)] + mod.warm_start_multipliers = [pow(2,i) for i in range(1)] + + mod.choices_cb_type = ['mtr'] + #mod.choices_choices_lambda = [2,4,8] + mod.choices_choices_lambda = [2] + #mod.choices_corrupt_type_supervised = [1,2,3] + #mod.choices_corrupt_prob_supervised = [0.0,0.5,1.0] + mod.choices_corrupt_type_supervised = [1,2] + mod.choices_corrupt_prob_supervised = [0.0,0.5] mod.corrupt_type_bandit = 1 mod.corrupt_prob_bandit = 0.0 @@ -432,14 +439,9 @@ def main_loop(mod): mod.choices_lambda = 2 mod.weighting_scheme = 1 - #mod.lambda_scheme = 3 - #mod.warm_start_type = 1 - mod.no_exploration = False - mod.cover_on = False mod.epsilon_on = True - #mod.plot_color = 'r' - #mod.plot_flat = False + mod.critical_size_ratios = [184 * pow(2, -i) for i in range(7) ] if args.num_learning_rates == 1: @@ -449,12 +451,6 @@ def main_loop(mod): else: mod.learning_rates = [0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1.0, 3.0, 10.0] - #pow(2,i) for i in range(-5, 7) - #for correctness test - #mod.choices_warm_start = [20] - #choices_fprob1 = [0.1] - #choices_fprob2 = [0.1] - print 'reading dataset files..' mod.dss = ds_files(mod.ds_path) print len(mod.dss) @@ -464,7 +460,7 @@ def main_loop(mod): print 'generating tasks..' # here, we are generating the task specific parameter settings # by first generate all parameter setting and pick every num_tasks of them - mod.config_task = ds_per_task(mod) + mod.config_task = params_per_task(mod) print 'task ' + str(mod.task_id) + ' of ' + str(mod.num_tasks) + ':' print len(mod.config_task) From a32c2e7c0925df76ede6addded3630804d64b4f4 Mon Sep 17 00:00:00 2001 From: Chicheng Zhang Date: Thu, 10 May 2018 21:55:26 -0400 Subject: [PATCH 072/127] . --- scripts/alg_comparison.py | 251 +++++++++++++++++++++---------------- scripts/plot_warm_start.py | 105 +++++++++++----- 2 files changed, 215 insertions(+), 141 deletions(-) diff --git a/scripts/alg_comparison.py b/scripts/alg_comparison.py index 3d810373075..ed6504fbbea 100644 --- a/scripts/alg_comparison.py +++ b/scripts/alg_comparison.py @@ -16,14 +16,6 @@ class model: def __init__(self): pass -# this part is changable -#alg1 = 'epsilon' -#alg2 = 'cover' -#alg1 = 'choices_lambda_1' -#alg2 = 'choices_lambda_5' -#alg1 = 'instance weighting' -#alg2 = 'dataset weighting' - def sum_files(result_path): prevdir = os.getcwd() os.chdir(result_path) @@ -81,13 +73,9 @@ def plot_comparison(errors_1, errors_2, sizes): plt.scatter(results_insigni_1, results_insigni_2, s=2, c='k') -def normalized_score(lst, l): - #print lst - #l = min(lst) - u = max(lst) - return [ (item - l) / (u - l + 1e-4) for item in lst ] - def alg_str(alg_name): + if (alg_name[0] == 0): + return 'majority_class' if (alg_name[0] == 2): return 'supervised_underutil_as_bandit' if (alg_name[2] == True and alg_name[3] == True): @@ -108,24 +96,27 @@ def problem_str(name_problem): def plot_cdf(alg_name, errs): - plt.step(np.sort(errs), np.linspace(0, 1, len(errs), endpoint=False), label=alg_str(alg_name)) - print alg_name print errs print len(errs) + + plt.step(np.sort(errs), np.linspace(0, 1, len(errs), endpoint=False), label=alg_str(alg_name)) + + #raw_input("Press Enter to continue...") def plot_all_cdfs(alg_results, mod): #plot all cdfs: + print 'printing cdfs..' i = 0 for alg_name, errs in alg_results.iteritems(): - if (alg_name[2] == False and alg_name[3] == False and alg_name[1] != 8): - pass - else: - plot_cdf(alg_name, errs) + plot_cdf(alg_name, errs) plt.legend() - plt.xlim(-0.2,1) + if mod.normalize_type == 1: + plt.xlim(-0.2,1) + elif mod.normalize_type == 2: + plt.xlim(-1,1) plt.ylim(0,1) plt.savefig(mod.problemdir+'/cdf.png') plt.clf() @@ -154,8 +145,19 @@ def init_results(result_table): for idx, row in result_table.iterrows(): alg_name = (row['warm_start_type'], row['choices_lambda'], row['no_supervised'], row['no_bandit']) alg_results[alg_name] = [] + + alg_results[(0, 0, False, False)] = [] return alg_results +def normalize_score(unnormalized_result, mod): + if mod.normalize_type == 1: + l = get_best_error(mod.best_error_table, mod.name_dataset) + u = max(unnormalized_result.values()) + return { k : ((v - l) / (u - l + 1e-4)) for k, v in unnormalized_result.iteritems() } + elif mod.normalize_type == 2: + l = unnormalized_result[(1, 1, True, False)] + return { k : ((v - l) / (l + 1e-4)) for k, v in unnormalized_result.iteritems() } + def get_best_error(best_error_table, name_dataset): name = name_dataset[0] best_error_oneline = best_error_table[best_error_table['dataset'] == name] @@ -168,6 +170,36 @@ def get_best_error(best_error_table, name_dataset): #raw_input("...") return best_error +def get_maj_error(maj_error_table, name_dataset): + name = name_dataset[0] + maj_error_oneline = maj_error_table[maj_error_table['data'] == name] + maj_error = maj_error_oneline.loc[maj_error_oneline.index[0], 'avg_error'] + return maj_error + +#normalized_results[alg_name].append(normalized_errs[i]) +#errs = [] +#for idx, row in result_table.iterrows(): +# errs.append(row['avg_error']) + +def get_unnormalized_results(result_table): + new_unnormalized_results = {} + new_size = 0 + + i = 0 + for idx, row in result_table.iterrows(): + if i == 0: + new_size = row['bandit_size'] + + if row['bandit_size'] == new_size: + alg_name = (row['warm_start_type'], row['choices_lambda'], row['no_supervised'], row['no_bandit']) + new_unnormalized_results[alg_name] = row['avg_error'] + i += 1 + + return new_size, new_unnormalized_results + +def update_result_dict(results_dict, new_result): + for k, v in new_result.iteritems(): + results_dict[k].append(v) def plot_all(mod, all_results): @@ -187,7 +219,20 @@ def plot_all(mod, all_results): #(False, False, 8), and compute a normalized score for name_dataset, group_dataset in grouped_by_dataset: - result_table = group_dataset #group_dataset.groupby(['choices_lambda','no_supervised', 'no_bandit']) + result_table = group_dataset + + grouped_by_algorithm = group_dataset.groupby(['warm_start_type', 'choices_lambda', 'no_supervised', 'no_bandit']) + + mod.name_dataset = name_dataset + #The 'learning_rate' would be the only free degree here now. Taking the + #min aggregation will give us the 7 algorithms we are evaluating. + result_table = grouped_by_algorithm.min() + result_table = result_table.reset_index() + + #print result_table + + + #group_dataset.groupby(['choices_lambda','no_supervised', 'no_bandit']) #first time - generate names of algorithms considered if normalized_results is None: @@ -199,26 +244,16 @@ def plot_all(mod, all_results): #dummy = input('') #in general (including the first time) - record the error rates of all algorithms - - err_best = get_best_error(mod.best_error_table, name_dataset) - errs = [] - for idx, row in result_table.iterrows(): - errs.append(row['avg_error']) - normalized_errs = normalized_score(errs, err_best) - #print result_table - i = 0 - for idx, row in result_table.iterrows(): - if i == 0: - temp_size = row['bandit_size'] - sizes.append(row['bandit_size']) + new_size, new_unnormalized_result = get_unnormalized_results(result_table) + new_unnormalized_result[(0, 0, False, False)] = get_maj_error(mod.maj_error_table, mod.name_dataset) + + new_normalized_result = normalize_score(new_unnormalized_result, mod) - if row['bandit_size'] == temp_size: - alg_name = (row['warm_start_type'], row['choices_lambda'], row['no_supervised'], row['no_bandit']) - unnormalized_results[alg_name].append(errs[i]) - normalized_results[alg_name].append(normalized_errs[i]) - i += 1 + update_result_dict(unnormalized_results, new_unnormalized_result) + update_result_dict(normalized_results, new_normalized_result) + sizes.append(new_size) #print 'sizes:' #print len(sizes) @@ -229,47 +264,28 @@ def plot_all(mod, all_results): if not os.path.exists(mod.problemdir): os.makedirs(mod.problemdir) - #plot_all_pair_comp(unnormalized_results, sizes, mod) - plot_all_cdfs(normalized_results, mod) - - - - - - -if __name__ == '__main__': - parser = argparse.ArgumentParser(description='result summary') - parser.add_argument('--results_dir', default='../../../figs/') - parser.add_argument('--filter', default='1') - parser.add_argument('--plot_subdir', default='expt1/') - args = parser.parse_args() - - mod = model() + print 'best_errors', mod.best_error_table + print 'unnormalized_results', unnormalized_results + print 'normalized_results', normalized_results - mod.results_dir = args.results_dir - mod.filter = args.filter - mod.plot_subdir = args.plot_subdir + if mod.pair_comp_on is True: + plot_all_pair_comp(unnormalized_results, sizes, mod) + if mod.cdf_on is True: + plot_all_cdfs(normalized_results, mod) - mod.fulldir = mod.results_dir + mod.plot_subdir - if not os.path.exists(mod.fulldir): - os.makedirs(mod.fulldir) +def save_to_hdf(mod): + print 'saving to hdf..' + store = pd.HDFStore('store.h5') + store['result_table'] = mod.all_results + store.close() - #results_dir = '../../../lambdas/' - #results_dir = '../../../warm_start_frac=0.1/' - #results_dir = '../../../cover_vs_epsilon/' - #results_dir = '../../../corrupt_supervised_type1_0.3/' - #results_dir = '../../../expt_0403/corrupt_supervised_type2_0.3/' - #results_dir = '../../../expt_0403/supervised_validation/' - #results_dir = '../../../weighting_schemes/' - #results_dir = '../../../central_lambda/' - #results_dir = '../../../central_lambda_naive/' - #results_dir = '../../../central_lambda_zeroone/' - #results_dir = '../../../type2_0.3/' - #results_dir = '../../../type1_0.3/' - #results_dir = '../../../type2_1/' - #results_dir = '../../../type2_0.65/' - #results_dir = '../../../type2_0.3/' +def load_from_hdf(mod): + print 'reading from hdf..' + store = pd.HDFStore('store.h5') + mod.all_results = store['result_table'] + store.close() +def load_from_sum(mod): print 'reading directory..' dss = sum_files(mod.results_dir) print len(dss) @@ -289,15 +305,65 @@ def plot_all(mod, all_results): all_results = all_results.append(result) print all_results + mod.all_results = all_results + + +# This is a hack - need to do this systematically in the future +def load_maj_error(mod): + maj_error_table = parse_sum_file(mod.maj_error_dir) + return maj_error_table + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='result summary') + parser.add_argument('--results_dir', default='../../../figs/') + parser.add_argument('--filter', default='1') + parser.add_argument('--plot_subdir', default='expt1/') + parser.add_argument('--from_hdf', action='store_true') + parser.add_argument('--normalize_type', type=int) + args = parser.parse_args() + + mod = model() + + mod.results_dir = args.results_dir + mod.filter = args.filter + mod.plot_subdir = args.plot_subdir + mod.normalize_type = args.normalize_type + mod.pair_comp_on = False + mod.cdf_on = True + mod.maj_error_dir = '../../../figs_maj_errors/0of1.sum' + + mod.fulldir = mod.results_dir + mod.plot_subdir + if not os.path.exists(mod.fulldir): + os.makedirs(mod.fulldir) + + #print args.from_hdf + #raw_input(' ') + if args.from_hdf is True: + load_from_hdf(mod) + else: + load_from_sum(mod) + save_to_hdf(mod) #first group by corruption mode, then corruption prob #then group by warm start - bandit ratio #these constitutes all the problem settings we are looking at (corresponding #to each cdf graph) + all_results = mod.all_results mod.best_error_table = all_results[all_results['choices_lambda'] == 0] all_results = all_results[all_results['choices_lambda'] != 0] + #ignore the no update row: + all_results = all_results[(all_results['no_supervised'] == False) | (all_results['no_bandit'] == False)] + + #filter choices_lambdas = 2,4,8? + #if (alg_name[2] == False and alg_name[3] == False and alg_name[1] != 8): + # pass + #else: + + mod.maj_error_table = load_maj_error(mod) + if mod.filter == '1': pass elif mod.filter == '2': @@ -314,36 +380,3 @@ def plot_all(mod, all_results): #if i >= 331 and i <= 340: # print 'result:', result # print 'all_results:', all_results - - - #result = parse_sum_file(results_dir + '400of600.sum') - #print result - - #choices_choices_lambda = sorted(all_results['choices_lambda'].unique()) - #grouped = all_results.groupby('choices_lambda') - - #for cl, results_lambda in grouped: - #results_lambda = all_results[all_results['choices_lambda'] == cl] - # compare combined w/ supervised - ''' - alg1 = all_results.columns[1] - alg2 = all_results.columns[2] - bandit_only = all_results.columns[3] - supervised_only = all_results.columns[4] - sizes = all_results.columns[5] - - results_alg1 = all_results[alg1].tolist() - results_alg2 = all_results[alg2].tolist() - results_bandit = all_results[bandit_only].tolist() - results_supervised = all_results[supervised_only].tolist() - dataset_sizes = all_results[sizes].tolist() - ''' - #print alg1 - #print results_alg1 - - # compare combined w/ bandit - #plot_comparison(results_alg1, results_bandit, dataset_sizes, alg1 + ' vs ' + 'bandit only', results_dir + alg1 + ' vs ' + 'bandit only' + '.png') - #plot_comparison(results_alg1, results_supervised, dataset_sizes, alg1 + ' vs ' + 'supervised only', results_dir + alg1 + ' vs ' + 'supervised only' + '.png') - #plot_comparison(results_alg2, results_bandit, dataset_sizes, alg2 + ' vs ' + 'bandit only', results_dir + alg2 + ' vs ' + 'bandit only' + '.png') - #plot_comparison(results_alg2, results_supervised, dataset_sizes, alg2 + ' vs ' + 'supervised only', results_dir + alg2 + ' vs ' + 'supervised only' + '.png') - #plot_comparison(results_alg1, results_alg2, dataset_sizes, alg1 + ' vs ' + alg2, results_dir+alg1 + ' vs ' + alg2 + '.png') diff --git a/scripts/plot_warm_start.py b/scripts/plot_warm_start.py index 5eeecb6acbb..0f2a19edd83 100644 --- a/scripts/plot_warm_start.py +++ b/scripts/plot_warm_start.py @@ -89,14 +89,15 @@ def gen_vw_options(mod): if 'optimal_approx' in mod.param: vw_options['passes'] = 5 vw_options['oaa'] = mod.result['num_classes'] - vw_options['cache_file'] = mod.param['data'] + '.cache' + vw_options['cache_file'] = mod.data_full_path + '.cache' elif 'majority_approx' in mod.param: - pass + # basically we would like to skip vw running as fast as possible + vw_options['cbify'] = mod.result['num_classes'] + vw_options['warm_start'] = 0 + vw_options['bandit'] = 0 else: vw_options['corrupt_type_bandit'] = mod.corrupt_type_bandit vw_options['corrupt_prob_bandit'] = mod.corrupt_prob_bandit - vw_options['validation_method'] = mod.validation_method - vw_options['weighting_scheme'] = mod.weighting_scheme vw_options['bandit'] = mod.bandit if mod.adf_on is True: @@ -122,6 +123,8 @@ def gen_vw_options(mod): vw_options['cbify'] = mod.result['num_classes'] vw_options['warm_start'] = mod.result['warm_start'] vw_options['overwrite_label'] = mod.result['majority_class'] + vw_options['validation_method'] = mod.result['validation_method'] + vw_options['weighting_scheme'] = mod.result['weighting_scheme'] #if mod.cover_on: # alg_option += ' --cover 5 --psi 0.01 --nounif ' @@ -148,7 +151,7 @@ def disperse(l, ch): return s def param_to_str(param): - param_list = [str(k)+'='+str(v) for k,v in param.iteritems() ] + param_list = [ str(k)+'='+str(v) for k,v in param.iteritems() ] return disperse(param_list, '_') def param_to_result(param, result): @@ -165,17 +168,24 @@ def gen_comparison_graph(mod): mod.compute_optimal = False param_to_result(mod.param, mod.result) - mod.data_full_path = mod.ds_path + mod.param['data'] + mod.data_full_path = mod.ds_path + str(mod.param['fold']) + '/' + mod.param['data'] + mod.result['fold'] = mod.param['fold'] mod.result['total_size'] = get_num_lines(mod.data_full_path) mod.result['num_classes'] = get_num_classes(mod.data_full_path) mod.result['majority_size'], mod.result['majority_class'] = get_majority_class(mod.data_full_path) mod.result['progress'] = int(math.ceil(float(mod.result['total_size']) / float(mod.num_checkpoints))) - mod.vw_output_filename = mod.results_path + param_to_str(mod.param) + '.txt' + mod.vw_output_dir = mod.results_path + remove_suffix(mod.param['data']) + '/' + mod.vw_output_filename = mod.vw_output_dir + param_to_str(mod.param) + '.txt' if mod.compute_optimal is False: mod.result['warm_start'] = mod.param['warm_start_multiplier'] * mod.result['progress'] mod.bandit = mod.result['total_size'] - mod.result['warm_start'] + mod.result['validation_method'] = mod.validation_method + mod.result['weighting_scheme'] = mod.weighting_scheme + mod.result['corrupt_type_bandit'] = mod.corrupt_type_bandit + mod.result['corrupt_prob_bandit'] = mod.corrupt_prob_bandit + mod.result['fold'] = mod.param['fold'] #plot_errors(mod) execute_vw(mod) @@ -246,11 +256,14 @@ def params_per_task(mod): params_warm_start_multiplier = dictify('warm_start_multiplier', mod.warm_start_multipliers) params_learning_rate = dictify('learning_rate', mod.learning_rates) + # could potentially induce a bug if the maj and best does not have this parameter + params_fold = dictify('fold', mod.folds) + # Algorithm parameters params_cb_type = dictify('cb_type', mod.choices_cb_type) # Common parameters - params_common = param_cartesian_multi([params_corrupt_type_sup, params_corrupt_prob_sup, params_warm_start_multiplier, params_learning_rate, params_cb_type]) + params_common = param_cartesian_multi([params_corrupt_type_sup, params_corrupt_prob_sup, params_warm_start_multiplier, params_learning_rate, params_cb_type, params_fold]) params_common = filter(lambda param: param['corrupt_type_supervised'] == 1 or abs(param['corrupt_prob_supervised']) > 1e-4, params_common) # Baseline parameters construction @@ -275,13 +288,19 @@ def params_per_task(mod): # Optimal baselines parameter construction if mod.optimal_on: - params_optimal = [{ 'optimal_approx': True }, { 'majority_approx': True }] + params_optimal = [{ 'optimal_approx': True }] else: params_optimal = [] + if mod.majority_on: + params_majority = [{ 'majority_approx': True }] + else: + params_majority = [] + + # Common factor in all 3 groups: dataset params_dataset = dictify('data', mod.dss) - params_all = param_cartesian( params_dataset, params_baseline + params_algs + params_optimal ) + params_all = param_cartesian( params_dataset, params_baseline + params_algs + params_optimal + params_majority ) params_all = sorted(params_all) print len(params_all) for row in params_all: @@ -307,7 +326,7 @@ def get_num_classes(ds): return n_actions def get_majority_class(dataset_name): - maj_class_str = subprocess.check_output(('zcat '+ dataset_name +' | cut -d \' \' -f 1 | sort | uniq -c | sort -r | head -1 | xargs '), shell=True) + maj_class_str = subprocess.check_output(('zcat '+ dataset_name +' | cut -d \' \' -f 1 | sort | uniq -c | sort -r -n | head -1 | xargs '), shell=True) maj_size, maj_class = maj_class_str.split() return int(maj_size), int(maj_class) @@ -349,6 +368,7 @@ def write_summary_header(mod): def main_loop(mod): mod.summary_file_name = mod.results_path+str(mod.task_id)+'of'+str(mod.num_tasks)+'.sum' mod.result_template_list = [ + 'fold', 0, 'data', 'ds', 'num_classes', 0, 'total_size' , 0, @@ -383,6 +403,15 @@ def main_loop(mod): for mod.param in mod.config_task: gen_comparison_graph(mod) +def create_dir(dir): + if not os.path.exists(dir): + os.makedirs(dir) + import stat + os.chmod(dir, os.stat(dir).st_mode | stat.S_IWOTH) + +def remove_suffix(filename): + return os.path.basename(filename).split('.')[0] + if __name__ == '__main__': parser = argparse.ArgumentParser(description='vw job') parser.add_argument('task_id', type=int, help='task ID, between 0 and num_tasks - 1') @@ -394,18 +423,27 @@ def main_loop(mod): args = parser.parse_args() if args.task_id == 0: - if not os.path.exists(args.results_dir): - os.makedirs(args.results_dir) - import stat - os.chmod(args.results_dir, os.stat(args.results_dir).st_mode | stat.S_IWOTH) + # To avoid race condition of writing to the same file at the same time + create_dir(args.results_dir) + + # This is specifically designed for teamscratch, as accessing a folder + # with a huge number of files can be super slow. Hence, we create a subfolder + # for each dataset to alleviate this. + dss = ds_files(args.ds_dir + '1/') + for ds in dss: + ds_no_suffix = remove_suffix(ds) + create_dir(args.results_dir + ds_no_suffix + '/') else: + # may still have the potential of race condition on those subfolders (if + # we have a lot of datasets to run and the datasets are small) while not os.path.exists(args.results_dir): time.sleep(1) mod = model() - mod.baselines_on = False + mod.baselines_on = True mod.algs_on = True mod.optimal_on = False + mod.majority_on = False mod.num_tasks = args.num_tasks mod.task_id = args.task_id @@ -416,43 +454,46 @@ def main_loop(mod): mod.num_checkpoints = 200 - mod.adf_on = True - # use fractions instead of absolute numbers #mod.warm_start_multipliers = [pow(2,i) for i in range(4)] - mod.warm_start_multipliers = [pow(2,i) for i in range(1)] + mod.warm_start_multipliers = [pow(2,i) for i in range(4)] mod.choices_cb_type = ['mtr'] #mod.choices_choices_lambda = [2,4,8] - mod.choices_choices_lambda = [2] + mod.choices_choices_lambda = [2, 4, 8] #mod.choices_corrupt_type_supervised = [1,2,3] #mod.choices_corrupt_prob_supervised = [0.0,0.5,1.0] - mod.choices_corrupt_type_supervised = [1,2] - mod.choices_corrupt_prob_supervised = [0.0,0.5] + mod.choices_corrupt_type_supervised = [1,2,3] + mod.choices_corrupt_prob_supervised = [0.0,0.5,1.0] + + if args.num_learning_rates == 1: + mod.learning_rates = [0.5] + elif args.num_learning_rates == 3: + mod.learning_rates = [0.1, 0.3, 1.0] + else: + mod.learning_rates = [0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1.0, 3.0, 10.0] + + mod.adf_on = True mod.corrupt_type_bandit = 1 mod.corrupt_prob_bandit = 0.0 mod.validation_method = 1 - mod.epsilon = 0.05 - - mod.choices_lambda = 2 mod.weighting_scheme = 1 + mod.epsilon = 0.05 mod.epsilon_on = True mod.critical_size_ratios = [184 * pow(2, -i) for i in range(7) ] - if args.num_learning_rates == 1: - mod.learning_rates = [0.5] - elif args.num_learning_rates == 3: - mod.learning_rates = [0.1, 0.3, 1.0] - else: - mod.learning_rates = [0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1.0, 3.0, 10.0] + #mod.folds = range(1,11) + mod.folds = range(1,6) print 'reading dataset files..' - mod.dss = ds_files(mod.ds_path) + #TODO: this line specifically for multiple folds + #Need a systematic way to detect subfolder names + mod.dss = ds_files(mod.ds_path + '1/') print len(mod.dss) #mod.dss = ["ds_223_63.vw.gz"] #mod.dss = mod.dss[:5] From 4ab1d8cd487124ace0b089480afd68c22dc64a35 Mon Sep 17 00:00:00 2001 From: Chicheng Zhang Date: Fri, 11 May 2018 11:44:01 -0400 Subject: [PATCH 073/127] . --- scripts/alg_comparison.py | 2 ++ scripts/plot_warm_start.py | 21 ++++++++++++++++++--- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/scripts/alg_comparison.py b/scripts/alg_comparison.py index ed6504fbbea..da5c4de9a19 100644 --- a/scripts/alg_comparison.py +++ b/scripts/alg_comparison.py @@ -87,6 +87,8 @@ def alg_str(alg_name): if (alg_name[2] == False and alg_name[3] == False): return 'combined_choices_lambda='+str(alg_name[1]) + return 'unknown algorithm' + def problem_str(name_problem): return 'supervised_corrupt_type='+str(name_problem[0]) \ +'_supervised_corrupt_prob='+str(name_problem[1]) \ diff --git a/scripts/plot_warm_start.py b/scripts/plot_warm_start.py index 0f2a19edd83..e7607ed34d1 100644 --- a/scripts/plot_warm_start.py +++ b/scripts/plot_warm_start.py @@ -152,7 +152,7 @@ def disperse(l, ch): def param_to_str(param): param_list = [ str(k)+'='+str(v) for k,v in param.iteritems() ] - return disperse(param_list, '_') + return disperse(param_list, ',') def param_to_result(param, result): for k, v in param.iteritems(): @@ -297,6 +297,11 @@ def params_per_task(mod): else: params_majority = [] + #print len(params_baseline) + #print len(params_algs) + #print len(params_common) + #raw_input('..') + # Common factor in all 3 groups: dataset params_dataset = dictify('data', mod.dss) @@ -419,9 +424,12 @@ def remove_suffix(filename): parser.add_argument('--results_dir', default='../../../figs/') parser.add_argument('--ds_dir', default='../../../vwshuffled/') parser.add_argument('--num_learning_rates', type=int, default=1) + parser.add_argument('--num_datasets', type=int, default=-1) args = parser.parse_args() + flag_dir = args.results_dir + 'flag/' + if args.task_id == 0: # To avoid race condition of writing to the same file at the same time create_dir(args.results_dir) @@ -433,10 +441,12 @@ def remove_suffix(filename): for ds in dss: ds_no_suffix = remove_suffix(ds) create_dir(args.results_dir + ds_no_suffix + '/') + + create_dir(flag_dir) else: # may still have the potential of race condition on those subfolders (if # we have a lot of datasets to run and the datasets are small) - while not os.path.exists(args.results_dir): + while not os.path.exists(flag_dir): time.sleep(1) mod = model() @@ -492,9 +502,14 @@ def remove_suffix(filename): print 'reading dataset files..' #TODO: this line specifically for multiple folds - #Need a systematic way to detect subfolder names + #Need a systematic way to detect subfolder names mod.dss = ds_files(mod.ds_path + '1/') print len(mod.dss) + + if args.num_datasets == -1 or args.num_datasets > len(mod.dss): + pass + else: + mod.dss = mod.dss[:args.num_datasets] #mod.dss = ["ds_223_63.vw.gz"] #mod.dss = mod.dss[:5] From 5d7dc3197506e47fb70c307be6d4902729167e48 Mon Sep 17 00:00:00 2001 From: Chicheng Zhang Date: Mon, 21 May 2018 11:53:09 -0400 Subject: [PATCH 074/127] updated scripts --- scripts/alg_comparison.py | 166 ++++++++++++++++++++++++++++++++----- scripts/plot_warm_start.py | 28 ++++--- 2 files changed, 161 insertions(+), 33 deletions(-) diff --git a/scripts/alg_comparison.py b/scripts/alg_comparison.py index da5c4de9a19..5e0dc1136f8 100644 --- a/scripts/alg_comparison.py +++ b/scripts/alg_comparison.py @@ -10,6 +10,9 @@ from math import sqrt import argparse import numpy as np +import seaborn as sns +from matplotlib.colors import ListedColormap +from matplotlib.font_manager import FontProperties class model: @@ -72,28 +75,97 @@ def plot_comparison(errors_1, errors_2, sizes): plt.scatter(results_insigni_1, results_insigni_2, s=2, c='k') + len_errors = len(errors_1) + wins_1 = [z_scores[i] < 0 and significance[i] for i in range(len_errors) ] + wins_2 = [z_scores[i] > 0 and significance[i] for i in range(len_errors) ] + num_wins_1 = wins_1.count(True) + num_wins_2 = wins_2.count(True) -def alg_str(alg_name): + return num_wins_1, num_wins_2 + +def alg_info(alg_name, result_lst): if (alg_name[0] == 0): - return 'majority_class' + return result_lst[0] if (alg_name[0] == 2): - return 'supervised_underutil_as_bandit' + return result_lst[1] if (alg_name[2] == True and alg_name[3] == True): - return 'no_update' + return result_lst[2] if (alg_name[2] == True and alg_name[3] == False): - return 'bandit_only' + return result_lst[3] if (alg_name[2] == False and alg_name[3] == True): - return 'supervised_only' - if (alg_name[2] == False and alg_name[3] == False): - return 'combined_choices_lambda='+str(alg_name[1]) + return result_lst[4] + if (alg_name[2] == False and alg_name[3] == False and alg_name[1] == 2): + return result_lst[5] + if (alg_name[2] == False and alg_name[3] == False and alg_name[1] == 4): + return result_lst[6] + if (alg_name[2] == False and alg_name[3] == False and alg_name[1] == 8): + return result_lst[7] + if (alg_name[2] == False and alg_name[3] == False and alg_name[1] == 16): + return result_lst[8] + + return result_lst[9] + +def alg_str(alg_name): + return alg_info(alg_name, ['Most-Freq', 'Sim-Bandit', 'Class-1', 'Bandit-Only', 'Sup-Only', 'MinimaxBandits', 'AwesomeBandits with $|\Lambda|$=4', 'AwesomeBandits with $|\Lambda|$=8', 'AwesomeBandits with $|\Lambda|$=16', 'unknown']) + +def alg_str_compatible(alg_name): + return alg_info(alg_name, ['Most-Freq', 'Sim-Bandit', 'Class-1', 'Bandit-Only', 'Sup-Only', 'Choices_lambda=2', 'Choices_lambda=4', 'Choices_lambda=8', 'Choices_lambda=16', 'unknown']) + +def alg_color_style(alg_name): + palette = sns.color_palette('colorblind') + colors = palette.as_hex() + #colors = [colors[5], colors[4], 'black', colors[2], colors[1], colors[3], 'black', colors[0], 'black', 'black'] + colors = [colors[5], colors[3], 'black', colors[0], colors[1], colors[2], colors[2], colors[2], colors[2], 'black' ] + + styles = ['solid', 'solid', 'solid', 'solid', 'dashed', 'dotted', 'dashdot', 'solid', 'dashed', 'solid'] + + return alg_info(alg_name, zip(colors, styles)) + #['black', 'magenta', 'lime', 'green', 'blue', 'darkorange','darksalmon', 'red', 'cyan'] - return 'unknown algorithm' +def alg_index(alg_name): + return alg_info(alg_name, [7.0, 6.0, 8.0, 5.0, 4.0, 2.0, 1.0, 1.2, 1.5, 9.0]) + + +def order_legends(indices): + ax = plt.gca() + handles, labels = ax.get_legend_handles_labels() + # sort both labels and handles by labels + labels, handles, indices = zip(*sorted(zip(labels, handles, indices), key=lambda t: t[2])) + ax.legend(handles, labels) + +def save_legend(mod, indices): + ax = plt.gca() + handles, labels = ax.get_legend_handles_labels() + labels, handles, indices = zip(*sorted(zip(labels, handles, indices), key=lambda t: t[2])) + #figlegend = pylab.figure(figsize=(26,1)) + #figlegend.legend(handles, labels, 'center', fontsize=26, ncol=8) + figlegend = pylab.figure(figsize=(17,1.5)) + figlegend.legend(handles, labels, 'center', fontsize=26, ncol=3) + figlegend.tight_layout(pad=0) + figlegend.savefig(mod.problemdir+'legend.pdf') def problem_str(name_problem): return 'supervised_corrupt_type='+str(name_problem[0]) \ +'_supervised_corrupt_prob='+str(name_problem[1]) \ +'_bandit_supervised_size_ratio='+str(name_problem[2]) +def noise_type_str(noise_type): + if noise_type == 1: + return 'UAR' + elif noise_type == 2: + return 'CYC' + elif noise_type == 3: + return 'MAJ' + +def problem_text(name_problem): + s='' + s += 'Ratio = ' + str(name_problem[2]) + ', ' + if abs(name_problem[1]) < 1e-6: + s += 'noiseless' + else: + s += noise_type_str(name_problem[0]) + ', ' + s += 'p = ' + str(name_problem[1]) + return s def plot_cdf(alg_name, errs): @@ -102,25 +174,52 @@ def plot_cdf(alg_name, errs): print errs print len(errs) - plt.step(np.sort(errs), np.linspace(0, 1, len(errs), endpoint=False), label=alg_str(alg_name)) + col, sty = alg_color_style(alg_name) + plt.step(np.sort(errs), np.linspace(0, 1, len(errs), endpoint=False), label=alg_str(alg_name), color=col, linestyle=sty, linewidth=2.0) + + # #raw_input("Press Enter to continue...") def plot_all_cdfs(alg_results, mod): #plot all cdfs: print 'printing cdfs..' - i = 0 + + indices = [] + + pylab.figure(figsize=(8,6)) + for alg_name, errs in alg_results.iteritems(): + indices.append(alg_index(alg_name)) plot_cdf(alg_name, errs) - plt.legend() if mod.normalize_type == 1: - plt.xlim(-0.2,1) + plt.xlim(0,1) elif mod.normalize_type == 2: plt.xlim(-1,1) + elif mod.normalize_type == 3: + plt.xlim(0, 1) + plt.ylim(0,1) - plt.savefig(mod.problemdir+'/cdf.png') + #params={'legend.fontsize':26, + #'axes.labelsize': 24, 'axes.titlesize':26, 'xtick.labelsize':20, + #'ytick.labelsize':20 } + #plt.rcParams.update(params) + #plt.xlabel('Normalized error',fontsize=34) + #plt.ylabel('Cumulative frequency', fontsize=34) + #plt.title(problem_text(mod.name_problem), fontsize=36) + plt.xticks(fontsize=30) + plt.yticks(fontsize=30) + plt.tight_layout(pad=0) + + ax = plt.gca() + order_legends(indices) + ax.legend_.set_zorder(-1) + plt.savefig(mod.problemdir+'cdf.pdf') + ax.legend_.remove() + plt.savefig(mod.problemdir+'cdf_nolegend.pdf') + save_legend(mod, indices) plt.clf() @@ -136,10 +235,11 @@ def plot_all_pair_comp(alg_results, sizes, mod): print len(errs_1), len(errs_2), len(sizes) #raw_input('Press any key to continue..') - plot_comparison(errs_1, errs_2, sizes) + num_wins_1, num_wins_2 = plot_comparison(errs_1, errs_2, sizes) - plt.title(alg_str(alg_names[i])+' vs '+alg_str(alg_names[j])) - plt.savefig(mod.problemdir+'/'+alg_str(alg_names[i])+'_vs_'+alg_str(alg_names[j])+'.png') + plt.title( 'total number of comparisons = ' + str(len(errs_1)) + '\n'+ + alg_str(alg_names[i]) + ' wins ' + str(num_wins_1) + ' times, \n' + alg_str(alg_names[j]) + ' wins ' + str(num_wins_2) + ' times') + plt.savefig(mod.problemdir+alg_str_compatible(alg_names[i])+'_vs_'+alg_str_compatible(alg_names[j])+'.pdf') plt.clf() def init_results(result_table): @@ -159,6 +259,8 @@ def normalize_score(unnormalized_result, mod): elif mod.normalize_type == 2: l = unnormalized_result[(1, 1, True, False)] return { k : ((v - l) / (l + 1e-4)) for k, v in unnormalized_result.iteritems() } + elif mod.normalize_type == 3: + return unnormalized_result def get_best_error(best_error_table, name_dataset): name = name_dataset[0] @@ -205,6 +307,9 @@ def update_result_dict(results_dict, new_result): def plot_all(mod, all_results): + + #all_results = all_results[all_results['corrupt_prob_supervised']!=0.0] + grouped_by_problem = all_results.groupby(['corrupt_type_supervised', 'corrupt_prob_supervised','bandit_supervised_size_ratio']) @@ -226,8 +331,12 @@ def plot_all(mod, all_results): grouped_by_algorithm = group_dataset.groupby(['warm_start_type', 'choices_lambda', 'no_supervised', 'no_bandit']) mod.name_dataset = name_dataset + #The 'learning_rate' would be the only free degree here now. Taking the #min aggregation will give us the 7 algorithms we are evaluating. + + #In the future this should be changed now if we run multiple folds: we + #should average among folds before choosing the min result_table = grouped_by_algorithm.min() result_table = result_table.reset_index() @@ -322,7 +431,7 @@ def load_maj_error(mod): parser.add_argument('--filter', default='1') parser.add_argument('--plot_subdir', default='expt1/') parser.add_argument('--from_hdf', action='store_true') - parser.add_argument('--normalize_type', type=int) + parser.add_argument('--normalize_type', type=int, default=1) args = parser.parse_args() mod = model() @@ -330,10 +439,10 @@ def load_maj_error(mod): mod.results_dir = args.results_dir mod.filter = args.filter mod.plot_subdir = args.plot_subdir - mod.normalize_type = args.normalize_type + mod.normalize_type = args.normalize_type #1: normalized score; 2: bandit only centered score; 3: raw score mod.pair_comp_on = False mod.cdf_on = True - mod.maj_error_dir = '../../../figs_maj_errors/0of1.sum' + mod.maj_error_dir = '../../../figs_all/expt_0509/figs_maj_errors/0of1.sum' mod.fulldir = mod.results_dir + mod.plot_subdir if not os.path.exists(mod.fulldir): @@ -354,10 +463,18 @@ def load_maj_error(mod): all_results = mod.all_results mod.best_error_table = all_results[all_results['choices_lambda'] == 0] + + #print mod.best_error_table[mod.best_error_table['dataset'] == 'ds_160_5.vw.gz'] + #raw_input(' ') + all_results = all_results[all_results['choices_lambda'] != 0] #ignore the no update row: all_results = all_results[(all_results['no_supervised'] == False) | (all_results['no_bandit'] == False)] + #ignore the choice_lambda = 4 row + all_results = all_results[(all_results['choices_lambda'] != 4)] + + #filter choices_lambdas = 2,4,8? #if (alg_name[2] == False and alg_name[3] == False and alg_name[1] != 8): @@ -376,6 +493,15 @@ def load_maj_error(mod): all_results = all_results[all_results['num_classes'] >= 3] elif mod.filter == '4': all_results = all_results[all_results['num_classes'] <= 2] + elif mod.filter == '5': + all_results = all_results[all_results['total_size'] >= 10000] + all_results = all_results[all_results['num_classes'] >= 3] + elif mod.filter == '6': + all_results = all_results[all_results['warm_start_size'] >= 100] + all_results = all_results[all_results['learning_rate'] == 0.3] + elif mod.filter == '7': + all_results = all_results[all_results['warm_start_size'] >= 100] + all_results = all_results[all_results['num_classes'] >= 3] plot_all(mod, all_results) diff --git a/scripts/plot_warm_start.py b/scripts/plot_warm_start.py index e7607ed34d1..0a33376114e 100644 --- a/scripts/plot_warm_start.py +++ b/scripts/plot_warm_start.py @@ -264,7 +264,7 @@ def params_per_task(mod): # Common parameters params_common = param_cartesian_multi([params_corrupt_type_sup, params_corrupt_prob_sup, params_warm_start_multiplier, params_learning_rate, params_cb_type, params_fold]) - params_common = filter(lambda param: param['corrupt_type_supervised'] == 1 or abs(param['corrupt_prob_supervised']) > 1e-4, params_common) + params_common = filter(lambda param: param['corrupt_type_supervised'] == 3 or abs(param['corrupt_prob_supervised']) > 1e-4, params_common) # Baseline parameters construction if mod.baselines_on: @@ -358,7 +358,7 @@ def vw_output_extract(mod, pattern): if not errs: avge = 0 else: - print errs + #print errs avge = float(errs[0][0]) vw_output.close() @@ -425,7 +425,7 @@ def remove_suffix(filename): parser.add_argument('--ds_dir', default='../../../vwshuffled/') parser.add_argument('--num_learning_rates', type=int, default=1) parser.add_argument('--num_datasets', type=int, default=-1) - + parser.add_argument('--num_folds', type=int, default=1) args = parser.parse_args() flag_dir = args.results_dir + 'flag/' @@ -438,6 +438,7 @@ def remove_suffix(filename): # with a huge number of files can be super slow. Hence, we create a subfolder # for each dataset to alleviate this. dss = ds_files(args.ds_dir + '1/') + dss = dss[:args.num_datasets] for ds in dss: ds_no_suffix = remove_suffix(ds) create_dir(args.results_dir + ds_no_suffix + '/') @@ -466,23 +467,24 @@ def remove_suffix(filename): # use fractions instead of absolute numbers #mod.warm_start_multipliers = [pow(2,i) for i in range(4)] - mod.warm_start_multipliers = [pow(2,i) for i in range(4)] + mod.warm_start_multipliers = [pow(2,i) for i in range(1)] mod.choices_cb_type = ['mtr'] #mod.choices_choices_lambda = [2,4,8] - mod.choices_choices_lambda = [2, 4, 8] + mod.choices_choices_lambda = [2,8,16] #mod.choices_corrupt_type_supervised = [1,2,3] #mod.choices_corrupt_prob_supervised = [0.0,0.5,1.0] - mod.choices_corrupt_type_supervised = [1,2,3] - mod.choices_corrupt_prob_supervised = [0.0,0.5,1.0] + mod.choices_corrupt_type_supervised = [3] + mod.choices_corrupt_prob_supervised = [0,0.25,0.5] + + mod.learning_rates_template = [0.1, 0.03, 0.3, 0.01, 1.0, 0.003, 3.0, 0.001, 10.0] - if args.num_learning_rates == 1: - mod.learning_rates = [0.5] - elif args.num_learning_rates == 3: - mod.learning_rates = [0.1, 0.3, 1.0] + if args.num_learning_rates <= 0 or args.num_learning_rates >= 10: + mod.learning_rates = mod.learning_rates_template else: - mod.learning_rates = [0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1.0, 3.0, 10.0] + mod.learning_rates = mod.learning_rates_template[:args.num_learning_rates] + mod.adf_on = True @@ -498,7 +500,7 @@ def remove_suffix(filename): mod.critical_size_ratios = [184 * pow(2, -i) for i in range(7) ] #mod.folds = range(1,11) - mod.folds = range(1,6) + mod.folds = range(1, args.num_folds+1) print 'reading dataset files..' #TODO: this line specifically for multiple folds From a4fb02fcd14928c82f067ca254f9ec046eb3abf4 Mon Sep 17 00:00:00 2001 From: Chicheng Zhang Date: Mon, 21 May 2018 17:52:04 -0400 Subject: [PATCH 075/127] cleaned up the run vw script; need more tests on more choices of param settings --- scripts/plot_warm_start.py | 360 +++++++++++++++++++------------------ 1 file changed, 187 insertions(+), 173 deletions(-) diff --git a/scripts/plot_warm_start.py b/scripts/plot_warm_start.py index 0a33376114e..c2f2faec726 100644 --- a/scripts/plot_warm_start.py +++ b/scripts/plot_warm_start.py @@ -14,8 +14,42 @@ class model: def __init__(self): - self.no_bandit = False - self.no_supervised = False + # Setting up argument-independent learning parameters in the constructor + self.baselines_on = True + self.algs_on = True + self.optimal_on = True + self.majority_on = True + + self.num_checkpoints = 200 + + # use fractions instead of absolute numbers + #mod.warm_start_multipliers = [pow(2,i) for i in range(4)] + self.warm_start_multipliers = [pow(2,i) for i in range(1)] + + self.choices_cb_type = ['mtr'] + #mod.choices_choices_lambda = [2,4,8] + self.choices_choices_lambda = [2,8,16] + + #mod.choices_corrupt_type_supervised = [1,2,3] + #mod.choices_corrupt_prob_supervised = [0.0,0.5,1.0] + self.choices_corrupt_type_supervised = [3] + self.choices_corrupt_prob_supervised = [0,0.25,0.5] + + self.learning_rates_template = [0.1, 0.03, 0.3, 0.01, 1.0, 0.003, 3.0, 0.001, 10.0] + + self.adf_on = True + + self.corrupt_type_bandit = 1 + self.corrupt_prob_bandit = 0.0 + + self.validation_method = 1 + self.weighting_scheme = 1 + + #self.epsilon = 0.05 + #self.epsilon_on = True + + self.critical_size_ratios = [184 * pow(2, -i) for i in range(7) ] + def collect_stats(mod): avg_error_value = avg_error(mod) @@ -31,14 +65,14 @@ def collect_stats(mod): 'ideal_variance': 0.0 } - if mod.compute_optimal is True: + if 'majority_approx' in mod.param or 'optimal_approx' in mod.param: vw_result = vw_result_template.copy() if 'optimal_approx' in mod.param: # this condition is for computing the optimal error vw_result['avg_error'] = avg_error_value else: # this condition is for computing the majority error - err = 1 - float(mod.result['majority_size']) / mod.result['total_size'] + err = 1 - float(mod.param['majority_size']) / mod.param['total_size'] vw_result['avg_error'] = float('%0.5f' % err) vw_run_results.append(vw_result) return vw_run_results @@ -61,8 +95,8 @@ def collect_stats(mod): bandit_effective = int(float(weight_str)) for ratio in mod.critical_size_ratios: - if bandit_effective >= (1 - 1e-7) * mod.result['warm_start'] * ratio and \ - bandit_effective <= (1 + 1e-7) * mod.result['warm_start'] * ratio: + if bandit_effective >= (1 - 1e-7) * mod.param['warm_start'] * ratio and \ + bandit_effective <= (1 + 1e-7) * mod.param['warm_start'] * ratio: vw_result = vw_result_template.copy() vw_result['bandit_size'] = bandit_effective vw_result['bandit_supervised_size_ratio'] = ratio @@ -74,67 +108,55 @@ def collect_stats(mod): return vw_run_results -def gen_vw_options_list(vw_options): +def gen_vw_options_list(mod): + mod.vw_options = format_setting(mod.vw_template, mod.param) vw_options_list = [] - for k, v in vw_options.iteritems(): + for k, v in mod.vw_options.iteritems(): vw_options_list.append('--'+str(k)) vw_options_list.append(str(v)) return vw_options_list def gen_vw_options(mod): - vw_options = {} - vw_options['data'] = mod.data_full_path - vw_options['progress'] = mod.result['progress'] - if 'optimal_approx' in mod.param: - vw_options['passes'] = 5 - vw_options['oaa'] = mod.result['num_classes'] - vw_options['cache_file'] = mod.data_full_path + '.cache' + # Fully supervised on full dataset + mod.vw_template = {'data':'', 'progress':2.0, 'passes':0, 'oaa':0, 'cache_file':''} + mod.param['passes'] = 5 + mod.param['oaa'] = mod.param['num_classes'] + mod.param['cache_file'] = mod.param['data'] + '.cache' elif 'majority_approx' in mod.param: - # basically we would like to skip vw running as fast as possible - vw_options['cbify'] = mod.result['num_classes'] - vw_options['warm_start'] = 0 - vw_options['bandit'] = 0 + # Compute majority error; basically we would like to skip vw running as fast as possible + mod.vw_template = {'data':'', 'progress':2.0, 'cbify':0, 'warm_start':0, 'bandit':0} + mod.param['cbify'] = mod.param['num_classes'] + mod.param['warm_start'] = 0 + mod.param['bandit'] = 0 else: - vw_options['corrupt_type_bandit'] = mod.corrupt_type_bandit - vw_options['corrupt_prob_bandit'] = mod.corrupt_prob_bandit - vw_options['bandit'] = mod.bandit - - if mod.adf_on is True: - vw_options['cb_explore_adf'] = ' ' + # General CB + mod.vw_template = {'data':'', 'corrupt_type_bandit':0, 'corrupt_prob_bandit':0.0, 'bandit':0, 'cb_type':'mtr', + 'choices_lambda':0, 'corrupt_type_supervised':0, 'corrupt_prob_supervised':0.0, 'lambda_scheme':1, 'learning_rate':0.5, 'warm_start_type':1, 'cbify':0, 'warm_start':0, 'overwrite_label':1, 'validation_method':1, 'weighting_scheme':1} + + mod.param['warm_start'] = mod.param['warm_start_multiplier'] * mod.param['progress'] + mod.param['bandit'] = mod.param['total_size'] - mod.param['warm_start'] + mod.param['cbify'] = mod.param['num_classes'] + mod.param['overwrite_label'] = mod.param['majority_class'] + + if mod.param['adf_on'] is True: + mod.param['cb_explore_adf'] = ' ' + mod.vw_template['cb_explore_adf'] = ' ' else: - vw_options['cb_explore'] = mod.num_classes - - if mod.epsilon_on is True: - vw_options['epsilon'] = mod.epsilon - - vw_options['cb_type'] = mod.param['cb_type'] - vw_options['choices_lambda'] = mod.param['choices_lambda'] - vw_options['corrupt_type_supervised'] = mod.param['corrupt_type_supervised'] - vw_options['corrupt_prob_supervised'] = mod.param['corrupt_prob_supervised'] - vw_options['lambda_scheme'] = mod.param['lambda_scheme'] - if mod.param['no_supervised'] is True: - vw_options['no_supervised'] = ' ' - if mod.param['no_bandit'] is True: - vw_options['no_bandit'] = ' ' - vw_options['learning_rate'] = mod.param['learning_rate'] - vw_options['warm_start_type'] = mod.param['warm_start_type'] - - vw_options['cbify'] = mod.result['num_classes'] - vw_options['warm_start'] = mod.result['warm_start'] - vw_options['overwrite_label'] = mod.result['majority_class'] - vw_options['validation_method'] = mod.result['validation_method'] - vw_options['weighting_scheme'] = mod.result['weighting_scheme'] - - #if mod.cover_on: - # alg_option += ' --cover 5 --psi 0.01 --nounif ' - #mod.cb_type = 'dr' - return vw_options + mod.param['cb_explore'] = mod.param['num_classes'] + mod.vw_template['cb_explore'] = 0 + + if mod.param['no_warm_start_update'] is True: + mod.param['no_supervised'] = ' ' + mod.vw_template['no_supervised'] = ' ' + if mod.param['no_interaction_update'] is True: + mod.param['no_bandit'] = ' ' + mod.vw_template['no_bandit'] = ' ' def execute_vw(mod): - vw_options = gen_vw_options(mod) - vw_options_list = gen_vw_options_list(vw_options) - cmd = disperse([mod.vw_path]+vw_options_list, ' ') + gen_vw_options(mod) + vw_options_list = gen_vw_options_list(mod) + cmd = intersperse([mod.vw_path]+vw_options_list, ' ') print cmd f = open(mod.vw_output_filename, 'w') @@ -143,7 +165,7 @@ def execute_vw(mod): process.wait() f.close() -def disperse(l, ch): +def intersperse(l, ch): s = '' for item in l: s += str(item) @@ -152,56 +174,68 @@ def disperse(l, ch): def param_to_str(param): param_list = [ str(k)+'='+str(v) for k,v in param.iteritems() ] - return disperse(param_list, ',') - -def param_to_result(param, result): - for k, v in param.iteritems(): - if k in result: - result[k] = v + return intersperse(param_list, ',') + +def replace_if_in(dic, k, k_new): + if k in dic: + dic[k_new] = dic[k] + del dic[k] + +def replace_keys(dic, simplified_keymap): + dic_new = dic.copy() + for k, k_new in simplified_keymap.iteritems(): + replace_if_in(dic_new, k, k_new) + return dic_new + +def param_to_str_simplified(mod): + #print 'before replace' + #print param + vw_run_param_set = ['lambda_scheme','learning_rate','validation_method', + 'fold','no_warm_start_update','no_interaction_update', + 'corrupt_prob_bandit', 'corrupt_prob_supervised', + 'corrupt_type_bandit', 'corrupt_type_supervised', + 'warm_start_type','warm_start_multiplier','choices_lambda','weighting_scheme', + 'cb_type','optimal_approx','majority_approx','dataset', 'adf_on'] + + mod.template_red = dict([(k,mod.result_template[k]) for k in vw_run_param_set]) + mod.simplified_keymap_red = dict([(k,mod.simplified_keymap[k]) for k in vw_run_param_set]) + # step 1: use the above as a template to filter out irrelevant parameters + # in the vw output file title + param_formatted = format_setting(mod.template_red, mod.param) + # step 2: replace the key names with the simplified names + param_simplified = replace_keys(param_formatted, mod.simplified_keymap_red) + #print 'after replace' + #print param + return param_to_str(param_simplified) def gen_comparison_graph(mod): - mod.result = mod.result_template.copy() + mod.param['data'] = mod.ds_path + str(mod.param['fold']) + '/' + mod.param['dataset'] - if 'majority_approx' in mod.param or 'optimal_approx' in mod.param: - mod.compute_optimal = True - else: - mod.compute_optimal = False - - param_to_result(mod.param, mod.result) - mod.data_full_path = mod.ds_path + str(mod.param['fold']) + '/' + mod.param['data'] - - mod.result['fold'] = mod.param['fold'] - mod.result['total_size'] = get_num_lines(mod.data_full_path) - mod.result['num_classes'] = get_num_classes(mod.data_full_path) - mod.result['majority_size'], mod.result['majority_class'] = get_majority_class(mod.data_full_path) - mod.result['progress'] = int(math.ceil(float(mod.result['total_size']) / float(mod.num_checkpoints))) + mod.param['total_size'] = get_num_lines(mod.param['data']) + mod.param['num_classes'] = get_num_classes(mod.param['data']) + mod.param['majority_size'], mod.param['majority_class'] = get_majority_class(mod.param['data']) + mod.param['progress'] = int(math.ceil(float(mod.param['total_size']) / float(mod.num_checkpoints))) mod.vw_output_dir = mod.results_path + remove_suffix(mod.param['data']) + '/' - mod.vw_output_filename = mod.vw_output_dir + param_to_str(mod.param) + '.txt' - - if mod.compute_optimal is False: - mod.result['warm_start'] = mod.param['warm_start_multiplier'] * mod.result['progress'] - mod.bandit = mod.result['total_size'] - mod.result['warm_start'] - mod.result['validation_method'] = mod.validation_method - mod.result['weighting_scheme'] = mod.weighting_scheme - mod.result['corrupt_type_bandit'] = mod.corrupt_type_bandit - mod.result['corrupt_prob_bandit'] = mod.corrupt_prob_bandit - mod.result['fold'] = mod.param['fold'] + mod.vw_output_filename = mod.vw_output_dir + param_to_str_simplified(mod) + '.txt' #plot_errors(mod) execute_vw(mod) vw_run_results = collect_stats(mod) for vw_result in vw_run_results: - result_combined = merge_two_dicts(mod.result, vw_result) - result_formatted = format_result(mod.result_template, result_combined) + result_combined = merge_two_dicts(mod.param, vw_result) + result_formatted = format_setting(mod.result_template, result_combined) record_result(mod, result_formatted) print('') -def format_result(result_template, result): - result_formatted = result_template.copy() - for k, v in result.iteritems(): - result_formatted[k] = v - return result_formatted +# The following function is a "template filling" function +# Given a template, we use the setting dict to fill it as much as possible +def format_setting(template, setting): + formatted = template.copy() + for k, v in setting.iteritems(): + if k in template.keys(): + formatted[k] = v + return formatted def record_result(mod, result): result_row = [] @@ -209,7 +243,7 @@ def record_result(mod, result): result_row.append(result[k]) summary_file = open(mod.summary_file_name, 'a') - summary_file.write( disperse(result_row, '\t') + '\n') + summary_file.write( intersperse(result_row, '\t') + '\n') summary_file.close() def ds_files(ds_path): @@ -269,10 +303,10 @@ def params_per_task(mod): # Baseline parameters construction if mod.baselines_on: params_baseline_basic = [ - [{'choices_lambda': 1, 'warm_start_type': 1, 'lambda_scheme': 3}], [{'no_supervised': True}, {'no_supervised': False}], [{'no_bandit': True}, {'no_bandit': False}] + [{'choices_lambda': 1, 'warm_start_type': 1, 'lambda_scheme': 3}], [{'no_warm_start_update': True}, {'no_warm_start_update': False}], [{'no_interaction_update': True}, {'no_interaction_update': False}] ] params_baseline = param_cartesian_multi([params_common] + params_baseline_basic) - params_baseline = filter(lambda param: param['no_supervised'] == True or param['no_bandit'] == True, params_baseline) + params_baseline = filter(lambda param: param['no_warm_start_update'] == True or param['no_interaction_update'] == True, params_baseline) else: params_baseline = [] @@ -280,34 +314,45 @@ def params_per_task(mod): # Algorithm parameters construction if mod.algs_on: params_choices_lambd = dictify('choices_lambda', mod.choices_choices_lambda) - params_algs_1 = param_cartesian(params_choices_lambd, [{'no_supervised': False, 'no_bandit': False, 'warm_start_type': 1, 'lambda_scheme': 3}] ) - params_algs_2 = [{'no_supervised': False, 'no_bandit': False, 'warm_start_type': 2, 'lambda_scheme': 1, 'choices_lambda':1}] + params_algs_1 = param_cartesian(params_choices_lambd, [{'no_warm_start_update': False, 'no_interaction_update': False, 'warm_start_type': 1, 'lambda_scheme': 3}] ) + params_algs_2 = [{'no_warm_start_update': False, 'no_interaction_update': False, 'warm_start_type': 2, 'lambda_scheme': 1, 'choices_lambda':1}] params_algs = param_cartesian( params_common, params_algs_1 + params_algs_2 ) else: params_algs = [] + + params_constant = [{'validation_method':mod.validation_method, + 'weighting_scheme':mod.weighting_scheme, + 'corrupt_type_bandit':mod.corrupt_type_bandit, + 'corrupt_prob_bandit':mod.corrupt_prob_bandit, + 'adf_on':True}] + + params_baseline_and_algs = param_cartesian_multi([params_constant, params_baseline + params_algs]) + + # Optimal baselines parameter construction if mod.optimal_on: - params_optimal = [{ 'optimal_approx': True }] + params_optimal = [{ 'optimal_approx': True, 'fold': 1 }] else: params_optimal = [] if mod.majority_on: - params_majority = [{ 'majority_approx': True }] + params_majority = [{ 'majority_approx': True, 'fold': 1 }] else: params_majority = [] + #print len(params_baseline) #print len(params_algs) #print len(params_common) #raw_input('..') - # Common factor in all 3 groups: dataset - params_dataset = dictify('data', mod.dss) - params_all = param_cartesian( params_dataset, params_baseline + params_algs + params_optimal + params_majority ) + params_dataset = dictify('dataset', mod.dss) + params_all = param_cartesian_multi( [params_dataset, params_baseline_and_algs + params_optimal + params_majority] ) + params_all = sorted(params_all) - print len(params_all) + print 'The total number of VW commands to run is: ', len(params_all) for row in params_all: print row return get_params_task(params_all) @@ -366,43 +411,50 @@ def vw_output_extract(mod, pattern): def write_summary_header(mod): summary_file = open(mod.summary_file_name, 'w') - summary_header = disperse(mod.result_header_list, '\t') + summary_header = intersperse(mod.result_header_list, '\t') summary_file.write(summary_header+'\n') summary_file.close() def main_loop(mod): mod.summary_file_name = mod.results_path+str(mod.task_id)+'of'+str(mod.num_tasks)+'.sum' + + # The reason for using a list is that, we would like to keep the order of the + #columns in this way. Maybe use ordered dictionary in the future? mod.result_template_list = [ - 'fold', 0, - 'data', 'ds', - 'num_classes', 0, - 'total_size' , 0, - 'majority_size', 0, - 'corrupt_type_supervised', 0, - 'corrupt_prob_supervised', 0.0, - 'corrupt_type_bandit', 0, - 'corrupt_prob_bandit', 0.0, - 'warm_start', 0, - 'bandit_size', 0, - 'bandit_supervised_size_ratio', 0, - 'cb_type', 'mtr', - 'validation_method', 0, - 'weighting_scheme', 0, - 'lambda_scheme', 0, - 'choices_lambda', 0, - 'no_supervised', False, - 'no_bandit', False, - 'warm_start_type', 0, - 'learning_rate', 0.0, - 'optimal_approx', False, - 'majority_approx', False, - 'avg_error', 0.0, - 'actual_variance', 0.0, - 'ideal_variance', 0.0 ] - - num_cols = len(mod.result_template_list)/2 - mod.result_header_list = [ mod.result_template_list[2*i] for i in range(num_cols) ] - mod.result_template = dict([ (mod.result_template_list[2*i], mod.result_template_list[2*i+1]) for i in range(num_cols) ]) + ('fold', 'fd', 0), + ('data', 'dt', ''), + ('dataset', 'ds', ''), + ('num_classes','nc', 0), + ('total_size', 'ts', 0), + ('majority_size','ms', 0), + ('corrupt_type_supervised', 'cts', 0), + ('corrupt_prob_supervised', 'cps', 0.0), + ('corrupt_type_bandit', 'ctb', 0), + ('corrupt_prob_bandit', 'cpb', 0.0), + ('adf_on', 'ao', True), + ('warm_start_multiplier','wsm',1), + ('warm_start', 'ws', 0), + ('warm_start_type', 'wst', 0), + ('bandit_size', 'bs', 0), + ('bandit_supervised_size_ratio', 'bssr', 0), + ('cb_type', 'cbt', 'mtr'), + ('validation_method', 'vm', 0), + ('weighting_scheme', 'wts', 0), + ('lambda_scheme','ls', 0), + ('choices_lambda', 'cl', 0), + ('no_warm_start_update', 'nwsu', False), + ('no_interaction_update', 'niu', False), + ('learning_rate', 'lr', 0.0), + ('optimal_approx', 'oa', False), + ('majority_approx', 'ma', False), + ('avg_error', 'ae', 0.0), + ('actual_variance', 'av', 0.0), + ('ideal_variance', 'iv', 0.0)] + + num_cols = len(mod.result_template_list) + mod.result_header_list = [ mod.result_template_list[i][0] for i in range(num_cols) ] + mod.result_template = dict([ (mod.result_template_list[i][0], mod.result_template_list[i][2]) for i in range(num_cols) ]) + mod.simplified_keymap = dict([ (mod.result_template_list[i][0], mod.result_template_list[i][1]) for i in range(num_cols) ]) write_summary_header(mod) for mod.param in mod.config_task: @@ -451,54 +503,17 @@ def remove_suffix(filename): time.sleep(1) mod = model() - mod.baselines_on = True - mod.algs_on = True - mod.optimal_on = False - mod.majority_on = False mod.num_tasks = args.num_tasks mod.task_id = args.task_id - mod.vw_path = '../vowpalwabbit/vw' mod.ds_path = args.ds_dir mod.results_path = args.results_dir - mod.num_checkpoints = 200 - - # use fractions instead of absolute numbers - #mod.warm_start_multipliers = [pow(2,i) for i in range(4)] - mod.warm_start_multipliers = [pow(2,i) for i in range(1)] - - mod.choices_cb_type = ['mtr'] - #mod.choices_choices_lambda = [2,4,8] - mod.choices_choices_lambda = [2,8,16] - - #mod.choices_corrupt_type_supervised = [1,2,3] - #mod.choices_corrupt_prob_supervised = [0.0,0.5,1.0] - mod.choices_corrupt_type_supervised = [3] - mod.choices_corrupt_prob_supervised = [0,0.25,0.5] - - mod.learning_rates_template = [0.1, 0.03, 0.3, 0.01, 1.0, 0.003, 3.0, 0.001, 10.0] - if args.num_learning_rates <= 0 or args.num_learning_rates >= 10: mod.learning_rates = mod.learning_rates_template else: mod.learning_rates = mod.learning_rates_template[:args.num_learning_rates] - - - mod.adf_on = True - - mod.corrupt_type_bandit = 1 - mod.corrupt_prob_bandit = 0.0 - - mod.validation_method = 1 - mod.weighting_scheme = 1 - - mod.epsilon = 0.05 - mod.epsilon_on = True - - mod.critical_size_ratios = [184 * pow(2, -i) for i in range(7) ] - #mod.folds = range(1,11) mod.folds = range(1, args.num_folds+1) @@ -523,7 +538,6 @@ def remove_suffix(filename): print len(mod.config_task) #print mod.ds_task - # we only need to vary the warm start fraction, and there is no need to vary the bandit fraction, # as each run of vw automatically accumulates the bandit dataset main_loop(mod) From f8d14ab016c82a1565c48b7881b66367f8e62cdd Mon Sep 17 00:00:00 2001 From: chicheng Date: Fri, 25 May 2018 15:47:43 -0400 Subject: [PATCH 076/127] fixed memory lost problems; still reachable problems still not resolved --- vowpalwabbit/cbify.cc | 63 ++++++++++++++++++++++--------------------- 1 file changed, 33 insertions(+), 30 deletions(-) diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc index bab211f3da2..d17d14f511b 100644 --- a/vowpalwabbit/cbify.cc +++ b/vowpalwabbit/cbify.cc @@ -269,7 +269,7 @@ void finish(cbify& data) for (size_t i = 0; i < data.warm_start_period; ++i) { - //VW::dealloc_example(COST_SENSITIVE::cs_label.delete_label, data.supervised_validation[i]); + VW::dealloc_example(COST_SENSITIVE::cs_label.delete_label, data.supervised_validation[i]); } free(data.supervised_validation); @@ -278,11 +278,11 @@ void finish(cbify& data) { for (size_t a = 0; a < data.adf_data.num_actions; ++a) { - //VW::dealloc_example(CB::cb_label.delete_label, data.adf_data.ecs[a]); - data.adf_data.ecs[a].pred.a_s.delete_v(); + VW::dealloc_example(CB::cb_label.delete_label, data.adf_data.ecs[a]); + //data.adf_data.ecs[a].pred.a_s.delete_v(); } - //VW::dealloc_example(CB::cb_label.delete_label, *data.adf_data.empty_example); - data.adf_data.empty_example->pred.a_s.delete_v(); + VW::dealloc_example(CB::cb_label.delete_label, *data.adf_data.empty_example); + //data.adf_data.empty_example->pred.a_s.delete_v(); free(data.adf_data.ecs); free(data.adf_data.empty_example); @@ -292,21 +292,20 @@ void finish(cbify& data) data.csl_empty->costs.delete_v(); + free(data.csls); free(data.csl_empty); - free(data.cbl_empty); - free(data.old_weights); - free(data.cbls); + free(data.cbls); + free(data.cbl_empty); + free(data.old_weights); } else { data.csls->costs.delete_v(); + free(data.csls); } - free(data.csls); - - } void copy_example_to_adf(cbify& data, example& ec) @@ -387,8 +386,12 @@ uint32_t predict_sublearner(cbify& data, base_learner& base, uint32_t i) //} //best_action_dir = ecs[0].pred.a_s[0].action+1; //assert(best_action == best_action_dir); + uint32_t pred_action = ecs[0].pred.a_s[0].action+1; + + //Need to clear the prediction, otherwise there will be a memory leak + ecs[0].pred.a_s.delete_v(); - return ecs[0].pred.a_s[0].action+1; + return pred_action; } @@ -412,7 +415,7 @@ void accumulate_costs_ips(cbify& data, example& ec, CB::cb_class& cl) else //validation using supervised data (their labels are already set to cost-sensitive labels) { //only update cumulative costs every warm_start_period iterations - if (data.bandit_iter % data.warm_start_period == 0) + if (abs(log2(data.bandit_iter) - floor(log2(data.bandit_iter))) < 1e-4) { for (uint32_t i = 0; i < data.choices_lambda; i++) data.cumulative_costs[i] = 0; @@ -467,7 +470,7 @@ void accumulate_costs_ips_adf(cbify& data, example& ec, CB::cb_class& cl, base_l else { //only update cumulative costs every warm_start_period iterations - if (data.bandit_iter % data.warm_start_period == 0) + if ( abs(log2(data.bandit_iter) - floor(log2(data.bandit_iter))) < 1e-4 ) { for (uint32_t i = 0; i < data.choices_lambda; i++) data.cumulative_costs[i] = 0; @@ -881,12 +884,6 @@ void predict_or_learn_cs_adf(cbify& data, base_learner& base, example& ec, bool //Store the multiclass input label MULTICLASS::label_t ld = ec.l.multi; - copy_example_to_adf(data, ec); - - for (size_t a = 0; a < data.adf_data.num_actions; ++a) - data.cbls[a].costs = data.adf_data.ecs[a].l.cb.costs; - data.cbl_empty->costs = data.adf_data.empty_example->l.cb.costs; - //best_action = predict_sublearner(data, base, argmin); uint32_t best_action = predict_cs_adf(data, base); @@ -916,11 +913,11 @@ void predict_or_learn_bandit_adf(cbify& data, base_learner& base, example& ec, b //Store the multiclass input label MULTICLASS::label_t ld = ec.l.multi; - copy_example_to_adf(data, ec); + //copy_example_to_adf(data, ec); - for (size_t a = 0; a < data.adf_data.num_actions; ++a) - data.cbls[a].costs = data.adf_data.ecs[a].l.cb.costs; - data.cbl_empty->costs = data.adf_data.empty_example->l.cb.costs; + //for (size_t a = 0; a < data.adf_data.num_actions; ++a) + // data.cbls[a].costs = data.adf_data.ecs[a].l.cb.costs; + //data.cbl_empty->costs = data.adf_data.empty_example->l.cb.costs; //size_t pred_pi = predict_cs_adf(data, base, ec); uint32_t idx = predict_bandit_adf(data, base); @@ -937,12 +934,12 @@ void predict_or_learn_bandit_adf(cbify& data, base_learner& base, example& ec, b auto& lab = data.adf_data.ecs[cl.action - 1].l.cb; lab.costs.push_back(cl); - if (is_update) learn_bandit_adf(data, base, ec_type); accumulate_variance_adf(data, base); + lab.costs.delete_v(); ec.pred.multiclass = cl.action; } @@ -952,6 +949,15 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) if (data.warm_start_iter == 0 && data.bandit_iter == 0) setup_lambdas(data, ec); + copy_example_to_adf(data, ec); + + // As we will be processing the examples with cs or cb labels, + // we need to store the default cb label so that the next time we call copy_example_to_adf + // we can free it successfully (that is the whole purpose of data.cbls) + for (size_t a = 0; a < data.adf_data.num_actions; ++a) + data.cbls[a].costs = data.adf_data.ecs[a].l.cb.costs; + data.cbl_empty->costs = data.adf_data.empty_example->l.cb.costs; + if (data.warm_start_iter < data.warm_start_period) // Call the cost-sensitive learner directly { if (data.warm_start_type == SUPERVISED_WS) @@ -964,12 +970,12 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) else if (data.bandit_iter < data.bandit_period) // call the bandit learner { predict_or_learn_bandit_adf(data, base, ec, data.ind_bandit, BANDIT); - data.bandit_iter++; if (data.bandit_iter == data.bandit_period) { cout<<"Ideal average variance = "<pred.a_s.delete_v(); free(data.adf_data.ecs); free(data.adf_data.empty_example); + //TODO: Use CB::cb_label.delete_label / CS here for (size_t a = 0; a < data.adf_data.num_actions; ++a) data.csls[a].costs.delete_v(); @@ -355,7 +356,6 @@ uint32_t find_min(v_array arr) argmin = i; } } - //cout<<"argmin = "<cost_sensitive->predict(*empty, argmin); - - //float best_score; - //for (size_t a = 0; a < data.adf_data.num_actions; ++a) - //{ - // if ( (a == 0) || (ecs[a].partial_prediction < best_score) ) - // { - // best_action = a + 1; - // best_score = ecs[a].partial_prediction; - // } - //} - //best_action_dir = ecs[0].pred.a_s[0].action+1; - //assert(best_action == best_action_dir); uint32_t pred_action = ecs[0].pred.a_s[0].action+1; //Need to clear the prediction, otherwise there will be a memory leak @@ -396,17 +384,51 @@ uint32_t predict_sublearner(cbify& data, base_learner& base, uint32_t i) } +void convert_mc_to_cs(cbify& data, example& ec) +{ + //generate cost-sensitive label (only for CSOAA's use - this will be retracted at the end) + COST_SENSITIVE::label& csl = *data.csls; + size_t label = ec.l.multi.label; -void accumulate_costs_ips(cbify& data, example& ec, CB::cb_class& cl) + for (uint32_t j = 0; j < data.num_actions; j++) + { + csl.costs[j].class_index = j+1; + csl.costs[j].x = loss(data, label, j+1); + } + ec.l.cs = csl; +} + +size_t predict_sublearner_noadf(cbify& data, example& ec, uint32_t i) { + //For vw's internal reason, we need to first have a cs label before + //using csoaa to predict + MULTICLASS::label_t ld = ec.l.multi; + convert_mc_to_cs(data, ec); + data.all->cost_sensitive->predict(ec, i); + ec.l.multi = ld; + + return ec.pred.multiclass; +} + +size_t predict_cs(cbify& data, example& ec) +{ + uint32_t argmin = find_min(data.cumulative_costs); + //cout<cost_sensitive->predict(ec, i); - if (ec.pred.multiclass == cl.action) + uint32_t action = predict_sublearner_noadf(data, ec, i); + + if (action == cl.action) data.cumulative_costs[i] += cl.cost / cl.probability; //cout<cost_sensitive->predict(ec_valid, i); - - //cout<cost_sensitive->predict(ec, argmin); - - //cout<(), "probability of label corruption in the bandit part") ("corrupt_type_supervised", po::value(), "type of label corruption in the supervised part (1 is uar, 2 is circular)") ("corrupt_type_bandit", po::value(), "probability of label corruption in the bandit part (1 is uar, 2 is circular)") - ("validation_method", po::value(), "lambda selection criterion (1 is using bandit with progressive validation, 2 is using supervised and amortizing)") + ("validation_method", po::value(), "lambda selection criterion (1 is using bandit with progressive validation, 2 is using supervised)") ("weighting_scheme", po::value(), "weighting scheme (1 is per instance weighting, 2 is per dataset weighting (where we use a diminishing weighting scheme) )") ("lambda_scheme", po::value(), "Lambda set scheme (1 is expanding based on center 0.5, 2 is expanding based on center=minimax lambda, 3 is expanding based on center=minimax lambda along with forcing 0,1 in Lambda )") ("overwrite_label", po::value(), "the label type 3 corruptions (overwriting) turn to") From 7b6e2ba4ece9b8f7b05f9c051b236990fd08cb95 Mon Sep 17 00:00:00 2001 From: Chicheng Zhang Date: Mon, 28 May 2018 16:58:03 -0400 Subject: [PATCH 078/127] begin changing the cb learning w/o adf part --- vowpalwabbit/cbify.cc | 190 +++++++++++++++++++++--------------------- 1 file changed, 94 insertions(+), 96 deletions(-) diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc index 3da7f8c9212..8e9180dd955 100644 --- a/vowpalwabbit/cbify.cc +++ b/vowpalwabbit/cbify.cc @@ -383,7 +383,6 @@ uint32_t predict_sublearner(cbify& data, base_learner& base, uint32_t i) } - void convert_mc_to_cs(cbify& data, example& ec) { //generate cost-sensitive label (only for CSOAA's use - this will be retracted at the end) @@ -410,59 +409,10 @@ size_t predict_sublearner_noadf(cbify& data, example& ec, uint32_t i) return ec.pred.multiclass; } -size_t predict_cs(cbify& data, example& ec) -{ - uint32_t argmin = find_min(data.cumulative_costs); - //cout<(); ec.pred.a_s = data.a_s; uint32_t argmin = find_min(data.cumulative_costs); base.predict(ec, argmin); - data.pred = ec.pred; + //data.pred = ec.pred; uint32_t action = data.mwt_explorer->Choose_Action(*data.generic_explorer, StringUtils::to_string(data.example_counter++), ec); + ec.l.cb.costs.delete_v(); return action; @@ -567,45 +575,34 @@ void predict_or_learn_bandit(cbify& data, base_learner& base, example& ec, size_ { MULTICLASS::label_t ld = ec.l.multi; uint32_t action = predict_bandit(data, base, ec); + data.mc_pred = action; - //CB::cb_class cl; - generate_corrupted_cb(data, ec, cl, ld, action, data.corrupted_label); - //convert_mc_to_cb(data, ec, action); + convert_mc_to_cb(data, ec, action); - //Create a new cb label - data.cb_label.costs.push_back(cl); - ec.l.cb = data.cb_label; //make sure the prediction here is a cb prediction - ec.pred = data.pred; + //ec.pred = data.pred; - bool is_update; - if (ec_type == SUPERVISED) - is_update = data.ind_supervised; - else - is_update = data.ind_bandit; - - if (is_update) + if (ind_update(data, ec_type)) learn_bandit(data, base, ec, ec_type); - data.a_s.erase(); + //data.a_s.erase(); data.a_s = ec.pred.a_s; ec.l.multi = ld; ec.pred.multiclass = action; - data.mc_pred = ec.pred.multiclass; } void add_to_sup_validation(cbify& data, example& ec) { - // NOTE WELL: for convenience in supervised validation, we intentionally use a cost-sensitive label as opposed to - // a multiclass label. This is because the csoaa learner needs a cost-sensitive label to predict (for vw's internal reasons). - MULTICLASS::label_t ld = ec.l.multi; ec.l.multi.label = data.corrupted_label; - example& ec_copy = data.supervised_validation[data.warm_start_iter]; - VW::copy_example_data(false, &ec_copy, &ec, 0, MULTICLASS::mc_label.copy_label); + example* ec_copy = calloc_or_throw(1); + VW::copy_example_data(false, ec_copy, &ec, 0, MULTICLASS::mc_label.copy_label); ec.l.multi = ld; - + // I believe we cannot directly do push_back(ec), as the label won't be deeply copied and that space will be + // reallocated when the example fall out of the predict_or_learn scope + data.supervised_validation.push_back(*ec_copy); + free(ec_copy); } void accumulate_costs_ips(cbify& data, example& ec) @@ -1079,7 +1076,8 @@ base_learner* cbify_setup(vw& all) if (data.validation_method == SUPERVISED_VALI) { - data.supervised_validation = calloc_or_throw(data.warm_start_period); + data.supervised_validation = v_init(); + //calloc_or_throw(data.warm_start_period); } @@ -1107,14 +1105,16 @@ base_learner* cbify_setup(vw& all) } else { - data.csls = calloc_or_throw(1); - auto& csl = data.csls[0]; + //data.csls = calloc_or_throw(1); + //auto& csl = data.csls[0]; - csl.costs = v_init(); + data.cs_label.costs = v_init(); //Note: these two lines are important, otherwise the cost sensitive vector seems to be unbounded. for (size_t a = 0; a < num_actions; ++a) - csl.costs.push_back({0, a+1, 0, 0}); + data.cs_label.costs.push_back({0, a+1, 0, 0}); + + data.cb_label.costs.push_back({0, 1, 0, 0}); } From 0f3b946bb5e3db95b1c7b2b404cd27edaca429ac Mon Sep 17 00:00:00 2001 From: Chicheng Zhang Date: Mon, 28 May 2018 20:01:14 -0400 Subject: [PATCH 080/127] before cleaning up adf --- vowpalwabbit/cbify.cc | 286 +++++++++++++++++++++--------------------- 1 file changed, 145 insertions(+), 141 deletions(-) diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc index 0a40cc32918..ac1e4101805 100644 --- a/vowpalwabbit/cbify.cc +++ b/vowpalwabbit/cbify.cc @@ -267,6 +267,48 @@ bool ind_update(cbify& data, size_t ec_type) return data.ind_bandit; } +float compute_weight_multiplier(cbify& data, size_t i, size_t ec_type) +{ + float weight_multiplier; + + if (ec_type == SUPERVISED) + { + if (data.lambdas[i] >= 0.5) + weight_multiplier = (1 - data.lambdas[i]) / data.lambdas[i]; + else + weight_multiplier = 1; + } + else + { + if (data.lambdas[i] >= 0.5) + weight_multiplier = 1; + else + weight_multiplier = data.lambdas[i] / (1-data.lambdas[i]); + + if (data.weighting_scheme == DATASET_WT) + weight_multiplier = weight_multiplier * data.warm_start_period / ( (data.bandit_iter+1) * (data.bandit_iter+2) ); + } + return weight_multiplier; +} + +uint32_t find_min(v_array arr) +{ + float min_val = FLT_MAX; + uint32_t argmin = 0; + + for (uint32_t i = 0; i < arr.size(); i++) + { + //cout< arr) -{ - float min_val = FLT_MAX; - uint32_t argmin = 0; - - for (uint32_t i = 0; i < arr.size(); i++) - { - //cout<cost_sensitive->predict(ecs[a], argmin); - } - base.predict(*empty, i); - //data.all->cost_sensitive->predict(*empty, argmin); - - uint32_t pred_action = ecs[0].pred.a_s[0].action+1; - - //Need to clear the prediction, otherwise there will be a memory leak - ecs[0].pred.a_s.delete_v(); - - return pred_action; - -} - void convert_mc_to_cs(cbify& data, example& ec) { //generate cost-sensitive label (only for CSOAA's use - this will be retracted at the end) @@ -419,76 +420,6 @@ size_t predict_sublearner_noadf(cbify& data, example& ec, uint32_t i) return ec.pred.multiclass; } -void accumulate_costs_ips_adf(cbify& data, example& ec, CB::cb_class& cl, base_learner& base) -{ - - if (data.validation_method == BANDIT_VALI) - { - uint32_t best_action; - - //IPS for approximating the cumulative costs for all lambdas - for (uint32_t i = 0; i < data.choices_lambda; i++) - { - best_action = predict_sublearner(data, base, i); - - if (best_action == cl.action) - data.cumulative_costs[i] += cl.cost / cl.probability; - - //cout<= 0.5) - weight_multiplier = (1 - data.lambdas[i]) / data.lambdas[i]; - else - weight_multiplier = 1; - } - else - { - if (data.lambdas[i] >= 0.5) - weight_multiplier = 1; - else - weight_multiplier = data.lambdas[i] / (1-data.lambdas[i]); - - if (data.weighting_scheme == DATASET_WT) - weight_multiplier = weight_multiplier * data.warm_start_period / ( (data.bandit_iter+1) * (data.bandit_iter+2) ); - } - return weight_multiplier; -} size_t predict_cs(cbify& data, example& ec) { @@ -710,6 +641,29 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec) } } + +uint32_t predict_sublearner(cbify& data, base_learner& base, uint32_t i) +{ + example* ecs = data.adf_data.ecs; + example* empty = data.adf_data.empty_example; + + + for (size_t a = 0; a < data.adf_data.num_actions; ++a) + { + base.predict(ecs[a], i); + //data.all->cost_sensitive->predict(ecs[a], argmin); + } + base.predict(*empty, i); + //data.all->cost_sensitive->predict(*empty, argmin); + + uint32_t pred_action = ecs[0].pred.a_s[0].action+1; + + //Need to clear the prediction, otherwise there will be a memory leak + ecs[0].pred.a_s.delete_v(); + + return pred_action; +} + size_t predict_cs_adf(cbify& data, base_learner& base) { uint32_t argmin = find_min(data.cumulative_costs); @@ -719,6 +673,83 @@ size_t predict_cs_adf(cbify& data, base_learner& base) return best_action; } + +void add_to_sup_validation_adf(cbify& data, example& ec) +{ + //cout<(1); + VW::copy_example_data(false, ec_copy, &ec, 0, MULTICLASS::mc_label.copy_label); + data.supervised_validation.push_back(*ec_copy); + free(ec_copy); +} + + +void accumulate_costs_ips_adf(cbify& data, example& ec, CB::cb_class& cl, base_learner& base) +{ + + if (data.validation_method == BANDIT_VALI) + { + uint32_t best_action; + + //IPS for approximating the cumulative costs for all lambdas + for (uint32_t i = 0; i < data.choices_lambda; i++) + { + best_action = predict_sublearner(data, base, i); + + if (best_action == cl.action) + data.cumulative_costs[i] += cl.cost / cl.probability; + + //cout<l.cs = *csl_empty; @@ -835,34 +866,6 @@ void learn_bandit_adf(cbify& data, base_learner& base, size_t ec_type) ecs[a].weight = data.old_weights[a]; } -void accumulate_variance_adf(cbify& data, base_learner& base) -{ - auto& out_ec = data.adf_data.ecs[0]; - - data.a_s.erase(); - for (size_t a = 0; a < data.adf_data.num_actions; ++a) - data.a_s.push_back({out_ec.pred.a_s[a].action, out_ec.pred.a_s[a].score}); - - size_t pred_best_approx = predict_cs_adf(data, base); - float temp_variance; - - for (size_t a = 0; a < data.adf_data.num_actions; ++a) - if (pred_best_approx == data.a_s[a].action + 1) - temp_variance = 1.0 / data.a_s[a].score; - - data.cumulative_variance += temp_variance; - - //cout<<"variance at bandit round "<< data.bandit_iter << " = " << temp_variance << endl; - //cout< void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) { @@ -1005,7 +1009,7 @@ void init_adf_data(cbify& data, const size_t num_actions) { data.csls[a].costs = v_init(); data.csls[a].costs.push_back({0, a+1, 0, 0}); - cout< Date: Mon, 28 May 2018 22:13:57 -0400 Subject: [PATCH 081/127] mwt explorer kept outputting action 0 --- vowpalwabbit/cbify.cc | 195 +++++++++++++++++++++++------------------- 1 file changed, 109 insertions(+), 86 deletions(-) diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc index ac1e4101805..67a0113a3b8 100644 --- a/vowpalwabbit/cbify.cc +++ b/vowpalwabbit/cbify.cc @@ -116,6 +116,14 @@ struct cbify }; +template +void deep_copy_array(v_array& dst, v_array& src) +{ + dst.erase(); + for (size_t i = 0; i < src.size(); ++i) + dst.push_back(src[i]); +} + float minimax_lambda(float epsilon, size_t num_actions, size_t warm_start_period, size_t bandit_period, size_t dim) { /* @@ -352,6 +360,8 @@ void finish(cbify& data) free(data.cbl_empty); free(data.old_weights); + + CB::cb_label.delete_label(&data.cb_label); } else { @@ -477,6 +487,7 @@ uint32_t predict_bandit(cbify& data, base_learner& base, example& ec) { // we need the cb cost array to be an empty array to make cb prediction ec.l.cb.costs = v_init(); + // TODO: not sure why we need the following sentence ec.pred.a_s = data.a_s; uint32_t argmin = find_min(data.cumulative_costs); @@ -641,9 +652,10 @@ void predict_or_learn(cbify& data, base_learner& base, example& ec) } } - -uint32_t predict_sublearner(cbify& data, base_learner& base, uint32_t i) +uint32_t predict_sublearner_adf(cbify& data, base_learner& base, example& ec, uint32_t i) { + //copy_example_to_adf(data, ec); + example* ecs = data.adf_data.ecs; example* empty = data.adf_data.empty_example; @@ -664,13 +676,10 @@ uint32_t predict_sublearner(cbify& data, base_learner& base, uint32_t i) return pred_action; } -size_t predict_cs_adf(cbify& data, base_learner& base) +size_t predict_cs_adf(cbify& data, base_learner& base, example& ec) { uint32_t argmin = find_min(data.cumulative_costs); - - size_t best_action = predict_sublearner(data, base, argmin); - - return best_action; + return predict_sublearner_adf(data, base, ec, argmin); } @@ -684,21 +693,18 @@ void add_to_sup_validation_adf(cbify& data, example& ec) } -void accumulate_costs_ips_adf(cbify& data, example& ec, CB::cb_class& cl, base_learner& base) +void accumulate_costs_ips_adf(cbify& data, base_learner& base, example& ec) { - + CB::cb_class& cl = data.cb_label.costs[0]; if (data.validation_method == BANDIT_VALI) { - uint32_t best_action; - //IPS for approximating the cumulative costs for all lambdas for (uint32_t i = 0; i < data.choices_lambda; i++) { - best_action = predict_sublearner(data, base, i); + uint32_t action = predict_sublearner_adf(data, base, ec, i); - if (best_action == cl.action) + if (action == cl.action) data.cumulative_costs[i] += cl.cost / cl.probability; - //cout<Choose_Action( - *data.generic_explorer, - StringUtils::to_string(data.example_counter++), out_ec) - 1; - - return idx; - -} - void multiclass_to_cs_adf(cbify& data, COST_SENSITIVE::label* csls, size_t corrupted_label) { for (size_t a = 0; a < data.adf_data.num_actions; ++a) @@ -830,6 +810,61 @@ void learn_cs_adf(cbify& data, size_t ec_type) ecs[a].weight = data.old_weights[a]; } +void predict_or_learn_cs_adf(cbify& data, base_learner& base, example& ec, size_t ec_type) +{ + //Store the multiclass input label + MULTICLASS::label_t ld = ec.l.multi; + + uint32_t best_action = predict_cs_adf(data, base, ec); + data.mc_pred = best_action; + + //data.all->cost_sensitive->predict(ec,argmin); + + //generate cost-sensitive label + // ecs[a].weight *= 1; + // cout << "size cbify = " << ecs[a].l.cs.costs.size() << endl; + size_t corrupted_label = corrupt_action(ld.label, data, ec_type); + generate_corrupted_cs_adf(data, ld, corrupted_label); + + if (ind_update(data, ec_type)) + learn_cs_adf(data, ec_type); + + ec.pred.multiclass = best_action; + ec.l.multi = ld; + + //a hack here - allocated memories not deleted + //to be corrected + //if (data.validation_method == SUPERVISED_VALI) + // add_to_sup_validation_adf(data, ec); +} + +size_t predict_bandit_adf(cbify& data, base_learner& base, example& ec) +{ + //copy_example_to_adf(data, ec); + + example* ecs = data.adf_data.ecs; + example* empty_example = data.adf_data.empty_example; + + uint32_t argmin = find_min(data.cumulative_costs); + + for (size_t a = 0; a < data.adf_data.num_actions; ++a) + { + base.predict(ecs[a], argmin); + } + base.predict(*empty_example, argmin); + + // get output scores + auto& out_ec = data.adf_data.ecs[0]; + uint32_t idx = data.mwt_explorer->Choose_Action( + *data.generic_explorer, + StringUtils::to_string(data.example_counter++), out_ec) - 1; + + deep_copy_array(data.a_s, out_ec.pred.a_s); + + return idx; + +} + void generate_corrupted_cb_adf(cbify& data, CB::cb_class& cl, MULTICLASS::label_t& ld, size_t idx, size_t corrupted_label) { auto& out_ec = data.adf_data.ecs[0]; @@ -866,36 +901,7 @@ void learn_bandit_adf(cbify& data, base_learner& base, size_t ec_type) ecs[a].weight = data.old_weights[a]; } -void predict_or_learn_cs_adf(cbify& data, base_learner& base, example& ec, bool is_update, size_t ec_type) -{ - //Store the multiclass input label - MULTICLASS::label_t ld = ec.l.multi; - - //best_action = predict_sublearner(data, base, argmin); - uint32_t best_action = predict_cs_adf(data, base); - - //data.all->cost_sensitive->predict(ec,argmin); - - //generate cost-sensitive label - // ecs[a].weight *= 1; - // cout << "size cbify = " << ecs[a].l.cs.costs.size() << endl; - size_t corrupted_label = corrupt_action(ld.label, data, ec_type); - generate_corrupted_cs_adf(data, ld, corrupted_label); - - if (is_update) - learn_cs_adf(data, ec_type); - - ec.pred.multiclass = best_action; - ec.l.multi = ld; - - //a hack here - allocated memories not deleted - //to be corrected - if (data.validation_method == SUPERVISED_VALI) - add_to_sup_validation_adf(data, ec); -} - - -void predict_or_learn_bandit_adf(cbify& data, base_learner& base, example& ec, bool is_update, size_t ec_type) +void predict_or_learn_bandit_adf(cbify& data, base_learner& base, example& ec, size_t ec_type) { //Store the multiclass input label MULTICLASS::label_t ld = ec.l.multi; @@ -907,24 +913,25 @@ void predict_or_learn_bandit_adf(cbify& data, base_learner& base, example& ec, b //data.cbl_empty->costs = data.adf_data.empty_example->l.cb.costs; //size_t pred_pi = predict_cs_adf(data, base, ec); - uint32_t idx = predict_bandit_adf(data, base); + uint32_t idx = predict_bandit_adf(data, base, ec); + data.mc_pred = idx; - CB::cb_class cl; + CB::cb_class& cl = data.cb_label.costs[0]; size_t corrupted_label = corrupt_action(ld.label, data, ec_type); generate_corrupted_cb_adf(data, cl, ld, idx, corrupted_label); // accumulate the cumulative costs of lambdas - accumulate_costs_ips_adf(data, ec, cl, base); + //accumulate_costs_ips_adf(data, ec, cl, base); // add cb label to chosen action auto& lab = data.adf_data.ecs[cl.action - 1].l.cb; lab.costs.push_back(cl); - if (is_update) + if (ind_update(data, ec_type)) learn_bandit_adf(data, base, ec_type); - accumulate_variance_adf(data, base); + //accumulate_variance_adf(data, base, ec); lab.costs.delete_v(); ec.pred.multiclass = cl.action; @@ -948,22 +955,33 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) if (data.warm_start_iter < data.warm_start_period) // Call the cost-sensitive learner directly { + data.corrupted_label = corrupt_action(ec.l.multi.label, data, SUPERVISED); + if (data.warm_start_type == SUPERVISED_WS) - predict_or_learn_cs_adf(data, base, ec, data.ind_supervised, SUPERVISED); + predict_or_learn_cs_adf(data, base, ec, SUPERVISED); else - predict_or_learn_bandit_adf(data, base, ec, data.ind_supervised, SUPERVISED); + predict_or_learn_bandit_adf(data, base, ec, SUPERVISED); + + if (data.validation_method == SUPERVISED_VALI) + add_to_sup_validation_adf(data, ec); + ec.weight = 0; + ec.pred.multiclass = data.mc_pred; data.warm_start_iter++; } else if (data.bandit_iter < data.bandit_period) // call the bandit learner { - predict_or_learn_bandit_adf(data, base, ec, data.ind_bandit, BANDIT); + data.corrupted_label = corrupt_action(ec.l.multi.label, data, BANDIT); + predict_or_learn_bandit_adf(data, base, ec, BANDIT); + data.bandit_iter++; if (data.bandit_iter == data.bandit_period) { cout<<"Ideal average variance = "<= 1 && abs( log2(data.bandit_iter+1) - floor(log2(data.bandit_iter+1)) ) < 1e-4 ) { + uint32_t total_epoch_num = ceil(log2(data.bandit_period)); + uint32_t epoch_num = log2(data.bandit_iter+1) - 1; + uint32_t sup_train_size = data.warm_start_period / 2; + uint32_t sup_vali_size = data.warm_start_period - sup_train_size; + float batch_vali_size = ((float) sup_vali_size) / total_epoch_num; + uint32_t lb, ub; + for (uint32_t i = 0; i < data.choices_lambda; i++) data.cumulative_costs[i] = 0; + if (data.validation_method == SUPERVISED_VALI_SPLIT) + { + lb = sup_train_size + ceil(batch_vali_size * epoch_num); + ub = sup_train_size + ceil(batch_vali_size * (epoch_num + 1)); + } + else + { + lb = sup_train_size; + ub = sup_train_size + sup_vali_size; + } + + //cout<<"updating validation error on supervised data: " << data.bandit_iter / data.warm_start_period << endl; for (uint32_t i = 0; i < data.choices_lambda; i++) { - for (uint32_t j = 0; j < data.warm_start_period; j++) + for (uint32_t j = lb; j < ub; j++) { example& ec_valid = data.supervised_validation[j]; uint32_t pred_label = predict_sublearner_adf(data, base, ec_valid, i); @@ -726,12 +744,6 @@ void accumulate_costs_ips_adf(cbify& data, example& ec, base_learner& base) void accumulate_variance_adf(cbify& data, base_learner& base, example& ec) { - auto& out_ec = data.adf_data.ecs[0]; - - data.a_s.erase(); - for (size_t a = 0; a < data.adf_data.num_actions; ++a) - data.a_s.push_back({out_ec.pred.a_s[a].action, out_ec.pred.a_s[a].score}); - size_t pred_best_approx = predict_cs_adf(data, base, ec); float temp_variance; @@ -825,22 +837,14 @@ void predict_or_learn_cs_adf(cbify& data, base_learner& base, example& ec, bool //a hack here - allocated memories not deleted //to be corrected - if (data.validation_method == SUPERVISED_VALI) + if (data.validation_method != BANDIT_VALI) add_to_sup_validation_adf(data, ec); } -size_t predict_bandit_adf(cbify& data, base_learner& base) +size_t predict_bandit_adf(cbify& data, base_learner& base, example& ec) { - example* ecs = data.adf_data.ecs; - example* empty_example = data.adf_data.empty_example; - uint32_t argmin = find_min(data.cumulative_costs); - - for (size_t a = 0; a < data.adf_data.num_actions; ++a) - { - base.predict(ecs[a], argmin); - } - base.predict(*empty_example, argmin); + predict_sublearner_adf(data, base, ec, argmin); // get output scores auto& out_ec = data.adf_data.ecs[0]; @@ -848,6 +852,10 @@ size_t predict_bandit_adf(cbify& data, base_learner& base) *data.generic_explorer, StringUtils::to_string(data.example_counter++), out_ec) - 1; + data.a_s.erase(); + for (size_t a = 0; a < data.adf_data.num_actions; ++a) + data.a_s.push_back({out_ec.pred.a_s[a].action, out_ec.pred.a_s[a].score}); + return idx; } @@ -900,7 +908,7 @@ void predict_or_learn_bandit_adf(cbify& data, base_learner& base, example& ec, b //data.cbl_empty->costs = data.adf_data.empty_example->l.cb.costs; //size_t pred_pi = predict_cs_adf(data, base, ec); - uint32_t idx = predict_bandit_adf(data, base); + uint32_t idx = predict_bandit_adf(data, base, ec); CB::cb_class cl; @@ -966,7 +974,7 @@ void predict_or_learn_adf(cbify& data, base_learner& base, example& ec) ec.pred.multiclass = 0; ec.weight = 0; } - + //data.adf_data.ecs[0].pred.a_s.erase(); for (size_t a = 0; a < data.adf_data.num_actions; ++a) data.adf_data.ecs[a].l.cb.costs = data.cbls[a].costs; data.adf_data.empty_example->l.cb.costs = data.cbl_empty->costs; @@ -1076,7 +1084,7 @@ base_learner* cbify_setup(vw& all) //cout<<"does epsilon exist?"<rank_all, "rank_all", "Return actions sorted by score order") (ld->no_predict, "no_predict", "Do not do a prediction when training") - .keep("cb_type", type_string, (string)"ips", "contextual bandit method to use in {ips,dm,dr, mtr}").missing()) + .keep("cb_type", type_string, (string)"ips", "contextual bandit method to use in {ips,dm,dr, mtr}") + ("cbify", ld->gen_cs.num_actions, 1U, "number of actions") + .missing()) return nullptr; ld->all = arg.all; - cb_to_cs_adf& c = ld.gen_cs; - c.num_actions = (uint32_t)(all.vm["cbify"].as()); + //cb_to_cs_adf& c = ld.gen_cs; + //c.num_actions = (uint32_t)(all.vm["cbify"].as()); // number of weight vectors needed size_t problem_multiplier = 1;//default for IPS diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc index 6aada60f6a4..5bfd6759c84 100644 --- a/vowpalwabbit/cbify.cc +++ b/vowpalwabbit/cbify.cc @@ -4,7 +4,7 @@ #include "rand48.h" #include "bs.h" #include "vw.h" -#include "hash.h" +#include "../explore/hash.h" #include "explore.h" #include @@ -59,20 +59,21 @@ struct cbify float loss0; float loss1; + //warm start parameters uint32_t ws_period; uint32_t inter_period; uint32_t choices_lambda; bool upd_ws; bool upd_inter; - uint32_t cor_type_ws; + int cor_type_ws; float cor_prob_ws; - uint32_t cor_type_inter; + int cor_type_inter; float cor_prob_inter; - uint32_t vali_method; - uint32_t wt_scheme; - uint32_t lambda_scheme; + int vali_method; + int wt_scheme; + int lambda_scheme; uint32_t overwrite_label; - uint32_t ws_type; + int ws_type; //auxiliary variables uint32_t num_actions; @@ -81,6 +82,15 @@ struct cbify action_scores a_s_adf; vector cumulative_costs; CB::cb_class cl_adf; + uint32_t ws_train_size; + uint32_t ws_vali_size; + vector ws_vali; + float cumu_var; + uint32_t ws_iter; + uint32_t inter_iter; + MULTICLASS::label_t mc_label; + COST_SENSITIVE::label* csls; + COST_SENSITIVE::label* csl_empty; }; @@ -94,12 +104,35 @@ float loss(cbify& data, uint32_t label, uint32_t final_prediction) template inline void delete_it(T* p) { if (p != nullptr) delete p; } +template +uint32_t find_min(vector arr) +{ + T min_val = FLT_MAX; + uint32_t argmin = 0; + + for (uint32_t i = 0; i < arr.size(); i++) + { + //cout<random_state); - if (randf < corrupt_prob) + if (randf < cor_prob) { - if (corrupt_type == UAR) - return generate_uar_action(data); - else if (corrupt_type == OVERWRITE) - return data.overwrite_label; + if (cor_type == UAR) + cor_action = generate_uar_action(data); + else if (cor_type == OVERWRITE) + cor_action = data.overwrite_label; else - return (action % data.num_actions) + 1; + cor_action = (action % data.num_actions) + 1; } else - return action; + cor_action = action; + return cor_action; } -bool ind_update(cbify& data, size_t ec_type) +bool ind_update(cbify& data, int ec_type) { if (ec_type == WARM_START) return data.upd_ws; else - return data.upd_bandit; + return data.upd_inter; } -float compute_weight_multiplier(cbify& data, size_t i, size_t ec_type) +float compute_weight_multiplier(cbify& data, size_t i, int ec_type) { float weight_multiplier; - float ws_train_size = data.warm_start_train_size; - float intr_train_size = data.bandit_period; + float ws_train_size = data.ws_train_size; + float inter_train_size = data.inter_period; - if (data.vali_method != BANDIT_VALI) + if (data.vali_method != INTER_VALI) { - if (ec_type == SUPERVISED && data.warm_start_iter >= ws_train_size) + if (ec_type == WARM_START && data.ws_iter >= ws_train_size) return 0.0; } - float total_size = ws_train_size + intr_train_size; - if (data.weighting_scheme == INSTANCE_WT) + float total_train_size = ws_train_size + inter_train_size; + if (data.wt_scheme == INSTANCE_WT) { - if (ec_type == SUPERVISED) - weight_multiplier = (1-data.lambdas[i]) * total_size / ws_train_size; + if (ec_type == WARM_START) + weight_multiplier = (1-data.lambdas[i]) * total_train_size / ws_train_size; else - weight_multiplier = data.lambdas[i] * total_size / intr_train_size; + weight_multiplier = data.lambdas[i] * total_train_size / inter_train_size; } else { - float total_weight = (1-data.lambdas[i]) * ws_train_size + data.lambdas[i] * intr_train_size; + float total_weight = (1-data.lambdas[i]) * ws_train_size + data.lambdas[i] * inter_train_size; - if (ec_type == SUPERVISED) - weight_multiplier = (1-data.lambdas[i]) * total_size / total_weight; + if (ec_type == WARM_START) + weight_multiplier = (1-data.lambdas[i]) * total_train_size / total_weight; else - weight_multiplier = data.lambdas[i] * total_size / total_weight; + weight_multiplier = data.lambdas[i] * total_train_size / total_weight; } return weight_multiplier; } -template -uint32_t find_min(vector arr) -{ - T min_val = FLT_MAX; - uint32_t argmin = 0; - - for (uint32_t i = 0; i < arr.size(); i++) - { - //cout< void predict_or_learn(cbify& data, single_learner& base, example& ec) @@ -328,7 +345,7 @@ void predict_or_learn(cbify& data, single_learner& base, example& ec) ec.pred.multiclass = chosen_action + 1; } -uint32_t predict_sublearner_adf(cbify& data, base_learner& base, example& ec, uint32_t i) +uint32_t predict_sublearner_adf(cbify& data, single_learner& base, example& ec, uint32_t i) { copy_example_to_adf(data, ec); @@ -342,7 +359,7 @@ uint32_t predict_sublearner_adf(cbify& data, base_learner& base, example& ec, ui return ecs[0].pred.a_s[0].action+1; } -void accumu_costs_iv_adf(cbify& data, base_learner& base, example& ec) +void accumu_costs_iv_adf(cbify& data, single_learner& base, example& ec) { CB::cb_class& cl = data.cl_adf; //IPS for approximating the cumulative costs for all lambdas @@ -359,6 +376,7 @@ void accumu_costs_iv_adf(cbify& data, base_learner& base, example& ec) void accumu_costs_wsv_adf(cbify& data, single_learner& base) { + uint32_t ws_vali_size = data.ws_vali_size; //only update cumulative costs every warm_start_period iterations if ( data.inter_iter >= 1 && abs( log2(data.inter_iter+1) - floor(log2(data.inter_iter+1)) ) < 1e-4 ) { @@ -367,11 +385,10 @@ void accumu_costs_wsv_adf(cbify& data, single_learner& base) uint32_t num_epochs = ceil(log2(data.inter_period)); uint32_t epoch = log2(data.inter_iter+1) - 1; - //uint32_t ws_vali_size = data.warm_start_period - data.warm_start_train_size; float batch_vali_size = ((float) ws_vali_size) / num_epochs; uint32_t lb, ub; - if (data.vali_method == SUPERVISED_VALI_SPLIT) + if (data.vali_method == WS_VALI_SPLIT) { lb = ceil(batch_vali_size * epoch); ub = ceil(batch_vali_size * (epoch + 1)); @@ -396,11 +413,78 @@ void accumu_costs_wsv_adf(cbify& data, single_learner& base) } } +void add_to_vali(cbify& data, example& ec) +{ + //if this does not work, we can try declare ws_vali as an array + example ec_copy; + VW::copy_example_data(false, &ec_copy, &ec, 0, MULTICLASS::mc_label.copy_label); + data.ws_vali.push_back(ec_copy); +} + +uint32_t predict_cs_adf(cbify& data, single_learner& base, example& ec) +{ + uint32_t argmin = find_min(data.cumulative_costs); + return predict_sublearner_adf(data, base, ec, argmin); +} + +void learn_cs_adf(cbify& data, single_learner& base, example& ec, int ec_type) +{ + //generate cost-sensitive label (only for CSOAA's use - this will be retracted at the end) + auto& csls = data.csls; + auto& csl_empty = data.csl_empty; + + for (size_t a = 0; a < data.adf_data.num_actions; ++a) + { + csls[a].costs[0].class_index = a+1; + csls[a].costs[0].x = loss(data, ec.l.multi.label, a+1); + } + + copy_example_to_adf(data, ec); + example* ecs = data.adf_data.ecs; + example* empty_example = data.adf_data.empty_example; + + for (size_t a = 0; a < data.adf_data.num_actions; ++a) + { + ecs[a].l.cs = csls[a]; + //cout<l.cs = *csl_empty; + + vector old_weights; + for (size_t a = 0; a < data.adf_data.num_actions; ++a) + old_weights[a] = ecs[a].weight; + for (uint32_t i = 0; i < data.choices_lambda; i++) + { + float weight_multiplier = compute_weight_multiplier(data, i, ec_type); + for (size_t a = 0; a < data.adf_data.num_actions; ++a) + { + ecs[a].weight = old_weights[a] * weight_multiplier; + data.all->cost_sensitive->learn(ecs[a],i); + } + data.all->cost_sensitive->learn(*empty_example,i); + } + //Seems like we don't need to set the weights back as this example will be + //discarded anyway + for (size_t a = 0; a < data.adf_data.num_actions; ++a) + ecs[a].weight = old_weights[a]; +} + +void predict_or_learn_cs_adf(cbify& data, single_learner& base, example& ec, int ec_type) +{ + uint32_t action = predict_cs_adf(data, base, ec); + + if (ind_update(data, ec_type)) + learn_cs_adf(data, base, ec, ec_type); + + ec.pred.multiclass = action; +} + + uint32_t predict_bandit_adf(cbify& data, single_learner& base, example& ec) { - copy_example_to_adf(data, ec); uint32_t argmin = find_min(data.cumulative_costs); + copy_example_to_adf(data, ec); for (size_t a = 0; a < data.adf_data.num_actions; ++a) { base.predict(data.adf_data.ecs[a], argmin); @@ -413,28 +497,33 @@ uint32_t predict_bandit_adf(cbify& data, single_learner& base, example& ec) if (sample_after_normalizing(data.app_seed + data.example_counter++, begin_scores(out_ec.pred.a_s), end_scores(out_ec.pred.a_s), chosen_action)) THROW("Failed to sample from pdf"); - copy_array(data.a_s_adf, out_ec.pred.a_s); + auto& a_s = data.a_s_adf; + copy_array(a_s, out_ec.pred.a_s); - CB::cb_class cl; + auto& cl = data.cl_adf; cl.action = a_s[chosen_action].action + 1; cl.probability = a_s[chosen_action].score; if(!cl.action) THROW("No action with non-zero probability found!"); - cl.cost = loss(data, ld.label, cl.action); + cl.cost = loss(data, ec.l.multi.label, cl.action); ec.pred.multiclass = cl.action; return chosen_action; } -uint32_t learn_bandit_adf(cbify& data, single_learner& base, example& ec, uint32_t ec_type, uint32_t chosen_action, action_scores& a_s) +void learn_bandit_adf(cbify& data, single_learner& base, example& ec, int ec_type, uint32_t chosen_action, action_scores& a_s) { //Store the multiclass input label MULTICLASS::label_t ld = ec.l.multi; + copy_example_to_adf(data, ec); + example* ecs = data.adf_data.ecs; + example* empty_example = data.adf_data.empty_example; // add cb label to chosen action + auto& cl = data.cl_adf; auto& lab = data.adf_data.ecs[cl.action - 1].l.cb; lab.costs.push_back(cl); @@ -447,10 +536,10 @@ uint32_t learn_bandit_adf(cbify& data, single_learner& base, example& ec, uint32 float weight_multiplier = compute_weight_multiplier(data, i, ec_type); for (size_t a = 0; a < data.adf_data.num_actions; ++a) { - ecs[a].weight = data.old_weights[a] * weight_multiplier; - base.learn(data.adf_data.ecs[a]); + ecs[a].weight = old_weights[a] * weight_multiplier; + base.learn(ecs[a]); } - base.learn(*data.adf_data.empty_example); + base.learn(*empty_example); } for (size_t a = 0; a < data.adf_data.num_actions; ++a) @@ -459,60 +548,71 @@ uint32_t learn_bandit_adf(cbify& data, single_learner& base, example& ec, uint32 //ec.pred.multiclass = cl.action; } -void predict_or_learn_bandit_adf(cbify& data, single_learner& base, example& ec, uint32_t ec_type) +void predict_or_learn_bandit_adf(cbify& data, single_learner& base, example& ec, int ec_type) { uint32_t action = predict_bandit_adf(data, base, ec); - if (ec_type == INTER && data.vali_method == INTER_VALI) + if (ec_type == INTERACTION && data.vali_method == INTER_VALI) accumu_costs_iv_adf(data, base, ec); if (ind_update(data, ec_type)) learn_bandit_adf(data, base, ec, ec_type, action, data.a_s_adf); - if (ec_type == INTER && (data.vali_method == WS_VALI_SPLIT || data.vali_method == WS_VALI_NOSPLIT)) + if (ec_type == INTERACTION && (data.vali_method == WS_VALI_SPLIT || data.vali_method == WS_VALI_NOSPLIT)) accumu_costs_wsv_adf(data, base); ec.pred.multiclass = action; } +void accumu_var_adf(cbify& data, single_learner& base, example& ec) +{ + size_t pred_best_approx = predict_cs_adf(data, base, ec); + float temp_var; + + for (size_t a = 0; a < data.adf_data.num_actions; ++a) + if (pred_best_approx == data.a_s_adf[a].action + 1) + temp_var = 1.0 / data.a_s_adf[a].score; + + data.cumu_var += temp_var; + + //cout<<"variance at bandit round "<< data.bandit_iter << " = " << temp_variance << endl; + //cout< void predict_or_learn_adf(cbify& data, single_learner& base, example& ec) { if (data.ws_iter < data.ws_period) { - data.mc_label = ec.l.multiclass - ec.l.multiclass = corrupt_label(WARM_START) + data.mc_label = ec.l.multi; + ec.l.multi.label = corrupt_action(data, data.mc_label.label, WARM_START); if (data.ws_iter < data.ws_train_size) { if (data.ws_type == SUPERVISED_WS) - predict_or_learn_supervised_adf(WARM_START) + predict_or_learn_cs_adf(data, base, ec, WARM_START); else if (data.ws_type == BANDIT_WS) - predict_or_learn_bandit_adf(WARM_START) + predict_or_learn_bandit_adf(data, base, ec, WARM_START); } else - { - add_to_vali(ec) - } + add_to_vali(data, ec); - ec.l.multiclass = data.mc_label + ec.l.multi = data.mc_label; ec.weight = 0; data.ws_iter++; } else if (data.inter_iter < data.inter_period) { - data.mc_label = ec.l.multiclass - ec.l.multiclass = corrupt_label(INTERACTION) - predict_or_learn_bandit_adf(INTERACTION); - accumulate_variance(); - ec.l.multiclass = data.mc_label + data.mc_label = ec.l.multi; + ec.l.multi.label = corrupt_action(data, data.mc_label.label, INTERACTION); + predict_or_learn_bandit_adf(data, base, ec, INTERACTION); + accumu_var_adf(data, base, ec); + ec.l.multi = data.mc_label; data.inter_iter++; } else { ec.weight = 0; } - - } @@ -530,6 +630,15 @@ void init_adf_data(cbify& data, const size_t num_actions) } CB::cb_label.default_label(&adf_data.empty_example->l.cb); adf_data.empty_example->in_use = true; + + data.csls = calloc_or_throw(num_actions); + data.csl_empty = calloc_or_throw(1); + for (uint32_t a=0; a < num_actions; ++a) + { + COST_SENSITIVE::cs_label.default_label(&data.csls[a]); + data.csls[a].costs.push_back({0, a+1, 0, 0}); + } + COST_SENSITIVE::cs_label.default_label(data.csl_empty); } base_learner* cbify_setup(arguments& arg) @@ -542,11 +651,11 @@ base_learner* cbify_setup(arguments& arg) ("loss0", data->loss0, 0.f, "loss for correct label") ("loss1", data->loss1, 1.f, "loss for incorrect label") ("epsilon", data->epsilon, 0.05f, "greedy probability") - ("warm_start", data->ws_period, 0, "number of training examples for warm start") - ("interaction", data->inter_period, 0, "number of training examples for bandit processing") - ("choices_lambda", data->choices_lambda, 1, "numbers of lambdas importance weights to aggregate") - ("warm_start_update", data->ind_ws, true, "indicator of warm start updates") - ("interaction_update", data->ind_inter, true, "indicator of interaction updates") + ("warm_start", data->ws_period, 0U, "number of training examples for warm start") + ("interaction", data->inter_period, 0U, "number of training examples for bandit processing") + ("choices_lambda", data->choices_lambda, 1U, "numbers of lambdas importance weights to aggregate") + ("warm_start_update", data->upd_ws, true, "indicator of warm start updates") + ("interaction_update", data->upd_inter, true, "indicator of interaction updates") ("corrupt_type_warm_start", data->cor_type_ws, UAR, "type of label corruption in the warm start phase (1: uniformly at random, 2: circular, 3: replacing with overwriting label)") ("corrupt_prob_warm_start", data->cor_prob_ws, 0.f, "probability of label corruption in the warm start phase") ("corrupt_type_bandit", data->cor_type_inter, UAR, "type of label corruption in the interaction phase (1: uniformly at random, 2: circular, 3: replacing with overwriting label)") @@ -554,7 +663,7 @@ base_learner* cbify_setup(arguments& arg) ("validation_method", data->vali_method, INTER_VALI, "lambda selection criterion (1 is using bandit with progressive validation, 2 is using supervised)") ("weighting_scheme", data->wt_scheme, INSTANCE_WT, "weighting scheme (1 is per instance weighting, 2 is per dataset weighting (where we use a diminishing weighting scheme) )") ("lambda_scheme", data->lambda_scheme, ABS_CENTRAL, "Lambda set scheme (1 is expanding based on center=0.5, 2 is expanding based on center=0.5 and enforcing 0,1 in Lambda, 3 is expanding based on center=minimax lambda, 4 is expanding based on center=minimax lambda and enforcing 0,1 in Lambda )") - ("overwrite_label", data->overwrite_label, 1, "the label type 3 corruptions (overwriting) turn to") + ("overwrite_label", data->overwrite_label, 1U, "the label type 3 corruptions (overwriting) turn to") ("warm_start_type", data->ws_type, SUPERVISED_WS, "the way of utilizing warm start data (1 is using supervised updates, 2 is using contextual bandit updates)").missing()) return nullptr; @@ -568,6 +677,17 @@ base_learner* cbify_setup(arguments& arg) if (data->use_adf) init_adf_data(*data.get(), num_actions); + if (data->vali_method == WS_VALI_SPLIT || data->vali_method == WS_VALI_NOSPLIT) + { + data->ws_train_size = ceil(data->ws_period / 2.0); + data->ws_vali_size = data->ws_period - data->ws_train_size; + } + else + { + data->ws_train_size = data->ws_period; + data->ws_vali_size = 0; + } + if (count(arg.args.begin(), arg.args.end(),"--cb_explore") == 0 && !data->use_adf) { arg.args.push_back("--cb_explore"); diff --git a/vowpalwabbit/global_data.h b/vowpalwabbit/global_data.h index d9e2d328cab..8043a3fcfcd 100644 --- a/vowpalwabbit/global_data.h +++ b/vowpalwabbit/global_data.h @@ -26,7 +26,7 @@ namespace po = boost::program_options; #include "learner.h" #include "v_hashmap.h" #include -#include "hash.h" +#include "../explore/hash.h" #include "crossplat_compat.h" #include "error_reporting.h" #include "parser_helper.h" diff --git a/vowpalwabbit/io_buf.h b/vowpalwabbit/io_buf.h index 94d4902da7c..f8553daa2ec 100644 --- a/vowpalwabbit/io_buf.h +++ b/vowpalwabbit/io_buf.h @@ -16,7 +16,7 @@ license as described in the file LICENSE. #include #include #include -#include "hash.h" +#include "../explore/hash.h" #include "vw_exception.h" #include "vw_validate.h" diff --git a/vowpalwabbit/parse_example.cc b/vowpalwabbit/parse_example.cc index d9d64cf71dd..bfb06839119 100644 --- a/vowpalwabbit/parse_example.cc +++ b/vowpalwabbit/parse_example.cc @@ -7,7 +7,7 @@ license as described in the file LICENSE. #include #include #include "parse_example.h" -#include "hash.h" +#include "../explore/hash.h" #include "unique_sort.h" #include "global_data.h" #include "constant.h" diff --git a/vowpalwabbit/parse_primitives.cc b/vowpalwabbit/parse_primitives.cc index a9eaebc781b..c0d1d541f77 100644 --- a/vowpalwabbit/parse_primitives.cc +++ b/vowpalwabbit/parse_primitives.cc @@ -13,7 +13,7 @@ license as described in the file LICENSE. #include #include "parse_primitives.h" -#include "hash.h" +#include "../explore/hash.h" #include "vw_exception.h" bool substring_equal(substring&a, substring&b) diff --git a/vowpalwabbit/vw.h b/vowpalwabbit/vw.h index 7744f9b5768..e764e72b23e 100644 --- a/vowpalwabbit/vw.h +++ b/vowpalwabbit/vw.h @@ -14,7 +14,7 @@ license as described in the file LICENSE. #include "global_data.h" #include "example.h" -#include "hash.h" +#include "../explore/hash.h" #include "simple_label.h" #include "parser.h" #include "parse_example.h" From 4c3eed34638a53df33353e0513de45c7c75a0478 Mon Sep 17 00:00:00 2001 From: Chicheng Zhang Date: Mon, 9 Jul 2018 01:17:16 -0400 Subject: [PATCH 093/127] fixed offset bugs in cb_explore and multiline_predict_or_learn --- scripts/data_gen.py | 6 +- vowpalwabbit/cb_adf.cc | 8 +- vowpalwabbit/cb_explore_adf.cc | 5 + vowpalwabbit/cbify.cc | 209 ++++++++++++++++++++------------- vowpalwabbit/learner.h | 28 ++++- 5 files changed, 158 insertions(+), 98 deletions(-) diff --git a/scripts/data_gen.py b/scripts/data_gen.py index 41bdee73c8f..aa30cb061c2 100644 --- a/scripts/data_gen.py +++ b/scripts/data_gen.py @@ -1,9 +1,9 @@ import random import numpy as np -classes = 10 -m = 100 -kwperclass = 20 +classes = 2 +m = 10 +kwperclass = 2 def gen_keyword(): keyword = np.zeros((classes, m)) diff --git a/vowpalwabbit/cb_adf.cc b/vowpalwabbit/cb_adf.cc index 5baacab31e1..bdfc9157d1c 100644 --- a/vowpalwabbit/cb_adf.cc +++ b/vowpalwabbit/cb_adf.cc @@ -350,15 +350,11 @@ base_learner* cb_adf_setup(arguments& arg) .critical("cb_adf", "Do Contextual Bandit learning with multiline action dependent features.") .keep(ld->rank_all, "rank_all", "Return actions sorted by score order") (ld->no_predict, "no_predict", "Do not do a prediction when training") - .keep("cb_type", type_string, (string)"ips", "contextual bandit method to use in {ips,dm,dr,mtr}") - ("cbify", ld->gen_cs.num_actions, 1U, "number of actions") - .missing()) + .keep("cb_type", type_string, (string)"ips", "contextual bandit method to use in {ips,dm,dr, mtr}").missing()) return nullptr; ld->all = arg.all; - - //cb_to_cs_adf& c = ld.gen_cs; - //c.num_actions = (uint32_t)(all.vm["cbify"].as()); + ld->gen_cs.num_actions = arg.vm["cbify"].as(); // number of weight vectors needed size_t problem_multiplier = 1;//default for IPS diff --git a/vowpalwabbit/cb_explore_adf.cc b/vowpalwabbit/cb_explore_adf.cc index b4ee60fb796..a93ad9043f3 100644 --- a/vowpalwabbit/cb_explore_adf.cc +++ b/vowpalwabbit/cb_explore_adf.cc @@ -263,6 +263,9 @@ void predict_or_learn_first(cb_explore_adf& data, multi_learner& base, multi_ex& template void predict_or_learn_greedy(cb_explore_adf& data, multi_learner& base, multi_ex& examples) { + //cout<<"data offset = "<(base, examples, data.offset); + //cout<<"example feature offset after = "<ft_offset<pred.a_s; uint32_t num_actions = (uint32_t)preds.size(); diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc index 05b6333e999..860b5220d5d 100644 --- a/vowpalwabbit/cbify.cc +++ b/vowpalwabbit/cbify.cc @@ -16,6 +16,7 @@ using namespace std; #define WARM_START 1 #define INTERACTION 2 +#define SKIP 3 #define SUPERVISED_WS 1 #define BANDIT_WS 2 @@ -88,6 +89,7 @@ struct cbify uint32_t ws_iter; uint32_t inter_iter; MULTICLASS::label_t mc_label; + COST_SENSITIVE::label cs_label; COST_SENSITIVE::label* csls; COST_SENSITIVE::label* csl_empty; @@ -139,10 +141,10 @@ void finish(cbify& data) data.a_s.delete_v(); if (data.use_adf) { - cout<<"The average variance estimate is:"<& lambdas = data.lambdas; + for (uint32_t i = 0; i= ws_train_size) - return 0.0; - } + //if (data.vali_method != INTER_VALI) + //{ + // if (ec_type == WARM_START && data.ws_iter >= ws_train_size) + // return 0.0; + //} float total_train_size = ws_train_size + inter_train_size; if (data.wt_scheme == INSTANCE_WT) { + float total_weight = (1-data.lambdas[i]) * ws_train_size + data.lambdas[i] * inter_train_size; + if (ec_type == WARM_START) - weight_multiplier = (1-data.lambdas[i]) * total_train_size / ws_train_size; + weight_multiplier = (1-data.lambdas[i]) * total_train_size / total_weight; else - weight_multiplier = data.lambdas[i] * total_train_size / inter_train_size; + weight_multiplier = data.lambdas[i] * total_train_size / total_weight; } else { - float total_weight = (1-data.lambdas[i]) * ws_train_size + data.lambdas[i] * inter_train_size; - if (ec_type == WARM_START) - weight_multiplier = (1-data.lambdas[i]) * total_train_size / total_weight; + weight_multiplier = (1-data.lambdas[i]) * total_train_size / ws_train_size; else - weight_multiplier = data.lambdas[i] * total_train_size / total_weight; + weight_multiplier = data.lambdas[i] * total_train_size / inter_train_size; } return weight_multiplier; } @@ -368,14 +372,17 @@ void predict_or_learn(cbify& data, single_learner& base, example& ec) ec.pred.multiclass = cl.action; } -uint32_t predict_sublearner_adf(cbify& data, single_learner& base, example& ec, uint32_t i) +uint32_t predict_sublearner_adf(cbify& data, multi_learner& base, example& ec, uint32_t i) { + //cout<<"predict using sublearner "<< i <ft_offset; + //multiline_learn_or_predict(base, data.adf_data.ecs, offset, i); + base.predict(data.adf_data.ecs, i); + return data.adf_data.ecs[0]->pred.a_s[0].action+1; } -void accumu_costs_iv_adf(cbify& data, single_learner& base, example& ec) +void accumu_costs_iv_adf(cbify& data, multi_learner& base, example& ec) { CB::cb_class& cl = data.cl_adf; //IPS for approximating the cumulative costs for all lambdas @@ -391,7 +398,7 @@ void accumu_costs_iv_adf(cbify& data, single_learner& base, example& ec) } template -void accumu_costs_wsv_adf(cbify& data, single_learner& base) +void accumu_costs_wsv_adf(cbify& data, multi_learner& base) { uint32_t ws_vali_size = data.ws_vali_size; //only update cumulative costs every warm_start_period iterations @@ -449,13 +456,14 @@ void add_to_vali(cbify& data, example& ec) data.ws_vali.push_back(ec_copy); } -uint32_t predict_cs_adf(cbify& data, single_learner& base, example& ec) +uint32_t predict_sup_adf(cbify& data, multi_learner& base, example& ec) { uint32_t argmin = find_min(data.cumulative_costs); return predict_sublearner_adf(data, base, ec, argmin); } -void learn_cs_adf(cbify& data, single_learner& base, example& ec, int ec_type) +template +void learn_sup_adf(cbify& data, multi_learner& base, example& ec, int ec_type) { copy_example_to_adf(data, ec); //generate cost-sensitive label (only for CSOAA's use - this will be retracted at the end) @@ -470,42 +478,47 @@ void learn_cs_adf(cbify& data, single_learner& base, example& ec, int ec_type) } for (size_t a = 0; a < data.adf_data.num_actions; ++a) { - data.adf_data.ecs[a].l.cs = csls[a]; + data.adf_data.ecs[a]->l.cs = csls[a]; //cout< old_weights; for (size_t a = 0; a < data.adf_data.num_actions; ++a) - old_weights[a] = ecs[a].weight; + old_weights.push_back(data.adf_data.ecs[a]->weight); for (uint32_t i = 0; i < data.choices_lambda; i++) { float weight_multiplier = compute_weight_multiplier(data, i, ec_type); + //cout<<"weight multiplier in sup = "<weight = old_weights[a] * weight_multiplier; multi_learner* cs_learner = as_multiline(data.all->cost_sensitive); cs_learner->learn(data.adf_data.ecs, i); + + //cout<<"cost-sensitive increment = "<increment<weight = old_weights[a]; } -void predict_or_learn_cs_adf(cbify& data, single_learner& base, example& ec, int ec_type) +template +void predict_or_learn_sup_adf(cbify& data, multi_learner& base, example& ec, int ec_type) { - uint32_t action = predict_cs_adf(data, base, ec); + uint32_t action = predict_sup_adf(data, base, ec); if (ind_update(data, ec_type)) - learn_cs_adf(data, base, ec, ec_type); + learn_sup_adf(data, base, ec, ec_type); ec.pred.multiclass = action; } - -uint32_t predict_bandit_adf(cbify& data, single_learner& base, example& ec) +uint32_t predict_bandit_adf(cbify& data, multi_learner& base, example& ec) { uint32_t argmin = find_min(data.cumulative_costs); + //cout<(a_s, out_ec.pred.a_s); + return chosen_action; } - -void learn_bandit_adf(cbify& data, single_learner& base, example& ec, int ec_type, uint32_t chosen_action, action_scores& a_s) +void learn_bandit_adf(cbify& data, multi_learner& base, example& ec, int ec_type) { //Store the multiclass input label //MULTICLASS::label_t ld = ec.l.multi; @@ -527,46 +542,68 @@ void learn_bandit_adf(cbify& data, single_learner& base, example& ec, int ec_typ // add cb label to chosen action auto& cl = data.cl_adf; - auto& lab = data.adf_data.ecs[cl.action - 1].l.cb; + auto& lab = data.adf_data.ecs[cl.action - 1]->l.cb; lab.costs.push_back(cl); vector old_weights; for (size_t a = 0; a < data.adf_data.num_actions; ++a) - old_weights[a] = data.adf_data.ecs[a].weight; + old_weights.push_back(data.adf_data.ecs[a]->weight); for (uint32_t i = 0; i < data.choices_lambda; i++) { float weight_multiplier = compute_weight_multiplier(data, i, ec_type); + + //cout<<"learn in sublearner "<< i <<" with weight multiplier "<weight = old_weights[a] * weight_multiplier; + base.learn(data.adf_data.ecs, i); + + //cout<<"cb-explore increment = "<ft_offset; + //multiline_learn_or_predict(base, data.adf_data.ecs, offset, i); } for (size_t a = 0; a < data.adf_data.num_actions; ++a) - data.adf_data.ecs[a].weight = old_weights[a]; - - //ec.pred.multiclass = cl.action; + data.adf_data.ecs[a]->weight = old_weights[a]; } -void predict_or_learn_bandit_adf(cbify& data, single_learner& base, example& ec, int ec_type) +template +void predict_or_learn_bandit_adf(cbify& data, multi_learner& base, example& ec, int ec_type) { - uint32_t action = predict_bandit_adf(data, base, ec); + uint32_t chosen_action = predict_bandit_adf(data, base, ec); + + auto& cl = data.cl_adf; + auto& a_s = data.a_s_adf; + cl.action = a_s[chosen_action].action + 1; + cl.probability = a_s[chosen_action].score; + + //cout<(data, base); - ec.pred.multiclass = action; + ec.pred.multiclass = cl.action; } -void accumu_var_adf(cbify& data, single_learner& base, example& ec) +void accumu_var_adf(cbify& data, multi_learner& base, example& ec) { - size_t pred_best_approx = predict_cs_adf(data, base, ec); + size_t pred_best_approx = predict_sup_adf(data, base, ec); float temp_var; for (size_t a = 0; a < data.adf_data.num_actions; ++a) @@ -580,52 +617,51 @@ void accumu_var_adf(cbify& data, single_learner& base, example& ec) } template -void predict_or_learn_adf(cbify& data, single_learner& base, example& ec) +void predict_or_learn_adf(cbify& data, multi_learner& base, example& ec) { - if (data.ws_iter < data.ws_period) + if (use_cs) { - if (!use_cs) - { - data.mc_label = ec.l.multi; + data.cs_label = ec.l.cs; + } + else + { + data.mc_label = ec.l.multi; + if (data.ws_iter < data.ws_period) ec.l.multi.label = corrupt_action(data, data.mc_label.label, WARM_START); - } + else if (data.inter_iter < data.inter_period) + ec.l.multi.label = corrupt_action(data, data.mc_label.label, INTERACTION); + } + + if (data.ws_iter < data.ws_period) + { if (data.ws_iter < data.ws_train_size) { if (data.ws_type == SUPERVISED_WS) - predict_or_learn_cs_adf(data, base, ec, WARM_START); + predict_or_learn_sup_adf(data, base, ec, WARM_START); else if (data.ws_type == BANDIT_WS) - predict_or_learn_bandit_adf(data, base, ec, WARM_START); + predict_or_learn_bandit_adf(data, base, ec, WARM_START); } else - add_to_vali(data, ec); - - if (!use_cs) - { - ec.l.multi = data.mc_label; - } + add_to_vali(data, ec); ec.weight = 0; data.ws_iter++; } else if (data.inter_iter < data.inter_period) { - if (!use_cs) - { - data.mc_label = ec.l.multi; - ec.l.multi.label = corrupt_action(data, data.mc_label.label, INTERACTION); - } - predict_or_learn_bandit_adf(data, base, ec, INTERACTION); + predict_or_learn_bandit_adf(data, base, ec, INTERACTION); accumu_var_adf(data, base, ec); - - if (!use_cs) - { - ec.l.multi = data.mc_label; - } data.inter_iter++; } else { ec.weight = 0; } + + if (use_cs) + ec.l.cs = data.cs_label; + else + ec.l.multi = data.mc_label; + } @@ -650,6 +686,20 @@ void init_adf_data(cbify& data, const size_t num_actions) data.csls[a].costs.push_back({0, a+1, 0, 0}); } COST_SENSITIVE::cs_label.default_label(data.csl_empty); + + if (data.vali_method == WS_VALI_SPLIT || data.vali_method == WS_VALI_NOSPLIT) + { + data.ws_train_size = ceil(data.ws_period / 2.0); + data.ws_vali_size = data.ws_period - data.ws_train_size; + } + else + { + data.ws_train_size = data.ws_period; + data.ws_vali_size = 0; + } + setup_lambdas(data); + for (uint32_t i = 0; i < data.choices_lambda; i++) + data.cumulative_costs.push_back(0.f); } base_learner* cbify_setup(arguments& arg) @@ -663,7 +713,6 @@ base_learner* cbify_setup(arguments& arg) (use_cs, "cbify_cs", "consume cost-sensitive classification examples instead of multiclass") ("loss0", data->loss0, 0.f, "loss for correct label") ("loss1", data->loss1, 1.f, "loss for incorrect label") - ("epsilon", data->epsilon, 0.05f, "greedy probability") ("warm_start", data->ws_period, 0U, "number of training examples for warm start") ("interaction", data->inter_period, 0U, "number of training examples for bandit processing") ("choices_lambda", data->choices_lambda, 1U, "numbers of lambdas importance weights to aggregate") @@ -690,17 +739,6 @@ base_learner* cbify_setup(arguments& arg) if (data->use_adf) init_adf_data(*data.get(), num_actions); - if (data->vali_method == WS_VALI_SPLIT || data->vali_method == WS_VALI_NOSPLIT) - { - data->ws_train_size = ceil(data->ws_period / 2.0); - data->ws_vali_size = data->ws_period - data->ws_train_size; - } - else - { - data->ws_train_size = data->ws_period; - data->ws_vali_size = 0; - } - if (count(arg.args.begin(), arg.args.end(),"--cb_explore") == 0 && !data->use_adf) { arg.args.push_back("--cb_explore"); @@ -728,10 +766,15 @@ base_learner* cbify_setup(arguments& arg) if (data->use_adf) { multi_learner* base = as_multiline(setup_base(arg)); + // Not sure why we can only put this line here to pass the value of epsilon + data->epsilon = arg.vm["epsilon"].as(); + if (use_cs) l = &init_cost_sensitive_learner(data, base, predict_or_learn_adf, predict_or_learn_adf, arg.all->p, data->choices_lambda); else l = &init_multiclass_learner(data, base, predict_or_learn_adf, predict_or_learn_adf, arg.all->p, data->choices_lambda); + + //cout<<"cbify increment = "<increment<(increment * i); +{ + //std::cout<<"in increment_offset singleex: increment = "<ft_offset = "<ft_offset<<" i = "<ft_offset += static_cast(increment * i); + } } inline void decrement_offset(example& ex, const size_t increment, const size_t i) -{ assert(ex.ft_offset >= increment * i); +{ + //std::cout<<"in decrement_offset singleex: increment = "<ft_offset = "<ft_offset<<" i = "<ft_offset >= increment * i); ec->ft_offset -= static_cast(increment * i); } } @@ -440,9 +451,11 @@ template struct learner template void multiline_learn_or_predict(multi_learner& base, multi_ex& examples, const uint64_t offset, const uint32_t id = 0) - { std::vector saved_offsets(examples.size()); + { std::vector saved_offsets; for (auto ec : examples) - { saved_offsets.push_back(ec->ft_offset); + { + //std::cout<<"saved offsets before = "<ft_offset<ft_offset); ec->ft_offset = offset; } @@ -452,6 +465,9 @@ template struct learner base.predict(examples, id); for (size_t i = 0; i < examples.size(); i++) + { examples[i]->ft_offset = saved_offsets[i]; + //std::cout<<"saved offsets after = "< Date: Tue, 10 Jul 2018 16:11:46 -0400 Subject: [PATCH 094/127] fixed error on split/nosplit swapping --- scripts/alg_comparison.py | 28 +++++++++++++++------------- vowpalwabbit/cbify.cc | 4 ++-- 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/scripts/alg_comparison.py b/scripts/alg_comparison.py index 24dc9c87b8a..b2988ef182e 100644 --- a/scripts/alg_comparison.py +++ b/scripts/alg_comparison.py @@ -29,7 +29,7 @@ def sum_files(result_path): def parse_sum_file(sum_filename): f = open(sum_filename, 'r') #f.seek(0, 0) - table = pd.read_table(f, sep='\s+',lineterminator='\n') + table = pd.read_table(f, sep='\s+',lineterminator='\n',error_bad_lines=False) return table @@ -120,14 +120,14 @@ def alg_str(alg_name): 'Class-1', 'Bandit-Only', 'Sup-Only', - 'MinimaxBandits, one validation', - 'AwesomeBandits with $|\Lambda|$=4, one validation', - 'AwesomeBandits with $|\Lambda|$=8, one validation', - 'AwesomeBandits with $|\Lambda|$=16, one validation', - 'MinimaxBandits, multiple validation', - 'AwesomeBandits with $|\Lambda|$=4, multiple validation', - 'AwesomeBandits with $|\Lambda|$=8, multiple validation', - 'AwesomeBandits with $|\Lambda|$=16, multiple validation', + 'MinimaxBandits, split validation', + 'AwesomeBandits with $|\Lambda|$=4, split validation', + 'AwesomeBandits with $|\Lambda|$=8, split validation', + 'AwesomeBandits with $|\Lambda|$=16, split validation', + 'MinimaxBandits, no-split validation', + 'AwesomeBandits with $|\Lambda|$=4, no-split validation', + 'AwesomeBandits with $|\Lambda|$=8, no-split validation', + 'AwesomeBandits with $|\Lambda|$=16, no-split validation', 'unknown']) def alg_str_compatible(alg_name): @@ -382,7 +382,9 @@ def get_unnormalized_results(result_table): return new_size, new_unnormalized_results def update_result_dict(results_dict, new_result): + print results_dict for k, v in new_result.iteritems(): + print k results_dict[k].append(v) @@ -524,7 +526,7 @@ def load_from_sum(mod): mod.pair_comp_on = False mod.cdf_on = True mod.maj_error_dir = '../../../figs_all/expt_0509/figs_maj_errors/0of1.sum' - mod.best_error_dir = '../../../figs_all/expt_0606/best_errors/0of1.sum' + mod.best_error_dir = '../../../figs_all/expt_0606/0of1.sum' mod.fulldir = mod.results_dir + mod.plot_subdir if not os.path.exists(mod.fulldir): @@ -571,7 +573,7 @@ def load_from_sum(mod): elif mod.filter == '2': #print all_results['warm_start_size'] >= 100 #raw_input(' ') - all_results = all_results[all_results['warm_start_size'] >= 100] + all_results = all_results[all_results['warm_start'] >= 200] elif mod.filter == '3': all_results = all_results[all_results['num_classes'] >= 3] elif mod.filter == '4': @@ -580,10 +582,10 @@ def load_from_sum(mod): all_results = all_results[all_results['total_size'] >= 10000] all_results = all_results[all_results['num_classes'] >= 3] elif mod.filter == '6': - all_results = all_results[all_results['warm_start_size'] >= 100] + all_results = all_results[all_results['warm_start'] >= 100] all_results = all_results[all_results['learning_rate'] == 0.3] elif mod.filter == '7': - all_results = all_results[all_results['warm_start_size'] >= 100] + all_results = all_results[all_results['warm_start'] >= 100] all_results = all_results[all_results['num_classes'] >= 3] plot_all(mod, all_results) diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc index 860b5220d5d..564a8455e6f 100644 --- a/vowpalwabbit/cbify.cc +++ b/vowpalwabbit/cbify.cc @@ -720,8 +720,8 @@ base_learner* cbify_setup(arguments& arg) ("interaction_update", data->upd_inter, true, "indicator of interaction updates") ("corrupt_type_warm_start", data->cor_type_ws, UAR, "type of label corruption in the warm start phase (1: uniformly at random, 2: circular, 3: replacing with overwriting label)") ("corrupt_prob_warm_start", data->cor_prob_ws, 0.f, "probability of label corruption in the warm start phase") - ("corrupt_type_bandit", data->cor_type_inter, UAR, "type of label corruption in the interaction phase (1: uniformly at random, 2: circular, 3: replacing with overwriting label)") - ("corrupt_prob_bandit", data->cor_prob_inter, 0.f, "probability of label corruption in the interaction phase") + ("corrupt_type_interaction", data->cor_type_inter, UAR, "type of label corruption in the interaction phase (1: uniformly at random, 2: circular, 3: replacing with overwriting label)") + ("corrupt_prob_interaction", data->cor_prob_inter, 0.f, "probability of label corruption in the interaction phase") ("validation_method", data->vali_method, INTER_VALI, "lambda selection criterion (1 is using bandit with progressive validation, 2 is using supervised)") ("weighting_scheme", data->wt_scheme, INSTANCE_WT, "weighting scheme (1 is per instance weighting, 2 is per dataset weighting (where we use a diminishing weighting scheme) )") ("lambda_scheme", data->lambda_scheme, ABS_CENTRAL, "Lambda set scheme (1 is expanding based on center=0.5, 2 is expanding based on center=0.5 and enforcing 0,1 in Lambda, 3 is expanding based on center=minimax lambda, 4 is expanding based on center=minimax lambda and enforcing 0,1 in Lambda )") From e12a8dab4da310017bf35312dd6ac15151c4dfdd Mon Sep 17 00:00:00 2001 From: Chicheng Zhang Date: Fri, 13 Jul 2018 21:55:35 -0400 Subject: [PATCH 095/127] fixed all memory leaks in warm start ground truth --- vowpalwabbit/cbify.cc | 70 ++++++++++++++++++++++++++++++------------- 1 file changed, 49 insertions(+), 21 deletions(-) diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc index 564a8455e6f..40559b2197f 100644 --- a/vowpalwabbit/cbify.cc +++ b/vowpalwabbit/cbify.cc @@ -91,7 +91,8 @@ struct cbify MULTICLASS::label_t mc_label; COST_SENSITIVE::label cs_label; COST_SENSITIVE::label* csls; - COST_SENSITIVE::label* csl_empty; + CB::label* cbls; + //COST_SENSITIVE::label* csl_empty; }; @@ -141,10 +142,17 @@ void finish(cbify& data) data.a_s.delete_v(); if (data.use_adf) { - cout<<"The average variance estimate is: "<(); } + + data.lambdas.~vector(); + data.cumulative_costs.~vector(); + + data.a_s_adf.delete_v(); } void copy_example_to_adf(cbify& data, example& ec) @@ -297,6 +310,12 @@ float compute_weight_multiplier(cbify& data, size_t i, int ec_type) float weight_multiplier; float ws_train_size = data.ws_train_size; float inter_train_size = data.inter_period; + float total_train_size = ws_train_size + inter_train_size; + float total_weight = (1-data.lambdas[i]) * ws_train_size + data.lambdas[i] * inter_train_size; + + //cout<<"weight multiplier:"<ft_offset; //multiline_learn_or_predict(base, data.adf_data.ecs, offset, i); base.predict(data.adf_data.ecs, i); + //cout<<"greedy label = " << data.adf_data.ecs[0]->pred.a_s[0].action+1 << endl; return data.adf_data.ecs[0]->pred.a_s[0].action+1; } @@ -468,6 +485,7 @@ void learn_sup_adf(cbify& data, multi_learner& base, example& ec, int ec_type) copy_example_to_adf(data, ec); //generate cost-sensitive label (only for CSOAA's use - this will be retracted at the end) auto& csls = data.csls; + auto& cbls = data.cbls; for (size_t a = 0; a < data.adf_data.num_actions; ++a) { csls[a].costs[0].class_index = a+1; @@ -478,6 +496,7 @@ void learn_sup_adf(cbify& data, multi_learner& base, example& ec, int ec_type) } for (size_t a = 0; a < data.adf_data.num_actions; ++a) { + cbls[a] = data.adf_data.ecs[a]->l.cb; data.adf_data.ecs[a]->l.cs = csls[a]; //cout<weight = old_weights[a]; + + for (size_t a = 0; a < data.adf_data.num_actions; ++a) + data.adf_data.ecs[a]->l.cb = cbls[a]; } template @@ -528,6 +550,10 @@ uint32_t predict_bandit_adf(cbify& data, multi_learner& base, example& ec) if (sample_after_normalizing(data.app_seed + data.example_counter++, begin_scores(out_ec.pred.a_s), end_scores(out_ec.pred.a_s), chosen_action)) THROW("Failed to sample from pdf"); + //cout<<"predict using sublearner "<< argmin < @@ -650,6 +676,7 @@ void predict_or_learn_adf(cbify& data, multi_learner& base, example& ec) { predict_or_learn_bandit_adf(data, base, ec, INTERACTION); accumu_var_adf(data, base, ec); + data.a_s_adf.clear(); data.inter_iter++; } else @@ -679,13 +706,14 @@ void init_adf_data(cbify& data, const size_t num_actions) } data.csls = calloc_or_throw(num_actions); - data.csl_empty = calloc_or_throw(1); + //data.csl_empty = calloc_or_throw(1); for (uint32_t a=0; a < num_actions; ++a) { COST_SENSITIVE::cs_label.default_label(&data.csls[a]); data.csls[a].costs.push_back({0, a+1, 0, 0}); } - COST_SENSITIVE::cs_label.default_label(data.csl_empty); + //COST_SENSITIVE::cs_label.default_label(data.csl_empty); + data.cbls = calloc_or_throw(num_actions); if (data.vali_method == WS_VALI_SPLIT || data.vali_method == WS_VALI_NOSPLIT) { @@ -713,20 +741,20 @@ base_learner* cbify_setup(arguments& arg) (use_cs, "cbify_cs", "consume cost-sensitive classification examples instead of multiclass") ("loss0", data->loss0, 0.f, "loss for correct label") ("loss1", data->loss1, 1.f, "loss for incorrect label") - ("warm_start", data->ws_period, 0U, "number of training examples for warm start") - ("interaction", data->inter_period, 0U, "number of training examples for bandit processing") - ("choices_lambda", data->choices_lambda, 1U, "numbers of lambdas importance weights to aggregate") + ("warm_start", data->ws_period, 0U, "number of training examples for warm start phase") + ("interaction", data->inter_period, UINT32_MAX, "number of examples for the interactive contextual bandit learning phase") ("warm_start_update", data->upd_ws, true, "indicator of warm start updates") ("interaction_update", data->upd_inter, true, "indicator of interaction updates") ("corrupt_type_warm_start", data->cor_type_ws, UAR, "type of label corruption in the warm start phase (1: uniformly at random, 2: circular, 3: replacing with overwriting label)") ("corrupt_prob_warm_start", data->cor_prob_ws, 0.f, "probability of label corruption in the warm start phase") ("corrupt_type_interaction", data->cor_type_inter, UAR, "type of label corruption in the interaction phase (1: uniformly at random, 2: circular, 3: replacing with overwriting label)") ("corrupt_prob_interaction", data->cor_prob_inter, 0.f, "probability of label corruption in the interaction phase") - ("validation_method", data->vali_method, INTER_VALI, "lambda selection criterion (1 is using bandit with progressive validation, 2 is using supervised)") - ("weighting_scheme", data->wt_scheme, INSTANCE_WT, "weighting scheme (1 is per instance weighting, 2 is per dataset weighting (where we use a diminishing weighting scheme) )") - ("lambda_scheme", data->lambda_scheme, ABS_CENTRAL, "Lambda set scheme (1 is expanding based on center=0.5, 2 is expanding based on center=0.5 and enforcing 0,1 in Lambda, 3 is expanding based on center=minimax lambda, 4 is expanding based on center=minimax lambda and enforcing 0,1 in Lambda )") - ("overwrite_label", data->overwrite_label, 1U, "the label type 3 corruptions (overwriting) turn to") - ("warm_start_type", data->ws_type, SUPERVISED_WS, "the way of utilizing warm start data (1 is using supervised updates, 2 is using contextual bandit updates)").missing()) + ("choices_lambda", data->choices_lambda, 1U, "the number of candidate lambdas to aggregate (lambda is the importance weight parameter between the two sources) ") + ("lambda_scheme", data->lambda_scheme, ABS_CENTRAL, "The scheme for generating candidate lambda set (1: center lambda=0.5, 2: center lambda=0.5, min lambda=0, max lambda=1, 3: center lambda=epsilon/(#actions+epsilon), 4: center lambda=epsilon/(#actions+epsilon), min lambda=0, max lambda=1); the rest of candidate lambda values are generated using a doubling scheme") + ("weighting_scheme", data->wt_scheme, INSTANCE_WT, "weighting scheme (1: per instance weighting, where for every lambda, each contextual bandit example have weight lambda/(1-lambda) times that of each warm start example, 2: per dataset weighting, where for every lambda, the contextual bandit dataset has total weight lambda/(1-lambda) times that of the warm start dataset)") + ("validation_method", data->vali_method, INTER_VALI, "lambda selection criterion (1: using contextual bandit examples with progressive validation, 2: using warm start examples)") + ("overwrite_label", data->overwrite_label, 1U, "the label used by type 3 corruptions (overwriting)") + ("warm_start_type", data->ws_type, SUPERVISED_WS, "update method of utilizing warm start examples (1: using supervised updates, 2: using contextual bandit updates)").missing()) return nullptr; data->use_adf = count(arg.args.begin(), arg.args.end(),"--cb_explore_adf") > 0; From 4e639bcfaa97fbd235cad253dd3b65b8e93de530 Mon Sep 17 00:00:00 2001 From: Chicheng Zhang Date: Sat, 14 Jul 2018 01:45:39 -0400 Subject: [PATCH 096/127] fixed memory leaks in supervised ground truth --- vowpalwabbit/cb_explore_adf.cc | 7 +++++- vowpalwabbit/cbify.cc | 40 +++++++++++++++++++++++----------- 2 files changed, 33 insertions(+), 14 deletions(-) diff --git a/vowpalwabbit/cb_explore_adf.cc b/vowpalwabbit/cb_explore_adf.cc index a93ad9043f3..2196092600e 100644 --- a/vowpalwabbit/cb_explore_adf.cc +++ b/vowpalwabbit/cb_explore_adf.cc @@ -795,7 +795,12 @@ base_learner* cb_explore_adf_setup(arguments& arg) data->explore_type = REGCB; else { - if (!arg.vm.count("epsilon")) data->epsilon = 0.05f; + if (!arg.vm.count("epsilon")) + { + data->epsilon = 0.05f; + //a hacky way of passing the implicit epsilon value to cbify + arg.vm.insert(std::make_pair("epsilon", boost::program_options::variable_value(data->epsilon, false))); + } data->explore_type = EPS_GREEDY; } diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc index 40559b2197f..b7e7eb5f323 100644 --- a/vowpalwabbit/cbify.cc +++ b/vowpalwabbit/cbify.cc @@ -84,7 +84,7 @@ struct cbify CB::cb_class cl_adf; uint32_t ws_train_size; uint32_t ws_vali_size; - vector ws_vali; + vector ws_vali; float cumu_var; uint32_t ws_iter; uint32_t inter_iter; @@ -92,6 +92,7 @@ struct cbify COST_SENSITIVE::label cs_label; COST_SENSITIVE::label* csls; CB::label* cbls; + bool use_cs; //COST_SENSITIVE::label* csl_empty; }; @@ -161,12 +162,21 @@ void finish(cbify& data) free(data.adf_data.ecs[a]); } data.adf_data.ecs.~vector(); - } - data.lambdas.~vector(); - data.cumulative_costs.~vector(); + data.lambdas.~vector(); + data.cumulative_costs.~vector(); - data.a_s_adf.delete_v(); + data.a_s_adf.delete_v(); + for (size_t i = 0; i < data.ws_vali.size(); ++i) + { + if (data.use_cs) + VW::dealloc_example(COST_SENSITIVE::cs_label.delete_label, *data.ws_vali[i]); + else + VW::dealloc_example(MULTICLASS::mc_label.delete_label, *data.ws_vali[i]); + free(data.ws_vali[i]); + } + data.ws_vali.~vector(); + } } void copy_example_to_adf(cbify& data, example& ec) @@ -439,18 +449,20 @@ void accumu_costs_wsv_adf(cbify& data, multi_learner& base) lb = 0; ub = ws_vali_size; } + //cout<<"validation at iteration "<l.cs.costs, pred_label); else - data.cumulative_costs[i] += loss(data, ec_vali.l.multi.label, pred_label); + data.cumulative_costs[i] += loss(data, ec_vali->l.multi.label, pred_label); //cout< void add_to_vali(cbify& data, example& ec) { //if this does not work, we can try declare ws_vali as an array - example ec_copy; + example* ec_copy = VW::alloc_examples(sizeof(polylabel), 1); if (use_cs) - VW::copy_example_data(false, &ec_copy, &ec, 0, COST_SENSITIVE::cs_label.copy_label); + VW::copy_example_data(false, ec_copy, &ec, 0, COST_SENSITIVE::cs_label.copy_label); else - VW::copy_example_data(false, &ec_copy, &ec, 0, MULTICLASS::mc_label.copy_label); + VW::copy_example_data(false, ec_copy, &ec, 0, MULTICLASS::mc_label.copy_label); data.ws_vali.push_back(ec_copy); } @@ -752,7 +764,7 @@ base_learner* cbify_setup(arguments& arg) ("choices_lambda", data->choices_lambda, 1U, "the number of candidate lambdas to aggregate (lambda is the importance weight parameter between the two sources) ") ("lambda_scheme", data->lambda_scheme, ABS_CENTRAL, "The scheme for generating candidate lambda set (1: center lambda=0.5, 2: center lambda=0.5, min lambda=0, max lambda=1, 3: center lambda=epsilon/(#actions+epsilon), 4: center lambda=epsilon/(#actions+epsilon), min lambda=0, max lambda=1); the rest of candidate lambda values are generated using a doubling scheme") ("weighting_scheme", data->wt_scheme, INSTANCE_WT, "weighting scheme (1: per instance weighting, where for every lambda, each contextual bandit example have weight lambda/(1-lambda) times that of each warm start example, 2: per dataset weighting, where for every lambda, the contextual bandit dataset has total weight lambda/(1-lambda) times that of the warm start dataset)") - ("validation_method", data->vali_method, INTER_VALI, "lambda selection criterion (1: using contextual bandit examples with progressive validation, 2: using warm start examples)") + ("validation_method", data->vali_method, INTER_VALI, "lambda selection criterion (1: using contextual bandit examples with progressive validation, 2: using warm start examples, with fresh validation examples at each epoch, 3: using warm start examples, with a single validation set throughout)") ("overwrite_label", data->overwrite_label, 1U, "the label used by type 3 corruptions (overwriting)") ("warm_start_type", data->ws_type, SUPERVISED_WS, "update method of utilizing warm start examples (1: using supervised updates, 2: using contextual bandit updates)").missing()) return nullptr; @@ -763,6 +775,7 @@ base_learner* cbify_setup(arguments& arg) data->all = arg.all; data->num_actions = num_actions; + data->use_cs = use_cs; if (data->use_adf) init_adf_data(*data.get(), num_actions); @@ -795,6 +808,7 @@ base_learner* cbify_setup(arguments& arg) { multi_learner* base = as_multiline(setup_base(arg)); // Not sure why we can only put this line here to pass the value of epsilon + cout<<"count: "<epsilon = arg.vm["epsilon"].as(); if (use_cs) From 6540308107dc1f68270b8dcb1cec30efb8deeb82 Mon Sep 17 00:00:00 2001 From: Chicheng Zhang Date: Mon, 16 Jul 2018 11:26:10 -0400 Subject: [PATCH 097/127] added cbify warm start test cases --- test/RunTests | 36 +++++++++++++ test/train-sets/ref/cbify_ws.stderr | 19 +++++++ test/train-sets/ref/cbify_ws_cyc.stderr | 19 +++++++ .../ref/cbify_ws_lambda_zeroone.stderr | 19 +++++++ test/train-sets/ref/cbify_ws_maj.stderr | 19 +++++++ .../train-sets/ref/cbify_ws_no_int_upd.stderr | 19 +++++++ test/train-sets/ref/cbify_ws_no_ws_upd.stderr | 19 +++++++ test/train-sets/ref/cbify_ws_simbandit.stderr | 19 +++++++ test/train-sets/ref/cbify_ws_uar.stderr | 19 +++++++ test/train-sets/ref/cbify_ws_wsgt.stderr | 19 +++++++ vowpalwabbit/cb_adf.cc | 2 +- vowpalwabbit/cbify.cc | 53 +++++++++---------- 12 files changed, 234 insertions(+), 28 deletions(-) create mode 100644 test/train-sets/ref/cbify_ws.stderr create mode 100644 test/train-sets/ref/cbify_ws_cyc.stderr create mode 100644 test/train-sets/ref/cbify_ws_lambda_zeroone.stderr create mode 100644 test/train-sets/ref/cbify_ws_maj.stderr create mode 100644 test/train-sets/ref/cbify_ws_no_int_upd.stderr create mode 100644 test/train-sets/ref/cbify_ws_no_ws_upd.stderr create mode 100644 test/train-sets/ref/cbify_ws_simbandit.stderr create mode 100644 test/train-sets/ref/cbify_ws_uar.stderr create mode 100644 test/train-sets/ref/cbify_ws_wsgt.stderr diff --git a/test/RunTests b/test/RunTests index d7e87b85858..5f51bb68642 100755 --- a/test/RunTests +++ b/test/RunTests @@ -1632,3 +1632,39 @@ echo "1 | feature:1" | {VW} -a --initial_weight 0.1 --initial_t 0.3 # Test 174 cbify adf, regcbopt {VW} --cbify 10 --cb_explore_adf --cb_type mtr --regcbopt --mellowness 0.01 -d train-sets/multiclass train-sets/ref/cbify_regcbopt.stderr + +# Test 175 cbify warm start +{VW} --cbify 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 -d train-sets/multiclass + /train-sets/ref/cbify_ws.stderr + +# Test 176 cbify warm start with lambda set containing 0/1 +{VW} --cbify 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --lambda_scheme 2 -d train-sets/multiclass + /train-sets/ref/cbify_ws_lambda_zeroone.stderr + +# Test 177 cbify warm start with warm start update turned off +{VW} --cbify 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --warm_start_update false -d train-sets/multiclass + /train-sets/ref/cbify_ws_no_ws_upd.stderr + +# Test 178 cbify warm start with interaction update turned off +{VW} --cbify 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --interaction_update false -d train-sets/multiclass + /train-sets/ref/cbify_ws_no_int_upd.stderr + +# Test 179 cbify warm start with bandit warm start type (Sim-Bandit) +{VW} --cbify 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --warm_start_type 2 -d train-sets/multiclass + /train-sets/ref/cbify_ws_simbandit.stderr + +# Test 180 cbify warm start with UAR supervised corruption +{VW} --cbify 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --corrupt_type_warm_start 1 --corrupt_prob_warm_start 0.5 -d train-sets/multiclass + /train-sets/ref/cbify_ws_uar.stderr + +# Test 181 cbify warm start with CYC supervised corruption +{VW} --cbify 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --corrupt_type_warm_start 2 --corrupt_prob_warm_start 0.5 -d train-sets/multiclass + /train-sets/ref/cbify_ws_cyc.stderr + +# Test 182 cbify warm start with MAJ supervised corruption +{VW} --cbify 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --corrupt_type_warm_start 3 --corrupt_prob_warm_start 0.5 --overwrite_label 1 -d train-sets/multiclass + /train-sets/ref/cbify_ws_maj.stderr + +# Test 183 cbify warm start with warm start distribution being the ground truth +{VW} --cbify 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --corrupt_type_interaction 2 --corrupt_prob_interaction 0.5 --weighting_scheme 2 --validation_method 2 -d train-sets/multiclass + /train-sets/ref/cbify_ws_wsgt.stderr diff --git a/test/train-sets/ref/cbify_ws.stderr b/test/train-sets/ref/cbify_ws.stderr new file mode 100644 index 00000000000..6d05ba5a0db --- /dev/null +++ b/test/train-sets/ref/cbify_ws.stderr @@ -0,0 +1,19 @@ +Num weight bits = 18 +learning rate = 0.5 +initial_t = 0 +power_t = 0.5 +using no cache +Reading datafile = train-sets/multiclass +num sources = 1 +average since example example current current current +loss last counter weight label predict features +1.000000 1.000000 4 1.0 4 3 2 +1.000000 1.000000 5 2.0 5 3 2 +1.000000 1.000000 7 4.0 7 3 2 + +finished run +number of examples = 10 +weighted example sum = 7.000000 +weighted label sum = 0.000000 +average loss = 1.000000 +total feature number = 20 diff --git a/test/train-sets/ref/cbify_ws_cyc.stderr b/test/train-sets/ref/cbify_ws_cyc.stderr new file mode 100644 index 00000000000..6d05ba5a0db --- /dev/null +++ b/test/train-sets/ref/cbify_ws_cyc.stderr @@ -0,0 +1,19 @@ +Num weight bits = 18 +learning rate = 0.5 +initial_t = 0 +power_t = 0.5 +using no cache +Reading datafile = train-sets/multiclass +num sources = 1 +average since example example current current current +loss last counter weight label predict features +1.000000 1.000000 4 1.0 4 3 2 +1.000000 1.000000 5 2.0 5 3 2 +1.000000 1.000000 7 4.0 7 3 2 + +finished run +number of examples = 10 +weighted example sum = 7.000000 +weighted label sum = 0.000000 +average loss = 1.000000 +total feature number = 20 diff --git a/test/train-sets/ref/cbify_ws_lambda_zeroone.stderr b/test/train-sets/ref/cbify_ws_lambda_zeroone.stderr new file mode 100644 index 00000000000..344c43a5335 --- /dev/null +++ b/test/train-sets/ref/cbify_ws_lambda_zeroone.stderr @@ -0,0 +1,19 @@ +Num weight bits = 18 +learning rate = 0.5 +initial_t = 0 +power_t = 0.5 +using no cache +Reading datafile = train-sets/multiclass +num sources = 1 +average since example example current current current +loss last counter weight label predict features +1.000000 1.000000 4 1.0 4 3 2 +1.000000 1.000000 5 2.0 5 9 2 +0.750000 0.500000 7 4.0 7 7 2 + +finished run +number of examples = 10 +weighted example sum = 7.000000 +weighted label sum = 0.000000 +average loss = 0.857143 +total feature number = 20 diff --git a/test/train-sets/ref/cbify_ws_maj.stderr b/test/train-sets/ref/cbify_ws_maj.stderr new file mode 100644 index 00000000000..6d05ba5a0db --- /dev/null +++ b/test/train-sets/ref/cbify_ws_maj.stderr @@ -0,0 +1,19 @@ +Num weight bits = 18 +learning rate = 0.5 +initial_t = 0 +power_t = 0.5 +using no cache +Reading datafile = train-sets/multiclass +num sources = 1 +average since example example current current current +loss last counter weight label predict features +1.000000 1.000000 4 1.0 4 3 2 +1.000000 1.000000 5 2.0 5 3 2 +1.000000 1.000000 7 4.0 7 3 2 + +finished run +number of examples = 10 +weighted example sum = 7.000000 +weighted label sum = 0.000000 +average loss = 1.000000 +total feature number = 20 diff --git a/test/train-sets/ref/cbify_ws_no_int_upd.stderr b/test/train-sets/ref/cbify_ws_no_int_upd.stderr new file mode 100644 index 00000000000..6d05ba5a0db --- /dev/null +++ b/test/train-sets/ref/cbify_ws_no_int_upd.stderr @@ -0,0 +1,19 @@ +Num weight bits = 18 +learning rate = 0.5 +initial_t = 0 +power_t = 0.5 +using no cache +Reading datafile = train-sets/multiclass +num sources = 1 +average since example example current current current +loss last counter weight label predict features +1.000000 1.000000 4 1.0 4 3 2 +1.000000 1.000000 5 2.0 5 3 2 +1.000000 1.000000 7 4.0 7 3 2 + +finished run +number of examples = 10 +weighted example sum = 7.000000 +weighted label sum = 0.000000 +average loss = 1.000000 +total feature number = 20 diff --git a/test/train-sets/ref/cbify_ws_no_ws_upd.stderr b/test/train-sets/ref/cbify_ws_no_ws_upd.stderr new file mode 100644 index 00000000000..4b334d4e73b --- /dev/null +++ b/test/train-sets/ref/cbify_ws_no_ws_upd.stderr @@ -0,0 +1,19 @@ +Num weight bits = 18 +learning rate = 0.5 +initial_t = 0 +power_t = 0.5 +using no cache +Reading datafile = train-sets/multiclass +num sources = 1 +average since example example current current current +loss last counter weight label predict features +1.000000 1.000000 4 1.0 4 5 2 +1.000000 1.000000 5 2.0 5 9 2 +0.750000 0.500000 7 4.0 7 7 2 + +finished run +number of examples = 10 +weighted example sum = 7.000000 +weighted label sum = 0.000000 +average loss = 0.714286 +total feature number = 20 diff --git a/test/train-sets/ref/cbify_ws_simbandit.stderr b/test/train-sets/ref/cbify_ws_simbandit.stderr new file mode 100644 index 00000000000..6d935a38a61 --- /dev/null +++ b/test/train-sets/ref/cbify_ws_simbandit.stderr @@ -0,0 +1,19 @@ +Num weight bits = 18 +learning rate = 0.5 +initial_t = 0 +power_t = 0.5 +using no cache +Reading datafile = train-sets/multiclass +num sources = 1 +average since example example current current current +loss last counter weight label predict features +1.000000 1.000000 4 1.0 4 7 2 +1.000000 1.000000 5 2.0 5 1 2 +0.750000 0.500000 7 4.0 7 10 2 + +finished run +number of examples = 10 +weighted example sum = 7.000000 +weighted label sum = 0.000000 +average loss = 0.857143 +total feature number = 20 diff --git a/test/train-sets/ref/cbify_ws_uar.stderr b/test/train-sets/ref/cbify_ws_uar.stderr new file mode 100644 index 00000000000..6d05ba5a0db --- /dev/null +++ b/test/train-sets/ref/cbify_ws_uar.stderr @@ -0,0 +1,19 @@ +Num weight bits = 18 +learning rate = 0.5 +initial_t = 0 +power_t = 0.5 +using no cache +Reading datafile = train-sets/multiclass +num sources = 1 +average since example example current current current +loss last counter weight label predict features +1.000000 1.000000 4 1.0 4 3 2 +1.000000 1.000000 5 2.0 5 3 2 +1.000000 1.000000 7 4.0 7 3 2 + +finished run +number of examples = 10 +weighted example sum = 7.000000 +weighted label sum = 0.000000 +average loss = 1.000000 +total feature number = 20 diff --git a/test/train-sets/ref/cbify_ws_wsgt.stderr b/test/train-sets/ref/cbify_ws_wsgt.stderr new file mode 100644 index 00000000000..d05436ac3a2 --- /dev/null +++ b/test/train-sets/ref/cbify_ws_wsgt.stderr @@ -0,0 +1,19 @@ +Num weight bits = 18 +learning rate = 0.5 +initial_t = 0 +power_t = 0.5 +using no cache +Reading datafile = train-sets/multiclass +num sources = 1 +average since example example current current current +loss last counter weight label predict features +1.000000 1.000000 4 1.0 4 2 2 +1.000000 1.000000 5 2.0 5 2 2 +1.000000 1.000000 7 4.0 7 2 2 + +finished run +number of examples = 10 +weighted example sum = 7.000000 +weighted label sum = 0.000000 +average loss = 1.000000 +total feature number = 20 diff --git a/vowpalwabbit/cb_adf.cc b/vowpalwabbit/cb_adf.cc index bdfc9157d1c..53a8bb5a4db 100644 --- a/vowpalwabbit/cb_adf.cc +++ b/vowpalwabbit/cb_adf.cc @@ -114,7 +114,7 @@ void learn_MTR(cb_adf& mydata, multi_learner& base, multi_ex& examples) uint32_t nf = (uint32_t)examples[mydata.gen_cs.mtr_example]->num_features; float old_weight = examples[mydata.gen_cs.mtr_example]->weight; - //adjust the importance weight to scale by a factor of 1/K (the last term) + //adjust the importance weight to scale by a factor of 1/num_actions (the last term) examples[mydata.gen_cs.mtr_example]->weight *= 1.f / examples[mydata.gen_cs.mtr_example]->l.cb.costs[0].probability * ((float)mydata.gen_cs.event_sum / (float)mydata.gen_cs.action_sum) * (1.f / mydata.gen_cs.num_actions); GEN_CS::call_cs_ldf(base, mydata.gen_cs.mtr_ec_seq, mydata.cb_labels, mydata.cs_labels, mydata.prepped_cs_labels, mydata.offset); examples[mydata.gen_cs.mtr_example]->num_features = nf; diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc index b7e7eb5f323..0de288757c8 100644 --- a/vowpalwabbit/cbify.cc +++ b/vowpalwabbit/cbify.cc @@ -93,7 +93,6 @@ struct cbify COST_SENSITIVE::label* csls; CB::label* cbls; bool use_cs; - //COST_SENSITIVE::label* csl_empty; }; @@ -219,7 +218,7 @@ float minimax_lambda(float epsilon, size_t num_actions, size_t warm_start_period void setup_lambdas(cbify& data) { - // The lambdas are in fact arranged in ascending order (the 'middle' lambda is 0.5) + // The lambdas are arranged in ascending order vector& lambdas = data.lambdas; for (uint32_t i = 0; i 0; i--) - lambdas[i-1] = lambdas[i] / 2; + lambdas[i-1] = lambdas[i] / 2.0; for (uint32_t i = mid+1; i < data.choices_lambda; i++) - lambdas[i] = 1 - (1-lambdas[i-1]) / 2; + lambdas[i] = 1 - (1-lambdas[i-1]) / 2.0; if (data.lambda_scheme == MINIMAX_CENTRAL_ZEROONE || data.lambda_scheme == ABS_CENTRAL_ZEROONE) { @@ -323,16 +322,9 @@ float compute_weight_multiplier(cbify& data, size_t i, int ec_type) float total_train_size = ws_train_size + inter_train_size; float total_weight = (1-data.lambdas[i]) * ws_train_size + data.lambdas[i] * inter_train_size; - //cout<<"weight multiplier:"<= ws_train_size) - // return 0.0; - //} - if (data.wt_scheme == INSTANCE_WT) { if (ec_type == WARM_START) @@ -347,6 +339,9 @@ float compute_weight_multiplier(cbify& data, size_t i, int ec_type) else weight_multiplier = data.lambdas[i] * total_train_size / inter_train_size; } + + //cout<<"weight multiplier: "< void add_to_vali(cbify& data, example& ec) { - //if this does not work, we can try declare ws_vali as an array + //TODO: set the first parameter properly example* ec_copy = VW::alloc_examples(sizeof(polylabel), 1); if (use_cs) @@ -495,7 +490,7 @@ template void learn_sup_adf(cbify& data, multi_learner& base, example& ec, int ec_type) { copy_example_to_adf(data, ec); - //generate cost-sensitive label (only for CSOAA's use - this will be retracted at the end) + //generate cost-sensitive label (for CSOAA's temporary use) auto& csls = data.csls; auto& cbls = data.cbls; for (size_t a = 0; a < data.adf_data.num_actions; ++a) @@ -552,7 +547,6 @@ void predict_or_learn_sup_adf(cbify& data, multi_learner& base, example& ec, int uint32_t predict_bandit_adf(cbify& data, multi_learner& base, example& ec) { uint32_t argmin = find_min(data.cumulative_costs); - //cout< void predict_or_learn_adf(cbify& data, multi_learner& base, example& ec) { + // Corrupt labels (only corrupting multiclass labels as of now) + if (use_cs) - { data.cs_label = ec.l.cs; - } else { data.mc_label = ec.l.multi; - if (data.ws_iter < data.ws_period) + /*if (data.ws_iter < data.ws_period) ec.l.multi.label = corrupt_action(data, data.mc_label.label, WARM_START); else if (data.inter_iter < data.inter_period) ec.l.multi.label = corrupt_action(data, data.mc_label.label, INTERACTION); + */ } + // Warm start phase if (data.ws_iter < data.ws_period) { if (data.ws_iter < data.ws_train_size) @@ -684,6 +678,7 @@ void predict_or_learn_adf(cbify& data, multi_learner& base, example& ec) ec.weight = 0; data.ws_iter++; } + // Interaction phase else if (data.inter_iter < data.inter_period) { predict_or_learn_bandit_adf(data, base, ec, INTERACTION); @@ -691,11 +686,11 @@ void predict_or_learn_adf(cbify& data, multi_learner& base, example& ec) data.a_s_adf.clear(); data.inter_iter++; } + // Skipping the rest of the examples else - { ec.weight = 0; - } + // Store the original labels back if (use_cs) ec.l.cs = data.cs_label; else @@ -703,7 +698,6 @@ void predict_or_learn_adf(cbify& data, multi_learner& base, example& ec) } - void init_adf_data(cbify& data, const size_t num_actions) { auto& adf_data = data.adf_data; @@ -717,14 +711,13 @@ void init_adf_data(cbify& data, const size_t num_actions) CB::cb_label.default_label(&lab); } + // The rest of the initialization is for warm start CB data.csls = calloc_or_throw(num_actions); - //data.csl_empty = calloc_or_throw(1); for (uint32_t a=0; a < num_actions; ++a) { COST_SENSITIVE::cs_label.default_label(&data.csls[a]); data.csls[a].costs.push_back({0, a+1, 0, 0}); } - //COST_SENSITIVE::cs_label.default_label(data.csl_empty); data.cbls = calloc_or_throw(num_actions); if (data.vali_method == WS_VALI_SPLIT || data.vali_method == WS_VALI_NOSPLIT) @@ -737,9 +730,13 @@ void init_adf_data(cbify& data, const size_t num_actions) data.ws_train_size = data.ws_period; data.ws_vali_size = 0; } + data.ws_iter = 0; + data.inter_iter = 0; + setup_lambdas(data); for (uint32_t i = 0; i < data.choices_lambda; i++) data.cumulative_costs.push_back(0.f); + data.cumu_var = 0.f; } base_learner* cbify_setup(arguments& arg) @@ -807,9 +804,11 @@ base_learner* cbify_setup(arguments& arg) if (data->use_adf) { multi_learner* base = as_multiline(setup_base(arg)); - // Not sure why we can only put this line here to pass the value of epsilon - cout<<"count: "<epsilon = arg.vm["epsilon"].as(); + // Note: the current version of warm start CB can only support epsilon greedy exploration + // algorithm - we need to wait for the default epsilon value to be passed from cb_explore + // is there is one + //cout<<"count: "<epsilon = arg.vm.count("epsilon") > 0 ? arg.vm["epsilon"].as() : 0.0f; if (use_cs) l = &init_cost_sensitive_learner(data, base, predict_or_learn_adf, predict_or_learn_adf, arg.all->p, data->choices_lambda); From be93a25e41827b131327ed7f48bb309a7afa92e6 Mon Sep 17 00:00:00 2001 From: Chicheng Zhang Date: Mon, 16 Jul 2018 11:55:25 -0400 Subject: [PATCH 098/127] removed unnecessary include path prefix --- vowpalwabbit/global_data.h | 2 +- vowpalwabbit/io_buf.h | 2 +- vowpalwabbit/parse_example.cc | 2 +- vowpalwabbit/parse_primitives.cc | 2 +- vowpalwabbit/vw.h | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/vowpalwabbit/global_data.h b/vowpalwabbit/global_data.h index a34491b48a0..be80d8d6fc1 100644 --- a/vowpalwabbit/global_data.h +++ b/vowpalwabbit/global_data.h @@ -26,7 +26,7 @@ namespace po = boost::program_options; #include "learner.h" #include "v_hashmap.h" #include -#include "../explore/hash.h" +#include "hash.h" #include "crossplat_compat.h" #include "error_reporting.h" #include "parser_helper.h" diff --git a/vowpalwabbit/io_buf.h b/vowpalwabbit/io_buf.h index f8553daa2ec..94d4902da7c 100644 --- a/vowpalwabbit/io_buf.h +++ b/vowpalwabbit/io_buf.h @@ -16,7 +16,7 @@ license as described in the file LICENSE. #include #include #include -#include "../explore/hash.h" +#include "hash.h" #include "vw_exception.h" #include "vw_validate.h" diff --git a/vowpalwabbit/parse_example.cc b/vowpalwabbit/parse_example.cc index bfb06839119..d9d64cf71dd 100644 --- a/vowpalwabbit/parse_example.cc +++ b/vowpalwabbit/parse_example.cc @@ -7,7 +7,7 @@ license as described in the file LICENSE. #include #include #include "parse_example.h" -#include "../explore/hash.h" +#include "hash.h" #include "unique_sort.h" #include "global_data.h" #include "constant.h" diff --git a/vowpalwabbit/parse_primitives.cc b/vowpalwabbit/parse_primitives.cc index 9e728ef9df3..3dbad443151 100644 --- a/vowpalwabbit/parse_primitives.cc +++ b/vowpalwabbit/parse_primitives.cc @@ -13,7 +13,7 @@ license as described in the file LICENSE. #include #include "parse_primitives.h" -#include "../explore/hash.h" +#include "hash.h" #include "vw_exception.h" bool substring_equal(const substring& a, const substring& b) diff --git a/vowpalwabbit/vw.h b/vowpalwabbit/vw.h index e764e72b23e..7744f9b5768 100644 --- a/vowpalwabbit/vw.h +++ b/vowpalwabbit/vw.h @@ -14,7 +14,7 @@ license as described in the file LICENSE. #include "global_data.h" #include "example.h" -#include "../explore/hash.h" +#include "hash.h" #include "simple_label.h" #include "parser.h" #include "parse_example.h" From 8a51d165a826ecf0ea22a9e244d4f72a4a24e8c5 Mon Sep 17 00:00:00 2001 From: Chicheng Zhang Date: Mon, 23 Jul 2018 01:53:47 -0400 Subject: [PATCH 099/127] cleaning up script --- scripts/run_vw_commands.py | 283 ++++++++++++++++++++++++++----------- 1 file changed, 204 insertions(+), 79 deletions(-) diff --git a/scripts/run_vw_commands.py b/scripts/run_vw_commands.py index 5b6d2553a08..e4ac5d1bf7a 100644 --- a/scripts/run_vw_commands.py +++ b/scripts/run_vw_commands.py @@ -115,23 +115,51 @@ def gen_vw_options_list(mod): def gen_vw_options(mod): if 'optimal_approx' in mod.param: # Fully supervised on full dataset - mod.vw_template = {'data':'', 'progress':2.0, 'passes':0, 'oaa':0, 'cache_file':''} + mod.vw_template = + {'data':'', + 'progress':2.0, + 'passes':0, + 'oaa':0, + 'cache_file':''} mod.param['passes'] = 5 mod.param['oaa'] = mod.param['num_classes'] mod.param['cache_file'] = mod.param['data'] + '.cache' elif 'majority_approx' in mod.param: # Compute majority error; basically we would like to skip vw running as fast as possible - mod.vw_template = {'data':'', 'progress':2.0, 'cbify':0, 'warm_start':0, 'bandit':0} + mod.vw_template = + {'data':'', + 'progress':2.0, + 'cbify':0, + 'warm_start':0, + 'bandit':0} mod.param['cbify'] = mod.param['num_classes'] mod.param['warm_start'] = 0 - mod.param['bandit'] = 0 + mod.param['interaction'] = 0 else: # General CB - mod.vw_template = {'data':'', 'progress':2.0, 'corrupt_type_bandit':0, 'corrupt_prob_bandit':0.0, 'bandit':0, 'cb_type':'mtr', - 'choices_lambda':0, 'corrupt_type_supervised':0, 'corrupt_prob_supervised':0.0, 'lambda_scheme':1, 'learning_rate':0.5, 'warm_start_type':1, 'cbify':0, 'warm_start':0, 'overwrite_label':1, 'validation_method':1, 'weighting_scheme':1} + mod.vw_template = + {'data':'', + 'progress':2.0, + 'cb_type':'mtr', + 'cbify':0, + 'warm_start':0, + 'interaction':0, + 'choices_lambda':0, + 'corrupt_type_interaction':0, + 'corrupt_prob_interaction':0.0, + 'corrupt_type_supervised':0, + 'corrupt_prob_supervised':0.0, + 'warm_start_update': True, + 'interaction_update': True, + 'lambda_scheme':1, + 'learning_rate':0.5, + 'warm_start_type':1, + 'overwrite_label':1, + 'validation_method':1, + 'weighting_scheme':1} mod.param['warm_start'] = mod.param['warm_start_multiplier'] * mod.param['progress'] - mod.param['bandit'] = mod.param['total_size'] - mod.param['warm_start'] + mod.param['interaction'] = mod.param['total_size'] - mod.param['warm_start'] mod.param['cbify'] = mod.param['num_classes'] mod.param['overwrite_label'] = mod.param['majority_class'] @@ -142,12 +170,6 @@ def gen_vw_options(mod): mod.param['cb_explore'] = mod.param['num_classes'] mod.vw_template['cb_explore'] = 0 - if mod.param['no_warm_start_update'] is True: - mod.param['no_supervised'] = ' ' - mod.vw_template['no_supervised'] = ' ' - if mod.param['no_interaction_update'] is True: - mod.param['no_bandit'] = ' ' - mod.vw_template['no_bandit'] = ' ' def execute_vw(mod): gen_vw_options(mod) @@ -186,12 +208,26 @@ def replace_keys(dic, simplified_keymap): def param_to_str_simplified(mod): #print 'before replace' #print param - vw_run_param_set = ['lambda_scheme','learning_rate','validation_method', - 'fold','no_warm_start_update','no_interaction_update', - 'corrupt_prob_bandit', 'corrupt_prob_supervised', - 'corrupt_type_bandit', 'corrupt_type_supervised', - 'warm_start_type','warm_start_multiplier','choices_lambda','weighting_scheme', - 'cb_type','optimal_approx','majority_approx','dataset', 'adf_on'] + vw_run_param_set = + ['lambda_scheme', + 'learning_rate', + 'validation_method', + 'fold', + 'no_warm_start_update', + 'no_interaction_update', + 'corrupt_prob_interaction', + 'corrupt_prob_warm_start', + 'corrupt_type_interaction', + 'corrupt_type_warm_start', + 'warm_start_type', + 'warm_start_multiplier', + 'choices_lambda', + 'weighting_scheme', + 'cb_type', + 'optimal_approx', + 'majority_approx', + 'dataset', + 'adf_on'] mod.template_red = dict([(k,mod.result_template[k]) for k in vw_run_param_set]) mod.simplified_keymap_red = dict([(k,mod.simplified_keymap[k]) for k in vw_run_param_set]) @@ -291,86 +327,173 @@ def dictify(param_name, param_choices): def params_per_task(mod): # Problem parameters - params_corrupt_type_sup = dictify('corrupt_type_supervised', mod.choices_corrupt_type_supervised) - params_corrupt_prob_sup = dictify('corrupt_prob_supervised', mod.choices_corrupt_prob_supervised) - params_corrupt_type_band = dictify('corrupt_type_bandit', mod.choices_corrupt_type_bandit) - params_corrupt_prob_band = dictify('corrupt_prob_bandit', mod.choices_corrupt_prob_bandit) - params_warm_start_multiplier = dictify('warm_start_multiplier', mod.warm_start_multipliers) - params_learning_rate = dictify('learning_rate', mod.learning_rates) - + prm_cor_type_ws = dictify('corrupt_type_warm_start', mod.choices_cor_type_ws) + prm_cor_prob_ws = dictify('corrupt_prob_warm_start', mod.choices_cor_prob_ws) + prm_cor_type_inter = dictify('corrupt_type_interaction', mod.choices_cor_type_inter) + prm_cor_prob_inter = dictify('corrupt_prob_interaction', mod.choices_cor_prob_inter) + prm_ws_multiplier = dictify('warm_start_multiplier', mod.ws_multipliers) + prm_lrs = dictify('learning_rate', mod.learning_rates) # could potentially induce a bug if the maj and best does not have this parameter - params_fold = dictify('fold', mod.folds) - + prm_fold = dictify('fold', mod.folds) # Algorithm parameters - params_cb_type = dictify('cb_type', mod.choices_cb_type) + prm_cb_type = dictify('cb_type', mod.choices_cb_type) + prm_dataset = dictify('dataset', mod.dss) + prm_choices_lbd = dictify('choices_lambda', mod.choices_choices_lambda) + prm_adf_on = dictify('adf_on', [True]) # Common parameters - params_common = param_cartesian_multi([params_corrupt_type_sup, params_corrupt_prob_sup, - params_corrupt_type_band, params_corrupt_prob_band, - params_warm_start_multiplier, params_learning_rate, params_cb_type, params_fold]) - params_common = filter(lambda param: param['corrupt_type_bandit'] == 3 or abs(param['corrupt_prob_bandit']) > 1e-4, params_common) + prm_com = param_cartesian_multi( + [prm_cor_type_ws, + prm_cor_prob_ws, + prm_cor_type_inter, + prm_cor_prob_inter, + prm_ws_multiplier, + prm_lrs, + prm_cb_type, + prm_fold, + prm_adf_on]) + + prm_com_inter_gt = filter(lambda p: + ((p['corrupt_type_interaction'] == 1 #noiseless for interaction data + and abs(param['corrupt_prob_interaction']) < 1e-4) + and + (p['corrupt_type_warm_start'] == 1 #filter out repetitive warm start data + or abs(param['corrupt_prob_warm_start']) > 1e-4)), + prm_com) + + + prm_com_ws_gt = filter(lambda p: + ((p['corrupt_type_warm_start'] == 1 #noiseless for warm start data + and abs(param['corrupt_prob_warm_start']) < 1e-4) + and + (p['corrupt_type_interaction'] == 1 #filter out repetitive interaction data + or abs(param['corrupt_prob_interaction']) > 1e-4)), + prm_com) + + prm_com = prm_com_inter_gt + prm_com_ws_gt # Baseline parameters construction if mod.baselines_on: - params_baseline_basic = [ - [{'choices_lambda': 1, 'warm_start_type': 1, 'lambda_scheme': 3}], [{'no_warm_start_update': True, 'no_interaction_update': False}, {'no_warm_start_update': False, 'no_interaction_update': True}] + prm_baseline_basic = + [ + [ + #Sup-Only + {'warm_start_type': 1, + 'warm_start_update': True, + 'interaction_update': False}, + #Band-Only + {'warm_start_type': 1, + 'warm_start_update': False, + 'interaction_update': True}, + #Sim-Bandit + {'warm_start_type': 2, + 'warm_start_update': True, + 'interaction_update': True} + #Sim-Bandit with no warm-start update + {'warm_start_type': 2, + 'warm_start_update': True, + 'interaction_update': False} + ] + ] + + prm_baseline_const = + [ + [ + {'weighting_scheme':1, + 'adf_on':True, + 'lambda_scheme':3, + 'choices_lambda':1} + ] ] - params_baseline = param_cartesian_multi([params_common] + params_baseline_basic) - #params_baseline = filter(lambda param: param['no_warm_start_update'] == True or param['no_interaction_update'] == True, params_baseline) + prm_baseline = param_cartesian_multi([prm_common] + prm_baseline_const + prm_baseline_basic) else: - params_baseline = [] + prm_baseline = [] # Algorithm parameters construction if mod.algs_on: - params_choices_lambd = dictify('choices_lambda', mod.choices_choices_lambda) - params_algs_1 = param_cartesian_multi([params_choices_lambd, [{'no_warm_start_update': False, 'no_interaction_update': False, 'warm_start_type': 1, 'lambda_scheme': 3}], [{'validation_method':2}, {'validation_method':3}]] ) - params_algs_2 = [{'no_warm_start_update': False, 'no_interaction_update': False, 'warm_start_type': 2, 'lambda_scheme': 1, 'choices_lambda':1}] - params_algs = param_cartesian( params_common, params_algs_1 + params_algs_2 ) + # Algorithms for supervised validation + prm_ws_gt = + [ + [ + {'warm_start_update': True, + 'interaction_update': True, + 'warm_start_type': 1, + 'lambda_scheme': 2, + 'weighting_scheme': 2} + ], + [ + {'validation_method':2}, + {'validation_method':3} + ] + ] + + prm_inter_gt = + [ + [ + {'warm_start_update': True, + 'interaction_update': True, + 'warm_start_type': 1, + 'lambda_scheme': 4, + 'weighting_scheme': 1} + ], + ] + + prm_algs_ws_gt = param_cartesian_multi([prm_com_ws_gt] + [prm_choices_lbd] + prm_ws_gt) + prm_algs_inter_gt = param_cartesian_multi([prm_com_inter_gt] + [prm_choices_lbd] + prm_inter_gt) + prm_algs = prm_algs_ws_gt + prm_algs_inter_gt else: params_algs = [] - params_constant_baseline = [{'weighting_scheme':1, - 'adf_on':True}] - params_constant_algs = [{'weighting_scheme':mod.weighting_scheme, - 'adf_on':True}] - - params_baseline_and_algs = param_cartesian_multi([params_constant_baseline, params_baseline]) + param_cartesian_multi([params_constant_algs, params_algs]) - - #for p in params_common: - # print p - - #for p in params_baseline: - # print p - - print len(params_common) - print len(params_baseline) - print len(params_algs) - print len(params_baseline_and_algs) - # Optimal baselines parameter construction if mod.optimal_on: - params_optimal = [{ 'optimal_approx': True, 'fold': 1, 'corrupt_type_supervised':1, 'corrupt_prob_supervised':0.0, 'corrupt_type_bandit':1, 'corrupt_prob_bandit':0.0} ] + params_optimal = + [ + {'optimal_approx': True, + 'fold': 1, + 'corrupt_type_warm_start':1, + 'corrupt_prob_warm_start':0.0, + 'corrupt_type_interaction':1, + 'corrupt_prob_interaction':0.0} + ] else: params_optimal = [] if mod.majority_on: - params_majority = [{ 'majority_approx': True, 'fold': 1, - 'corrupt_type_supervised':1, 'corrupt_prob_supervised':0.0, 'corrupt_type_bandit':1, 'corrupt_prob_bandit':0.0} ] + params_majority = + [ + {'majority_approx': True, + 'fold': 1, + 'corrupt_type_warm_start':1, + 'corrupt_prob_warm_start':0.0, + 'corrupt_type_interaction':1, + 'corrupt_prob_interaction':0.0} + ] else: params_majority = [] - #print len(params_baseline) - #print len(params_algs) - #print len(params_common) + #for p in params_common: + # print p + #for p in params_baseline: + # print p + print len(params_common) + print len(params_baseline) + print len(params_algs) #raw_input('..') # Common factor in all 3 groups: dataset - params_dataset = dictify('dataset', mod.dss) - params_all = param_cartesian_multi( [params_dataset, params_baseline_and_algs + params_optimal + params_majority] ) - - params_all = sorted(params_all, key=lambda d: (d['dataset'], d['corrupt_type_supervised'], d['corrupt_prob_supervised'], d['corrupt_type_bandit'], d['corrupt_prob_bandit'])) + params_all = param_cartesian_multi( + [params_dataset, + params_baseline_and_algs + params_optimal + params_majority]) + + params_all = sorted(params_all, + key=lambda d: (d['dataset'], + d['corrupt_type_warm_start'], + d['corrupt_prob_warm_start'], + d['corrupt_type_interaction'], + d['corrupt_prob_interaction']) + ) print 'The total number of VW commands to run is: ', len(params_all) #for row in params_all: # print row @@ -446,29 +569,31 @@ def main_loop(mod): ('num_classes','nc', 0), ('total_size', 'ts', 0), ('majority_size','ms', 0), - ('corrupt_type_supervised', 'cts', 0), - ('corrupt_prob_supervised', 'cps', 0.0), - ('corrupt_type_bandit', 'ctb', 0), - ('corrupt_prob_bandit', 'cpb', 0.0), + ('corrupt_type_warm_start', 'ctws', 0), + ('corrupt_prob_warm_start', 'cpws', 0.0), + ('corrupt_type_interaction', 'cti', 0), + ('corrupt_prob_interaction', 'cpi', 0.0), ('adf_on', 'ao', True), ('warm_start_multiplier','wsm',1), ('warm_start', 'ws', 0), ('warm_start_type', 'wst', 0), - ('bandit_size', 'bs', 0), - ('bandit_supervised_size_ratio', 'bssr', 0), + ('interaction', 'bs', 0), + ('inter_ws_size_ratio', 'iwsr', 0), ('cb_type', 'cbt', 'mtr'), ('validation_method', 'vm', 0), ('weighting_scheme', 'wts', 0), - ('lambda_scheme','ls', 0), + ('lambda_scheme', 'ls', 0), ('choices_lambda', 'cl', 0), - ('no_warm_start_update', 'nwsu', False), - ('no_interaction_update', 'niu', False), + ('warm_start_update', 'wsu', True), + ('interaction_update', 'iu', True), ('learning_rate', 'lr', 0.0), ('optimal_approx', 'oa', False), ('majority_approx', 'ma', False), ('avg_error', 'ae', 0.0), ('actual_variance', 'av', 0.0), - ('ideal_variance', 'iv', 0.0)] + ('ideal_variance', 'iv', 0.0), + ('last_lambda', 'll', 0.0), + ] num_cols = len(mod.result_template_list) mod.result_header_list = [ mod.result_template_list[i][0] for i in range(num_cols) ] From f95d1541b1a17576fd4e97998ff2a11f65b747cf Mon Sep 17 00:00:00 2001 From: Chicheng Zhang Date: Mon, 23 Jul 2018 15:52:55 -0400 Subject: [PATCH 100/127] finished updating the running vw script --- scripts/run_vw_commands.py | 242 ++++++++++++++++++------------------- 1 file changed, 118 insertions(+), 124 deletions(-) diff --git a/scripts/run_vw_commands.py b/scripts/run_vw_commands.py index e4ac5d1bf7a..2b5a81e4be4 100644 --- a/scripts/run_vw_commands.py +++ b/scripts/run_vw_commands.py @@ -6,6 +6,7 @@ import time import glob import re +from collections import OrderedDict class model: @@ -13,37 +14,33 @@ def __init__(self): # Setting up argument-independent learning parameters in the constructor self.baselines_on = True self.algs_on = True - self.optimal_on = False - self.majority_on = False + self.optimal_on = True + self.majority_on = True self.num_checkpoints = 200 # use fractions instead of absolute numbers - #mod.warm_start_multipliers = [pow(2,i) for i in range(4)] - self.warm_start_multipliers = [pow(2,i) for i in range(4)] + self.ws_multipliers = [pow(2,i) for i in range(4)] self.choices_cb_type = ['mtr'] #mod.choices_choices_lambda = [2,4,8] self.choices_choices_lambda = [2,8,16] - #mod.choices_corrupt_type_supervised = [1,2,3] - #mod.choices_corrupt_prob_supervised = [0.0,0.5,1.0] - self.choices_corrupt_type_supervised = [1] - self.choices_corrupt_prob_supervised = [0.0] + #mod.choices_cor_type_ws = [1,2,3] + #mod.choices_cor_prob_ws = [0.0,0.5,1.0] + self.choices_cor_type_ws = [1] + self.choices_cor_prob_ws = [0.0] - self.learning_rates_template = [0.1, 0.03, 0.3, 0.01, 1.0, 0.003, 3.0, 0.001, 10.0] - - self.adf_on = True - - self.choices_corrupt_type_bandit = [1,2,3] - self.choices_corrupt_prob_bandit = [0.0,0.5,1.0] + self.choices_cor_type_inter = [1,2,3] + self.choices_cor_prob_inter = [0.0,0.5,1.0] self.validation_method = 1 self.weighting_scheme = 2 #self.epsilon = 0.05 #self.epsilon_on = True - + self.lr_template = [0.1, 0.03, 0.3, 0.01, 1.0, 0.003, 3.0, 0.001, 10.0] + self.adf_on = True self.critical_size_ratios = [184 * pow(2, -i) for i in range(7) ] @@ -54,8 +51,8 @@ def collect_stats(mod): vw_run_results = [] vw_result_template = { - 'bandit_size': 0, - 'bandit_supervised_size_ratio': 0, + 'interaction': 0, + 'inter_ws_size_ratio': 0, 'avg_error': 0.0, 'actual_variance': 0.0, 'ideal_variance': 0.0 @@ -88,19 +85,24 @@ def collect_stats(mod): curr_pred_str, curr_feat_str = s avg_loss = float(avg_loss_str) - bandit_effective = int(float(weight_str)) + inter_effective = int(float(weight_str)) for ratio in mod.critical_size_ratios: - if bandit_effective >= (1 - 1e-7) * mod.param['warm_start'] * ratio and \ - bandit_effective <= (1 + 1e-7) * mod.param['warm_start'] * ratio: + if inter_effective >= (1 - 1e-7) * mod.param['warm_start'] * ratio and \ + inter_effective <= (1 + 1e-7) * mod.param['warm_start'] * ratio: vw_result = vw_result_template.copy() - vw_result['bandit_size'] = bandit_effective - vw_result['bandit_supervised_size_ratio'] = ratio + vw_result['interaction'] = inter_effective + vw_result['inter_ws_size_ratio'] = ratio vw_result['avg_error'] = avg_loss vw_result['actual_variance'] = actual_var_value vw_result['ideal_variance'] = ideal_var_value vw_run_results.append(vw_result) f.close() + + #if len(vw_run_results) >= 1: + # print mod.param['warm_start'] + # print vw_run_results + #raw_input('..') return vw_run_results @@ -115,48 +117,45 @@ def gen_vw_options_list(mod): def gen_vw_options(mod): if 'optimal_approx' in mod.param: # Fully supervised on full dataset - mod.vw_template = - {'data':'', - 'progress':2.0, - 'passes':0, - 'oaa':0, - 'cache_file':''} + mod.vw_template = OrderedDict([('data',''), + ('progress',2.0), + ('passes',0), + ('oaa',0), + ('cache_file','')]) mod.param['passes'] = 5 mod.param['oaa'] = mod.param['num_classes'] mod.param['cache_file'] = mod.param['data'] + '.cache' elif 'majority_approx' in mod.param: # Compute majority error; basically we would like to skip vw running as fast as possible - mod.vw_template = - {'data':'', - 'progress':2.0, - 'cbify':0, - 'warm_start':0, - 'bandit':0} + mod.vw_template = OrderedDict([('data',''), + ('progress',2.0), + ('cbify',0), + ('warm_start',0), + ('interaction',0)]) mod.param['cbify'] = mod.param['num_classes'] mod.param['warm_start'] = 0 mod.param['interaction'] = 0 else: # General CB - mod.vw_template = - {'data':'', - 'progress':2.0, - 'cb_type':'mtr', - 'cbify':0, - 'warm_start':0, - 'interaction':0, - 'choices_lambda':0, - 'corrupt_type_interaction':0, - 'corrupt_prob_interaction':0.0, - 'corrupt_type_supervised':0, - 'corrupt_prob_supervised':0.0, - 'warm_start_update': True, - 'interaction_update': True, - 'lambda_scheme':1, - 'learning_rate':0.5, - 'warm_start_type':1, - 'overwrite_label':1, - 'validation_method':1, - 'weighting_scheme':1} + mod.vw_template = OrderedDict([('data',''), + ('cbify',0), + ('cb_type','mtr'), + ('warm_start',0), + ('interaction',0), + ('corrupt_type_interaction',0), + ('corrupt_prob_interaction',0.0), + ('corrupt_type_warm_start',0), + ('corrupt_prob_warm_start',0.0), + ('warm_start_update',True), + ('interaction_update',True), + ('choices_lambda',0), + ('lambda_scheme',1), + ('warm_start_type',1), + ('overwrite_label',1), + ('validation_method',1), + ('weighting_scheme',1), + ('learning_rate',0.5), + ('progress',2.0),]) mod.param['warm_start'] = mod.param['warm_start_multiplier'] * mod.param['progress'] mod.param['interaction'] = mod.param['total_size'] - mod.param['warm_start'] @@ -194,55 +193,49 @@ def param_to_str(param): param_list = [ str(k)+'='+str(v) for k,v in param.iteritems() ] return intersperse(param_list, ',') -def replace_if_in(dic, k, k_new): - if k in dic: - dic[k_new] = dic[k] - del dic[k] - def replace_keys(dic, simplified_keymap): - dic_new = dic.copy() - for k, k_new in simplified_keymap.iteritems(): - replace_if_in(dic_new, k, k_new) + dic_new = OrderedDict() + for k, v in dic.iteritems(): + dic_new[simplified_keymap[k]] = v return dic_new def param_to_str_simplified(mod): #print 'before replace' #print param - vw_run_param_set = - ['lambda_scheme', - 'learning_rate', - 'validation_method', + vw_run_param_set = \ + ['dataset', 'fold', - 'no_warm_start_update', - 'no_interaction_update', + 'lambda_scheme', + 'validation_method', + 'warm_start_multiplier', 'corrupt_prob_interaction', 'corrupt_prob_warm_start', 'corrupt_type_interaction', 'corrupt_type_warm_start', + 'warm_start_update', + 'interaction_update', 'warm_start_type', - 'warm_start_multiplier', 'choices_lambda', 'weighting_scheme', 'cb_type', 'optimal_approx', 'majority_approx', - 'dataset', + 'learning_rate', 'adf_on'] - mod.template_red = dict([(k,mod.result_template[k]) for k in vw_run_param_set]) - mod.simplified_keymap_red = dict([(k,mod.simplified_keymap[k]) for k in vw_run_param_set]) + mod.template_red = OrderedDict([(k,mod.result_template[k]) for k in vw_run_param_set]) + #mod.simplified_keymap_red = dict([(k,mod.simplified_keymap[k]) for k in vw_run_param_set]) # step 1: use the above as a template to filter out irrelevant parameters # in the vw output file title param_formatted = format_setting(mod.template_red, mod.param) # step 2: replace the key names with the simplified names - param_simplified = replace_keys(param_formatted, mod.simplified_keymap_red) + param_simplified = replace_keys(param_formatted, mod.simplified_keymap) #print 'after replace' #print param return param_to_str(param_simplified) -def gen_comparison_graph(mod): +def run_single_expt(mod): mod.param['data'] = mod.ds_path + str(mod.param['fold']) + '/' + mod.param['dataset'] - mod.param['total_size'] = get_num_lines(mod.param['data']) mod.param['num_classes'] = get_num_classes(mod.param['data']) mod.param['majority_size'], mod.param['majority_class'] = get_majority_class(mod.param['data']) @@ -264,7 +257,6 @@ def gen_comparison_graph(mod): result_formatted = format_setting(mod.result_template, result_combined) record_result(mod, result_formatted) - print('') # The following function is a "template filling" function # Given a template, we use the setting dict to fill it as much as possible @@ -276,10 +268,9 @@ def format_setting(template, setting): return formatted def record_result(mod, result): - result_row = [] - for k in mod.result_header_list: - result_row.append(result[k]) - + result_row = result.values() + #for k in mod.result_header_list: + # result_row.append(result[k]) #print result['validation_method'] #print result_row @@ -353,28 +344,27 @@ def params_per_task(mod): prm_fold, prm_adf_on]) - prm_com_inter_gt = filter(lambda p: - ((p['corrupt_type_interaction'] == 1 #noiseless for interaction data - and abs(param['corrupt_prob_interaction']) < 1e-4) + fltr_inter_gt = lambda p: ((p['corrupt_type_interaction'] == 1 #noiseless for interaction data + and abs(p['corrupt_prob_interaction']) < 1e-4) and (p['corrupt_type_warm_start'] == 1 #filter out repetitive warm start data - or abs(param['corrupt_prob_warm_start']) > 1e-4)), - prm_com) + or abs(p['corrupt_prob_warm_start']) > 1e-4)) + prm_com_inter_gt = filter(fltr_inter_gt, prm_com) - prm_com_ws_gt = filter(lambda p: - ((p['corrupt_type_warm_start'] == 1 #noiseless for warm start data - and abs(param['corrupt_prob_warm_start']) < 1e-4) - and - (p['corrupt_type_interaction'] == 1 #filter out repetitive interaction data - or abs(param['corrupt_prob_interaction']) > 1e-4)), - prm_com) + fltr_ws_gt = lambda p: ((p['corrupt_type_warm_start'] == 1 #noiseless for warm start data + and abs(p['corrupt_prob_warm_start']) < 1e-4) + and + (p['corrupt_type_interaction'] == 1 #filter out repetitive interaction data + or abs(p['corrupt_prob_interaction']) > 1e-4)) + + prm_com_ws_gt = filter(fltr_ws_gt, prm_com) - prm_com = prm_com_inter_gt + prm_com_ws_gt + prm_com = filter(lambda p: (fltr_ws_gt(p) or fltr_inter_gt(p)), prm_com) # Baseline parameters construction if mod.baselines_on: - prm_baseline_basic = + prm_baseline_basic = \ [ [ #Sup-Only @@ -388,7 +378,7 @@ def params_per_task(mod): #Sim-Bandit {'warm_start_type': 2, 'warm_start_update': True, - 'interaction_update': True} + 'interaction_update': True}, #Sim-Bandit with no warm-start update {'warm_start_type': 2, 'warm_start_update': True, @@ -396,7 +386,7 @@ def params_per_task(mod): ] ] - prm_baseline_const = + prm_baseline_const = \ [ [ {'weighting_scheme':1, @@ -405,7 +395,7 @@ def params_per_task(mod): 'choices_lambda':1} ] ] - prm_baseline = param_cartesian_multi([prm_common] + prm_baseline_const + prm_baseline_basic) + prm_baseline = param_cartesian_multi([prm_com] + prm_baseline_const + prm_baseline_basic) else: prm_baseline = [] @@ -413,7 +403,7 @@ def params_per_task(mod): # Algorithm parameters construction if mod.algs_on: # Algorithms for supervised validation - prm_ws_gt = + prm_ws_gt = \ [ [ {'warm_start_update': True, @@ -428,7 +418,7 @@ def params_per_task(mod): ] ] - prm_inter_gt = + prm_inter_gt = \ [ [ {'warm_start_update': True, @@ -443,11 +433,11 @@ def params_per_task(mod): prm_algs_inter_gt = param_cartesian_multi([prm_com_inter_gt] + [prm_choices_lbd] + prm_inter_gt) prm_algs = prm_algs_ws_gt + prm_algs_inter_gt else: - params_algs = [] + prm_algs = [] # Optimal baselines parameter construction if mod.optimal_on: - params_optimal = + prm_optimal = \ [ {'optimal_approx': True, 'fold': 1, @@ -457,10 +447,10 @@ def params_per_task(mod): 'corrupt_prob_interaction':0.0} ] else: - params_optimal = [] + prm_optimal = [] if mod.majority_on: - params_majority = + prm_majority = \ [ {'majority_approx': True, 'fold': 1, @@ -470,34 +460,37 @@ def params_per_task(mod): 'corrupt_prob_interaction':0.0} ] else: - params_majority = [] + prm_majority = [] #for p in params_common: # print p #for p in params_baseline: # print p - print len(params_common) - print len(params_baseline) - print len(params_algs) + #print len(prm_com_ws_gt), len(prm_algs_ws_gt) + #print len(prm_com_inter_gt), len(prm_algs_inter_gt) + #print len(prm_com) + #print len(prm_baseline) + #print len(prm_algs) #raw_input('..') # Common factor in all 3 groups: dataset - params_all = param_cartesian_multi( - [params_dataset, - params_baseline_and_algs + params_optimal + params_majority]) + prm_all = param_cartesian_multi( + [prm_dataset, + + prm_baseline + prm_algs + prm_optimal + prm_majority]) - params_all = sorted(params_all, + prm_all = sorted(prm_all, key=lambda d: (d['dataset'], d['corrupt_type_warm_start'], d['corrupt_prob_warm_start'], d['corrupt_type_interaction'], d['corrupt_prob_interaction']) ) - print 'The total number of VW commands to run is: ', len(params_all) - #for row in params_all: + print 'The total number of VW commands to run is: ', len(prm_all) + #for row in prm_all: # print row - return get_params_task(params_all) + return get_params_task(prm_all) def get_params_task(params_all): @@ -553,7 +546,7 @@ def vw_output_extract(mod, pattern): def write_summary_header(mod): summary_file = open(mod.summary_file_name, 'w') - summary_header = intersperse(mod.result_header_list, '\t') + summary_header = intersperse(mod.result_template.keys(), '\t') summary_file.write(summary_header+'\n') summary_file.close() @@ -596,15 +589,15 @@ def main_loop(mod): ] num_cols = len(mod.result_template_list) - mod.result_header_list = [ mod.result_template_list[i][0] for i in range(num_cols) ] - mod.result_template = dict([ (mod.result_template_list[i][0], mod.result_template_list[i][2]) for i in range(num_cols) ]) - mod.simplified_keymap = dict([ (mod.result_template_list[i][0], mod.result_template_list[i][1]) for i in range(num_cols) ]) + #mod.result_header_list = [ mod.result_template_list[i][0] for i in range(num_cols) ] + mod.result_template = OrderedDict([ (mod.result_template_list[i][0], mod.result_template_list[i][2]) for i in range(num_cols) ]) + mod.simplified_keymap = OrderedDict([ (mod.result_template_list[i][0], mod.result_template_list[i][1]) for i in range(num_cols) ]) write_summary_header(mod) for mod.param in mod.config_task: #if (mod.param['no_interaction_update'] is True): # raw_input(' ') - gen_comparison_graph(mod) + run_single_expt(mod) def create_dir(dir): if not os.path.exists(dir): @@ -649,9 +642,10 @@ def remove_suffix(filename): #print mod.dss if args.task_id == 0: - #process = subprocess.Popen('make -C .. clean; make -C ..', shell=True, stdout=f, stderr=f) - #subprocess.check_call(cmd, shell=True) - #process.wait() + # Compile vw in one of the subfolders + process = subprocess.Popen('make -C .. clean; make -C ..', shell=True, stdout=f, stderr=f) + subprocess.check_call(cmd, shell=True) + process.wait() # To avoid race condition of writing to the same file at the same time create_dir(args.results_dir) @@ -671,9 +665,9 @@ def remove_suffix(filename): time.sleep(1) if args.num_learning_rates <= 0 or args.num_learning_rates >= 10: - mod.learning_rates = mod.learning_rates_template + mod.learning_rates = mod.lr_template else: - mod.learning_rates = mod.learning_rates_template[:args.num_learning_rates] + mod.learning_rates = mod.lr_template[:args.num_learning_rates] #mod.folds = range(1,11) mod.folds = range(1, args.num_folds+1) From 558f1a225014d9a817eb5bfffe7fb608d7b46aaa Mon Sep 17 00:00:00 2001 From: Chicheng Zhang Date: Mon, 6 Aug 2018 10:34:07 -0400 Subject: [PATCH 101/127] . --- scripts/alg_comparison.py | 70 +++++++++++++++++----- scripts/run_vw_commands.py | 120 ++++++++++++++++++++++++------------- vowpalwabbit/cbify.cc | 3 +- 3 files changed, 136 insertions(+), 57 deletions(-) diff --git a/scripts/alg_comparison.py b/scripts/alg_comparison.py index b2988ef182e..674a4e86c89 100644 --- a/scripts/alg_comparison.py +++ b/scripts/alg_comparison.py @@ -13,6 +13,7 @@ import seaborn as sns from matplotlib.colors import ListedColormap from matplotlib.font_manager import FontProperties +from collections import Counter class model: @@ -250,9 +251,9 @@ def problem_text(name_problem): def plot_cdf(alg_name, errs): - print alg_name - print errs - print len(errs) + #print alg_name + #print errs + #print len(errs) col, sty = alg_color_style(alg_name) @@ -302,6 +303,21 @@ def plot_all_cdfs(alg_results, mod): save_legend(mod, indices) plt.clf() +def plot_all_lrs(lrs, mod): + alg_names = lrs.keys() + + for i in range(len(alg_names)): + pylab.figure(figsize=(8,6)) + lrs_alg = lrs[alg_names[i]] + counts = Counter(lrs_alg) + names = list(counts.keys()) + names_sorted = sorted(names) + values = [counts[n] for n in names_sorted] + plt.barh(range(len(names_sorted)),values) + plt.yticks(range(len(names_sorted)),names_sorted) + plt.savefig(mod.problemdir+alg_str_compatible(alg_names[i])+'_lr.pdf') + plt.clf() + def plot_all_pair_comp(alg_results, sizes, mod): alg_names = alg_results.keys() @@ -367,6 +383,7 @@ def get_maj_error(maj_error_table, name_dataset): def get_unnormalized_results(result_table): new_unnormalized_results = {} + new_lr = {} new_size = 0 i = 0 @@ -377,9 +394,10 @@ def get_unnormalized_results(result_table): if row['bandit_size'] == new_size: alg_name = (row['warm_start_type'], row['choices_lambda'], row['no_warm_start_update'], row['no_interaction_update'], row['validation_method']) new_unnormalized_results[alg_name] = row['avg_error'] + new_lr[alg_name] = row['learning_rate'] i += 1 - return new_size, new_unnormalized_results + return new_size, new_unnormalized_results, new_lr def update_result_dict(results_dict, new_result): print results_dict @@ -393,10 +411,10 @@ def plot_all(mod, all_results): #all_results = all_results[all_results['corrupt_prob_supervised']!=0.0] grouped_by_problem = all_results.groupby(['corrupt_type_supervised', - 'corrupt_prob_supervised', - 'corrupt_type_bandit', - 'corrupt_prob_bandit', - 'bandit_supervised_size_ratio']) + 'corrupt_prob_supervised', + 'corrupt_type_bandit', + 'corrupt_prob_bandit', + 'bandit_supervised_size_ratio']) #then group by dataset and warm_start size (corresponding to each point in cdf) for name_problem, group_problem in grouped_by_problem: @@ -405,7 +423,8 @@ def plot_all(mod, all_results): sizes = None mod.name_problem = name_problem - grouped_by_dataset = group_problem.groupby(['dataset','warm_start']) + grouped_by_dataset = group_problem.groupby(['dataset', + 'warm_start']) #then select unique combinations of (no_supervised, no_bandit, choices_lambda) #e.g. (True, True, 1), (True, False, 1), (False, True, 1), (False, False, 2) #(False, False, 8), and compute a normalized score @@ -413,8 +432,13 @@ def plot_all(mod, all_results): for name_dataset, group_dataset in grouped_by_dataset: result_table = group_dataset - grouped_by_algorithm = group_dataset.groupby(['warm_start_type', 'choices_lambda', 'no_warm_start_update', 'no_interaction_update', - 'validation_method']) + group_dataset = group_dataset.reset_index(drop=True) + + grouped_by_algorithm = group_dataset.groupby(['warm_start_type', + 'choices_lambda', + 'no_warm_start_update', + 'no_interaction_update', + 'validation_method']) mod.name_dataset = name_dataset @@ -423,11 +447,18 @@ def plot_all(mod, all_results): #In the future this should be changed if we run multiple folds: we #should average among folds before choosing the min - result_table = grouped_by_algorithm.min() - result_table = result_table.reset_index() + #result_table = grouped_by_algorithm.min() + #result_table = result_table.reset_index() - #print result_table + #print grouped_by_algorithm + #grouped_by_algorithm.describe() + idx = grouped_by_algorithm.apply(lambda df:df["avg_error"].idxmin()) + result_table = group_dataset.ix[idx, :] + #print idx + #print result_table + #print group_dataset + #raw_input('..') #group_dataset.groupby(['choices_lambda','no_supervised', 'no_bandit']) #print alg_results @@ -436,8 +467,9 @@ def plot_all(mod, all_results): #in general (including the first time) - record the error rates of all algorithms #print result_table - new_size, new_unnormalized_result = get_unnormalized_results(result_table) + new_size, new_unnormalized_result, new_lr = get_unnormalized_results(result_table) new_unnormalized_result[(0, 0, False, False, 1)] = get_maj_error(mod.maj_error_table, mod.name_dataset) + new_lr[(0, 0, False, False, 1)] = 0.0 new_normalized_result = normalize_score(new_unnormalized_result, mod) #first time - generate names of algorithms considered @@ -445,9 +477,11 @@ def plot_all(mod, all_results): sizes = [] unnormalized_results = dict([(k,[]) for k in new_unnormalized_result.keys()]) normalized_results = dict([(k,[]) for k in new_unnormalized_result.keys()]) + lrs = dict([(k,[]) for k in new_unnormalized_result.keys()]) update_result_dict(unnormalized_results, new_unnormalized_result) update_result_dict(normalized_results, new_normalized_result) + update_result_dict(lrs, new_lr) sizes.append(new_size) #print 'sizes:' @@ -468,6 +502,8 @@ def plot_all(mod, all_results): if mod.cdf_on is True: plot_all_cdfs(normalized_results, mod) + plot_all_lrs(lrs, mod) + def save_to_hdf(mod): print 'saving to hdf..' store = pd.HDFStore('store.h5') @@ -549,6 +585,9 @@ def load_from_sum(mod): #print mod.best_error_table[mod.best_error_table['dataset'] == 'ds_160_5.vw.gz'] #raw_input(' ') + #print all_results + #raw_input('..') + all_results = all_results[all_results['choices_lambda'] != 0] #ignore the no update row: @@ -558,6 +597,7 @@ def load_from_sum(mod): + #filter choices_lambdas = 2,4,8? #if (alg_name[2] == False and alg_name[3] == False and alg_name[1] != 8): # pass diff --git a/scripts/run_vw_commands.py b/scripts/run_vw_commands.py index 2b5a81e4be4..41c974196a7 100644 --- a/scripts/run_vw_commands.py +++ b/scripts/run_vw_commands.py @@ -13,36 +13,51 @@ class model: def __init__(self): # Setting up argument-independent learning parameters in the constructor self.baselines_on = True - self.algs_on = True - self.optimal_on = True - self.majority_on = True + self.algs_on = False + self.optimal_on = False + self.majority_on = False + + self.ws_gt_on = True + self.inter_gt_on = False self.num_checkpoints = 200 # use fractions instead of absolute numbers self.ws_multipliers = [pow(2,i) for i in range(4)] + #self.ws_multipliers = [pow(2,i) for i in range(2)] self.choices_cb_type = ['mtr'] #mod.choices_choices_lambda = [2,4,8] self.choices_choices_lambda = [2,8,16] - #mod.choices_cor_type_ws = [1,2,3] - #mod.choices_cor_prob_ws = [0.0,0.5,1.0] + #self.choices_cor_type_ws = [1,2,3] + #self.choices_cor_prob_ws = [0.0,0.5,1.0] self.choices_cor_type_ws = [1] self.choices_cor_prob_ws = [0.0] - self.choices_cor_type_inter = [1,2,3] - self.choices_cor_prob_inter = [0.0,0.5,1.0] + self.choices_cor_type_inter = [1] + self.choices_cor_prob_inter = [0.0, 0.125, 0.25, 0.5] - self.validation_method = 1 - self.weighting_scheme = 2 + self.choices_loss_enc = [(-1, 0)] + #self.choices_cor_type_inter = [1,2] + #self.choices_cor_prob_inter = [0.0,0.5] - #self.epsilon = 0.05 + self.choices_epsilon = [0.05, 0.1] #self.epsilon_on = True - self.lr_template = [0.1, 0.03, 0.3, 0.01, 1.0, 0.003, 3.0, 0.001, 10.0] - self.adf_on = True + #self.lr_template = [0.1, 0.03, 0.3, 0.01, 1.0, 0.003, 3.0, 0.001, 10.0, 0.0003, 30.0, 0.0001, 100.0] + self.choices_adf = [True] self.critical_size_ratios = [184 * pow(2, -i) for i in range(7) ] +def gen_lr(n): + m = math.floor(n / 4.0) + if n % 4 == 0: + return 0.1 * pow(10, m) + if n % 4 == 1: + return 0.03 * pow(10, -m) + if n % 4 == 2: + return 0.3 * pow(10, m) + if n % 4 == 3: + return 0.01 * pow(10, -m) def collect_stats(mod): avg_error_value = avg_error(mod) @@ -155,7 +170,10 @@ def gen_vw_options(mod): ('validation_method',1), ('weighting_scheme',1), ('learning_rate',0.5), - ('progress',2.0),]) + ('epsilon', 0.05), + ('loss0', 0), + ('loss1', 0), + ('progress',2.0)]) mod.param['warm_start'] = mod.param['warm_start_multiplier'] * mod.param['progress'] mod.param['interaction'] = mod.param['total_size'] - mod.param['warm_start'] @@ -221,7 +239,10 @@ def param_to_str_simplified(mod): 'optimal_approx', 'majority_approx', 'learning_rate', - 'adf_on'] + 'adf_on', + 'epsilon', + 'loss0', + 'loss1'] mod.template_red = OrderedDict([(k,mod.result_template[k]) for k in vw_run_param_set]) #mod.simplified_keymap_red = dict([(k,mod.simplified_keymap[k]) for k in vw_run_param_set]) @@ -311,11 +332,16 @@ def dictify(param_name, param_choices): result = [] for param in param_choices: dic = {} - dic[param_name] = param + if isinstance(param_name, tuple): + for i in range(len(param_name)): + dic[param_name[i]] = param[i] + else: + dic[param_name] = param result.append(dic) - print param_name, len(result) + print param_name, result return result + def params_per_task(mod): # Problem parameters prm_cor_type_ws = dictify('corrupt_type_warm_start', mod.choices_cor_type_ws) @@ -330,7 +356,9 @@ def params_per_task(mod): prm_cb_type = dictify('cb_type', mod.choices_cb_type) prm_dataset = dictify('dataset', mod.dss) prm_choices_lbd = dictify('choices_lambda', mod.choices_choices_lambda) - prm_adf_on = dictify('adf_on', [True]) + prm_choices_eps = dictify('epsilon', mod.choices_epsilon) + prm_adf_on = dictify('adf_on', mod.choices_adf) + prm_loss_enc = dictify(('loss0', 'loss1'), mod.choices_loss_enc) # Common parameters prm_com = param_cartesian_multi( @@ -342,21 +370,29 @@ def params_per_task(mod): prm_lrs, prm_cb_type, prm_fold, - prm_adf_on]) - - fltr_inter_gt = lambda p: ((p['corrupt_type_interaction'] == 1 #noiseless for interaction data - and abs(p['corrupt_prob_interaction']) < 1e-4) - and - (p['corrupt_type_warm_start'] == 1 #filter out repetitive warm start data - or abs(p['corrupt_prob_warm_start']) > 1e-4)) + prm_adf_on, + prm_choices_eps, + prm_loss_enc]) + + if mod.inter_gt_on: + fltr_inter_gt = lambda p: ((p['corrupt_type_interaction'] == 1 #noiseless for interaction data + and abs(p['corrupt_prob_interaction']) < 1e-4) + and + (p['corrupt_type_warm_start'] == 1 #filter out repetitive warm start data + or abs(p['corrupt_prob_warm_start']) > 1e-4)) + else: + fltr_inter_gt = lambda p: False prm_com_inter_gt = filter(fltr_inter_gt, prm_com) - fltr_ws_gt = lambda p: ((p['corrupt_type_warm_start'] == 1 #noiseless for warm start data - and abs(p['corrupt_prob_warm_start']) < 1e-4) - and - (p['corrupt_type_interaction'] == 1 #filter out repetitive interaction data - or abs(p['corrupt_prob_interaction']) > 1e-4)) + if mod.ws_gt_on: + fltr_ws_gt = lambda p: ((p['corrupt_type_warm_start'] == 1 #noiseless for warm start data + and abs(p['corrupt_prob_warm_start']) < 1e-4) + and + (p['corrupt_type_interaction'] == 1 #filter out repetitive interaction data + or abs(p['corrupt_prob_interaction']) > 1e-4)) + else: + fltr_ws_gt = lambda p: False prm_com_ws_gt = filter(fltr_ws_gt, prm_com) @@ -378,8 +414,9 @@ def params_per_task(mod): #Sim-Bandit {'warm_start_type': 2, 'warm_start_update': True, - 'interaction_update': True}, - #Sim-Bandit with no warm-start update + 'interaction_update': True, + 'lambda_scheme': 1}, + #Sim-Bandit with only warm-start update {'warm_start_type': 2, 'warm_start_update': True, 'interaction_update': False} @@ -486,10 +523,10 @@ def params_per_task(mod): d['corrupt_prob_warm_start'], d['corrupt_type_interaction'], d['corrupt_prob_interaction']) - ) + ) print 'The total number of VW commands to run is: ', len(prm_all) - #for row in prm_all: - # print row + for row in prm_all: + print row return get_params_task(prm_all) @@ -586,6 +623,9 @@ def main_loop(mod): ('actual_variance', 'av', 0.0), ('ideal_variance', 'iv', 0.0), ('last_lambda', 'll', 0.0), + ('epsilon', 'eps', 0.0), + ('loss0', 'l0', 0.0), + ('loss1', 'l1', 0.0), ] num_cols = len(mod.result_template_list) @@ -643,9 +683,9 @@ def remove_suffix(filename): if args.task_id == 0: # Compile vw in one of the subfolders - process = subprocess.Popen('make -C .. clean; make -C ..', shell=True, stdout=f, stderr=f) - subprocess.check_call(cmd, shell=True) - process.wait() + #process = subprocess.Popen('make -C .. clean; make -C ..', shell=True, stdout=f, stderr=f) + #subprocess.check_call(cmd, shell=True) + #process.wait() # To avoid race condition of writing to the same file at the same time create_dir(args.results_dir) @@ -664,10 +704,10 @@ def remove_suffix(filename): while not os.path.exists(flag_dir): time.sleep(1) - if args.num_learning_rates <= 0 or args.num_learning_rates >= 10: - mod.learning_rates = mod.lr_template + if args.num_learning_rates <= 0: + mod.learning_rates = [gen_lr(0)] else: - mod.learning_rates = mod.lr_template[:args.num_learning_rates] + mod.learning_rates = [gen_lr(i) for i in range(args.num_learning_rates)] #mod.folds = range(1,11) mod.folds = range(1, args.num_folds+1) diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc index 3bcab3abac6..1947a0734e6 100644 --- a/vowpalwabbit/cbify.cc +++ b/vowpalwabbit/cbify.cc @@ -656,11 +656,10 @@ void predict_or_learn_adf(cbify& data, multi_learner& base, example& ec) else { data.mc_label = ec.l.multi; - /*if (data.ws_iter < data.ws_period) + if (data.ws_iter < data.ws_period) ec.l.multi.label = corrupt_action(data, data.mc_label.label, WARM_START); else if (data.inter_iter < data.inter_period) ec.l.multi.label = corrupt_action(data, data.mc_label.label, INTERACTION); - */ } // Warm start phase From 648f0d979f29b97fc4723e81423f7928013e11c6 Mon Sep 17 00:00:00 2001 From: Chicheng Zhang Date: Mon, 6 Aug 2018 10:37:28 -0400 Subject: [PATCH 102/127] removed running scripts --- scripts/alg_comparison.py | 635 -------------------------------- scripts/data_gen.py | 88 ----- scripts/run_vw_commands.py | 727 ------------------------------------- scripts/run_vw_job.py | 205 ----------- scripts/shuffle.sh | 10 - 5 files changed, 1665 deletions(-) delete mode 100644 scripts/alg_comparison.py delete mode 100644 scripts/data_gen.py delete mode 100644 scripts/run_vw_commands.py delete mode 100644 scripts/run_vw_job.py delete mode 100644 scripts/shuffle.sh diff --git a/scripts/alg_comparison.py b/scripts/alg_comparison.py deleted file mode 100644 index 674a4e86c89..00000000000 --- a/scripts/alg_comparison.py +++ /dev/null @@ -1,635 +0,0 @@ -import matplotlib -matplotlib.use('Agg') -import matplotlib.pyplot as plt -import pylab -import os -import glob -import pandas as pd -import scipy.stats as stats -from itertools import compress -from math import sqrt -import argparse -import numpy as np -import seaborn as sns -from matplotlib.colors import ListedColormap -from matplotlib.font_manager import FontProperties -from collections import Counter - - -class model: - def __init__(self): - pass - -def sum_files(result_path): - prevdir = os.getcwd() - os.chdir(result_path) - dss = sorted(glob.glob('*.sum')) - os.chdir(prevdir) - return dss - -def parse_sum_file(sum_filename): - f = open(sum_filename, 'r') - #f.seek(0, 0) - table = pd.read_table(f, sep='\s+',lineterminator='\n',error_bad_lines=False) - - return table - -def get_z_scores(errors_1, errors_2, sizes): - z_scores = [] - for i in range(len(errors_1)): - #print i - z_scores.append( z_score(errors_1[i], errors_2[i], sizes[i]) ) - return z_scores - -def z_score(err_1, err_2, size): - if (abs(err_1) < 1e-6 or abs(err_1) > 1-1e-6) and (abs(err_2) < 1e-6 or abs(err_2) > 1-1e-6): - return 0 - - #print err_1, err_2, size, sqrt( (err_1*(1 - err_1) + err_2*(1-err_2)) / size ) - - z = (err_1 - err_2) / sqrt( (err_1*(1 - err_1) + err_2*(1-err_2)) / size ) - return z - #print z - -def is_significant(z): - if (stats.norm.cdf(z) < 0.05) or (stats.norm.cdf(z) > 0.95): - return True - else: - return False - -def plot_comparison(errors_1, errors_2, sizes): - #print title - plt.plot([0,1],[0,1]) - z_scores = get_z_scores(errors_1, errors_2, sizes) - sorted_z_scores = sorted(enumerate(z_scores), key=lambda x:x[1]) - #for s in sorted_z_scores: - # print s, is_significant(s[1]) - - significance = map(is_significant, z_scores) - results_signi_1 = list(compress(errors_1, significance)) - results_signi_2 = list(compress(errors_2, significance)) - plt.scatter(results_signi_1, results_signi_2, s=18, c='r') - - insignificance = [not b for b in significance] - results_insigni_1 = list(compress(errors_1, insignificance)) - results_insigni_2 = list(compress(errors_2, insignificance)) - - plt.scatter(results_insigni_1, results_insigni_2, s=2, c='k') - - len_errors = len(errors_1) - wins_1 = [z_scores[i] < 0 and significance[i] for i in range(len_errors) ] - wins_2 = [z_scores[i] > 0 and significance[i] for i in range(len_errors) ] - num_wins_1 = wins_1.count(True) - num_wins_2 = wins_2.count(True) - - return num_wins_1, num_wins_2 - -def alg_info(alg_name, result_lst): - if (alg_name[0] == 0): - return result_lst[0] - if (alg_name[0] == 2): - return result_lst[1] - if (alg_name[2] == True and alg_name[3] == True): - return result_lst[2] - if (alg_name[2] == True and alg_name[3] == False): - return result_lst[3] - if (alg_name[2] == False and alg_name[3] == True): - return result_lst[4] - if (alg_name[2] == False and alg_name[3] == False and alg_name[1] == 2 and alg_name[4] == 2): - return result_lst[5] - if (alg_name[2] == False and alg_name[3] == False and alg_name[1] == 4 and alg_name[4] == 2): - return result_lst[6] - if (alg_name[2] == False and alg_name[3] == False and alg_name[1] == 8 and alg_name[4] == 2): - return result_lst[7] - if (alg_name[2] == False and alg_name[3] == False and alg_name[1] == 16 and alg_name[4] == 2): - return result_lst[8] - if (alg_name[2] == False and alg_name[3] == False and alg_name[1] == 2 and alg_name[4] == 3): - return result_lst[9] - if (alg_name[2] == False and alg_name[3] == False and alg_name[1] == 4 and alg_name[4] == 3): - return result_lst[10] - if (alg_name[2] == False and alg_name[3] == False and alg_name[1] == 8 and alg_name[4] == 3): - return result_lst[11] - if (alg_name[2] == False and alg_name[3] == False and alg_name[1] == 16 and alg_name[4] == 3): - return result_lst[12] - - return result_lst[13] - -def alg_str(alg_name): - return alg_info(alg_name, - ['Most-Freq', - 'Sim-Bandit', - 'Class-1', - 'Bandit-Only', - 'Sup-Only', - 'MinimaxBandits, split validation', - 'AwesomeBandits with $|\Lambda|$=4, split validation', - 'AwesomeBandits with $|\Lambda|$=8, split validation', - 'AwesomeBandits with $|\Lambda|$=16, split validation', - 'MinimaxBandits, no-split validation', - 'AwesomeBandits with $|\Lambda|$=4, no-split validation', - 'AwesomeBandits with $|\Lambda|$=8, no-split validation', - 'AwesomeBandits with $|\Lambda|$=16, no-split validation', - 'unknown']) - -def alg_str_compatible(alg_name): - return alg_info(alg_name, - ['Most-Freq', - 'Sim-Bandit', - 'Class-1', - 'Bandit-Only', - 'Sup-Only', - 'Choices_lambda=2, validation_method=2', - 'Choices_lambda=4, validation_method=2', - 'Choices_lambda=8, validation_method=2', - 'Choices_lambda=16, validation_method=2', - 'Choices_lambda=2, validation_method=3', - 'Choices_lambda=4, validation_method=3', - 'Choices_lambda=8, validation_method=3', - 'Choices_lambda=16, validation_method=3', - 'unknown']) - -def alg_color_style(alg_name): - palette = sns.color_palette('colorblind') - colors = palette.as_hex() - #colors = [colors[5], colors[4], 'black', colors[2], colors[1], colors[3], 'black', colors[0], 'black', 'black'] - colors = [ - colors[5], - colors[3], - 'black', - colors[0], - colors[1], - colors[2], - colors[2], - colors[2], - colors[2], - colors[4], - colors[4], - colors[4], - colors[4], - 'black' ] - - styles = [ - 'solid', - 'solid', - 'solid', - 'solid', - 'dashed', - 'dotted', - 'dashdot', - 'solid', - 'dashed', - 'dotted', - 'dashdot', - 'solid', - 'dashed', - 'solid'] - - return alg_info(alg_name, zip(colors, styles)) - #['black', 'magenta', 'lime', 'green', 'blue', 'darkorange','darksalmon', 'red', 'cyan'] - -def alg_index(alg_name): - return alg_info(alg_name, - [7.0, - 6.0, - 8.0, - 5.0, - 4.0, - 2.0, - 1.0, - 1.2, - 1.5, - 3.0, - 2.0, - 2.2, - 2.5, - 9.0]) - - -def order_legends(indices): - ax = plt.gca() - handles, labels = ax.get_legend_handles_labels() - # sort both labels and handles by labels - labels, handles, indices = zip(*sorted(zip(labels, handles, indices), key=lambda t: t[2])) - ax.legend(handles, labels) - -def save_legend(mod, indices): - ax = plt.gca() - handles, labels = ax.get_legend_handles_labels() - labels, handles, indices = zip(*sorted(zip(labels, handles, indices), key=lambda t: t[2])) - #figlegend = pylab.figure(figsize=(26,1)) - #figlegend.legend(handles, labels, 'center', fontsize=26, ncol=8) - figlegend = pylab.figure(figsize=(17,1.5)) - figlegend.legend(handles, labels, 'center', fontsize=26, ncol=3) - figlegend.tight_layout(pad=0) - figlegend.savefig(mod.problemdir+'legend.pdf') - -def problem_str(name_problem): - return 'sct='+str(name_problem[0]) \ - +'_scp='+str(name_problem[1]) \ - +'_bct='+str(name_problem[2]) \ - +'_bcp='+str(name_problem[3]) \ - +'_ratio='+str(name_problem[4]) - -def noise_type_str(noise_type): - if noise_type == 1: - return 'UAR' - elif noise_type == 2: - return 'CYC' - elif noise_type == 3: - return 'MAJ' - -def problem_text(name_problem): - s='' - s += 'Ratio = ' + str(name_problem[2]) + ', ' - if abs(name_problem[1]) < 1e-6: - s += 'noiseless' - else: - s += noise_type_str(name_problem[0]) + ', ' - s += 'p = ' + str(name_problem[1]) - return s - - -def plot_cdf(alg_name, errs): - - #print alg_name - #print errs - #print len(errs) - - col, sty = alg_color_style(alg_name) - - plt.step(np.sort(errs), np.linspace(0, 1, len(errs), endpoint=False), label=alg_str(alg_name), color=col, linestyle=sty, linewidth=2.0) - - # - - #raw_input("Press Enter to continue...") - -def plot_all_cdfs(alg_results, mod): - #plot all cdfs: - print 'printing cdfs..' - - indices = [] - - pylab.figure(figsize=(8,6)) - - for alg_name, errs in alg_results.iteritems(): - indices.append(alg_index(alg_name)) - plot_cdf(alg_name, errs) - - if mod.normalize_type == 1: - plt.xlim(0,1) - elif mod.normalize_type == 2: - plt.xlim(-1,1) - elif mod.normalize_type == 3: - plt.xlim(0, 1) - - plt.ylim(0,1) - #params={'legend.fontsize':26, - #'axes.labelsize': 24, 'axes.titlesize':26, 'xtick.labelsize':20, - #'ytick.labelsize':20 } - #plt.rcParams.update(params) - #plt.xlabel('Normalized error',fontsize=34) - #plt.ylabel('Cumulative frequency', fontsize=34) - #plt.title(problem_text(mod.name_problem), fontsize=36) - plt.xticks(fontsize=30) - plt.yticks(fontsize=30) - plt.tight_layout(pad=0) - - ax = plt.gca() - order_legends(indices) - ax.legend_.set_zorder(-1) - plt.savefig(mod.problemdir+'cdf.pdf') - ax.legend_.remove() - plt.savefig(mod.problemdir+'cdf_nolegend.pdf') - save_legend(mod, indices) - plt.clf() - -def plot_all_lrs(lrs, mod): - alg_names = lrs.keys() - - for i in range(len(alg_names)): - pylab.figure(figsize=(8,6)) - lrs_alg = lrs[alg_names[i]] - counts = Counter(lrs_alg) - names = list(counts.keys()) - names_sorted = sorted(names) - values = [counts[n] for n in names_sorted] - plt.barh(range(len(names_sorted)),values) - plt.yticks(range(len(names_sorted)),names_sorted) - plt.savefig(mod.problemdir+alg_str_compatible(alg_names[i])+'_lr.pdf') - plt.clf() - - -def plot_all_pair_comp(alg_results, sizes, mod): - alg_names = alg_results.keys() - - for i in range(len(alg_names)): - for j in range(len(alg_names)): - if i < j: - errs_1 = alg_results[alg_names[i]] - errs_2 = alg_results[alg_names[j]] - - print len(errs_1), len(errs_2), len(sizes) - #raw_input('Press any key to continue..') - - num_wins_1, num_wins_2 = plot_comparison(errs_1, errs_2, sizes) - - plt.title( 'total number of comparisons = ' + str(len(errs_1)) + '\n'+ - alg_str(alg_names[i]) + ' wins ' + str(num_wins_1) + ' times, \n' + alg_str(alg_names[j]) + ' wins ' + str(num_wins_2) + ' times') - plt.savefig(mod.problemdir+alg_str_compatible(alg_names[i])+'_vs_'+alg_str_compatible(alg_names[j])+'.pdf') - plt.clf() - -#def init_results(result_table): -# alg_results = {} -# for idx, row in result_table.iterrows(): -# alg_name = (row['warm_start_type'], row['choices_lambda'], row['no_warm_start_update'], row['no_interaction_update']) -# alg_results[alg_name] = [] -# alg_results[(0, 0, False, False)] = [] -# return alg_results - -def normalize_score(unnormalized_result, mod): - if mod.normalize_type == 1: - l = get_best_error(mod.best_error_table, mod.name_dataset) - u = max(unnormalized_result.values()) - return { k : ((v - l) / (u - l + 1e-4)) for k, v in unnormalized_result.iteritems() } - elif mod.normalize_type == 2: - l = unnormalized_result[(1, 1, True, False)] - return { k : ((v - l) / (l + 1e-4)) for k, v in unnormalized_result.iteritems() } - elif mod.normalize_type == 3: - return unnormalized_result - -def get_best_error(best_error_table, name_dataset): - name = name_dataset[0] - print name - print best_error_table - best_error_oneline = best_error_table[best_error_table['dataset'] == name] - best_error = best_error_oneline.loc[best_error_oneline.index[0], 'avg_error'] - #raw_input("...") - #print best_error_oneline - #raw_input("...") - #print best_error - #raw_input("...") - return best_error - -def get_maj_error(maj_error_table, name_dataset): - name = name_dataset[0] - maj_error_oneline = maj_error_table[maj_error_table['data'] == name] - maj_error = maj_error_oneline.loc[maj_error_oneline.index[0], 'avg_error'] - return maj_error - -#normalized_results[alg_name].append(normalized_errs[i]) -#errs = [] -#for idx, row in result_table.iterrows(): -# errs.append(row['avg_error']) - -def get_unnormalized_results(result_table): - new_unnormalized_results = {} - new_lr = {} - new_size = 0 - - i = 0 - for idx, row in result_table.iterrows(): - if i == 0: - new_size = row['bandit_size'] - - if row['bandit_size'] == new_size: - alg_name = (row['warm_start_type'], row['choices_lambda'], row['no_warm_start_update'], row['no_interaction_update'], row['validation_method']) - new_unnormalized_results[alg_name] = row['avg_error'] - new_lr[alg_name] = row['learning_rate'] - i += 1 - - return new_size, new_unnormalized_results, new_lr - -def update_result_dict(results_dict, new_result): - print results_dict - for k, v in new_result.iteritems(): - print k - results_dict[k].append(v) - - -def plot_all(mod, all_results): - - #all_results = all_results[all_results['corrupt_prob_supervised']!=0.0] - - grouped_by_problem = all_results.groupby(['corrupt_type_supervised', - 'corrupt_prob_supervised', - 'corrupt_type_bandit', - 'corrupt_prob_bandit', - 'bandit_supervised_size_ratio']) - - #then group by dataset and warm_start size (corresponding to each point in cdf) - for name_problem, group_problem in grouped_by_problem: - normalized_results = None - unnormalized_results = None - sizes = None - mod.name_problem = name_problem - - grouped_by_dataset = group_problem.groupby(['dataset', - 'warm_start']) - #then select unique combinations of (no_supervised, no_bandit, choices_lambda) - #e.g. (True, True, 1), (True, False, 1), (False, True, 1), (False, False, 2) - #(False, False, 8), and compute a normalized score - - for name_dataset, group_dataset in grouped_by_dataset: - result_table = group_dataset - - group_dataset = group_dataset.reset_index(drop=True) - - grouped_by_algorithm = group_dataset.groupby(['warm_start_type', - 'choices_lambda', - 'no_warm_start_update', - 'no_interaction_update', - 'validation_method']) - - mod.name_dataset = name_dataset - - #The 'learning_rate' would be the only free degree here now. Taking the - #min aggregation will give us the algorithms we are evaluating. - - #In the future this should be changed if we run multiple folds: we - #should average among folds before choosing the min - #result_table = grouped_by_algorithm.min() - #result_table = result_table.reset_index() - - #print grouped_by_algorithm - #grouped_by_algorithm.describe() - - idx = grouped_by_algorithm.apply(lambda df:df["avg_error"].idxmin()) - result_table = group_dataset.ix[idx, :] - #print idx - #print result_table - #print group_dataset - #raw_input('..') - - #group_dataset.groupby(['choices_lambda','no_supervised', 'no_bandit']) - #print alg_results - #dummy = input('') - - #in general (including the first time) - record the error rates of all algorithms - #print result_table - - new_size, new_unnormalized_result, new_lr = get_unnormalized_results(result_table) - new_unnormalized_result[(0, 0, False, False, 1)] = get_maj_error(mod.maj_error_table, mod.name_dataset) - new_lr[(0, 0, False, False, 1)] = 0.0 - new_normalized_result = normalize_score(new_unnormalized_result, mod) - - #first time - generate names of algorithms considered - if normalized_results is None: - sizes = [] - unnormalized_results = dict([(k,[]) for k in new_unnormalized_result.keys()]) - normalized_results = dict([(k,[]) for k in new_unnormalized_result.keys()]) - lrs = dict([(k,[]) for k in new_unnormalized_result.keys()]) - - update_result_dict(unnormalized_results, new_unnormalized_result) - update_result_dict(normalized_results, new_normalized_result) - update_result_dict(lrs, new_lr) - sizes.append(new_size) - - #print 'sizes:' - #print len(sizes) - #for k, v in unnormalized_results.iteritems(): - # print len(v) - - mod.problemdir = mod.fulldir+problem_str(mod.name_problem)+'/' - if not os.path.exists(mod.problemdir): - os.makedirs(mod.problemdir) - - print 'best_errors', mod.best_error_table - print 'unnormalized_results', unnormalized_results - print 'normalized_results', normalized_results - - if mod.pair_comp_on is True: - plot_all_pair_comp(unnormalized_results, sizes, mod) - if mod.cdf_on is True: - plot_all_cdfs(normalized_results, mod) - - plot_all_lrs(lrs, mod) - -def save_to_hdf(mod): - print 'saving to hdf..' - store = pd.HDFStore('store.h5') - store['result_table'] = mod.all_results - store.close() - -def load_from_hdf(mod): - print 'reading from hdf..' - store = pd.HDFStore('store.h5') - mod.all_results = store['result_table'] - store.close() - -def load_from_sum(mod): - print 'reading directory..' - dss = sum_files(mod.results_dir) - print len(dss) - - #print dss[168] - - all_results = None - - print 'reading sum tables..' - for i in range(len(dss)): - print 'result file name: ', dss[i] - result = parse_sum_file(mod.results_dir + dss[i]) - - if (i == 0): - all_results = result - else: - all_results = all_results.append(result) - - print all_results - mod.all_results = all_results - - -# This is a hack - need to do this systematically in the future -#def load_maj_error(mod): -# return parse_sum_file(mod.maj_error_dir) - - -if __name__ == '__main__': - parser = argparse.ArgumentParser(description='result summary') - parser.add_argument('--results_dir', default='../../../figs/') - parser.add_argument('--filter', default='1') - parser.add_argument('--plot_subdir', default='expt1/') - parser.add_argument('--from_hdf', action='store_true') - parser.add_argument('--normalize_type', type=int, default=1) - args = parser.parse_args() - - mod = model() - - mod.results_dir = args.results_dir - mod.filter = args.filter - mod.plot_subdir = args.plot_subdir - mod.normalize_type = args.normalize_type #1: normalized score; 2: bandit only centered score; 3: raw score - mod.pair_comp_on = False - mod.cdf_on = True - mod.maj_error_dir = '../../../figs_all/expt_0509/figs_maj_errors/0of1.sum' - mod.best_error_dir = '../../../figs_all/expt_0606/0of1.sum' - - mod.fulldir = mod.results_dir + mod.plot_subdir - if not os.path.exists(mod.fulldir): - os.makedirs(mod.fulldir) - - #print args.from_hdf - #raw_input(' ') - if args.from_hdf is True: - load_from_hdf(mod) - else: - load_from_sum(mod) - save_to_hdf(mod) - - #first group by corruption mode, then corruption prob - #then group by warm start - bandit ratio - #these constitutes all the problem settings we are looking at (corresponding - #to each cdf graph) - all_results = mod.all_results - - #print mod.best_error_table[mod.best_error_table['dataset'] == 'ds_160_5.vw.gz'] - #raw_input(' ') - - #print all_results - #raw_input('..') - - all_results = all_results[all_results['choices_lambda'] != 0] - - #ignore the no update row: - all_results = all_results[(all_results['no_warm_start_update'] == False) | (all_results['no_interaction_update'] == False)] - #ignore the choice_lambda = 4 row - all_results = all_results[(all_results['choices_lambda'] != 4)] - - - - - #filter choices_lambdas = 2,4,8? - #if (alg_name[2] == False and alg_name[3] == False and alg_name[1] != 8): - # pass - #else: - - mod.maj_error_table = parse_sum_file(mod.maj_error_dir) - mod.maj_error_table = mod.maj_error_table[mod.maj_error_table['majority_approx']] - mod.best_error_table = parse_sum_file(mod.best_error_dir) - mod.best_error_table = mod.best_error_table[mod.best_error_table['optimal_approx']] - - if mod.filter == '1': - pass - elif mod.filter == '2': - #print all_results['warm_start_size'] >= 100 - #raw_input(' ') - all_results = all_results[all_results['warm_start'] >= 200] - elif mod.filter == '3': - all_results = all_results[all_results['num_classes'] >= 3] - elif mod.filter == '4': - all_results = all_results[all_results['num_classes'] <= 2] - elif mod.filter == '5': - all_results = all_results[all_results['total_size'] >= 10000] - all_results = all_results[all_results['num_classes'] >= 3] - elif mod.filter == '6': - all_results = all_results[all_results['warm_start'] >= 100] - all_results = all_results[all_results['learning_rate'] == 0.3] - elif mod.filter == '7': - all_results = all_results[all_results['warm_start'] >= 100] - all_results = all_results[all_results['num_classes'] >= 3] - - plot_all(mod, all_results) - - #if i >= 331 and i <= 340: - # print 'result:', result - # print 'all_results:', all_results diff --git a/scripts/data_gen.py b/scripts/data_gen.py deleted file mode 100644 index aa30cb061c2..00000000000 --- a/scripts/data_gen.py +++ /dev/null @@ -1,88 +0,0 @@ -import random -import numpy as np - -classes = 2 -m = 10 -kwperclass = 2 - -def gen_keyword(): - keyword = np.zeros((classes, m)) - - for i in range(classes): - shuffled = range(m) - random.shuffle(shuffled) - - for j in range(kwperclass): - keyword[i,shuffled[j]] = 1 - - return keyword - - -def classify(classifier, example): - result = classifier.dot(example) - return np.argmax(result) - -def gen_datasets(filename, keyword, num_samples, fprob): - - f = open(filename+".vw", "w") - g = open(filename+"_m.vw", "w") - - for i in range(num_samples): - c = random.randint(0, classes-1) - - #generate a pair of datasets (one is cost-sensitive, the other is multiclass) - for l in range(classes): - f.write(str(l+1)+':') - cost = 1 - if l == c: - cost = 0 - f.write(str(cost)+' ') - - g.write(str(c+1)) - - f.write(' | ') - g.write(' | ') - - vec = np.zeros(m) - - for j in range(m): - flip = np.random.choice([False,True],p=[1-fprob, fprob]) - if flip: - vec[j] = 2 * (1-keyword[c][j]) - 1 - else: - vec[j] = 2 * keyword[c][j] - 1 - - for j in range(m): - f.write('w'+str(j)+':') - f.write(str(vec[j])+' ') - g.write('w'+str(j)+':') - g.write(str(vec[j])+' ') - - #print 'Is the prediction equal to the class label? ', classify(keyword, vec) == c - f.write('\n') - g.write('\n') - - f.close() - g.close() - - - -if __name__ == '__main__': - - keyword = gen_keyword() - # Remember to generate a pair of datasets at the same time - # so that the class-dependent feature is retained - - - num_samples = 10000 - fprob = 0.1 - filename = "source1"+'_'+str(fprob) - - gen_datasets(filename, keyword, num_samples, fprob) - - - num_samples = 10000 - fprob = 0.1 - filename = "source2"+'_'+str(fprob) - - gen_datasets(filename, keyword, num_samples, fprob) diff --git a/scripts/run_vw_commands.py b/scripts/run_vw_commands.py deleted file mode 100644 index 41c974196a7..00000000000 --- a/scripts/run_vw_commands.py +++ /dev/null @@ -1,727 +0,0 @@ -import subprocess -from itertools import product -import os -import math -import argparse -import time -import glob -import re -from collections import OrderedDict - - -class model: - def __init__(self): - # Setting up argument-independent learning parameters in the constructor - self.baselines_on = True - self.algs_on = False - self.optimal_on = False - self.majority_on = False - - self.ws_gt_on = True - self.inter_gt_on = False - - self.num_checkpoints = 200 - - # use fractions instead of absolute numbers - self.ws_multipliers = [pow(2,i) for i in range(4)] - #self.ws_multipliers = [pow(2,i) for i in range(2)] - - self.choices_cb_type = ['mtr'] - #mod.choices_choices_lambda = [2,4,8] - self.choices_choices_lambda = [2,8,16] - - #self.choices_cor_type_ws = [1,2,3] - #self.choices_cor_prob_ws = [0.0,0.5,1.0] - self.choices_cor_type_ws = [1] - self.choices_cor_prob_ws = [0.0] - - self.choices_cor_type_inter = [1] - self.choices_cor_prob_inter = [0.0, 0.125, 0.25, 0.5] - - self.choices_loss_enc = [(-1, 0)] - #self.choices_cor_type_inter = [1,2] - #self.choices_cor_prob_inter = [0.0,0.5] - - self.choices_epsilon = [0.05, 0.1] - #self.epsilon_on = True - #self.lr_template = [0.1, 0.03, 0.3, 0.01, 1.0, 0.003, 3.0, 0.001, 10.0, 0.0003, 30.0, 0.0001, 100.0] - self.choices_adf = [True] - self.critical_size_ratios = [184 * pow(2, -i) for i in range(7) ] - -def gen_lr(n): - m = math.floor(n / 4.0) - if n % 4 == 0: - return 0.1 * pow(10, m) - if n % 4 == 1: - return 0.03 * pow(10, -m) - if n % 4 == 2: - return 0.3 * pow(10, m) - if n % 4 == 3: - return 0.01 * pow(10, -m) - -def collect_stats(mod): - avg_error_value = avg_error(mod) - actual_var_value = actual_var(mod) - ideal_var_value = ideal_var(mod) - - vw_run_results = [] - vw_result_template = { - 'interaction': 0, - 'inter_ws_size_ratio': 0, - 'avg_error': 0.0, - 'actual_variance': 0.0, - 'ideal_variance': 0.0 - } - - if 'majority_approx' in mod.param or 'optimal_approx' in mod.param: - vw_result = vw_result_template.copy() - if 'optimal_approx' in mod.param: - # this condition is for computing the optimal error - vw_result['avg_error'] = avg_error_value - else: - # this condition is for computing the majority error - err = 1 - float(mod.param['majority_size']) / mod.param['total_size'] - vw_result['avg_error'] = float('%0.5f' % err) - vw_run_results.append(vw_result) - return vw_run_results - - f = open(mod.vw_output_filename, 'r') - - i = 0 - for line in f: - vw_progress_pattern = '\d+\.\d+\s+\d+\.\d+\s+\d+\s+\d+\.\d+\s+[a-zA-Z0-9]+\s+[a-zA-Z0-9]+\s+\d+.*' - matchobj = re.match(vw_progress_pattern, line) - - if matchobj: - s = line.split() - if len(s) >= 8: - s = s[:7] - avg_loss_str, last_loss_str, counter_str, weight_str, curr_label_str, \ - curr_pred_str, curr_feat_str = s - - avg_loss = float(avg_loss_str) - inter_effective = int(float(weight_str)) - - for ratio in mod.critical_size_ratios: - if inter_effective >= (1 - 1e-7) * mod.param['warm_start'] * ratio and \ - inter_effective <= (1 + 1e-7) * mod.param['warm_start'] * ratio: - vw_result = vw_result_template.copy() - vw_result['interaction'] = inter_effective - vw_result['inter_ws_size_ratio'] = ratio - vw_result['avg_error'] = avg_loss - vw_result['actual_variance'] = actual_var_value - vw_result['ideal_variance'] = ideal_var_value - vw_run_results.append(vw_result) - f.close() - - #if len(vw_run_results) >= 1: - # print mod.param['warm_start'] - # print vw_run_results - #raw_input('..') - return vw_run_results - - -def gen_vw_options_list(mod): - mod.vw_options = format_setting(mod.vw_template, mod.param) - vw_options_list = [] - for k, v in mod.vw_options.iteritems(): - vw_options_list.append('--'+str(k)) - vw_options_list.append(str(v)) - return vw_options_list - -def gen_vw_options(mod): - if 'optimal_approx' in mod.param: - # Fully supervised on full dataset - mod.vw_template = OrderedDict([('data',''), - ('progress',2.0), - ('passes',0), - ('oaa',0), - ('cache_file','')]) - mod.param['passes'] = 5 - mod.param['oaa'] = mod.param['num_classes'] - mod.param['cache_file'] = mod.param['data'] + '.cache' - elif 'majority_approx' in mod.param: - # Compute majority error; basically we would like to skip vw running as fast as possible - mod.vw_template = OrderedDict([('data',''), - ('progress',2.0), - ('cbify',0), - ('warm_start',0), - ('interaction',0)]) - mod.param['cbify'] = mod.param['num_classes'] - mod.param['warm_start'] = 0 - mod.param['interaction'] = 0 - else: - # General CB - mod.vw_template = OrderedDict([('data',''), - ('cbify',0), - ('cb_type','mtr'), - ('warm_start',0), - ('interaction',0), - ('corrupt_type_interaction',0), - ('corrupt_prob_interaction',0.0), - ('corrupt_type_warm_start',0), - ('corrupt_prob_warm_start',0.0), - ('warm_start_update',True), - ('interaction_update',True), - ('choices_lambda',0), - ('lambda_scheme',1), - ('warm_start_type',1), - ('overwrite_label',1), - ('validation_method',1), - ('weighting_scheme',1), - ('learning_rate',0.5), - ('epsilon', 0.05), - ('loss0', 0), - ('loss1', 0), - ('progress',2.0)]) - - mod.param['warm_start'] = mod.param['warm_start_multiplier'] * mod.param['progress'] - mod.param['interaction'] = mod.param['total_size'] - mod.param['warm_start'] - mod.param['cbify'] = mod.param['num_classes'] - mod.param['overwrite_label'] = mod.param['majority_class'] - - if mod.param['adf_on'] is True: - mod.param['cb_explore_adf'] = ' ' - mod.vw_template['cb_explore_adf'] = ' ' - else: - mod.param['cb_explore'] = mod.param['num_classes'] - mod.vw_template['cb_explore'] = 0 - - -def execute_vw(mod): - gen_vw_options(mod) - vw_options_list = gen_vw_options_list(mod) - cmd = intersperse([mod.vw_path]+vw_options_list, ' ') - print cmd - - f = open(mod.vw_output_filename, 'w') - process = subprocess.Popen(cmd, shell=True, stdout=f, stderr=f) - #subprocess.check_call(cmd, shell=True) - process.wait() - f.close() - -def intersperse(l, ch): - s = '' - for item in l: - s += str(item) - s += ch - return s - -def param_to_str(param): - param_list = [ str(k)+'='+str(v) for k,v in param.iteritems() ] - return intersperse(param_list, ',') - -def replace_keys(dic, simplified_keymap): - dic_new = OrderedDict() - for k, v in dic.iteritems(): - dic_new[simplified_keymap[k]] = v - return dic_new - -def param_to_str_simplified(mod): - #print 'before replace' - #print param - vw_run_param_set = \ - ['dataset', - 'fold', - 'lambda_scheme', - 'validation_method', - 'warm_start_multiplier', - 'corrupt_prob_interaction', - 'corrupt_prob_warm_start', - 'corrupt_type_interaction', - 'corrupt_type_warm_start', - 'warm_start_update', - 'interaction_update', - 'warm_start_type', - 'choices_lambda', - 'weighting_scheme', - 'cb_type', - 'optimal_approx', - 'majority_approx', - 'learning_rate', - 'adf_on', - 'epsilon', - 'loss0', - 'loss1'] - - mod.template_red = OrderedDict([(k,mod.result_template[k]) for k in vw_run_param_set]) - #mod.simplified_keymap_red = dict([(k,mod.simplified_keymap[k]) for k in vw_run_param_set]) - # step 1: use the above as a template to filter out irrelevant parameters - # in the vw output file title - param_formatted = format_setting(mod.template_red, mod.param) - # step 2: replace the key names with the simplified names - param_simplified = replace_keys(param_formatted, mod.simplified_keymap) - #print 'after replace' - #print param - return param_to_str(param_simplified) - -def run_single_expt(mod): - mod.param['data'] = mod.ds_path + str(mod.param['fold']) + '/' + mod.param['dataset'] - mod.param['total_size'] = get_num_lines(mod.param['data']) - mod.param['num_classes'] = get_num_classes(mod.param['data']) - mod.param['majority_size'], mod.param['majority_class'] = get_majority_class(mod.param['data']) - mod.param['progress'] = int(math.ceil(float(mod.param['total_size']) / float(mod.num_checkpoints))) - mod.vw_output_dir = mod.results_path + remove_suffix(mod.param['data']) + '/' - mod.vw_output_filename = mod.vw_output_dir + param_to_str_simplified(mod) + '.txt' - - #plot_errors(mod) - #print mod.param['validation_method'] - - execute_vw(mod) - vw_run_results = collect_stats(mod) - for vw_result in vw_run_results: - result_combined = merge_two_dicts(mod.param, vw_result) - - #print mod.result_template['no_interaction_update'] - #print result_combined['no_interaction_update'] - - result_formatted = format_setting(mod.result_template, result_combined) - record_result(mod, result_formatted) - - -# The following function is a "template filling" function -# Given a template, we use the setting dict to fill it as much as possible -def format_setting(template, setting): - formatted = template.copy() - for k, v in setting.iteritems(): - if k in template.keys(): - formatted[k] = v - return formatted - -def record_result(mod, result): - result_row = result.values() - #for k in mod.result_header_list: - # result_row.append(result[k]) - #print result['validation_method'] - #print result_row - - summary_file = open(mod.summary_file_name, 'a') - summary_file.write( intersperse(result_row, '\t') + '\n') - summary_file.close() - -def ds_files(ds_path): - prevdir = os.getcwd() - os.chdir(ds_path) - dss = sorted(glob.glob('*.vw.gz')) - #dss = [ds_path+ds for ds in dss] - os.chdir(prevdir) - return dss - -def merge_two_dicts(x, y): - #print 'x = ', x - #print 'y = ', y - z = x.copy() # start with x's keys and values - z.update(y) # modifies z with y's keys and values & returns None - return z - -def param_cartesian(param_set_1, param_set_2): - prod = [] - for param_1 in param_set_1: - for param_2 in param_set_2: - prod.append(merge_two_dicts(param_1, param_2)) - return prod - -def param_cartesian_multi(param_sets): - #print param_sets - prod = [{}] - for param_set in param_sets: - prod = param_cartesian(prod, param_set) - return prod - -def dictify(param_name, param_choices): - result = [] - for param in param_choices: - dic = {} - if isinstance(param_name, tuple): - for i in range(len(param_name)): - dic[param_name[i]] = param[i] - else: - dic[param_name] = param - result.append(dic) - print param_name, result - return result - - -def params_per_task(mod): - # Problem parameters - prm_cor_type_ws = dictify('corrupt_type_warm_start', mod.choices_cor_type_ws) - prm_cor_prob_ws = dictify('corrupt_prob_warm_start', mod.choices_cor_prob_ws) - prm_cor_type_inter = dictify('corrupt_type_interaction', mod.choices_cor_type_inter) - prm_cor_prob_inter = dictify('corrupt_prob_interaction', mod.choices_cor_prob_inter) - prm_ws_multiplier = dictify('warm_start_multiplier', mod.ws_multipliers) - prm_lrs = dictify('learning_rate', mod.learning_rates) - # could potentially induce a bug if the maj and best does not have this parameter - prm_fold = dictify('fold', mod.folds) - # Algorithm parameters - prm_cb_type = dictify('cb_type', mod.choices_cb_type) - prm_dataset = dictify('dataset', mod.dss) - prm_choices_lbd = dictify('choices_lambda', mod.choices_choices_lambda) - prm_choices_eps = dictify('epsilon', mod.choices_epsilon) - prm_adf_on = dictify('adf_on', mod.choices_adf) - prm_loss_enc = dictify(('loss0', 'loss1'), mod.choices_loss_enc) - - # Common parameters - prm_com = param_cartesian_multi( - [prm_cor_type_ws, - prm_cor_prob_ws, - prm_cor_type_inter, - prm_cor_prob_inter, - prm_ws_multiplier, - prm_lrs, - prm_cb_type, - prm_fold, - prm_adf_on, - prm_choices_eps, - prm_loss_enc]) - - if mod.inter_gt_on: - fltr_inter_gt = lambda p: ((p['corrupt_type_interaction'] == 1 #noiseless for interaction data - and abs(p['corrupt_prob_interaction']) < 1e-4) - and - (p['corrupt_type_warm_start'] == 1 #filter out repetitive warm start data - or abs(p['corrupt_prob_warm_start']) > 1e-4)) - else: - fltr_inter_gt = lambda p: False - - prm_com_inter_gt = filter(fltr_inter_gt, prm_com) - - if mod.ws_gt_on: - fltr_ws_gt = lambda p: ((p['corrupt_type_warm_start'] == 1 #noiseless for warm start data - and abs(p['corrupt_prob_warm_start']) < 1e-4) - and - (p['corrupt_type_interaction'] == 1 #filter out repetitive interaction data - or abs(p['corrupt_prob_interaction']) > 1e-4)) - else: - fltr_ws_gt = lambda p: False - - prm_com_ws_gt = filter(fltr_ws_gt, prm_com) - - prm_com = filter(lambda p: (fltr_ws_gt(p) or fltr_inter_gt(p)), prm_com) - - # Baseline parameters construction - if mod.baselines_on: - prm_baseline_basic = \ - [ - [ - #Sup-Only - {'warm_start_type': 1, - 'warm_start_update': True, - 'interaction_update': False}, - #Band-Only - {'warm_start_type': 1, - 'warm_start_update': False, - 'interaction_update': True}, - #Sim-Bandit - {'warm_start_type': 2, - 'warm_start_update': True, - 'interaction_update': True, - 'lambda_scheme': 1}, - #Sim-Bandit with only warm-start update - {'warm_start_type': 2, - 'warm_start_update': True, - 'interaction_update': False} - ] - ] - - prm_baseline_const = \ - [ - [ - {'weighting_scheme':1, - 'adf_on':True, - 'lambda_scheme':3, - 'choices_lambda':1} - ] - ] - prm_baseline = param_cartesian_multi([prm_com] + prm_baseline_const + prm_baseline_basic) - else: - prm_baseline = [] - - - # Algorithm parameters construction - if mod.algs_on: - # Algorithms for supervised validation - prm_ws_gt = \ - [ - [ - {'warm_start_update': True, - 'interaction_update': True, - 'warm_start_type': 1, - 'lambda_scheme': 2, - 'weighting_scheme': 2} - ], - [ - {'validation_method':2}, - {'validation_method':3} - ] - ] - - prm_inter_gt = \ - [ - [ - {'warm_start_update': True, - 'interaction_update': True, - 'warm_start_type': 1, - 'lambda_scheme': 4, - 'weighting_scheme': 1} - ], - ] - - prm_algs_ws_gt = param_cartesian_multi([prm_com_ws_gt] + [prm_choices_lbd] + prm_ws_gt) - prm_algs_inter_gt = param_cartesian_multi([prm_com_inter_gt] + [prm_choices_lbd] + prm_inter_gt) - prm_algs = prm_algs_ws_gt + prm_algs_inter_gt - else: - prm_algs = [] - - # Optimal baselines parameter construction - if mod.optimal_on: - prm_optimal = \ - [ - {'optimal_approx': True, - 'fold': 1, - 'corrupt_type_warm_start':1, - 'corrupt_prob_warm_start':0.0, - 'corrupt_type_interaction':1, - 'corrupt_prob_interaction':0.0} - ] - else: - prm_optimal = [] - - if mod.majority_on: - prm_majority = \ - [ - {'majority_approx': True, - 'fold': 1, - 'corrupt_type_warm_start':1, - 'corrupt_prob_warm_start':0.0, - 'corrupt_type_interaction':1, - 'corrupt_prob_interaction':0.0} - ] - else: - prm_majority = [] - - - #for p in params_common: - # print p - #for p in params_baseline: - # print p - #print len(prm_com_ws_gt), len(prm_algs_ws_gt) - #print len(prm_com_inter_gt), len(prm_algs_inter_gt) - #print len(prm_com) - #print len(prm_baseline) - #print len(prm_algs) - #raw_input('..') - - # Common factor in all 3 groups: dataset - prm_all = param_cartesian_multi( - [prm_dataset, - - prm_baseline + prm_algs + prm_optimal + prm_majority]) - - prm_all = sorted(prm_all, - key=lambda d: (d['dataset'], - d['corrupt_type_warm_start'], - d['corrupt_prob_warm_start'], - d['corrupt_type_interaction'], - d['corrupt_prob_interaction']) - ) - print 'The total number of VW commands to run is: ', len(prm_all) - for row in prm_all: - print row - return get_params_task(prm_all) - - -def get_params_task(params_all): - params_task = [] - for i in range(len(params_all)): - if (i % mod.num_tasks == mod.task_id): - params_task.append(params_all[i]) - return params_task - -def get_num_lines(dataset_name): - num_lines = subprocess.check_output(('zcat ' + dataset_name + ' | wc -l'), shell=True) - return int(num_lines) - -def get_num_classes(ds): - # could be a bug for including the prefix.. - did, n_actions = os.path.basename(ds).split('.')[0].split('_')[1:] - did, n_actions = int(did), int(n_actions) - return n_actions - -def get_majority_class(dataset_name): - maj_class_str = subprocess.check_output(('zcat '+ dataset_name +' | cut -d \' \' -f 1 | sort | uniq -c | sort -r -n | head -1 | xargs '), shell=True) - maj_size, maj_class = maj_class_str.split() - return int(maj_size), int(maj_class) - -def avg_error(mod): - return vw_output_extract(mod, 'average loss') - -def actual_var(mod): - return vw_output_extract(mod, 'Measured average variance') - -def ideal_var(mod): - return vw_output_extract(mod, 'Ideal average variance') - -def vw_output_extract(mod, pattern): - #print mod.vw_output_filename - vw_output = open(mod.vw_output_filename, 'r') - vw_output_text = vw_output.read() - #print vw_output_text - #rgx_pattern = '^'+pattern+' = (.*)(|\sh)\n.*$' - #print rgx_pattern - rgx_pattern = '.*'+pattern+' = ([\d]*.[\d]*)( h|)\n.*' - rgx = re.compile(rgx_pattern, flags=re.M) - - errs = rgx.findall(vw_output_text) - if not errs: - avge = 0 - else: - #print errs - avge = float(errs[0][0]) - - vw_output.close() - return avge - -def write_summary_header(mod): - summary_file = open(mod.summary_file_name, 'w') - summary_header = intersperse(mod.result_template.keys(), '\t') - summary_file.write(summary_header+'\n') - summary_file.close() - -def main_loop(mod): - mod.summary_file_name = mod.results_path+str(mod.task_id)+'of'+str(mod.num_tasks)+'.sum' - - # The reason for using a list is that, we would like to keep the order of the - #columns in this way. Maybe use ordered dictionary in the future? - mod.result_template_list = [ - ('fold', 'fd', 0), - ('data', 'dt', ''), - ('dataset', 'ds', ''), - ('num_classes','nc', 0), - ('total_size', 'ts', 0), - ('majority_size','ms', 0), - ('corrupt_type_warm_start', 'ctws', 0), - ('corrupt_prob_warm_start', 'cpws', 0.0), - ('corrupt_type_interaction', 'cti', 0), - ('corrupt_prob_interaction', 'cpi', 0.0), - ('adf_on', 'ao', True), - ('warm_start_multiplier','wsm',1), - ('warm_start', 'ws', 0), - ('warm_start_type', 'wst', 0), - ('interaction', 'bs', 0), - ('inter_ws_size_ratio', 'iwsr', 0), - ('cb_type', 'cbt', 'mtr'), - ('validation_method', 'vm', 0), - ('weighting_scheme', 'wts', 0), - ('lambda_scheme', 'ls', 0), - ('choices_lambda', 'cl', 0), - ('warm_start_update', 'wsu', True), - ('interaction_update', 'iu', True), - ('learning_rate', 'lr', 0.0), - ('optimal_approx', 'oa', False), - ('majority_approx', 'ma', False), - ('avg_error', 'ae', 0.0), - ('actual_variance', 'av', 0.0), - ('ideal_variance', 'iv', 0.0), - ('last_lambda', 'll', 0.0), - ('epsilon', 'eps', 0.0), - ('loss0', 'l0', 0.0), - ('loss1', 'l1', 0.0), - ] - - num_cols = len(mod.result_template_list) - #mod.result_header_list = [ mod.result_template_list[i][0] for i in range(num_cols) ] - mod.result_template = OrderedDict([ (mod.result_template_list[i][0], mod.result_template_list[i][2]) for i in range(num_cols) ]) - mod.simplified_keymap = OrderedDict([ (mod.result_template_list[i][0], mod.result_template_list[i][1]) for i in range(num_cols) ]) - - write_summary_header(mod) - for mod.param in mod.config_task: - #if (mod.param['no_interaction_update'] is True): - # raw_input(' ') - run_single_expt(mod) - -def create_dir(dir): - if not os.path.exists(dir): - os.makedirs(dir) - import stat - os.chmod(dir, os.stat(dir).st_mode | stat.S_IWOTH) - -def remove_suffix(filename): - return os.path.basename(filename).split('.')[0] - -if __name__ == '__main__': - parser = argparse.ArgumentParser(description='vw job') - parser.add_argument('task_id', type=int, help='task ID, between 0 and num_tasks - 1') - parser.add_argument('num_tasks', type=int) - parser.add_argument('--results_dir', default='../../../figs/') - parser.add_argument('--ds_dir', default='../../../vwshuffled/') - parser.add_argument('--num_learning_rates', type=int, default=1) - parser.add_argument('--num_datasets', type=int, default=-1) - parser.add_argument('--num_folds', type=int, default=1) - - args = parser.parse_args() - flag_dir = args.results_dir + 'flag/' - - mod = model() - mod.num_tasks = args.num_tasks - mod.task_id = args.task_id - mod.vw_path = '../vowpalwabbit/vw' - mod.ds_path = args.ds_dir - mod.results_path = args.results_dir - print 'reading dataset files..' - #TODO: this line specifically for multiple folds - #Need a systematic way to detect subfolder names - mod.dss = ds_files(mod.ds_path + '1/') - - print len(mod.dss) - - if args.num_datasets == -1 or args.num_datasets > len(mod.dss): - pass - else: - mod.dss = mod.dss[:args.num_datasets] - - #print mod.dss - - if args.task_id == 0: - # Compile vw in one of the subfolders - #process = subprocess.Popen('make -C .. clean; make -C ..', shell=True, stdout=f, stderr=f) - #subprocess.check_call(cmd, shell=True) - #process.wait() - - # To avoid race condition of writing to the same file at the same time - create_dir(args.results_dir) - - # This is specifically designed for teamscratch, as accessing a folder - # with a huge number of result files can be super slow. Hence, we create a - # subfolder for each dataset to alleviate this. - for ds in mod.dss: - ds_no_suffix = remove_suffix(ds) - create_dir(args.results_dir + ds_no_suffix + '/') - - create_dir(flag_dir) - else: - # may still have the potential of race condition on those subfolders (if - # we have a lot of datasets to run and the datasets are small) - while not os.path.exists(flag_dir): - time.sleep(1) - - if args.num_learning_rates <= 0: - mod.learning_rates = [gen_lr(0)] - else: - mod.learning_rates = [gen_lr(i) for i in range(args.num_learning_rates)] - #mod.folds = range(1,11) - mod.folds = range(1, args.num_folds+1) - - #mod.dss = ["ds_223_63.vw.gz"] - #mod.dss = mod.dss[:5] - - print 'generating tasks..' - # here, we are generating the task specific parameter settings - # by first generate all parameter setting and pick every num_tasks of them - mod.config_task = params_per_task(mod) - print 'task ' + str(mod.task_id) + ' of ' + str(mod.num_tasks) + ':' - print len(mod.config_task) - - #print mod.ds_task - # we only need to vary the warm start fraction, and there is no need to vary the bandit fraction, - # as each run of vw automatically accumulates the bandit dataset - main_loop(mod) diff --git a/scripts/run_vw_job.py b/scripts/run_vw_job.py deleted file mode 100644 index d2551819f4e..00000000000 --- a/scripts/run_vw_job.py +++ /dev/null @@ -1,205 +0,0 @@ -import argparse -import os -import re -import subprocess -import sys -import time - -USE_ADF = True -USE_CS = False - -VW = '/scratch/clear/abietti/.local/bin/vw' -if USE_CS: - VW_DS_DIR = '/scratch/clear/abietti/cb_eval/vwshuffled_cs/' - DIR_PATTERN = '/scratch/clear/abietti/cb_eval/res_cs/cbresults_{}/' -else: - VW_DS_DIR = '/scratch/clear/abietti/cb_eval/vwshuffled/' - DIR_PATTERN = '/scratch/clear/abietti/cb_eval/res/cbresults_{}/' -# VW_DS_DIR = '/bscratch/b-albiet/vwshuffled/' -# DIR_PATTERN = '/bscratch/b-albiet/cbresults_{}/' - -rgx = re.compile('^average loss = (.*)$', flags=re.M) - - -def expand_cover(policies): - algs = [] - for psi in [0, 0.01, 0.1, 1.0]: - algs.append(('cover', policies, 'psi', psi)) - algs.append(('cover', policies, 'psi', psi, 'nounif', None)) - # algs.append(('cover', policies, 'psi', psi, 'nounifagree', None, 'agree_mellowness', 0.1)) - # algs.append(('cover', policies, 'psi', psi, 'nounifagree', None, 'agree_mellowness', 0.01)) - return algs - -params_old = { - 'alg': [ - ('supervised',), - ('epsilon', 0), - ('epsilon', 0.02), - ('epsilon', 0.05), - ('epsilon', 0.1), - ('epsilon', 0.05, 'nounifagree', None, 'agree_mellowness', 1.0), - ('epsilon', 0.05, 'nounifagree', None, 'agree_mellowness', 1e-2), - ('epsilon', 0.05, 'nounifagree', None, 'agree_mellowness', 1e-4), - ('epsilon', 0.05, 'nounifagree', None, 'agree_mellowness', 1e-6), - # agree - ('epsilon', 1, 'nounifagree', None, 'agree_mellowness', 1.0), - ('epsilon', 1, 'nounifagree', None, 'agree_mellowness', 1e-2), - ('epsilon', 1, 'nounifagree', None, 'agree_mellowness', 1e-4), - ('epsilon', 1, 'nounifagree', None, 'agree_mellowness', 1e-6), - ('bag', 2), - ('bag', 4), - ('bag', 8), - ('bag', 16), - ('bag', 2, 'greedify', None), - ('bag', 4, 'greedify', None), - ('bag', 8, 'greedify', None), - ('bag', 16, 'greedify', None), - ] + expand_cover(1) + expand_cover(4) + expand_cover(8) + expand_cover(16), - 'learning_rate': [0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1.0, 3.0, 10.0], - 'cb_type': ['dr', 'ips', 'mtr'], - } - -params = { - 'alg': [ - ('epsilon', 0.05, 'nounifagree', None, 'agree_mellowness', 1.0), - ('epsilon', 0.05, 'nounifagree', None, 'agree_mellowness', 1e-2), - ('epsilon', 0.05, 'nounifagree', None, 'agree_mellowness', 1e-4), - ('epsilon', 0.05, 'nounifagree', None, 'agree_mellowness', 1e-6), - ('epsilon', 1, 'nounifagree', None, 'agree_mellowness', 1.0), - ('epsilon', 1, 'nounifagree', None, 'agree_mellowness', 1e-2), - ('epsilon', 1, 'nounifagree', None, 'agree_mellowness', 1e-4), - ('epsilon', 1, 'nounifagree', None, 'agree_mellowness', 1e-6), - ], - 'learning_rate': [0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1.0, 3.0, 10.0], - 'cb_type': ['dr', 'ips', 'mtr'], - } - -extra_flags = None -# extra_flags = ['--loss0', '9', '--loss1', '10', '--baseline'] - -def param_grid(): - grid = [{}] - for k in params: - new_grid = [] - for g in grid: - for param in params[k]: - gg = g.copy() - gg[k] = param - new_grid.append(gg) - grid = new_grid - - return sorted(grid) - - -def ds_files(): - import glob - return sorted(glob.glob(os.path.join(VW_DS_DIR, '*.vw.gz'))) - - -def get_task_name(ds, params): - did, n_actions = os.path.basename(ds).split('.')[0].split('_')[1:] - did, n_actions = int(did), int(n_actions) - - task_name = 'ds:{}|na:{}'.format(did, n_actions) - if len(params) > 1: - task_name += '|' + '|'.join('{}:{}'.format(k, v) for k, v in sorted(params.items()) if k != 'alg') - task_name += '|' + ':'.join([str(p) for p in params['alg'] if p is not None]) - return task_name - - -def process(ds, params, results_dir): - print 'processing', ds, params - did, n_actions = os.path.basename(ds).split('.')[0].split('_')[1:] - did, n_actions = int(did), int(n_actions) - - cmd = [VW, ds, '-b', '24'] - for k, v in params.iteritems(): - if k == 'alg': - if v[0] == 'supervised': - cmd += ['--csoaa' if USE_CS else '--oaa', str(n_actions)] - else: - cmd += ['--cbify', str(n_actions)] - if USE_CS: - cmd += ['--cbify_cs'] - if extra_flags: - cmd += extra_flags - if USE_ADF: - cmd += ['--cb_explore_adf'] - assert len(v) % 2 == 0, 'params should be in pairs of (option, value)' - for i in range(len(v) / 2): - cmd += ['--{}'.format(v[2 * i])] - if v[2 * i + 1] is not None: - cmd += [str(v[2 * i + 1])] - else: - if params['alg'][0] == 'supervised' and k == 'cb_type': - pass - else: - cmd += ['--{}'.format(k), str(v)] - - print 'running', cmd - t = time.time() - output = subprocess.check_output(cmd, stderr=subprocess.STDOUT) - sys.stderr.write('\n\n{}, {}, time: {}, output:\n'.format(ds, params, time.time() - t)) - sys.stderr.write(output) - pv_loss = float(rgx.findall(output)[0]) - print 'elapsed time:', time.time() - t, 'pv loss:', pv_loss - - return pv_loss - - -if __name__ == '__main__': - parser = argparse.ArgumentParser(description='vw job') - parser.add_argument('task_id', type=int, help='task ID, between 0 and num_tasks - 1') - parser.add_argument('num_tasks', type=int) - parser.add_argument('--task_offset', type=int, default=0, - help='offset for task_id in output filenames') - parser.add_argument('--results_dir', default=DIR_PATTERN.format('agree01')) - parser.add_argument('--name', default=None) - parser.add_argument('--test', action='store_true') - parser.add_argument('--flags', default=None, help='extra flags for cb algorithms') - args = parser.parse_args() - - if args.name is not None: - args.results_dir = DIR_PATTERN.format(args.name) - - if args.flags is not None: - extra_flags = args.flags.split() - grid = param_grid() - dss = ds_files() - tot_jobs = len(grid) * len(dss) - - if args.task_id == 0: - if not os.path.exists(args.results_dir): - os.makedirs(args.results_dir) - import stat - os.chmod(args.results_dir, os.stat(args.results_dir).st_mode | stat.S_IWOTH) - else: - while not os.path.exists(args.results_dir): - time.sleep(1) - if not args.test: - fname = os.path.join(args.results_dir, 'loss{}.txt'.format(args.task_offset + args.task_id)) - done_tasks = set() - if os.path.exists(fname): - done_tasks = set([line.split()[0] for line in open(fname).readlines()]) - loss_file = open(fname, 'a') - idx = args.task_id - while idx < tot_jobs: - ds = dss[idx / len(grid)] - params = grid[idx % len(grid)] - if args.test: - print ds, params - else: - task_name = get_task_name(ds, params) - if task_name not in done_tasks: - try: - pv_loss = process(ds, params, args.results_dir) - loss_file.write('{} {}\n'.format(task_name, pv_loss)) - loss_file.flush() - os.fsync(loss_file.fileno()) - except subprocess.CalledProcessError: - sys.stderr.write('\nERROR: TASK FAILED {} {}\n\n'.format(ds, params)) - print 'ERROR: TASK FAILED', ds, params - idx += args.num_tasks - - if not args.test: - loss_file.close() diff --git a/scripts/shuffle.sh b/scripts/shuffle.sh deleted file mode 100644 index 69aacfc3ee5..00000000000 --- a/scripts/shuffle.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/bash - -suffix=".gz" - -for filename in ./*.vw.gz; do - vw_name=$(echo "$filename" | sed -e "s/$suffix$//") - echo $vw_name - zcat $filename | shuf > ../vwshuffled/$vw_name - gzip ../vwshuffled/$vw_name -done From 5561a123a4dfd5a9e8a2735464b7f6a0b2a761ff Mon Sep 17 00:00:00 2001 From: Chicheng Zhang Date: Mon, 6 Aug 2018 11:10:49 -0400 Subject: [PATCH 103/127] removed spurious changes --- Makefile | 2 +- vowpalwabbit/cb_adf.cc | 2 +- vowpalwabbit/cb_explore.cc | 14 ------------ vowpalwabbit/cb_explore_adf.cc | 12 +--------- vowpalwabbit/cost_sensitive.cc | 1 - vowpalwabbit/csoaa.cc | 8 +------ vowpalwabbit/example.h | 1 - vowpalwabbit/gd.cc | 1 - vowpalwabbit/gen_cs_example.cc | 42 +++++++++++++++------------------- vowpalwabbit/gen_cs_example.h | 6 ++--- vowpalwabbit/learner.h | 26 ++++----------------- 11 files changed, 30 insertions(+), 85 deletions(-) diff --git a/Makefile b/Makefile index 5f0d7c3c69a..fe59f2f34fb 100644 --- a/Makefile +++ b/Makefile @@ -86,7 +86,7 @@ FLAGS = -std=c++11 $(CFLAGS) $(LDFLAGS) $(ARCH) $(WARN_FLAGS) $(OPTIM_FLAGS) -D_ #CXX = g++ # for valgrind / gdb debugging -FLAGS = -std=c++11 $(CFLAGS) $(LDFLAGS) $(ARCH) $(WARN_FLAGS) -D_FILE_OFFSET_BITS=64 $(BOOST_INCLUDE) $(JSON_INCLUDE) -g -O0 -fPIC +#FLAGS = -std=c++11 $(CFLAGS) $(LDFLAGS) $(ARCH) $(WARN_FLAGS) -D_FILE_OFFSET_BITS=64 $(BOOST_INCLUDE) $(JSON_INCLUDE) -g -O0 -fPIC # for valgrind profiling: run 'valgrind --tool=callgrind PROGRAM' then 'callgrind_annotate --tree=both --inclusive=yes' #FLAGS = -std=c++11 $(CFLAGS) $(LDFLAGS) -Wall $(ARCH) -ffast-math -D_FILE_OFFSET_BITS=64 $(BOOST_INCLUDE) $(JSON_INCLUDE) -g -fomit-frame-pointer -ffast-math -fno-strict-aliasing -fPIC diff --git a/vowpalwabbit/cb_adf.cc b/vowpalwabbit/cb_adf.cc index 53a8bb5a4db..72b8c0699b5 100644 --- a/vowpalwabbit/cb_adf.cc +++ b/vowpalwabbit/cb_adf.cc @@ -349,7 +349,7 @@ base_learner* cb_adf_setup(arguments& arg) if (arg.new_options("Contextual Bandit with Action Dependent Features") .critical("cb_adf", "Do Contextual Bandit learning with multiline action dependent features.") .keep(ld->rank_all, "rank_all", "Return actions sorted by score order") - (ld->no_predict, "no_predict", "Do not do a prediction when training") + (ld->no_predict, "no_predict", "Do not do a prediction when training") .keep("cb_type", type_string, (string)"ips", "contextual bandit method to use in {ips,dm,dr, mtr}").missing()) return nullptr; diff --git a/vowpalwabbit/cb_explore.cc b/vowpalwabbit/cb_explore.cc index cf04c811ecb..5cb58f303c4 100644 --- a/vowpalwabbit/cb_explore.cc +++ b/vowpalwabbit/cb_explore.cc @@ -22,8 +22,6 @@ struct cb_explore cb_to_cs cbcs; v_array preds; v_array cover_probs; - v_array cost_lambda; - v_array lambdas; CB::label cb_label; COST_SENSITIVE::label cs_label; @@ -36,8 +34,6 @@ struct cb_explore size_t bag_size; size_t cover_size; float psi; - size_t lambda_size; - float n_2; size_t counter; @@ -192,21 +188,11 @@ void predict_or_learn_cover(cb_explore& data, single_learner& base, example& ec) data.cs_label.costs.clear(); float norm = min_prob * num_actions; ec.l.cb = data.cb_label; - data.cbcs.known_cost = get_observed_cost(data.cb_label); - //cout<<"cbcs's cb type is "<(data.cbcs, ec, data.cb_label, data.cs_label); for(uint32_t i = 0; i < num_actions; i++) probabilities[i] = 0; - //for (size_t i = 0; i < data.cbcs.num_actions; i++) - // cout<<"action "< void predict_or_learn_greedy(cb_explore_adf& data, multi_learner& base, multi_ex& examples) { - //cout<<"data offset = "<(base, examples, data.offset); else multiline_learn_or_predict(base, examples, data.offset); - //cout<<"example feature offset after = "<ft_offset<pred.a_s; uint32_t num_actions = (uint32_t)preds.size(); @@ -789,12 +784,7 @@ base_learner* cb_explore_adf_setup(arguments& arg) data->explore_type = REGCB; else { - if (!arg.vm.count("epsilon")) - { - data->epsilon = 0.05f; - //a hacky way of passing the implicit epsilon value to cbify - arg.vm.insert(std::make_pair("epsilon", boost::program_options::variable_value(data->epsilon, false))); - } + if (!arg.vm.count("epsilon")) data->epsilon = 0.05f; data->explore_type = EPS_GREEDY; } diff --git a/vowpalwabbit/cost_sensitive.cc b/vowpalwabbit/cost_sensitive.cc index 03065517303..80e8e5c4438 100644 --- a/vowpalwabbit/cost_sensitive.cc +++ b/vowpalwabbit/cost_sensitive.cc @@ -312,7 +312,6 @@ void finish_example(vw& all, example& ec) bool example_is_test(example& ec) { v_array costs = ec.l.cs.costs; - //cout << "is_test " << costs.size() << endl; if (costs.size() == 0) return true; for (size_t j=0; jweight; - //cout << "weight = " << ec->weight << endl; if (!data.treat_as_classifier) // treat like regression simple_label.label = costs[0].x; else // treat like classification { - //cout << "here" << endl; if (costs[0].x <= min_cost) { simple_label.label = -1.; diff --git a/vowpalwabbit/example.h b/vowpalwabbit/example.h index 1a174cc2902..b9dd6388021 100644 --- a/vowpalwabbit/example.h +++ b/vowpalwabbit/example.h @@ -107,7 +107,6 @@ void free_flatten_example(flat_example* fec); inline int example_is_newline(example& ec) { // if only index is constant namespace or no index - // std::cout << "call e_i_n " << ec.indices.size() << " " << ec.tag.size() << std::endl; if (ec.tag.size() > 0) return false; return ((ec.indices.size() == 0) || ((ec.indices.size() == 1) && diff --git a/vowpalwabbit/gd.cc b/vowpalwabbit/gd.cc index 6bfba8e21e7..f77ae3be9e5 100644 --- a/vowpalwabbit/gd.cc +++ b/vowpalwabbit/gd.cc @@ -651,7 +651,6 @@ void learn(gd& g, base_learner& base, example& ec) assert(ec.l.simple.label != FLT_MAX); assert(ec.weight > 0.); g.predict(g,base,ec); - //cout << "iw = " << ec.weight << endl; update(g,base,ec); } diff --git a/vowpalwabbit/gen_cs_example.cc b/vowpalwabbit/gen_cs_example.cc index f5ade2627cc..8fe85b46696 100644 --- a/vowpalwabbit/gen_cs_example.cc +++ b/vowpalwabbit/gen_cs_example.cc @@ -49,7 +49,7 @@ void gen_cs_example_ips(multi_ex& examples, COST_SENSITIVE::label& cs_labels) for (uint32_t i = 0; i < examples.size(); i++) { CB::label ld = examples[i]->l.cb; - //std::cout << "example weight = " << examples[i]->weight << std::endl; + COST_SENSITIVE::wclass wc = {0.,i,0.,0.}; if (shared && i > 0) wc.class_index = (uint32_t)i-1; @@ -112,35 +112,31 @@ void gen_cs_test_example(multi_ex& examples, COST_SENSITIVE::label& cs_labels) //single line version void gen_cs_example_ips(cb_to_cs& c, CB::label& ld, COST_SENSITIVE::label& cs_ld) { - //std::cout<<"-------"<action<action) - { - wc.x = c.known_cost->cost / safe_probability(c.known_cost->probability); //use importance weighted cost for observed action, 0 otherwise - //ips can be thought as the doubly robust method with a fixed regressor that predicts 0 costs for everything - //update the loss of this regressor - c.nb_ex_regressors++; - c.avg_loss_regressors += (1.0f / c.nb_ex_regressors)*((c.known_cost->cost)*(c.known_cost->cost) - c.avg_loss_regressors); - c.last_pred_reg = 0; - c.last_correct_cost = c.known_cost->cost; - } - - cs_ld.costs.push_back(wc); - } + COST_SENSITIVE::wclass wc = {0.,i,0.,0.}; + if (c.known_cost != nullptr && i == c.known_cost->action) + { + wc.x = c.known_cost->cost / safe_probability(c.known_cost->probability); //use importance weighted cost for observed action, 0 otherwise + //ips can be thought as the doubly robust method with a fixed regressor that predicts 0 costs for everything + //update the loss of this regressor + c.nb_ex_regressors++; + c.avg_loss_regressors += (1.0f / c.nb_ex_regressors)*((c.known_cost->cost)*(c.known_cost->cost) - c.avg_loss_regressors); + c.last_pred_reg = 0; + c.last_correct_cost = c.known_cost->cost; + } + + cs_ld.costs.push_back(wc); } + } else //this is an example where we can only perform a subset of the actions { - //std::cout<<"---not typical----"<(c.scorer, c.known_cost, ec, action, c.num_actions); - //std::cout<<"wc.x = "<ft_offset = "<ft_offset<<" i = "<ft_offset += static_cast(increment * i); - } } inline void decrement_offset(example& ex, const size_t increment, const size_t i) -{ - //std::cout<<"in decrement_offset singleex: increment = "<ft_offset = "<ft_offset<<" i = "<ft_offset >= increment * i); + { assert(ec->ft_offset >= increment * i); ec->ft_offset -= static_cast(increment * i); } } @@ -453,9 +442,7 @@ template struct learner void multiline_learn_or_predict(multi_learner& base, multi_ex& examples, const uint64_t offset, const uint32_t id = 0) { std::vector saved_offsets; for (auto ec : examples) - { - //std::cout<<"saved offsets before = "<ft_offset<ft_offset); + { saved_offsets.push_back(ec->ft_offset); ec->ft_offset = offset; } @@ -465,9 +452,6 @@ template struct learner base.predict(examples, id); for (size_t i = 0; i < examples.size(); i++) - { examples[i]->ft_offset = saved_offsets[i]; - //std::cout<<"saved offsets after = "< Date: Mon, 6 Aug 2018 11:20:22 -0400 Subject: [PATCH 104/127] removed spurious changes --- vowpalwabbit/gen_cs_example.cc | 37 +++++++++++++++++----------------- 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/vowpalwabbit/gen_cs_example.cc b/vowpalwabbit/gen_cs_example.cc index 8fe85b46696..97eb548269c 100644 --- a/vowpalwabbit/gen_cs_example.cc +++ b/vowpalwabbit/gen_cs_example.cc @@ -115,26 +115,27 @@ void gen_cs_example_ips(cb_to_cs& c, CB::label& ld, COST_SENSITIVE::label& cs_ld //this implements the inverse propensity score method, where cost are importance weighted by the probability of the chosen action //generate cost-sensitive example cs_ld.costs.clear(); - if (ld.costs.size() == 1 || ld.costs.size() == 0) //this is a typical example where we can perform all actions - { - //in this case generate cost-sensitive example with all actions - for (uint32_t i = 1; i <= c.num_actions; i++) + if (ld.costs.size() == 0 || (ld.costs.size() == 1 && ld.costs[0].cost != FLT_MAX)) + //this is a typical example where we can perform all actions { - COST_SENSITIVE::wclass wc = {0.,i,0.,0.}; - if (c.known_cost != nullptr && i == c.known_cost->action) - { - wc.x = c.known_cost->cost / safe_probability(c.known_cost->probability); //use importance weighted cost for observed action, 0 otherwise - //ips can be thought as the doubly robust method with a fixed regressor that predicts 0 costs for everything - //update the loss of this regressor - c.nb_ex_regressors++; - c.avg_loss_regressors += (1.0f / c.nb_ex_regressors)*((c.known_cost->cost)*(c.known_cost->cost) - c.avg_loss_regressors); - c.last_pred_reg = 0; - c.last_correct_cost = c.known_cost->cost; - } - - cs_ld.costs.push_back(wc); + //in this case generate cost-sensitive example with all actions + for (uint32_t i = 1; i <= c.num_actions; i++) + { + COST_SENSITIVE::wclass wc = {0.,i,0.,0.}; + if (c.known_cost != nullptr && i == c.known_cost->action) + { + wc.x = c.known_cost->cost / safe_probability(c.known_cost->probability); //use importance weighted cost for observed action, 0 otherwise + //ips can be thought as the doubly robust method with a fixed regressor that predicts 0 costs for everything + //update the loss of this regressor + c.nb_ex_regressors++; + c.avg_loss_regressors += (1.0f / c.nb_ex_regressors)*((c.known_cost->cost)*(c.known_cost->cost) - c.avg_loss_regressors); + c.last_pred_reg = 0; + c.last_correct_cost = c.known_cost->cost; + } + + cs_ld.costs.push_back(wc); + } } - } else //this is an example where we can only perform a subset of the actions { //in this case generate cost-sensitive example with only allowed actions From d9573e124cbac7aa5c7a2e27274b59ccb17fc4b2 Mon Sep 17 00:00:00 2001 From: Chicheng Zhang Date: Mon, 6 Aug 2018 11:45:56 -0400 Subject: [PATCH 105/127] undoing the weight scaling by 1/k in mtr --- vowpalwabbit/cb_adf.cc | 8 +++----- vowpalwabbit/gen_cs_example.h | 4 ---- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/vowpalwabbit/cb_adf.cc b/vowpalwabbit/cb_adf.cc index 72b8c0699b5..a5291eb24a2 100644 --- a/vowpalwabbit/cb_adf.cc +++ b/vowpalwabbit/cb_adf.cc @@ -113,9 +113,7 @@ void learn_MTR(cb_adf& mydata, multi_learner& base, multi_ex& examples) gen_cs_example_mtr(mydata.gen_cs, examples, mydata.cs_labels); uint32_t nf = (uint32_t)examples[mydata.gen_cs.mtr_example]->num_features; float old_weight = examples[mydata.gen_cs.mtr_example]->weight; - - //adjust the importance weight to scale by a factor of 1/num_actions (the last term) - examples[mydata.gen_cs.mtr_example]->weight *= 1.f / examples[mydata.gen_cs.mtr_example]->l.cb.costs[0].probability * ((float)mydata.gen_cs.event_sum / (float)mydata.gen_cs.action_sum) * (1.f / mydata.gen_cs.num_actions); + examples[mydata.gen_cs.mtr_example]->weight *= 1.f / examples[mydata.gen_cs.mtr_example]->l.cb.costs[0].probability * ((float)mydata.gen_cs.event_sum / (float)mydata.gen_cs.action_sum); GEN_CS::call_cs_ldf(base, mydata.gen_cs.mtr_ec_seq, mydata.cb_labels, mydata.cs_labels, mydata.prepped_cs_labels, mydata.offset); examples[mydata.gen_cs.mtr_example]->num_features = nf; examples[mydata.gen_cs.mtr_example]->weight = old_weight; @@ -354,7 +352,6 @@ base_learner* cb_adf_setup(arguments& arg) return nullptr; ld->all = arg.all; - ld->gen_cs.num_actions = arg.vm["cbify"].as(); // number of weight vectors needed size_t problem_multiplier = 1;//default for IPS @@ -386,9 +383,10 @@ base_learner* cb_adf_setup(arguments& arg) || ld->rank_all || arg.vm.count("csoaa_rank") == 0) { if (count(arg.args.begin(), arg.args.end(), "--csoaa_ldf") == 0) + { arg.args.push_back("--csoaa_ldf"); - if (count(arg.args.begin(), arg.args.end(), "multiline") == 0) arg.args.push_back("multiline"); + } if (count(arg.args.begin(), arg.args.end(), "--csoaa_rank") == 0) arg.args.push_back("--csoaa_rank"); } diff --git a/vowpalwabbit/gen_cs_example.h b/vowpalwabbit/gen_cs_example.h index ac555a8c0f2..f60bf9821cf 100644 --- a/vowpalwabbit/gen_cs_example.h +++ b/vowpalwabbit/gen_cs_example.h @@ -40,10 +40,6 @@ struct cb_to_cs_adf COST_SENSITIVE::label pred_scores; CB::cb_class known_cost; LEARNER::single_learner* scorer; - - //for scaling the weights in MTR - uint32_t num_actions; - }; CB::cb_class* get_observed_cost(CB::label& ld); From 01bf93e744050d80790627af137813526f8e1ab5 Mon Sep 17 00:00:00 2001 From: Chicheng Zhang Date: Mon, 6 Aug 2018 12:01:23 -0400 Subject: [PATCH 106/127] updated tests --- test/RunTests | 18 +++++++++--------- test/train-sets/ref/cbify_ws_cyc.stderr | 8 ++++---- test/train-sets/ref/cbify_ws_maj.stderr | 6 +++--- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/test/RunTests b/test/RunTests index 5f51bb68642..fa4708f092c 100755 --- a/test/RunTests +++ b/test/RunTests @@ -1635,36 +1635,36 @@ echo "1 | feature:1" | {VW} -a --initial_weight 0.1 --initial_t 0.3 # Test 175 cbify warm start {VW} --cbify 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 -d train-sets/multiclass - /train-sets/ref/cbify_ws.stderr + train-sets/ref/cbify_ws.stderr # Test 176 cbify warm start with lambda set containing 0/1 {VW} --cbify 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --lambda_scheme 2 -d train-sets/multiclass - /train-sets/ref/cbify_ws_lambda_zeroone.stderr + train-sets/ref/cbify_ws_lambda_zeroone.stderr # Test 177 cbify warm start with warm start update turned off {VW} --cbify 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --warm_start_update false -d train-sets/multiclass - /train-sets/ref/cbify_ws_no_ws_upd.stderr + train-sets/ref/cbify_ws_no_ws_upd.stderr # Test 178 cbify warm start with interaction update turned off {VW} --cbify 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --interaction_update false -d train-sets/multiclass - /train-sets/ref/cbify_ws_no_int_upd.stderr + train-sets/ref/cbify_ws_no_int_upd.stderr # Test 179 cbify warm start with bandit warm start type (Sim-Bandit) {VW} --cbify 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --warm_start_type 2 -d train-sets/multiclass - /train-sets/ref/cbify_ws_simbandit.stderr + train-sets/ref/cbify_ws_simbandit.stderr # Test 180 cbify warm start with UAR supervised corruption {VW} --cbify 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --corrupt_type_warm_start 1 --corrupt_prob_warm_start 0.5 -d train-sets/multiclass - /train-sets/ref/cbify_ws_uar.stderr + train-sets/ref/cbify_ws_uar.stderr # Test 181 cbify warm start with CYC supervised corruption {VW} --cbify 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --corrupt_type_warm_start 2 --corrupt_prob_warm_start 0.5 -d train-sets/multiclass - /train-sets/ref/cbify_ws_cyc.stderr + train-sets/ref/cbify_ws_cyc.stderr # Test 182 cbify warm start with MAJ supervised corruption {VW} --cbify 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --corrupt_type_warm_start 3 --corrupt_prob_warm_start 0.5 --overwrite_label 1 -d train-sets/multiclass - /train-sets/ref/cbify_ws_maj.stderr + train-sets/ref/cbify_ws_maj.stderr # Test 183 cbify warm start with warm start distribution being the ground truth {VW} --cbify 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --corrupt_type_interaction 2 --corrupt_prob_interaction 0.5 --weighting_scheme 2 --validation_method 2 -d train-sets/multiclass - /train-sets/ref/cbify_ws_wsgt.stderr + train-sets/ref/cbify_ws_wsgt.stderr diff --git a/test/train-sets/ref/cbify_ws_cyc.stderr b/test/train-sets/ref/cbify_ws_cyc.stderr index 6d05ba5a0db..a1affe4ec96 100644 --- a/test/train-sets/ref/cbify_ws_cyc.stderr +++ b/test/train-sets/ref/cbify_ws_cyc.stderr @@ -7,13 +7,13 @@ Reading datafile = train-sets/multiclass num sources = 1 average since example example current current current loss last counter weight label predict features -1.000000 1.000000 4 1.0 4 3 2 -1.000000 1.000000 5 2.0 5 3 2 -1.000000 1.000000 7 4.0 7 3 2 +0.000000 0.000000 4 1.0 4 4 2 +0.500000 1.000000 5 2.0 5 4 2 +0.750000 1.000000 7 4.0 7 3 2 finished run number of examples = 10 weighted example sum = 7.000000 weighted label sum = 0.000000 -average loss = 1.000000 +average loss = 0.857143 total feature number = 20 diff --git a/test/train-sets/ref/cbify_ws_maj.stderr b/test/train-sets/ref/cbify_ws_maj.stderr index 6d05ba5a0db..2a12135dfa0 100644 --- a/test/train-sets/ref/cbify_ws_maj.stderr +++ b/test/train-sets/ref/cbify_ws_maj.stderr @@ -7,9 +7,9 @@ Reading datafile = train-sets/multiclass num sources = 1 average since example example current current current loss last counter weight label predict features -1.000000 1.000000 4 1.0 4 3 2 -1.000000 1.000000 5 2.0 5 3 2 -1.000000 1.000000 7 4.0 7 3 2 +1.000000 1.000000 4 1.0 4 1 2 +1.000000 1.000000 5 2.0 5 1 2 +1.000000 1.000000 7 4.0 7 1 2 finished run number of examples = 10 From 83da642f7203bae563f7de4eb5a3d6aa02141022 Mon Sep 17 00:00:00 2001 From: Chicheng Zhang Date: Mon, 6 Aug 2018 14:25:24 -0400 Subject: [PATCH 107/127] added warm_cb as a separate file --- test/RunTests | 20 +- vowpalwabbit/Makefile.am | 2 +- vowpalwabbit/cbify.cc | 609 ++------------------------- vowpalwabbit/parse_args.cc | 2 + vowpalwabbit/warm_cb.cc | 831 +++++++++++++++++++++++++++++++++++++ vowpalwabbit/warm_cb.h | 1 + 6 files changed, 872 insertions(+), 593 deletions(-) create mode 100644 vowpalwabbit/warm_cb.cc create mode 100644 vowpalwabbit/warm_cb.h diff --git a/test/RunTests b/test/RunTests index fa4708f092c..833b295b9ab 100755 --- a/test/RunTests +++ b/test/RunTests @@ -1633,38 +1633,38 @@ echo "1 | feature:1" | {VW} -a --initial_weight 0.1 --initial_t 0.3 {VW} --cbify 10 --cb_explore_adf --cb_type mtr --regcbopt --mellowness 0.01 -d train-sets/multiclass train-sets/ref/cbify_regcbopt.stderr -# Test 175 cbify warm start -{VW} --cbify 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 -d train-sets/multiclass +# Test 175 warm_cb warm start +{VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 -d train-sets/multiclass train-sets/ref/cbify_ws.stderr # Test 176 cbify warm start with lambda set containing 0/1 -{VW} --cbify 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --lambda_scheme 2 -d train-sets/multiclass +{VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --lambda_scheme 2 -d train-sets/multiclass train-sets/ref/cbify_ws_lambda_zeroone.stderr # Test 177 cbify warm start with warm start update turned off -{VW} --cbify 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --warm_start_update false -d train-sets/multiclass +{VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --warm_start_update false -d train-sets/multiclass train-sets/ref/cbify_ws_no_ws_upd.stderr # Test 178 cbify warm start with interaction update turned off -{VW} --cbify 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --interaction_update false -d train-sets/multiclass +{VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --interaction_update false -d train-sets/multiclass train-sets/ref/cbify_ws_no_int_upd.stderr # Test 179 cbify warm start with bandit warm start type (Sim-Bandit) -{VW} --cbify 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --warm_start_type 2 -d train-sets/multiclass +{VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --warm_start_type 2 -d train-sets/multiclass train-sets/ref/cbify_ws_simbandit.stderr # Test 180 cbify warm start with UAR supervised corruption -{VW} --cbify 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --corrupt_type_warm_start 1 --corrupt_prob_warm_start 0.5 -d train-sets/multiclass +{VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --corrupt_type_warm_start 1 --corrupt_prob_warm_start 0.5 -d train-sets/multiclass train-sets/ref/cbify_ws_uar.stderr # Test 181 cbify warm start with CYC supervised corruption -{VW} --cbify 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --corrupt_type_warm_start 2 --corrupt_prob_warm_start 0.5 -d train-sets/multiclass +{VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --corrupt_type_warm_start 2 --corrupt_prob_warm_start 0.5 -d train-sets/multiclass train-sets/ref/cbify_ws_cyc.stderr # Test 182 cbify warm start with MAJ supervised corruption -{VW} --cbify 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --corrupt_type_warm_start 3 --corrupt_prob_warm_start 0.5 --overwrite_label 1 -d train-sets/multiclass +{VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --corrupt_type_warm_start 3 --corrupt_prob_warm_start 0.5 --overwrite_label 1 -d train-sets/multiclass train-sets/ref/cbify_ws_maj.stderr # Test 183 cbify warm start with warm start distribution being the ground truth -{VW} --cbify 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --corrupt_type_interaction 2 --corrupt_prob_interaction 0.5 --weighting_scheme 2 --validation_method 2 -d train-sets/multiclass +{VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --corrupt_type_interaction 2 --corrupt_prob_interaction 0.5 --weighting_scheme 2 --validation_method 2 -d train-sets/multiclass train-sets/ref/cbify_ws_wsgt.stderr diff --git a/vowpalwabbit/Makefile.am b/vowpalwabbit/Makefile.am index 127d68887fe..cfab1395555 100644 --- a/vowpalwabbit/Makefile.am +++ b/vowpalwabbit/Makefile.am @@ -4,7 +4,7 @@ liballreduce_la_SOURCES = allreduce_sockets.cc allreduce_threads.cc vw_exception bin_PROGRAMS = vw active_interactor -libvw_la_SOURCES = parser_helper.cc global_data.cc io_buf.cc parse_regressor.cc parse_primitives.cc unique_sort.cc cache.cc rand48.cc no_label.cc simple_label.cc multiclass.cc oaa.cc multilabel_oaa.cc boosting.cc ect.cc marginal.cc autolink.cc binary.cc lrq.cc cost_sensitive.cc multilabel.cc label_dictionary.cc csoaa.cc cb.cc cb_adf.cc cb_algs.cc search.cc search_meta.cc search_sequencetask.cc search_dep_parser.cc search_hooktask.cc search_multiclasstask.cc search_entityrelationtask.cc search_graph.cc parse_example.cc scorer.cc network.cc parse_args.cc accumulate.cc gd.cc learner.cc mwt.cc lda_core.cc gd_mf.cc mf.cc bfgs.cc noop.cc print.cc example.cc parser.cc loss_functions.cc sender.cc nn.cc confidence.cc bs.cc cbify.cc explore_eval.cc topk.cc stagewise_poly.cc log_multi.cc recall_tree.cc active.cc active_cover.cc cs_active.cc kernel_svm.cc best_constant.cc ftrl.cc svrg.cc lrqfa.cc interact.cc comp_io.cc interactions.cc vw_exception.cc vw_validate.cc audit_regressor.cc gen_cs_example.cc cb_explore.cc action_score.cc cb_explore_adf.cc OjaNewton.cc parse_example_json.cc baseline.cc classweight.cc +libvw_la_SOURCES = parser_helper.cc global_data.cc io_buf.cc parse_regressor.cc parse_primitives.cc unique_sort.cc cache.cc rand48.cc no_label.cc simple_label.cc multiclass.cc oaa.cc multilabel_oaa.cc boosting.cc ect.cc marginal.cc autolink.cc binary.cc lrq.cc cost_sensitive.cc multilabel.cc label_dictionary.cc csoaa.cc cb.cc cb_adf.cc cb_algs.cc search.cc search_meta.cc search_sequencetask.cc search_dep_parser.cc search_hooktask.cc search_multiclasstask.cc search_entityrelationtask.cc search_graph.cc parse_example.cc scorer.cc network.cc parse_args.cc accumulate.cc gd.cc learner.cc mwt.cc lda_core.cc gd_mf.cc mf.cc bfgs.cc noop.cc print.cc example.cc parser.cc loss_functions.cc sender.cc nn.cc confidence.cc bs.cc cbify.cc explore_eval.cc topk.cc stagewise_poly.cc log_multi.cc recall_tree.cc active.cc active_cover.cc cs_active.cc kernel_svm.cc best_constant.cc ftrl.cc svrg.cc lrqfa.cc interact.cc comp_io.cc interactions.cc vw_exception.cc vw_validate.cc audit_regressor.cc gen_cs_example.cc cb_explore.cc action_score.cc cb_explore_adf.cc OjaNewton.cc parse_example_json.cc baseline.cc classweight.cc warm_cb.cc libvw_c_wrapper_la_SOURCES = vwdll.cpp diff --git a/vowpalwabbit/cbify.cc b/vowpalwabbit/cbify.cc index 1947a0734e6..d3b2752d260 100644 --- a/vowpalwabbit/cbify.cc +++ b/vowpalwabbit/cbify.cc @@ -4,7 +4,7 @@ #include "rand48.h" #include "bs.h" #include "vw.h" -#include "../explore/hash.h" +#include "hash.h" #include "explore.h" #include @@ -14,30 +14,6 @@ using namespace exploration; using namespace ACTION_SCORE; using namespace std; -#define WARM_START 1 -#define INTERACTION 2 -#define SKIP 3 - -#define SUPERVISED_WS 1 -#define BANDIT_WS 2 - -#define UAR 1 -#define CIRCULAR 2 -#define OVERWRITE 3 - -#define INTER_VALI 1 -#define WS_VALI_SPLIT 2 -#define WS_VALI_NOSPLIT 3 - -#define INSTANCE_WT 1 -#define DATASET_WT 2 - -#define ABS_CENTRAL 1 -#define ABS_CENTRAL_ZEROONE 2 -#define MINIMAX_CENTRAL 3 -#define MINIMAX_CENTRAL_ZEROONE 4 - - struct cbify; struct cbify_adf_data @@ -58,42 +34,6 @@ struct cbify cbify_adf_data adf_data; float loss0; float loss1; - - //warm start parameters - uint32_t ws_period; - uint32_t inter_period; - uint32_t choices_lambda; - bool upd_ws; - bool upd_inter; - int cor_type_ws; - float cor_prob_ws; - int cor_type_inter; - float cor_prob_inter; - int vali_method; - int wt_scheme; - int lambda_scheme; - uint32_t overwrite_label; - int ws_type; - - //auxiliary variables - uint32_t num_actions; - float epsilon; - vector lambdas; - action_scores a_s_adf; - vector cumulative_costs; - CB::cb_class cl_adf; - uint32_t ws_train_size; - uint32_t ws_vali_size; - vector ws_vali; - float cumu_var; - uint32_t ws_iter; - uint32_t inter_iter; - MULTICLASS::label_t mc_label; - COST_SENSITIVE::label cs_label; - COST_SENSITIVE::label* csls; - CB::label* cbls; - bool use_cs; - }; float loss(cbify& data, uint32_t label, uint32_t final_prediction) @@ -118,42 +58,12 @@ float loss_cs(cbify& data, v_array& costs, uint32_t fina template inline void delete_it(T* p) { if (p != nullptr) delete p; } -template -uint32_t find_min(vector arr) -{ - T min_val = FLT_MAX; - uint32_t argmin = 0; - - for (uint32_t i = 0; i < arr.size(); i++) - { - //cout<pred.a_s.delete_v(); @@ -161,20 +71,6 @@ void finish(cbify& data) free_it(data.adf_data.ecs[a]); } data.adf_data.ecs.~vector(); - - data.lambdas.~vector(); - data.cumulative_costs.~vector(); - - data.a_s_adf.delete_v(); - for (size_t i = 0; i < data.ws_vali.size(); ++i) - { - if (data.use_cs) - VW::dealloc_example(COST_SENSITIVE::cs_label.delete_label, *data.ws_vali[i]); - else - VW::dealloc_example(MULTICLASS::mc_label.delete_label, *data.ws_vali[i]); - free(data.ws_vali[i]); - } - data.ws_vali.~vector(); } } @@ -211,141 +107,6 @@ void copy_example_to_adf(cbify& data, example& ec) } } -float minimax_lambda(float epsilon, size_t num_actions, size_t warm_start_period, size_t interaction_period) -{ - return epsilon / (num_actions + epsilon); -} - -void setup_lambdas(cbify& data) -{ - // The lambdas are arranged in ascending order - vector& lambdas = data.lambdas; - for (uint32_t i = 0; i 0; i--) - lambdas[i-1] = lambdas[i] / 2.0; - - for (uint32_t i = mid+1; i < data.choices_lambda; i++) - lambdas[i] = 1 - (1-lambdas[i-1]) / 2.0; - - if (data.lambda_scheme == MINIMAX_CENTRAL_ZEROONE || data.lambda_scheme == ABS_CENTRAL_ZEROONE) - { - lambdas[0] = 0.0; - lambdas[data.choices_lambda-1] = 1.0; - } -} - -uint32_t generate_uar_action(cbify& data) -{ - float randf = merand48(data.all->random_state); - - for (uint32_t i = 1; i <= data.num_actions; i++) - { - if (randf <= float(i) / data.num_actions) - return i; - } - return data.num_actions; -} - -uint32_t corrupt_action(cbify& data, uint32_t action, int ec_type) -{ - float cor_prob; - uint32_t cor_type; - uint32_t cor_action; - - if (ec_type == WARM_START) - { - cor_prob = data.cor_prob_ws; - cor_type = data.cor_type_ws; - } - else - { - cor_prob = data.cor_prob_inter; - cor_type = data.cor_type_inter; - } - - float randf = merand48(data.all->random_state); - if (randf < cor_prob) - { - if (cor_type == UAR) - cor_action = generate_uar_action(data); - else if (cor_type == OVERWRITE) - cor_action = data.overwrite_label; - else - cor_action = (action % data.num_actions) + 1; - } - else - cor_action = action; - return cor_action; -} - -bool ind_update(cbify& data, int ec_type) -{ - if (ec_type == WARM_START) - return data.upd_ws; - else - return data.upd_inter; -} - -float compute_weight_multiplier(cbify& data, size_t i, int ec_type) -{ - float weight_multiplier; - float ws_train_size = data.ws_train_size; - float inter_train_size = data.inter_period; - float total_train_size = ws_train_size + inter_train_size; - float total_weight = (1-data.lambdas[i]) * ws_train_size + data.lambdas[i] * inter_train_size; - - //cout< void predict_or_learn(cbify& data, single_learner& base, example& ec) { @@ -393,308 +154,44 @@ void predict_or_learn(cbify& data, single_learner& base, example& ec) ec.pred.multiclass = cl.action; } -uint32_t predict_sublearner_adf(cbify& data, multi_learner& base, example& ec, uint32_t i) -{ - //cout<<"predict using sublearner "<< i <ft_offset; - //multiline_learn_or_predict(base, data.adf_data.ecs, offset, i); - base.predict(data.adf_data.ecs, i); - //cout<<"greedy label = " << data.adf_data.ecs[0]->pred.a_s[0].action+1 << endl; - return data.adf_data.ecs[0]->pred.a_s[0].action+1; -} - -void accumu_costs_iv_adf(cbify& data, multi_learner& base, example& ec) -{ - CB::cb_class& cl = data.cl_adf; - //IPS for approximating the cumulative costs for all lambdas - for (uint32_t i = 0; i < data.choices_lambda; i++) - { - uint32_t action = predict_sublearner_adf(data, base, ec, i); - - if (action == cl.action) - data.cumulative_costs[i] += cl.cost / cl.probability; - //cout< -void accumu_costs_wsv_adf(cbify& data, multi_learner& base) -{ - uint32_t ws_vali_size = data.ws_vali_size; - //only update cumulative costs every warm_start_period iterations - if ( data.inter_iter >= 1 && abs( log2(data.inter_iter+1) - floor(log2(data.inter_iter+1)) ) < 1e-4 ) - { - for (uint32_t i = 0; i < data.choices_lambda; i++) - data.cumulative_costs[i] = 0; - - uint32_t num_epochs = ceil(log2(data.inter_period)); - uint32_t epoch = log2(data.inter_iter+1) - 1; - float batch_vali_size = ((float) ws_vali_size) / num_epochs; - uint32_t lb, ub; - - if (data.vali_method == WS_VALI_SPLIT) - { - lb = ceil(batch_vali_size * epoch); - ub = ceil(batch_vali_size * (epoch + 1)); - } - else - { - lb = 0; - ub = ws_vali_size; - } - //cout<<"validation at iteration "<l.cs.costs, pred_label); - else - data.cumulative_costs[i] += loss(data, ec_vali->l.multi.label, pred_label); - - //cout< -void add_to_vali(cbify& data, example& ec) -{ - //TODO: set the first parameter properly - example* ec_copy = VW::alloc_examples(sizeof(polylabel), 1); - - if (use_cs) - VW::copy_example_data(false, ec_copy, &ec, 0, COST_SENSITIVE::cs_label.copy_label); - else - VW::copy_example_data(false, ec_copy, &ec, 0, MULTICLASS::mc_label.copy_label); - - data.ws_vali.push_back(ec_copy); -} - -uint32_t predict_sup_adf(cbify& data, multi_learner& base, example& ec) -{ - uint32_t argmin = find_min(data.cumulative_costs); - return predict_sublearner_adf(data, base, ec, argmin); -} - -template -void learn_sup_adf(cbify& data, multi_learner& base, example& ec, int ec_type) -{ - copy_example_to_adf(data, ec); - //generate cost-sensitive label (for CSOAA's temporary use) - auto& csls = data.csls; - auto& cbls = data.cbls; - for (size_t a = 0; a < data.adf_data.num_actions; ++a) - { - csls[a].costs[0].class_index = a+1; - if (use_cs) - csls[a].costs[0].x = loss_cs(data, ec.l.cs.costs, a+1); - else - csls[a].costs[0].x = loss(data, ec.l.multi.label, a+1); - } - for (size_t a = 0; a < data.adf_data.num_actions; ++a) - { - cbls[a] = data.adf_data.ecs[a]->l.cb; - data.adf_data.ecs[a]->l.cs = csls[a]; - //cout< old_weights; - for (size_t a = 0; a < data.adf_data.num_actions; ++a) - old_weights.push_back(data.adf_data.ecs[a]->weight); - - for (uint32_t i = 0; i < data.choices_lambda; i++) - { - float weight_multiplier = compute_weight_multiplier(data, i, ec_type); - //cout<<"weight multiplier in sup = "<weight = old_weights[a] * weight_multiplier; - multi_learner* cs_learner = as_multiline(data.all->cost_sensitive); - cs_learner->learn(data.adf_data.ecs, i); - - //cout<<"cost-sensitive increment = "<increment<weight = old_weights[a]; - - for (size_t a = 0; a < data.adf_data.num_actions; ++a) - data.adf_data.ecs[a]->l.cb = cbls[a]; -} - -template -void predict_or_learn_sup_adf(cbify& data, multi_learner& base, example& ec, int ec_type) -{ - uint32_t action = predict_sup_adf(data, base, ec); - - if (ind_update(data, ec_type)) - learn_sup_adf(data, base, ec, ec_type); - - ec.pred.multiclass = action; -} - -uint32_t predict_bandit_adf(cbify& data, multi_learner& base, example& ec) +template +void predict_or_learn_adf(cbify& data, multi_learner& base, example& ec) { - uint32_t argmin = find_min(data.cumulative_costs); + //Store the multiclass or cost-sensitive input label + MULTICLASS::label_t ld; + COST_SENSITIVE::label csl; + if (use_cs) + csl = ec.l.cs; + else + ld = ec.l.multi; copy_example_to_adf(data, ec); - base.predict(data.adf_data.ecs, argmin); + base.predict(data.adf_data.ecs); + + auto& out_ec = *data.adf_data.ecs[0]; - auto& out_ec = *data.adf_data.ecs[0]; uint32_t chosen_action; if (sample_after_normalizing(data.app_seed + data.example_counter++, begin_scores(out_ec.pred.a_s), end_scores(out_ec.pred.a_s), chosen_action)) THROW("Failed to sample from pdf"); - //cout<<"predict using sublearner "<< argmin <l.cb; lab.costs.push_back(cl); - vector old_weights; - for (size_t a = 0; a < data.adf_data.num_actions; ++a) - old_weights.push_back(data.adf_data.ecs[a]->weight); - - for (uint32_t i = 0; i < data.choices_lambda; i++) - { - float weight_multiplier = compute_weight_multiplier(data, i, ec_type); - - //cout<<"learn in sublearner "<< i <<" with weight multiplier "<weight = old_weights[a] * weight_multiplier; - base.learn(data.adf_data.ecs, i); - - //cout<<"cb-explore increment = "<ft_offset; - //multiline_learn_or_predict(base, data.adf_data.ecs, offset, i); - } - - for (size_t a = 0; a < data.adf_data.num_actions; ++a) - data.adf_data.ecs[a]->weight = old_weights[a]; -} - -template -void predict_or_learn_bandit_adf(cbify& data, multi_learner& base, example& ec, int ec_type) -{ - uint32_t chosen_action = predict_bandit_adf(data, base, ec); - - auto& cl = data.cl_adf; - auto& a_s = data.a_s_adf; - cl.action = a_s[chosen_action].action + 1; - cl.probability = a_s[chosen_action].score; - - //cout<(data, base); - - ec.pred.multiclass = cl.action; -} - -void accumu_var_adf(cbify& data, multi_learner& base, example& ec) -{ - size_t pred_best_approx = predict_sup_adf(data, base, ec); - float temp_var; - - for (size_t a = 0; a < data.adf_data.num_actions; ++a) - if (pred_best_approx == data.a_s_adf[a].action + 1) - temp_var = 1.0 / data.a_s_adf[a].score; - - data.cumu_var += temp_var; - - //cout<<"variance at bandit round "<< data.inter_iter << " = " << temp_var << endl; - //cout< -void predict_or_learn_adf(cbify& data, multi_learner& base, example& ec) -{ - // Corrupt labels (only corrupting multiclass labels as of now) - - if (use_cs) - data.cs_label = ec.l.cs; - else - { - data.mc_label = ec.l.multi; - if (data.ws_iter < data.ws_period) - ec.l.multi.label = corrupt_action(data, data.mc_label.label, WARM_START); - else if (data.inter_iter < data.inter_period) - ec.l.multi.label = corrupt_action(data, data.mc_label.label, INTERACTION); - } - - // Warm start phase - if (data.ws_iter < data.ws_period) - { - if (data.ws_iter < data.ws_train_size) - { - if (data.ws_type == SUPERVISED_WS) - predict_or_learn_sup_adf(data, base, ec, WARM_START); - else if (data.ws_type == BANDIT_WS) - predict_or_learn_bandit_adf(data, base, ec, WARM_START); - } - else - add_to_vali(data, ec); - ec.weight = 0; - data.ws_iter++; - } - // Interaction phase - else if (data.inter_iter < data.inter_period) - { - predict_or_learn_bandit_adf(data, base, ec, INTERACTION); - accumu_var_adf(data, base, ec); - data.a_s_adf.clear(); - data.inter_iter++; - } - // Skipping the rest of the examples - else - ec.weight = 0; - - // Store the original labels back - if (use_cs) - ec.l.cs = data.cs_label; - else - ec.l.multi = data.mc_label; - + base.learn(data.adf_data.ecs); + ec.pred.multiclass = cl.action; } void init_adf_data(cbify& data, const size_t num_actions) @@ -709,33 +206,6 @@ void init_adf_data(cbify& data, const size_t num_actions) auto& lab = adf_data.ecs[a]->l.cb; CB::cb_label.default_label(&lab); } - - // The rest of the initialization is for warm start CB - data.csls = calloc_or_throw(num_actions); - for (uint32_t a=0; a < num_actions; ++a) - { - COST_SENSITIVE::cs_label.default_label(&data.csls[a]); - data.csls[a].costs.push_back({0, a+1, 0, 0}); - } - data.cbls = calloc_or_throw(num_actions); - - if (data.vali_method == WS_VALI_SPLIT || data.vali_method == WS_VALI_NOSPLIT) - { - data.ws_train_size = ceil(data.ws_period / 2.0); - data.ws_vali_size = data.ws_period - data.ws_train_size; - } - else - { - data.ws_train_size = data.ws_period; - data.ws_vali_size = 0; - } - data.ws_iter = 0; - data.inter_iter = 0; - - setup_lambdas(data); - for (uint32_t i = 0; i < data.choices_lambda; i++) - data.cumulative_costs.push_back(0.f); - data.cumu_var = 0.f; } base_learner* cbify_setup(arguments& arg) @@ -748,21 +218,7 @@ base_learner* cbify_setup(arguments& arg) .critical("cbify", num_actions, "Convert multiclass on classes into a contextual bandit problem") (use_cs, "cbify_cs", "consume cost-sensitive classification examples instead of multiclass") ("loss0", data->loss0, 0.f, "loss for correct label") - ("loss1", data->loss1, 1.f, "loss for incorrect label") - ("warm_start", data->ws_period, 0U, "number of training examples for warm start phase") - ("interaction", data->inter_period, UINT32_MAX, "number of examples for the interactive contextual bandit learning phase") - ("warm_start_update", data->upd_ws, true, "indicator of warm start updates") - ("interaction_update", data->upd_inter, true, "indicator of interaction updates") - ("corrupt_type_warm_start", data->cor_type_ws, UAR, "type of label corruption in the warm start phase (1: uniformly at random, 2: circular, 3: replacing with overwriting label)") - ("corrupt_prob_warm_start", data->cor_prob_ws, 0.f, "probability of label corruption in the warm start phase") - ("corrupt_type_interaction", data->cor_type_inter, UAR, "type of label corruption in the interaction phase (1: uniformly at random, 2: circular, 3: replacing with overwriting label)") - ("corrupt_prob_interaction", data->cor_prob_inter, 0.f, "probability of label corruption in the interaction phase") - ("choices_lambda", data->choices_lambda, 1U, "the number of candidate lambdas to aggregate (lambda is the importance weight parameter between the two sources) ") - ("lambda_scheme", data->lambda_scheme, ABS_CENTRAL, "The scheme for generating candidate lambda set (1: center lambda=0.5, 2: center lambda=0.5, min lambda=0, max lambda=1, 3: center lambda=epsilon/(#actions+epsilon), 4: center lambda=epsilon/(#actions+epsilon), min lambda=0, max lambda=1); the rest of candidate lambda values are generated using a doubling scheme") - ("weighting_scheme", data->wt_scheme, INSTANCE_WT, "weighting scheme (1: per instance weighting, where for every lambda, each contextual bandit example have weight lambda/(1-lambda) times that of each warm start example, 2: per dataset weighting, where for every lambda, the contextual bandit dataset has total weight lambda/(1-lambda) times that of the warm start dataset)") - ("validation_method", data->vali_method, INTER_VALI, "lambda selection criterion (1: using contextual bandit examples with progressive validation, 2: using warm start examples, with fresh validation examples at each epoch, 3: using warm start examples, with a single validation set throughout)") - ("overwrite_label", data->overwrite_label, 1U, "the label used by type 3 corruptions (overwriting)") - ("warm_start_type", data->ws_type, SUPERVISED_WS, "update method of utilizing warm start examples (1: using supervised updates, 2: using contextual bandit updates)").missing()) + ("loss1", data->loss1, 1.f, "loss for incorrect label").missing()) return nullptr; data->use_adf = count(arg.args.begin(), arg.args.end(),"--cb_explore_adf") > 0; @@ -770,9 +226,6 @@ base_learner* cbify_setup(arguments& arg) data->a_s = v_init(); data->all = arg.all; - data->num_actions = num_actions; - data->use_cs = use_cs; - if (data->use_adf) init_adf_data(*data.get(), num_actions); @@ -803,18 +256,10 @@ base_learner* cbify_setup(arguments& arg) if (data->use_adf) { multi_learner* base = as_multiline(setup_base(arg)); - // Note: the current version of warm start CB can only support epsilon greedy exploration - // algorithm - we need to wait for the default epsilon value to be passed from cb_explore - // is there is one - //cout<<"count: "<epsilon = arg.vm.count("epsilon") > 0 ? arg.vm["epsilon"].as() : 0.0f; - if (use_cs) - l = &init_cost_sensitive_learner(data, base, predict_or_learn_adf, predict_or_learn_adf, arg.all->p, data->choices_lambda); + l = &init_cost_sensitive_learner(data, base, predict_or_learn_adf, predict_or_learn_adf, arg.all->p, 1); else - l = &init_multiclass_learner(data, base, predict_or_learn_adf, predict_or_learn_adf, arg.all->p, data->choices_lambda); - - //cout<<"cbify increment = "<increment<, predict_or_learn_adf, arg.all->p, 1); } else { diff --git a/vowpalwabbit/parse_args.cc b/vowpalwabbit/parse_args.cc index 5c27df2b621..fb97222de41 100644 --- a/vowpalwabbit/parse_args.cc +++ b/vowpalwabbit/parse_args.cc @@ -73,6 +73,7 @@ license as described in the file LICENSE. #include "explore_eval.h" #include "baseline.h" #include "classweight.h" +#include "warm_cb.h" // #include "cntk.h" using namespace std; @@ -1140,6 +1141,7 @@ void parse_reductions(arguments& arg) all.reduction_stack.push_back(mwt_setup); all.reduction_stack.push_back(cb_explore_setup); all.reduction_stack.push_back(cb_explore_adf_setup); + all.reduction_stack.push_back(warm_cb_setup); all.reduction_stack.push_back(cbify_setup); all.reduction_stack.push_back(explore_eval_setup); all.reduction_stack.push_back(ExpReplay::expreplay_setup<'c', COST_SENSITIVE::cs_label>); diff --git a/vowpalwabbit/warm_cb.cc b/vowpalwabbit/warm_cb.cc new file mode 100644 index 00000000000..389359539e1 --- /dev/null +++ b/vowpalwabbit/warm_cb.cc @@ -0,0 +1,831 @@ +#include +#include "reductions.h" +#include "cb_algs.h" +#include "rand48.h" +#include "bs.h" +#include "vw.h" +#include "../explore/hash.h" +#include "explore.h" + +#include + +using namespace LEARNER; +using namespace exploration; +using namespace ACTION_SCORE; +using namespace std; + +#define WARM_START 1 +#define INTERACTION 2 +#define SKIP 3 + +#define SUPERVISED_WS 1 +#define BANDIT_WS 2 + +#define UAR 1 +#define CIRCULAR 2 +#define OVERWRITE 3 + +#define INTER_VALI 1 +#define WS_VALI_SPLIT 2 +#define WS_VALI_NOSPLIT 3 + +#define INSTANCE_WT 1 +#define DATASET_WT 2 + +#define ABS_CENTRAL 1 +#define ABS_CENTRAL_ZEROONE 2 +#define MINIMAX_CENTRAL 3 +#define MINIMAX_CENTRAL_ZEROONE 4 + + +struct warm_cb; + +struct warm_cb_adf_data +{ + multi_ex ecs; + size_t num_actions; +}; + +struct warm_cb +{ + CB::label cb_label; + uint64_t app_seed; + action_scores a_s; + // used as the seed + size_t example_counter; + vw* all; + bool use_adf; // if true, reduce to cb_explore_adf instead of cb_explore + warm_cb_adf_data adf_data; + float loss0; + float loss1; + + //warm start parameters + uint32_t ws_period; + uint32_t inter_period; + uint32_t choices_lambda; + bool upd_ws; + bool upd_inter; + int cor_type_ws; + float cor_prob_ws; + int cor_type_inter; + float cor_prob_inter; + int vali_method; + int wt_scheme; + int lambda_scheme; + uint32_t overwrite_label; + int ws_type; + + //auxiliary variables + uint32_t num_actions; + float epsilon; + vector lambdas; + action_scores a_s_adf; + vector cumulative_costs; + CB::cb_class cl_adf; + uint32_t ws_train_size; + uint32_t ws_vali_size; + vector ws_vali; + float cumu_var; + uint32_t ws_iter; + uint32_t inter_iter; + MULTICLASS::label_t mc_label; + COST_SENSITIVE::label cs_label; + COST_SENSITIVE::label* csls; + CB::label* cbls; + bool use_cs; + +}; + +float loss(warm_cb& data, uint32_t label, uint32_t final_prediction) +{ + if (label != final_prediction) + return data.loss1; + else + return data.loss0; +} + +float loss_cs(warm_cb& data, v_array& costs, uint32_t final_prediction) +{ + float cost = 0.; + for (auto wc : costs) + { if (wc.class_index == final_prediction) + { cost = wc.x; + break; + } + } + return data.loss0 + (data.loss1 - data.loss0) * cost; +} + +template inline void delete_it(T* p) { if (p != nullptr) delete p; } + +template +uint32_t find_min(vector arr) +{ + T min_val = FLT_MAX; + uint32_t argmin = 0; + + for (uint32_t i = 0; i < arr.size(); i++) + { + //cout<pred.a_s.delete_v(); + VW::dealloc_example(CB::cb_label.delete_label, *data.adf_data.ecs[a]); + free_it(data.adf_data.ecs[a]); + } + data.adf_data.ecs.~vector(); + + data.lambdas.~vector(); + data.cumulative_costs.~vector(); + + data.a_s_adf.delete_v(); + for (size_t i = 0; i < data.ws_vali.size(); ++i) + { + if (data.use_cs) + VW::dealloc_example(COST_SENSITIVE::cs_label.delete_label, *data.ws_vali[i]); + else + VW::dealloc_example(MULTICLASS::mc_label.delete_label, *data.ws_vali[i]); + free(data.ws_vali[i]); + } + data.ws_vali.~vector(); + } +} + +void copy_example_to_adf(warm_cb& data, example& ec) +{ + auto& adf_data = data.adf_data; + const uint64_t ss = data.all->weights.stride_shift(); + const uint64_t mask = data.all->weights.mask(); + + for (size_t a = 0; a < adf_data.num_actions; ++a) + { + auto& eca = *adf_data.ecs[a]; + // clear label + auto& lab = eca.l.cb; + CB::cb_label.default_label(&lab); + + // copy data + VW::copy_example_data(false, &eca, &ec); + + // offset indicies for given action + for (features& fs : eca) + { + for (feature_index& idx : fs.indicies) + { + idx = ((((idx >> ss) * 28904713) + 4832917 * (uint64_t)a) << ss) & mask; + } + } + + // avoid empty example by adding a tag (hacky) + if (CB_ALGS::example_is_newline_not_header(eca) && CB::cb_label.test_label(&eca.l)) + { + eca.tag.push_back('n'); + } + } +} + +float minimax_lambda(float epsilon, size_t num_actions, size_t warm_start_period, size_t interaction_period) +{ + return epsilon / (num_actions + epsilon); +} + +void setup_lambdas(warm_cb& data) +{ + // The lambdas are arranged in ascending order + vector& lambdas = data.lambdas; + for (uint32_t i = 0; i 0; i--) + lambdas[i-1] = lambdas[i] / 2.0; + + for (uint32_t i = mid+1; i < data.choices_lambda; i++) + lambdas[i] = 1 - (1-lambdas[i-1]) / 2.0; + + if (data.lambda_scheme == MINIMAX_CENTRAL_ZEROONE || data.lambda_scheme == ABS_CENTRAL_ZEROONE) + { + lambdas[0] = 0.0; + lambdas[data.choices_lambda-1] = 1.0; + } +} + +uint32_t generate_uar_action(warm_cb& data) +{ + float randf = merand48(data.all->random_state); + + for (uint32_t i = 1; i <= data.num_actions; i++) + { + if (randf <= float(i) / data.num_actions) + return i; + } + return data.num_actions; +} + +uint32_t corrupt_action(warm_cb& data, uint32_t action, int ec_type) +{ + float cor_prob; + uint32_t cor_type; + uint32_t cor_action; + + if (ec_type == WARM_START) + { + cor_prob = data.cor_prob_ws; + cor_type = data.cor_type_ws; + } + else + { + cor_prob = data.cor_prob_inter; + cor_type = data.cor_type_inter; + } + + float randf = merand48(data.all->random_state); + if (randf < cor_prob) + { + if (cor_type == UAR) + cor_action = generate_uar_action(data); + else if (cor_type == OVERWRITE) + cor_action = data.overwrite_label; + else + cor_action = (action % data.num_actions) + 1; + } + else + cor_action = action; + return cor_action; +} + +bool ind_update(warm_cb& data, int ec_type) +{ + if (ec_type == WARM_START) + return data.upd_ws; + else + return data.upd_inter; +} + +float compute_weight_multiplier(warm_cb& data, size_t i, int ec_type) +{ + float weight_multiplier; + float ws_train_size = data.ws_train_size; + float inter_train_size = data.inter_period; + float total_train_size = ws_train_size + inter_train_size; + float total_weight = (1-data.lambdas[i]) * ws_train_size + data.lambdas[i] * inter_train_size; + + //cout< +void predict_or_learn(warm_cb& data, single_learner& base, example& ec) +{ + //Store the multiclass or cost-sensitive input label + MULTICLASS::label_t ld; + COST_SENSITIVE::label csl; + if (use_cs) + csl = ec.l.cs; + else + ld = ec.l.multi; + + data.cb_label.costs.clear(); + ec.l.cb = data.cb_label; + ec.pred.a_s = data.a_s; + + //Call the cb_explore algorithm. It returns a vector of probabilities for each action + base.predict(ec); + //data.probs = ec.pred.scalars; + + uint32_t chosen_action; + if (sample_after_normalizing(data.app_seed + data.example_counter++, begin_scores(ec.pred.a_s), end_scores(ec.pred.a_s), chosen_action)) + THROW("Failed to sample from pdf"); + + CB::cb_class cl; + cl.action = chosen_action + 1; + cl.probability = ec.pred.a_s[chosen_action].score; + + if(!cl.action) + THROW("No action with non-zero probability found!"); + if (use_cs) + cl.cost = loss_cs(data, csl.costs, cl.action); + else + cl.cost = loss(data, ld.label, cl.action); + + //Create a new cb label + data.cb_label.costs.push_back(cl); + ec.l.cb = data.cb_label; + base.learn(ec); + data.a_s.clear(); + data.a_s = ec.pred.a_s; + if (use_cs) + ec.l.cs = csl; + else + ec.l.multi = ld; + ec.pred.multiclass = cl.action; +} + +uint32_t predict_sublearner_adf(warm_cb& data, multi_learner& base, example& ec, uint32_t i) +{ + //cout<<"predict using sublearner "<< i <ft_offset; + //multiline_learn_or_predict(base, data.adf_data.ecs, offset, i); + base.predict(data.adf_data.ecs, i); + //cout<<"greedy label = " << data.adf_data.ecs[0]->pred.a_s[0].action+1 << endl; + return data.adf_data.ecs[0]->pred.a_s[0].action+1; +} + +void accumu_costs_iv_adf(warm_cb& data, multi_learner& base, example& ec) +{ + CB::cb_class& cl = data.cl_adf; + //IPS for approximating the cumulative costs for all lambdas + for (uint32_t i = 0; i < data.choices_lambda; i++) + { + uint32_t action = predict_sublearner_adf(data, base, ec, i); + + if (action == cl.action) + data.cumulative_costs[i] += cl.cost / cl.probability; + //cout< +void accumu_costs_wsv_adf(warm_cb& data, multi_learner& base) +{ + uint32_t ws_vali_size = data.ws_vali_size; + //only update cumulative costs every warm_start_period iterations + if ( data.inter_iter >= 1 && abs( log2(data.inter_iter+1) - floor(log2(data.inter_iter+1)) ) < 1e-4 ) + { + for (uint32_t i = 0; i < data.choices_lambda; i++) + data.cumulative_costs[i] = 0; + + uint32_t num_epochs = ceil(log2(data.inter_period)); + uint32_t epoch = log2(data.inter_iter+1) - 1; + float batch_vali_size = ((float) ws_vali_size) / num_epochs; + uint32_t lb, ub; + + if (data.vali_method == WS_VALI_SPLIT) + { + lb = ceil(batch_vali_size * epoch); + ub = ceil(batch_vali_size * (epoch + 1)); + } + else + { + lb = 0; + ub = ws_vali_size; + } + //cout<<"validation at iteration "<l.cs.costs, pred_label); + else + data.cumulative_costs[i] += loss(data, ec_vali->l.multi.label, pred_label); + + //cout< +void add_to_vali(warm_cb& data, example& ec) +{ + //TODO: set the first parameter properly + example* ec_copy = VW::alloc_examples(sizeof(polylabel), 1); + + if (use_cs) + VW::copy_example_data(false, ec_copy, &ec, 0, COST_SENSITIVE::cs_label.copy_label); + else + VW::copy_example_data(false, ec_copy, &ec, 0, MULTICLASS::mc_label.copy_label); + + data.ws_vali.push_back(ec_copy); +} + +uint32_t predict_sup_adf(warm_cb& data, multi_learner& base, example& ec) +{ + uint32_t argmin = find_min(data.cumulative_costs); + return predict_sublearner_adf(data, base, ec, argmin); +} + +template +void learn_sup_adf(warm_cb& data, multi_learner& base, example& ec, int ec_type) +{ + copy_example_to_adf(data, ec); + //generate cost-sensitive label (for CSOAA's temporary use) + auto& csls = data.csls; + auto& cbls = data.cbls; + for (size_t a = 0; a < data.adf_data.num_actions; ++a) + { + csls[a].costs[0].class_index = a+1; + if (use_cs) + csls[a].costs[0].x = loss_cs(data, ec.l.cs.costs, a+1); + else + csls[a].costs[0].x = loss(data, ec.l.multi.label, a+1); + } + for (size_t a = 0; a < data.adf_data.num_actions; ++a) + { + cbls[a] = data.adf_data.ecs[a]->l.cb; + data.adf_data.ecs[a]->l.cs = csls[a]; + //cout< old_weights; + for (size_t a = 0; a < data.adf_data.num_actions; ++a) + old_weights.push_back(data.adf_data.ecs[a]->weight); + + for (uint32_t i = 0; i < data.choices_lambda; i++) + { + float weight_multiplier = compute_weight_multiplier(data, i, ec_type); + //cout<<"weight multiplier in sup = "<weight = old_weights[a] * weight_multiplier; + multi_learner* cs_learner = as_multiline(data.all->cost_sensitive); + cs_learner->learn(data.adf_data.ecs, i); + + //cout<<"cost-sensitive increment = "<increment<weight = old_weights[a]; + + for (size_t a = 0; a < data.adf_data.num_actions; ++a) + data.adf_data.ecs[a]->l.cb = cbls[a]; +} + +template +void predict_or_learn_sup_adf(warm_cb& data, multi_learner& base, example& ec, int ec_type) +{ + uint32_t action = predict_sup_adf(data, base, ec); + + if (ind_update(data, ec_type)) + learn_sup_adf(data, base, ec, ec_type); + + ec.pred.multiclass = action; +} + +uint32_t predict_bandit_adf(warm_cb& data, multi_learner& base, example& ec) +{ + uint32_t argmin = find_min(data.cumulative_costs); + + copy_example_to_adf(data, ec); + base.predict(data.adf_data.ecs, argmin); + + auto& out_ec = *data.adf_data.ecs[0]; + uint32_t chosen_action; + if (sample_after_normalizing(data.app_seed + data.example_counter++, begin_scores(out_ec.pred.a_s), end_scores(out_ec.pred.a_s), chosen_action)) + THROW("Failed to sample from pdf"); + + //cout<<"predict using sublearner "<< argmin <weight = old_weights[a] * weight_multiplier; + base.learn(data.adf_data.ecs, i); + + //cout<<"cb-explore increment = "<ft_offset; + //multiline_learn_or_predict(base, data.adf_data.ecs, offset, i); + } + + for (size_t a = 0; a < data.adf_data.num_actions; ++a) + data.adf_data.ecs[a]->weight = old_weights[a]; +} + +template +void predict_or_learn_bandit_adf(warm_cb& data, multi_learner& base, example& ec, int ec_type) +{ + uint32_t chosen_action = predict_bandit_adf(data, base, ec); + + auto& cl = data.cl_adf; + auto& a_s = data.a_s_adf; + cl.action = a_s[chosen_action].action + 1; + cl.probability = a_s[chosen_action].score; + + //cout<(data, base); + + ec.pred.multiclass = cl.action; +} + +void accumu_var_adf(warm_cb& data, multi_learner& base, example& ec) +{ + size_t pred_best_approx = predict_sup_adf(data, base, ec); + float temp_var; + + for (size_t a = 0; a < data.adf_data.num_actions; ++a) + if (pred_best_approx == data.a_s_adf[a].action + 1) + temp_var = 1.0 / data.a_s_adf[a].score; + + data.cumu_var += temp_var; + + //cout<<"variance at bandit round "<< data.inter_iter << " = " << temp_var << endl; + //cout< +void predict_or_learn_adf(warm_cb& data, multi_learner& base, example& ec) +{ + // Corrupt labels (only corrupting multiclass labels as of now) + + if (use_cs) + data.cs_label = ec.l.cs; + else + { + data.mc_label = ec.l.multi; + if (data.ws_iter < data.ws_period) + ec.l.multi.label = corrupt_action(data, data.mc_label.label, WARM_START); + else if (data.inter_iter < data.inter_period) + ec.l.multi.label = corrupt_action(data, data.mc_label.label, INTERACTION); + } + + // Warm start phase + if (data.ws_iter < data.ws_period) + { + if (data.ws_iter < data.ws_train_size) + { + if (data.ws_type == SUPERVISED_WS) + predict_or_learn_sup_adf(data, base, ec, WARM_START); + else if (data.ws_type == BANDIT_WS) + predict_or_learn_bandit_adf(data, base, ec, WARM_START); + } + else + add_to_vali(data, ec); + ec.weight = 0; + data.ws_iter++; + } + // Interaction phase + else if (data.inter_iter < data.inter_period) + { + predict_or_learn_bandit_adf(data, base, ec, INTERACTION); + accumu_var_adf(data, base, ec); + data.a_s_adf.clear(); + data.inter_iter++; + } + // Skipping the rest of the examples + else + ec.weight = 0; + + // Store the original labels back + if (use_cs) + ec.l.cs = data.cs_label; + else + ec.l.multi = data.mc_label; + +} + +void init_adf_data(warm_cb& data, const size_t num_actions) +{ + auto& adf_data = data.adf_data; + adf_data.num_actions = num_actions; + + adf_data.ecs.resize(num_actions); + for (size_t a=0; a < num_actions; ++a) + { + adf_data.ecs[a] = VW::alloc_examples(CB::cb_label.label_size, 1); + auto& lab = adf_data.ecs[a]->l.cb; + CB::cb_label.default_label(&lab); + } + + // The rest of the initialization is for warm start CB + data.csls = calloc_or_throw(num_actions); + for (uint32_t a=0; a < num_actions; ++a) + { + COST_SENSITIVE::cs_label.default_label(&data.csls[a]); + data.csls[a].costs.push_back({0, a+1, 0, 0}); + } + data.cbls = calloc_or_throw(num_actions); + + if (data.vali_method == WS_VALI_SPLIT || data.vali_method == WS_VALI_NOSPLIT) + { + data.ws_train_size = ceil(data.ws_period / 2.0); + data.ws_vali_size = data.ws_period - data.ws_train_size; + } + else + { + data.ws_train_size = data.ws_period; + data.ws_vali_size = 0; + } + data.ws_iter = 0; + data.inter_iter = 0; + + setup_lambdas(data); + for (uint32_t i = 0; i < data.choices_lambda; i++) + data.cumulative_costs.push_back(0.f); + data.cumu_var = 0.f; +} + +base_learner* warm_cb_setup(arguments& arg) +{ + uint32_t num_actions=0; + auto data = scoped_calloc_or_throw(); + bool use_cs; + + if (arg.new_options("Make Multiclass into Contextual Bandit") + .critical("warm_cb", num_actions, "Convert multiclass on classes into a contextual bandit problem") + (use_cs, "warm_cb_cs", "consume cost-sensitive classification examples instead of multiclass") + ("loss0", data->loss0, 0.f, "loss for correct label") + ("loss1", data->loss1, 1.f, "loss for incorrect label") + ("warm_start", data->ws_period, 0U, "number of training examples for warm start phase") + ("interaction", data->inter_period, UINT32_MAX, "number of examples for the interactive contextual bandit learning phase") + ("warm_start_update", data->upd_ws, true, "indicator of warm start updates") + ("interaction_update", data->upd_inter, true, "indicator of interaction updates") + ("corrupt_type_warm_start", data->cor_type_ws, UAR, "type of label corruption in the warm start phase (1: uniformly at random, 2: circular, 3: replacing with overwriting label)") + ("corrupt_prob_warm_start", data->cor_prob_ws, 0.f, "probability of label corruption in the warm start phase") + ("corrupt_type_interaction", data->cor_type_inter, UAR, "type of label corruption in the interaction phase (1: uniformly at random, 2: circular, 3: replacing with overwriting label)") + ("corrupt_prob_interaction", data->cor_prob_inter, 0.f, "probability of label corruption in the interaction phase") + ("choices_lambda", data->choices_lambda, 1U, "the number of candidate lambdas to aggregate (lambda is the importance weight parameter between the two sources) ") + ("lambda_scheme", data->lambda_scheme, ABS_CENTRAL, "The scheme for generating candidate lambda set (1: center lambda=0.5, 2: center lambda=0.5, min lambda=0, max lambda=1, 3: center lambda=epsilon/(#actions+epsilon), 4: center lambda=epsilon/(#actions+epsilon), min lambda=0, max lambda=1); the rest of candidate lambda values are generated using a doubling scheme") + ("weighting_scheme", data->wt_scheme, INSTANCE_WT, "weighting scheme (1: per instance weighting, where for every lambda, each contextual bandit example have weight lambda/(1-lambda) times that of each warm start example, 2: per dataset weighting, where for every lambda, the contextual bandit dataset has total weight lambda/(1-lambda) times that of the warm start dataset)") + ("validation_method", data->vali_method, INTER_VALI, "lambda selection criterion (1: using contextual bandit examples with progressive validation, 2: using warm start examples, with fresh validation examples at each epoch, 3: using warm start examples, with a single validation set throughout)") + ("overwrite_label", data->overwrite_label, 1U, "the label used by type 3 corruptions (overwriting)") + ("warm_start_type", data->ws_type, SUPERVISED_WS, "update method of utilizing warm start examples (1: using supervised updates, 2: using contextual bandit updates)").missing()) + return nullptr; + + data->use_adf = count(arg.args.begin(), arg.args.end(),"--cb_explore_adf") > 0; + data->app_seed = uniform_hash("vw", 2, 0); + data->a_s = v_init(); + data->all = arg.all; + + data->num_actions = num_actions; + data->use_cs = use_cs; + + if (data->use_adf) + init_adf_data(*data.get(), num_actions); + + if (count(arg.args.begin(), arg.args.end(),"--cb_explore") == 0 && !data->use_adf) + { + arg.args.push_back("--cb_explore"); + stringstream ss; + ss << num_actions; + arg.args.push_back(ss.str()); + } + if (data->use_adf) + { + arg.args.push_back("--cb_min_cost"); + arg.args.push_back(to_string(data->loss0)); + arg.args.push_back("--cb_max_cost"); + arg.args.push_back(to_string(data->loss1)); + } + if (count(arg.args.begin(), arg.args.end(), "--baseline")) + { + arg.args.push_back("--lr_multiplier"); + stringstream ss; + ss << max(abs(data->loss0), abs(data->loss1)) / (data->loss1 - data->loss0); + arg.args.push_back(ss.str()); + } + + learner* l; + + if (data->use_adf) + { + multi_learner* base = as_multiline(setup_base(arg)); + // Note: the current version of warm start CB can only support epsilon greedy exploration + // algorithm - we need to wait for the default epsilon value to be passed from cb_explore + // is there is one + //cout<<"count: "<epsilon = arg.vm.count("epsilon") > 0 ? arg.vm["epsilon"].as() : 0.0f; + + if (use_cs) + l = &init_cost_sensitive_learner(data, base, predict_or_learn_adf, predict_or_learn_adf, arg.all->p, data->choices_lambda); + else + l = &init_multiclass_learner(data, base, predict_or_learn_adf, predict_or_learn_adf, arg.all->p, data->choices_lambda); + + //cout<<"warm_cb increment = "<increment<, predict_or_learn, arg.all->p, 1); + else + l = &init_multiclass_learner(data, base, predict_or_learn, predict_or_learn, arg.all->p, 1); + } + l->set_finish(finish); + arg.all->delete_prediction = nullptr; + + return make_base(*l); +} diff --git a/vowpalwabbit/warm_cb.h b/vowpalwabbit/warm_cb.h new file mode 100644 index 00000000000..2d645774f88 --- /dev/null +++ b/vowpalwabbit/warm_cb.h @@ -0,0 +1 @@ +LEARNER::base_learner* warm_cb_setup(arguments& arg); From 3f037851ec374554e456956fa75d176dab0133c3 Mon Sep 17 00:00:00 2001 From: Chicheng Zhang Date: Mon, 6 Aug 2018 14:34:52 -0400 Subject: [PATCH 108/127] . --- vowpalwabbit/warm_cb.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vowpalwabbit/warm_cb.cc b/vowpalwabbit/warm_cb.cc index 389359539e1..180d7aa19f6 100644 --- a/vowpalwabbit/warm_cb.cc +++ b/vowpalwabbit/warm_cb.cc @@ -4,7 +4,7 @@ #include "rand48.h" #include "bs.h" #include "vw.h" -#include "../explore/hash.h" +#include "hash.h" #include "explore.h" #include From 452e4aa1b5cc577964c646c9334867cc8734f4b8 Mon Sep 17 00:00:00 2001 From: Chicheng Zhang Date: Mon, 6 Aug 2018 14:58:43 -0400 Subject: [PATCH 109/127] removed part on non-adf --- vowpalwabbit/warm_cb.cc | 265 ++++++++++++++-------------------------- 1 file changed, 93 insertions(+), 172 deletions(-) diff --git a/vowpalwabbit/warm_cb.cc b/vowpalwabbit/warm_cb.cc index 180d7aa19f6..bdded4c25f8 100644 --- a/vowpalwabbit/warm_cb.cc +++ b/vowpalwabbit/warm_cb.cc @@ -38,14 +38,6 @@ using namespace std; #define MINIMAX_CENTRAL_ZEROONE 4 -struct warm_cb; - -struct warm_cb_adf_data -{ - multi_ex ecs; - size_t num_actions; -}; - struct warm_cb { CB::label cb_label; @@ -54,8 +46,7 @@ struct warm_cb // used as the seed size_t example_counter; vw* all; - bool use_adf; // if true, reduce to cb_explore_adf instead of cb_explore - warm_cb_adf_data adf_data; + multi_ex ecs; float loss0; float loss1; @@ -140,53 +131,50 @@ void finish(warm_cb& data) { CB::cb_label.delete_label(&data.cb_label); data.a_s.delete_v(); - if (data.use_adf) - { - cout<<"average variance estimate = "<pred.a_s.delete_v(); - VW::dealloc_example(CB::cb_label.delete_label, *data.adf_data.ecs[a]); - free_it(data.adf_data.ecs[a]); - } - data.adf_data.ecs.~vector(); + for (size_t a = 0; a < data.num_actions; ++a) + { + COST_SENSITIVE::cs_label.delete_label(&data.csls[a]); + } + free(data.csls); + free(data.cbls); - data.lambdas.~vector(); - data.cumulative_costs.~vector(); + for (size_t a = 0; a < data.num_actions; ++a) + { + data.ecs[a]->pred.a_s.delete_v(); + VW::dealloc_example(CB::cb_label.delete_label, *data.ecs[a]); + free_it(data.ecs[a]); + } + data.ecs.~vector(); - data.a_s_adf.delete_v(); - for (size_t i = 0; i < data.ws_vali.size(); ++i) - { - if (data.use_cs) - VW::dealloc_example(COST_SENSITIVE::cs_label.delete_label, *data.ws_vali[i]); - else - VW::dealloc_example(MULTICLASS::mc_label.delete_label, *data.ws_vali[i]); - free(data.ws_vali[i]); - } - data.ws_vali.~vector(); - } + data.lambdas.~vector(); + data.cumulative_costs.~vector(); + + data.a_s_adf.delete_v(); + for (size_t i = 0; i < data.ws_vali.size(); ++i) + { + if (data.use_cs) + VW::dealloc_example(COST_SENSITIVE::cs_label.delete_label, *data.ws_vali[i]); + else + VW::dealloc_example(MULTICLASS::mc_label.delete_label, *data.ws_vali[i]); + free(data.ws_vali[i]); + } + data.ws_vali.~vector(); } void copy_example_to_adf(warm_cb& data, example& ec) { - auto& adf_data = data.adf_data; const uint64_t ss = data.all->weights.stride_shift(); const uint64_t mask = data.all->weights.mask(); - for (size_t a = 0; a < adf_data.num_actions; ++a) + for (size_t a = 0; a < data.num_actions; ++a) { - auto& eca = *adf_data.ecs[a]; + auto& eca = *data.ecs[a]; // clear label auto& lab = eca.l.cb; CB::cb_label.default_label(&lab); @@ -345,63 +333,15 @@ float compute_weight_multiplier(warm_cb& data, size_t i, int ec_type) return weight_multiplier; } - -template -void predict_or_learn(warm_cb& data, single_learner& base, example& ec) -{ - //Store the multiclass or cost-sensitive input label - MULTICLASS::label_t ld; - COST_SENSITIVE::label csl; - if (use_cs) - csl = ec.l.cs; - else - ld = ec.l.multi; - - data.cb_label.costs.clear(); - ec.l.cb = data.cb_label; - ec.pred.a_s = data.a_s; - - //Call the cb_explore algorithm. It returns a vector of probabilities for each action - base.predict(ec); - //data.probs = ec.pred.scalars; - - uint32_t chosen_action; - if (sample_after_normalizing(data.app_seed + data.example_counter++, begin_scores(ec.pred.a_s), end_scores(ec.pred.a_s), chosen_action)) - THROW("Failed to sample from pdf"); - - CB::cb_class cl; - cl.action = chosen_action + 1; - cl.probability = ec.pred.a_s[chosen_action].score; - - if(!cl.action) - THROW("No action with non-zero probability found!"); - if (use_cs) - cl.cost = loss_cs(data, csl.costs, cl.action); - else - cl.cost = loss(data, ld.label, cl.action); - - //Create a new cb label - data.cb_label.costs.push_back(cl); - ec.l.cb = data.cb_label; - base.learn(ec); - data.a_s.clear(); - data.a_s = ec.pred.a_s; - if (use_cs) - ec.l.cs = csl; - else - ec.l.multi = ld; - ec.pred.multiclass = cl.action; -} - uint32_t predict_sublearner_adf(warm_cb& data, multi_learner& base, example& ec, uint32_t i) { //cout<<"predict using sublearner "<< i <ft_offset; - //multiline_learn_or_predict(base, data.adf_data.ecs, offset, i); - base.predict(data.adf_data.ecs, i); - //cout<<"greedy label = " << data.adf_data.ecs[0]->pred.a_s[0].action+1 << endl; - return data.adf_data.ecs[0]->pred.a_s[0].action+1; + //uint32_t offset = data.ecs[0]->ft_offset; + //multiline_learn_or_predict(base, data.ecs, offset, i); + base.predict(data.ecs, i); + //cout<<"greedy label = " << data.ecs[0]->pred.a_s[0].action+1 << endl; + return data.ecs[0]->pred.a_s[0].action+1; } void accumu_costs_iv_adf(warm_cb& data, multi_learner& base, example& ec) @@ -493,7 +433,7 @@ void learn_sup_adf(warm_cb& data, multi_learner& base, example& ec, int ec_type) //generate cost-sensitive label (for CSOAA's temporary use) auto& csls = data.csls; auto& cbls = data.cbls; - for (size_t a = 0; a < data.adf_data.num_actions; ++a) + for (size_t a = 0; a < data.num_actions; ++a) { csls[a].costs[0].class_index = a+1; if (use_cs) @@ -501,36 +441,36 @@ void learn_sup_adf(warm_cb& data, multi_learner& base, example& ec, int ec_type) else csls[a].costs[0].x = loss(data, ec.l.multi.label, a+1); } - for (size_t a = 0; a < data.adf_data.num_actions; ++a) + for (size_t a = 0; a < data.num_actions; ++a) { - cbls[a] = data.adf_data.ecs[a]->l.cb; - data.adf_data.ecs[a]->l.cs = csls[a]; + cbls[a] = data.ecs[a]->l.cb; + data.ecs[a]->l.cs = csls[a]; //cout< old_weights; - for (size_t a = 0; a < data.adf_data.num_actions; ++a) - old_weights.push_back(data.adf_data.ecs[a]->weight); + for (size_t a = 0; a < data.num_actions; ++a) + old_weights.push_back(data.ecs[a]->weight); for (uint32_t i = 0; i < data.choices_lambda; i++) { float weight_multiplier = compute_weight_multiplier(data, i, ec_type); //cout<<"weight multiplier in sup = "<weight = old_weights[a] * weight_multiplier; + for (size_t a = 0; a < data.num_actions; ++a) + data.ecs[a]->weight = old_weights[a] * weight_multiplier; multi_learner* cs_learner = as_multiline(data.all->cost_sensitive); - cs_learner->learn(data.adf_data.ecs, i); + cs_learner->learn(data.ecs, i); //cout<<"cost-sensitive increment = "<increment<weight = old_weights[a]; + for (size_t a = 0; a < data.num_actions; ++a) + data.ecs[a]->weight = old_weights[a]; - for (size_t a = 0; a < data.adf_data.num_actions; ++a) - data.adf_data.ecs[a]->l.cb = cbls[a]; + for (size_t a = 0; a < data.num_actions; ++a) + data.ecs[a]->l.cb = cbls[a]; } template @@ -549,15 +489,15 @@ uint32_t predict_bandit_adf(warm_cb& data, multi_learner& base, example& ec) uint32_t argmin = find_min(data.cumulative_costs); copy_example_to_adf(data, ec); - base.predict(data.adf_data.ecs, argmin); + base.predict(data.ecs, argmin); - auto& out_ec = *data.adf_data.ecs[0]; + auto& out_ec = *data.ecs[0]; uint32_t chosen_action; if (sample_after_normalizing(data.app_seed + data.example_counter++, begin_scores(out_ec.pred.a_s), end_scores(out_ec.pred.a_s), chosen_action)) THROW("Failed to sample from pdf"); //cout<<"predict using sublearner "<< argmin <weight = old_weights[a] * weight_multiplier; - base.learn(data.adf_data.ecs, i); + for (size_t a = 0; a < data.num_actions; ++a) + data.ecs[a]->weight = old_weights[a] * weight_multiplier; + base.learn(data.ecs, i); //cout<<"cb-explore increment = "<ft_offset; - //multiline_learn_or_predict(base, data.adf_data.ecs, offset, i); + //uint32_t offset = data.ecs[0]->ft_offset; + //multiline_learn_or_predict(base, data.ecs, offset, i); } - for (size_t a = 0; a < data.adf_data.num_actions; ++a) - data.adf_data.ecs[a]->weight = old_weights[a]; + for (size_t a = 0; a < data.num_actions; ++a) + data.ecs[a]->weight = old_weights[a]; } template @@ -634,9 +574,9 @@ void predict_or_learn_bandit_adf(warm_cb& data, multi_learner& base, example& ec void accumu_var_adf(warm_cb& data, multi_learner& base, example& ec) { size_t pred_best_approx = predict_sup_adf(data, base, ec); - float temp_var; + float temp_var = 0.f; - for (size_t a = 0; a < data.adf_data.num_actions; ++a) + for (size_t a = 0; a < data.num_actions; ++a) if (pred_best_approx == data.a_s_adf[a].action + 1) temp_var = 1.0 / data.a_s_adf[a].score; @@ -699,14 +639,12 @@ void predict_or_learn_adf(warm_cb& data, multi_learner& base, example& ec) void init_adf_data(warm_cb& data, const size_t num_actions) { - auto& adf_data = data.adf_data; - adf_data.num_actions = num_actions; - - adf_data.ecs.resize(num_actions); + data.num_actions = num_actions; + data.ecs.resize(num_actions); for (size_t a=0; a < num_actions; ++a) { - adf_data.ecs[a] = VW::alloc_examples(CB::cb_label.label_size, 1); - auto& lab = adf_data.ecs[a]->l.cb; + data.ecs[a] = VW::alloc_examples(CB::cb_label.label_size, 1); + auto& lab = data.ecs[a]->l.cb; CB::cb_label.default_label(&lab); } @@ -765,31 +703,18 @@ base_learner* warm_cb_setup(arguments& arg) ("warm_start_type", data->ws_type, SUPERVISED_WS, "update method of utilizing warm start examples (1: using supervised updates, 2: using contextual bandit updates)").missing()) return nullptr; - data->use_adf = count(arg.args.begin(), arg.args.end(),"--cb_explore_adf") > 0; data->app_seed = uniform_hash("vw", 2, 0); data->a_s = v_init(); data->all = arg.all; - - data->num_actions = num_actions; data->use_cs = use_cs; - if (data->use_adf) - init_adf_data(*data.get(), num_actions); + init_adf_data(*data.get(), num_actions); + + arg.args.push_back("--cb_min_cost"); + arg.args.push_back(to_string(data->loss0)); + arg.args.push_back("--cb_max_cost"); + arg.args.push_back(to_string(data->loss1)); - if (count(arg.args.begin(), arg.args.end(),"--cb_explore") == 0 && !data->use_adf) - { - arg.args.push_back("--cb_explore"); - stringstream ss; - ss << num_actions; - arg.args.push_back(ss.str()); - } - if (data->use_adf) - { - arg.args.push_back("--cb_min_cost"); - arg.args.push_back(to_string(data->loss0)); - arg.args.push_back("--cb_max_cost"); - arg.args.push_back(to_string(data->loss1)); - } if (count(arg.args.begin(), arg.args.end(), "--baseline")) { arg.args.push_back("--lr_multiplier"); @@ -800,30 +725,26 @@ base_learner* warm_cb_setup(arguments& arg) learner* l; - if (data->use_adf) + multi_learner* base = as_multiline(setup_base(arg)); + // Note: the current version of warm start CB can only support epsilon greedy exploration + // algorithm - we need to wait for the default epsilon value to be passed from cb_explore + // is there is one + //cout<<"count: "<epsilon = arg.vm.count("epsilon") > 0 ? arg.vm["epsilon"].as() : 0.0f; - - if (use_cs) - l = &init_cost_sensitive_learner(data, base, predict_or_learn_adf, predict_or_learn_adf, arg.all->p, data->choices_lambda); - else - l = &init_multiclass_learner(data, base, predict_or_learn_adf, predict_or_learn_adf, arg.all->p, data->choices_lambda); - - //cout<<"warm_cb increment = "<increment<epsilon = 0.05f; } else - { - single_learner* base = as_singleline(setup_base(arg)); - if (use_cs) - l = &init_cost_sensitive_learner(data, base, predict_or_learn, predict_or_learn, arg.all->p, 1); - else - l = &init_multiclass_learner(data, base, predict_or_learn, predict_or_learn, arg.all->p, 1); - } + data->epsilon = arg.vm["epsilon"].as(); + + if (use_cs) + l = &init_cost_sensitive_learner(data, base, predict_or_learn_adf, predict_or_learn_adf, arg.all->p, data->choices_lambda); + else + l = &init_multiclass_learner(data, base, predict_or_learn_adf, predict_or_learn_adf, arg.all->p, data->choices_lambda); + + //cout<<"warm_cb increment = "<increment<set_finish(finish); arg.all->delete_prediction = nullptr; From aa9e9f7d44572108f3273884d87d01194c45d1e1 Mon Sep 17 00:00:00 2001 From: Chicheng Zhang Date: Mon, 6 Aug 2018 15:25:06 -0400 Subject: [PATCH 110/127] redoing the importance weight scaling by a factor of 1/k --- vowpalwabbit/cb_adf.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vowpalwabbit/cb_adf.cc b/vowpalwabbit/cb_adf.cc index a5291eb24a2..67faafbf129 100644 --- a/vowpalwabbit/cb_adf.cc +++ b/vowpalwabbit/cb_adf.cc @@ -113,7 +113,7 @@ void learn_MTR(cb_adf& mydata, multi_learner& base, multi_ex& examples) gen_cs_example_mtr(mydata.gen_cs, examples, mydata.cs_labels); uint32_t nf = (uint32_t)examples[mydata.gen_cs.mtr_example]->num_features; float old_weight = examples[mydata.gen_cs.mtr_example]->weight; - examples[mydata.gen_cs.mtr_example]->weight *= 1.f / examples[mydata.gen_cs.mtr_example]->l.cb.costs[0].probability * ((float)mydata.gen_cs.event_sum / (float)mydata.gen_cs.action_sum); + examples[mydata.gen_cs.mtr_example]->weight *= 1.f / examples[mydata.gen_cs.mtr_example]->l.cb.costs[0].probability * ((float)mydata.gen_cs.event_sum / (float)mydata.gen_cs.action_sum) * (1.f / examples.size()); GEN_CS::call_cs_ldf(base, mydata.gen_cs.mtr_ec_seq, mydata.cb_labels, mydata.cs_labels, mydata.prepped_cs_labels, mydata.offset); examples[mydata.gen_cs.mtr_example]->num_features = nf; examples[mydata.gen_cs.mtr_example]->weight = old_weight; From 52439aa4ebbe79e9ffaf6442459a60b0f532c1e4 Mon Sep 17 00:00:00 2001 From: Chicheng Zhang Date: Mon, 6 Aug 2018 15:32:52 -0400 Subject: [PATCH 111/127] . --- vowpalwabbit/cb_adf.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vowpalwabbit/cb_adf.cc b/vowpalwabbit/cb_adf.cc index 67faafbf129..374dabd9e15 100644 --- a/vowpalwabbit/cb_adf.cc +++ b/vowpalwabbit/cb_adf.cc @@ -113,7 +113,7 @@ void learn_MTR(cb_adf& mydata, multi_learner& base, multi_ex& examples) gen_cs_example_mtr(mydata.gen_cs, examples, mydata.cs_labels); uint32_t nf = (uint32_t)examples[mydata.gen_cs.mtr_example]->num_features; float old_weight = examples[mydata.gen_cs.mtr_example]->weight; - examples[mydata.gen_cs.mtr_example]->weight *= 1.f / examples[mydata.gen_cs.mtr_example]->l.cb.costs[0].probability * ((float)mydata.gen_cs.event_sum / (float)mydata.gen_cs.action_sum) * (1.f / examples.size()); + examples[mydata.gen_cs.mtr_example]->weight *= 1.f / examples[mydata.gen_cs.mtr_example]->l.cb.costs[0].probability * ((float)mydata.gen_cs.event_sum / (float)mydata.gen_cs.action_sum) * (1.f / (float)examples.size()) GEN_CS::call_cs_ldf(base, mydata.gen_cs.mtr_ec_seq, mydata.cb_labels, mydata.cs_labels, mydata.prepped_cs_labels, mydata.offset); examples[mydata.gen_cs.mtr_example]->num_features = nf; examples[mydata.gen_cs.mtr_example]->weight = old_weight; From e5db84414f95fc71d78a3d23abe6285ebbdeb7e4 Mon Sep 17 00:00:00 2001 From: Chicheng Zhang Date: Mon, 6 Aug 2018 16:14:11 -0400 Subject: [PATCH 112/127] comma typo --- test/RunTests | 16 ++++++++-------- vowpalwabbit/cb_adf.cc | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/test/RunTests b/test/RunTests index 833b295b9ab..f33df041fa0 100755 --- a/test/RunTests +++ b/test/RunTests @@ -1637,34 +1637,34 @@ echo "1 | feature:1" | {VW} -a --initial_weight 0.1 --initial_t 0.3 {VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 -d train-sets/multiclass train-sets/ref/cbify_ws.stderr -# Test 176 cbify warm start with lambda set containing 0/1 +# Test 176 warm_cb warm start with lambda set containing 0/1 {VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --lambda_scheme 2 -d train-sets/multiclass train-sets/ref/cbify_ws_lambda_zeroone.stderr -# Test 177 cbify warm start with warm start update turned off +# Test 177 warm_cb warm start with warm start update turned off {VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --warm_start_update false -d train-sets/multiclass train-sets/ref/cbify_ws_no_ws_upd.stderr -# Test 178 cbify warm start with interaction update turned off +# Test 178 warm_cb warm start with interaction update turned off {VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --interaction_update false -d train-sets/multiclass train-sets/ref/cbify_ws_no_int_upd.stderr -# Test 179 cbify warm start with bandit warm start type (Sim-Bandit) +# Test 179 warm_cb warm start with bandit warm start type (Sim-Bandit) {VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --warm_start_type 2 -d train-sets/multiclass train-sets/ref/cbify_ws_simbandit.stderr -# Test 180 cbify warm start with UAR supervised corruption +# Test 180 warm_cb warm start with UAR supervised corruption {VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --corrupt_type_warm_start 1 --corrupt_prob_warm_start 0.5 -d train-sets/multiclass train-sets/ref/cbify_ws_uar.stderr -# Test 181 cbify warm start with CYC supervised corruption +# Test 181 warm_cb warm start with CYC supervised corruption {VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --corrupt_type_warm_start 2 --corrupt_prob_warm_start 0.5 -d train-sets/multiclass train-sets/ref/cbify_ws_cyc.stderr -# Test 182 cbify warm start with MAJ supervised corruption +# Test 182 warm_cb warm start with MAJ supervised corruption {VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --corrupt_type_warm_start 3 --corrupt_prob_warm_start 0.5 --overwrite_label 1 -d train-sets/multiclass train-sets/ref/cbify_ws_maj.stderr -# Test 183 cbify warm start with warm start distribution being the ground truth +# Test 183 warm_cb warm start with warm start distribution being the ground truth {VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --corrupt_type_interaction 2 --corrupt_prob_interaction 0.5 --weighting_scheme 2 --validation_method 2 -d train-sets/multiclass train-sets/ref/cbify_ws_wsgt.stderr diff --git a/vowpalwabbit/cb_adf.cc b/vowpalwabbit/cb_adf.cc index 374dabd9e15..8da4f76c5b4 100644 --- a/vowpalwabbit/cb_adf.cc +++ b/vowpalwabbit/cb_adf.cc @@ -113,7 +113,7 @@ void learn_MTR(cb_adf& mydata, multi_learner& base, multi_ex& examples) gen_cs_example_mtr(mydata.gen_cs, examples, mydata.cs_labels); uint32_t nf = (uint32_t)examples[mydata.gen_cs.mtr_example]->num_features; float old_weight = examples[mydata.gen_cs.mtr_example]->weight; - examples[mydata.gen_cs.mtr_example]->weight *= 1.f / examples[mydata.gen_cs.mtr_example]->l.cb.costs[0].probability * ((float)mydata.gen_cs.event_sum / (float)mydata.gen_cs.action_sum) * (1.f / (float)examples.size()) + examples[mydata.gen_cs.mtr_example]->weight *= 1.f / examples[mydata.gen_cs.mtr_example]->l.cb.costs[0].probability * ((float)mydata.gen_cs.event_sum / (float)mydata.gen_cs.action_sum) * (1.f / (float)examples.size()); GEN_CS::call_cs_ldf(base, mydata.gen_cs.mtr_ec_seq, mydata.cb_labels, mydata.cs_labels, mydata.prepped_cs_labels, mydata.offset); examples[mydata.gen_cs.mtr_example]->num_features = nf; examples[mydata.gen_cs.mtr_example]->weight = old_weight; From 2011b7ac97a1ad0a307b5dd59c14736305762bd7 Mon Sep 17 00:00:00 2001 From: Chicheng Zhang Date: Thu, 16 Aug 2018 14:16:49 -0400 Subject: [PATCH 113/127] removed redundant comments --- vowpalwabbit/warm_cb.cc | 68 ++++++----------------------------------- 1 file changed, 10 insertions(+), 58 deletions(-) diff --git a/vowpalwabbit/warm_cb.cc b/vowpalwabbit/warm_cb.cc index bdded4c25f8..492ab7198e4 100644 --- a/vowpalwabbit/warm_cb.cc +++ b/vowpalwabbit/warm_cb.cc @@ -117,7 +117,6 @@ uint32_t find_min(vector arr) for (uint32_t i = 0; i < arr.size(); i++) { - //cout<ft_offset; - //multiline_learn_or_predict(base, data.ecs, offset, i); base.predict(data.ecs, i); - //cout<<"greedy label = " << data.ecs[0]->pred.a_s[0].action+1 << endl; return data.ecs[0]->pred.a_s[0].action+1; } @@ -354,16 +340,14 @@ void accumu_costs_iv_adf(warm_cb& data, multi_learner& base, example& ec) if (action == cl.action) data.cumulative_costs[i] += cl.cost / cl.probability; - //cout< void accumu_costs_wsv_adf(warm_cb& data, multi_learner& base) { uint32_t ws_vali_size = data.ws_vali_size; - //only update cumulative costs every warm_start_period iterations + //only update cumulative costs at the end of every epoch if ( data.inter_iter >= 1 && abs( log2(data.inter_iter+1) - floor(log2(data.inter_iter+1)) ) < 1e-4 ) { for (uint32_t i = 0; i < data.choices_lambda; i++) @@ -384,9 +368,7 @@ void accumu_costs_wsv_adf(warm_cb& data, multi_learner& base) lb = 0; ub = ws_vali_size; } - //cout<<"validation at iteration "<l.cs.costs, pred_label); else data.cumulative_costs[i] += loss(data, ec_vali->l.multi.label, pred_label); - - //cout< void learn_sup_adf(warm_cb& data, multi_learner& base, example& ec, int ec_type) { copy_example_to_adf(data, ec); - //generate cost-sensitive label (for CSOAA's temporary use) + //generate cost-sensitive label (for cost-sensitive learner's temporary use) auto& csls = data.csls; auto& cbls = data.cbls; for (size_t a = 0; a < data.num_actions; ++a) @@ -445,7 +424,6 @@ void learn_sup_adf(warm_cb& data, multi_learner& base, example& ec, int ec_type) { cbls[a] = data.ecs[a]->l.cb; data.ecs[a]->l.cs = csls[a]; - //cout< old_weights; @@ -455,17 +433,12 @@ void learn_sup_adf(warm_cb& data, multi_learner& base, example& ec, int ec_type) for (uint32_t i = 0; i < data.choices_lambda; i++) { float weight_multiplier = compute_weight_multiplier(data, i, ec_type); - //cout<<"weight multiplier in sup = "<weight = old_weights[a] * weight_multiplier; multi_learner* cs_learner = as_multiline(data.all->cost_sensitive); cs_learner->learn(data.ecs, i); - - //cout<<"cost-sensitive increment = "<increment<weight = old_weights[a]; @@ -496,10 +469,6 @@ uint32_t predict_bandit_adf(warm_cb& data, multi_learner& base, example& ec) if (sample_after_normalizing(data.app_seed + data.example_counter++, begin_scores(out_ec.pred.a_s), end_scores(out_ec.pred.a_s), chosen_action)) THROW("Failed to sample from pdf"); - //cout<<"predict using sublearner "<< argmin <weight = old_weights[a] * weight_multiplier; base.learn(data.ecs, i); - - //cout<<"cb-explore increment = "<ft_offset; - //multiline_learn_or_predict(base, data.ecs, offset, i); } for (size_t a = 0; a < data.num_actions; ++a) @@ -547,8 +510,6 @@ void predict_or_learn_bandit_adf(warm_cb& data, multi_learner& base, example& ec cl.action = a_s[chosen_action].action + 1; cl.probability = a_s[chosen_action].score; - //cout< void predict_or_learn_adf(warm_cb& data, multi_learner& base, example& ec) { // Corrupt labels (only corrupting multiclass labels as of now) - if (use_cs) data.cs_label = ec.l.cs; else @@ -629,7 +584,7 @@ void predict_or_learn_adf(warm_cb& data, multi_learner& base, example& ec) else ec.weight = 0; - // Store the original labels back + // Restore the original labels if (use_cs) ec.l.cs = data.cs_label; else @@ -726,10 +681,9 @@ base_learner* warm_cb_setup(arguments& arg) learner* l; multi_learner* base = as_multiline(setup_base(arg)); - // Note: the current version of warm start CB can only support epsilon greedy exploration - // algorithm - we need to wait for the default epsilon value to be passed from cb_explore - // is there is one - //cout<<"count: "<, predict_or_learn_adf, arg.all->p, data->choices_lambda); - //cout<<"warm_cb increment = "<increment<set_finish(finish); arg.all->delete_prediction = nullptr; From 4d8811de01733ada04abdc0acb2c5cc22f5b1868 Mon Sep 17 00:00:00 2001 From: Chicheng Zhang Date: Wed, 6 Feb 2019 23:54:59 -0500 Subject: [PATCH 114/127] resolve conflicts --- vowpalwabbit/cb_adf.cc | 7 ------- vowpalwabbit/learner.h | 10 ---------- 2 files changed, 17 deletions(-) diff --git a/vowpalwabbit/cb_adf.cc b/vowpalwabbit/cb_adf.cc index e3e3cff9692..c632b57c77e 100644 --- a/vowpalwabbit/cb_adf.cc +++ b/vowpalwabbit/cb_adf.cc @@ -111,15 +111,8 @@ void learn_MTR(cb_adf& mydata, multi_learner& base, multi_ex& examples) gen_cs_example_mtr(mydata.gen_cs, examples, mydata.cs_labels); uint32_t nf = (uint32_t)examples[mydata.gen_cs.mtr_example]->num_features; float old_weight = examples[mydata.gen_cs.mtr_example]->weight; -<<<<<<< HEAD examples[mydata.gen_cs.mtr_example]->weight *= 1.f / examples[mydata.gen_cs.mtr_example]->l.cb.costs[0].probability * ((float)mydata.gen_cs.event_sum / (float)mydata.gen_cs.action_sum) * (1.f / (float)examples.size()); GEN_CS::call_cs_ldf(base, mydata.gen_cs.mtr_ec_seq, mydata.cb_labels, mydata.cs_labels, mydata.prepped_cs_labels, mydata.offset); -======= - examples[mydata.gen_cs.mtr_example]->weight *= 1.f / examples[mydata.gen_cs.mtr_example]->l.cb.costs[0].probability * - ((float)mydata.gen_cs.event_sum / (float)mydata.gen_cs.action_sum); - GEN_CS::call_cs_ldf( - base, mydata.gen_cs.mtr_ec_seq, mydata.cb_labels, mydata.cs_labels, mydata.prepped_cs_labels, mydata.offset); ->>>>>>> c9110426d9e8585e8410403d12f0194d5e6673fa examples[mydata.gen_cs.mtr_example]->num_features = nf; examples[mydata.gen_cs.mtr_example]->weight = old_weight; swap(examples[0]->pred.a_s, mydata.a_s); diff --git a/vowpalwabbit/learner.h b/vowpalwabbit/learner.h index db1ef826dcf..b6bd1b5b61c 100644 --- a/vowpalwabbit/learner.h +++ b/vowpalwabbit/learner.h @@ -406,19 +406,9 @@ learner& init_learner(free_ptr& dat, L* base, void (*learn)(T&, L&, E&) { auto ret = &learner::init_learner(dat.get(), base, learn, predict, 1, base->pred_type); -<<<<<<< HEAD - template - void multiline_learn_or_predict(multi_learner& base, multi_ex& examples, const uint64_t offset, const uint32_t id = 0) - { std::vector saved_offsets; - for (auto ec : examples) - { saved_offsets.push_back(ec->ft_offset); - ec->ft_offset = offset; - } -======= dat.release(); return *ret; } ->>>>>>> c9110426d9e8585e8410403d12f0194d5e6673fa // Reduction with no data. template From 7bc56af05f3325bb8c61a10287b067767645b24c Mon Sep 17 00:00:00 2001 From: Chicheng Zhang Date: Thu, 7 Feb 2019 00:36:13 -0500 Subject: [PATCH 115/127] compile error on peeking epsilon in warm_cb.cc --- vowpalwabbit/CMakeLists.txt | 4 +- vowpalwabbit/cb_explore_adf.cc | 5 +-- vowpalwabbit/warm_cb.cc | 75 ++++++++++++++++++---------------- vowpalwabbit/warm_cb.h | 3 +- 4 files changed, 45 insertions(+), 42 deletions(-) diff --git a/vowpalwabbit/CMakeLists.txt b/vowpalwabbit/CMakeLists.txt index 06f580bbcf4..e45eb19b0a5 100644 --- a/vowpalwabbit/CMakeLists.txt +++ b/vowpalwabbit/CMakeLists.txt @@ -32,7 +32,7 @@ set(vw_all_headers binary.h nn.h boosting.h ftrl.h no_label.h spanning_tree.h bs.h gd.h noop.h stable_unique.h cache.h gd_mf.h oaa.h stagewise_poly.h cb_adf.h gd_predict.h OjaNewton.h svrg.h cb_algs.h gen_cs_example.h parse_args.h topk.h cb_explore_adf.h parse_dispatch_loop.h unique_sort.h - interact.h interactions.h parse_example_json.h cbify.h interactions_predict.h vw_allreduce.h + interact.h interactions.h parse_example_json.h cbify.h warm_cb.h interactions_predict.h vw_allreduce.h classweight.h parse_regressor.h kernel_svm.h confidence.h label_dictionary.h config.h.in primitives.h lda_core.h print.h vw_versions.h ) @@ -46,7 +46,7 @@ set(vw_all_sources search_multiclasstask.cc search_entityrelationtask.cc search_graph.cc parse_example.cc scorer.cc network.cc parse_args.cc accumulate.cc gd.cc learner.cc mwt.cc lda_core.cc gd_mf.cc mf.cc bfgs.cc noop.cc print.cc example.cc parser.cc loss_functions.cc sender.cc nn.cc confidence.cc bs.cc - cbify.cc explore_eval.cc topk.cc stagewise_poly.cc log_multi.cc recall_tree.cc active.cc + cbify.cc warm_cb.cc explore_eval.cc topk.cc stagewise_poly.cc log_multi.cc recall_tree.cc active.cc active_cover.cc cs_active.cc kernel_svm.cc best_constant.cc ftrl.cc svrg.cc lrqfa.cc interact.cc comp_io.cc interactions.cc vw_validate.cc audit_regressor.cc gen_cs_example.cc cb_explore.cc action_score.cc cb_explore_adf.cc OjaNewton.cc parse_example_json.cc baseline.cc classweight.cc diff --git a/vowpalwabbit/cb_explore_adf.cc b/vowpalwabbit/cb_explore_adf.cc index b38d96427d4..e40f27a944c 100644 --- a/vowpalwabbit/cb_explore_adf.cc +++ b/vowpalwabbit/cb_explore_adf.cc @@ -269,12 +269,9 @@ void predict_or_learn_first(cb_explore_adf& data, multi_learner& base, multi_ex& template void predict_or_learn_greedy(cb_explore_adf& data, multi_learner& base, multi_ex& examples) { -<<<<<<< HEAD data.offset = examples[0]->ft_offset; //Explore uniform random an epsilon fraction of the time. -======= - // Explore uniform random an epsilon fraction of the time. ->>>>>>> c9110426d9e8585e8410403d12f0194d5e6673fa + if (is_learn && test_adf_sequence(examples) != nullptr) multiline_learn_or_predict(base, examples, data.offset); else diff --git a/vowpalwabbit/warm_cb.cc b/vowpalwabbit/warm_cb.cc index 492ab7198e4..89ab71504d3 100644 --- a/vowpalwabbit/warm_cb.cc +++ b/vowpalwabbit/warm_cb.cc @@ -13,6 +13,7 @@ using namespace LEARNER; using namespace exploration; using namespace ACTION_SCORE; using namespace std; +using namespace VW::config; #define WARM_START 1 #define INTERACTION 2 @@ -631,74 +632,78 @@ void init_adf_data(warm_cb& data, const size_t num_actions) data.cumu_var = 0.f; } -base_learner* warm_cb_setup(arguments& arg) +base_learner* warm_cb_setup(options_i& options, vw& all) { uint32_t num_actions=0; auto data = scoped_calloc_or_throw(); bool use_cs; - if (arg.new_options("Make Multiclass into Contextual Bandit") - .critical("warm_cb", num_actions, "Convert multiclass on classes into a contextual bandit problem") - (use_cs, "warm_cb_cs", "consume cost-sensitive classification examples instead of multiclass") - ("loss0", data->loss0, 0.f, "loss for correct label") - ("loss1", data->loss1, 1.f, "loss for incorrect label") - ("warm_start", data->ws_period, 0U, "number of training examples for warm start phase") - ("interaction", data->inter_period, UINT32_MAX, "number of examples for the interactive contextual bandit learning phase") - ("warm_start_update", data->upd_ws, true, "indicator of warm start updates") - ("interaction_update", data->upd_inter, true, "indicator of interaction updates") - ("corrupt_type_warm_start", data->cor_type_ws, UAR, "type of label corruption in the warm start phase (1: uniformly at random, 2: circular, 3: replacing with overwriting label)") - ("corrupt_prob_warm_start", data->cor_prob_ws, 0.f, "probability of label corruption in the warm start phase") - ("corrupt_type_interaction", data->cor_type_inter, UAR, "type of label corruption in the interaction phase (1: uniformly at random, 2: circular, 3: replacing with overwriting label)") - ("corrupt_prob_interaction", data->cor_prob_inter, 0.f, "probability of label corruption in the interaction phase") - ("choices_lambda", data->choices_lambda, 1U, "the number of candidate lambdas to aggregate (lambda is the importance weight parameter between the two sources) ") - ("lambda_scheme", data->lambda_scheme, ABS_CENTRAL, "The scheme for generating candidate lambda set (1: center lambda=0.5, 2: center lambda=0.5, min lambda=0, max lambda=1, 3: center lambda=epsilon/(#actions+epsilon), 4: center lambda=epsilon/(#actions+epsilon), min lambda=0, max lambda=1); the rest of candidate lambda values are generated using a doubling scheme") - ("weighting_scheme", data->wt_scheme, INSTANCE_WT, "weighting scheme (1: per instance weighting, where for every lambda, each contextual bandit example have weight lambda/(1-lambda) times that of each warm start example, 2: per dataset weighting, where for every lambda, the contextual bandit dataset has total weight lambda/(1-lambda) times that of the warm start dataset)") - ("validation_method", data->vali_method, INTER_VALI, "lambda selection criterion (1: using contextual bandit examples with progressive validation, 2: using warm start examples, with fresh validation examples at each epoch, 3: using warm start examples, with a single validation set throughout)") - ("overwrite_label", data->overwrite_label, 1U, "the label used by type 3 corruptions (overwriting)") - ("warm_start_type", data->ws_type, SUPERVISED_WS, "update method of utilizing warm start examples (1: using supervised updates, 2: using contextual bandit updates)").missing()) - return nullptr; + option_group_definition new_options("Make Multiclass into Warm-starting Contextual Bandit"); + + new_options + .add(make_option("warm_cb", num_actions) + .keep() + .help("Convert multiclass on classes into a contextual bandit problem")) + .add(make_option("warm_cb_cs", use_cs).help("consume cost-sensitive classification examples instead of multiclass")) + .add(make_option("loss0", data->loss0).default_value(0.f).help("loss for correct label")) + .add(make_option("loss1", data->loss1).default_value(1.f).help("loss for incorrect label")) + .add(make_option("warm_start", data->ws_period).default_value(0U).help("number of training examples for warm start phase")) + .add(make_option("interaction", data->inter_period).default_value(UINT32_MAX).help("number of examples for the interactive contextual bandit learning phase")) + .add(make_option("warm_start_update", data->upd_ws).default_value(true).help("indicator of warm start updates")) + .add(make_option("interaction_update", data->upd_inter).default_value(true).help("indicator of interaction updates")) + .add(make_option("corrupt_type_warm_start", data->cor_type_ws).default_value(UAR).help("type of label corruption in the warm start phase (1: uniformly at random, 2: circular, 3: replacing with overwriting label)")) + .add(make_option("corrupt_prob_warm_start", data->cor_prob_ws).default_value(0.f).help("probability of label corruption in the warm start phase")) + .add(make_option("corrupt_type_interaction", data->cor_type_inter).default_value(UAR).help("type of label corruption in the interaction phase (1: uniformly at random, 2: circular, 3: replacing with overwriting label)")) + .add(make_option("corrupt_prob_interaction", data->cor_prob_inter).default_value(0.f).help("probability of label corruption in the interaction phase")) + .add(make_option("choices_lambda", data->choices_lambda).default_value(1U).help("the number of candidate lambdas to aggregate (lambda is the importance weight parameter between the two sources)")) + .add(make_option("lambda_scheme", data->lambda_scheme).default_value(ABS_CENTRAL).help("The scheme for generating candidate lambda set (1: center lambda=0.5, 2: center lambda=0.5, min lambda=0, max lambda=1, 3: center lambda=epsilon/(#actions+epsilon), 4: center lambda=epsilon/(#actions+epsilon), min lambda=0, max lambda=1); the rest of candidate lambda values are generated using a doubling scheme")) + .add(make_option("weighting_scheme", data->wt_scheme).default_value(INSTANCE_WT).help("weighting scheme (1: per instance weighting, where for every lambda, each contextual bandit example have weight lambda/(1-lambda) times that of each warm start example, 2: per dataset weighting, where for every lambda, the contextual bandit dataset has total weight lambda/(1-lambda) times that of the warm start dataset)")) + .add(make_option("validation_method", data->vali_method).default_value(INTER_VALI).help("lambda selection criterion (1: using contextual bandit examples with progressive validation, 2: using warm start examples, with fresh validation examples at each epoch, 3: using warm start examples, with a single validation set throughout)")) + .add(make_option("overwrite_label", data->overwrite_label).default_value(1U).help("the label used by type 3 corruptions (overwriting)")) + .add(make_option("warm_start_type", data->ws_type).default_value(SUPERVISED_WS).help("update method of utilizing warm start examples (1: using supervised updates, 2: using contextual bandit updates)")); + + options.add_and_parse(new_options); data->app_seed = uniform_hash("vw", 2, 0); data->a_s = v_init(); - data->all = arg.all; + data->all = &all; data->use_cs = use_cs; init_adf_data(*data.get(), num_actions); - arg.args.push_back("--cb_min_cost"); - arg.args.push_back(to_string(data->loss0)); - arg.args.push_back("--cb_max_cost"); - arg.args.push_back(to_string(data->loss1)); + options.insert("cb_min_cost", to_string(data->loss0)); + options.insert("cb_max_cost", to_string(data->loss1)); - if (count(arg.args.begin(), arg.args.end(), "--baseline")) + if (options.was_supplied("baseline")) { - arg.args.push_back("--lr_multiplier"); stringstream ss; ss << max(abs(data->loss0), abs(data->loss1)) / (data->loss1 - data->loss0); - arg.args.push_back(ss.str()); + options.insert("lr_multiplier", ss.str()); } learner* l; - multi_learner* base = as_multiline(setup_base(arg)); + multi_learner* base = as_multiline(setup_base(options, all)); // Note: the current version of warm start CB can only support epsilon-greedy exploration // We need to wait for the epsilon value to be passed from the base // cb_explore learner, if there is one - if (arg.vm.count("epsilon") == 0) + + + if (!options.was_supplied("epsilon")) { cerr<<"Warning: no epsilon (greedy parameter) specified; resetting to 0.05"<epsilon = 0.05f; } else - data->epsilon = arg.vm["epsilon"].as(); + data->epsilon = *options.get_option("epsilon"); if (use_cs) - l = &init_cost_sensitive_learner(data, base, predict_or_learn_adf, predict_or_learn_adf, arg.all->p, data->choices_lambda); + l = &init_cost_sensitive_learner(data, base, predict_or_learn_adf, predict_or_learn_adf, all.p, data->choices_lambda); else - l = &init_multiclass_learner(data, base, predict_or_learn_adf, predict_or_learn_adf, arg.all->p, data->choices_lambda); + l = &init_multiclass_learner(data, base, predict_or_learn_adf, predict_or_learn_adf, all.p, data->choices_lambda); l->set_finish(finish); - arg.all->delete_prediction = nullptr; + all.delete_prediction = nullptr; return make_base(*l); } diff --git a/vowpalwabbit/warm_cb.h b/vowpalwabbit/warm_cb.h index 2d645774f88..1f211135d85 100644 --- a/vowpalwabbit/warm_cb.h +++ b/vowpalwabbit/warm_cb.h @@ -1 +1,2 @@ -LEARNER::base_learner* warm_cb_setup(arguments& arg); +//LEARNER::base_learner* warm_cb_setup(arguments& arg); +LEARNER::base_learner* warm_cb_setup(VW::config::options_i& options, vw& all); From 49714779ce011aee8d924ca0a8d3d8492f002d8d Mon Sep 17 00:00:00 2001 From: Chicheng Zhang Date: Thu, 7 Feb 2019 11:42:48 -0500 Subject: [PATCH 116/127] fixed sim-bandit option, disallow cost-sensitive corruption --- vowpalwabbit/learner.h | 2 +- vowpalwabbit/warm_cb.cc | 36 ++++++++++++++++++++++++++++-------- 2 files changed, 29 insertions(+), 9 deletions(-) diff --git a/vowpalwabbit/learner.h b/vowpalwabbit/learner.h index b6bd1b5b61c..902f86b09c4 100644 --- a/vowpalwabbit/learner.h +++ b/vowpalwabbit/learner.h @@ -468,7 +468,7 @@ single_learner* as_singleline(learner* l) template void multiline_learn_or_predict(multi_learner& base, multi_ex& examples, const uint64_t offset, const uint32_t id = 0) { - std::vector saved_offsets(examples.size()); + std::vector saved_offsets; for (auto ec : examples) { saved_offsets.push_back(ec->ft_offset); diff --git a/vowpalwabbit/warm_cb.cc b/vowpalwabbit/warm_cb.cc index 89ab71504d3..cd4378be91b 100644 --- a/vowpalwabbit/warm_cb.cc +++ b/vowpalwabbit/warm_cb.cc @@ -6,6 +6,7 @@ #include "vw.h" #include "hash.h" #include "explore.h" +#include "vw_exception.h" #include @@ -66,6 +67,7 @@ struct warm_cb int lambda_scheme; uint32_t overwrite_label; int ws_type; + bool sim_bandit; //auxiliary variables uint32_t num_actions; @@ -132,10 +134,14 @@ void finish(warm_cb& data) CB::cb_label.delete_label(&data.cb_label); data.a_s.delete_v(); - cout<<"average variance estimate = "<quiet) + { + cerr << "average variance estimate = " << data.cumu_var / data.inter_iter << endl; + cerr << "theoretical average variance = " << data.num_actions / data.epsilon << endl; + cerr << "last lambda chosen = " << data.lambdas[argmin] << " among lambdas ranging from " << data.lambdas[0] << " to " << data.lambdas[data.choices_lambda-1] << endl; + } for (size_t a = 0; a < data.num_actions; ++a) { @@ -596,6 +602,10 @@ void predict_or_learn_adf(warm_cb& data, multi_learner& base, example& ec) void init_adf_data(warm_cb& data, const size_t num_actions) { data.num_actions = num_actions; + if (data.sim_bandit) + data.ws_type = BANDIT_WS; + else + data.ws_type = SUPERVISED_WS; data.ecs.resize(num_actions); for (size_t a=0; a < num_actions; ++a) { @@ -648,9 +658,10 @@ base_learner* warm_cb_setup(options_i& options, vw& all) .add(make_option("loss0", data->loss0).default_value(0.f).help("loss for correct label")) .add(make_option("loss1", data->loss1).default_value(1.f).help("loss for incorrect label")) .add(make_option("warm_start", data->ws_period).default_value(0U).help("number of training examples for warm start phase")) + .add(make_option("epsilon", data->epsilon).keep().help("epsilon-greedy exploration")) .add(make_option("interaction", data->inter_period).default_value(UINT32_MAX).help("number of examples for the interactive contextual bandit learning phase")) - .add(make_option("warm_start_update", data->upd_ws).default_value(true).help("indicator of warm start updates")) - .add(make_option("interaction_update", data->upd_inter).default_value(true).help("indicator of interaction updates")) + .add(make_option("warm_start_update", data->upd_ws).help("indicator of warm start updates")) + .add(make_option("interaction_update", data->upd_inter).help("indicator of interaction updates")) .add(make_option("corrupt_type_warm_start", data->cor_type_ws).default_value(UAR).help("type of label corruption in the warm start phase (1: uniformly at random, 2: circular, 3: replacing with overwriting label)")) .add(make_option("corrupt_prob_warm_start", data->cor_prob_ws).default_value(0.f).help("probability of label corruption in the warm start phase")) .add(make_option("corrupt_type_interaction", data->cor_type_inter).default_value(UAR).help("type of label corruption in the interaction phase (1: uniformly at random, 2: circular, 3: replacing with overwriting label)")) @@ -660,10 +671,21 @@ base_learner* warm_cb_setup(options_i& options, vw& all) .add(make_option("weighting_scheme", data->wt_scheme).default_value(INSTANCE_WT).help("weighting scheme (1: per instance weighting, where for every lambda, each contextual bandit example have weight lambda/(1-lambda) times that of each warm start example, 2: per dataset weighting, where for every lambda, the contextual bandit dataset has total weight lambda/(1-lambda) times that of the warm start dataset)")) .add(make_option("validation_method", data->vali_method).default_value(INTER_VALI).help("lambda selection criterion (1: using contextual bandit examples with progressive validation, 2: using warm start examples, with fresh validation examples at each epoch, 3: using warm start examples, with a single validation set throughout)")) .add(make_option("overwrite_label", data->overwrite_label).default_value(1U).help("the label used by type 3 corruptions (overwriting)")) - .add(make_option("warm_start_type", data->ws_type).default_value(SUPERVISED_WS).help("update method of utilizing warm start examples (1: using supervised updates, 2: using contextual bandit updates)")); + .add(make_option("sim_bandit", data->sim_bandit).help("simulate contextual bandit updates on warm start examples")); options.add_and_parse(new_options); + if(use_cs && (options.was_supplied("corrupt_type_warm_start") || options.was_supplied("corrupt_prob_warm_start") || options.was_supplied("corrupt_type_interaction") || options.was_supplied("corrupt_prob_interaction") )) + { + THROW("label corruption on cost-sensitive examples not currently supported"); + } + + + if(!options.was_supplied("warm_cb")) + { + return nullptr; + } + data->app_seed = uniform_hash("vw", 2, 0); data->a_s = v_init(); data->all = &all; @@ -694,8 +716,6 @@ base_learner* warm_cb_setup(options_i& options, vw& all) cerr<<"Warning: no epsilon (greedy parameter) specified; resetting to 0.05"<epsilon = 0.05f; } - else - data->epsilon = *options.get_option("epsilon"); if (use_cs) l = &init_cost_sensitive_learner(data, base, predict_or_learn_adf, predict_or_learn_adf, all.p, data->choices_lambda); From db1da5e55c5fd1ba94deae6820d6843cd50b7833 Mon Sep 17 00:00:00 2001 From: Chicheng Zhang Date: Thu, 7 Feb 2019 22:06:50 -0500 Subject: [PATCH 117/127] begin fixing importance weight in cs examples --- vowpalwabbit/cost_sensitive.cc | 203 ++++++++++++++++----------------- vowpalwabbit/warm_cb.cc | 120 ++++--------------- 2 files changed, 118 insertions(+), 205 deletions(-) diff --git a/vowpalwabbit/cost_sensitive.cc b/vowpalwabbit/cost_sensitive.cc index 2d239e6d612..80e8e5c4438 100644 --- a/vowpalwabbit/cost_sensitive.cc +++ b/vowpalwabbit/cost_sensitive.cc @@ -6,42 +6,43 @@ using namespace std; namespace COST_SENSITIVE { -void name_value(substring& s, v_array& name, float& v) + +void name_value(substring &s, v_array& name, float &v) { tokenize(':', s, name); switch (name.size()) { - case 0: - case 1: - v = 1.; - break; - case 2: - v = float_of_substring(name[1]); - if (nanpattern(v)) - THROW("error NaN value for: " << name[0]); - break; - default: - cerr << "example with a wierd name. What is '"; - cerr.write(s.begin, s.end - s.begin); - cerr << "'?\n"; + case 0: + case 1: + v = 1.; + break; + case 2: + v = float_of_substring(name[1]); + if (nanpattern(v)) + THROW("error NaN value for: " << name[0]); + break; + default: + cerr << "example with a wierd name. What is '"; + cerr.write(s.begin, s.end - s.begin); + cerr << "'?\n"; } } char* bufread_label(label* ld, char* c, io_buf& cache) { - size_t num = *(size_t*)c; + size_t num = *(size_t *)c; ld->costs.clear(); c += sizeof(size_t); - size_t total = sizeof(wclass) * num; - if (cache.buf_read(c, (int)total) < total) + size_t total = sizeof(wclass)*num; + if (buf_read(cache, c, (int)total) < total) { cout << "error in demarshal of cost data" << endl; return c; } - for (size_t i = 0; i < num; i++) + for (size_t i = 0; icosts.push_back(temp); } @@ -51,26 +52,29 @@ char* bufread_label(label* ld, char* c, io_buf& cache) size_t read_cached_label(shared_data*, void* v, io_buf& cache) { - label* ld = (label*)v; + label* ld = (label*) v; ld->costs.clear(); - char* c; + char *c; size_t total = sizeof(size_t); - if (cache.buf_read(c, (int)total) < total) + if (buf_read(cache, c, (int)total) < total) return 0; - bufread_label(ld, c, cache); + bufread_label(ld,c, cache); return total; } -float weight(void*) { return 1.; } +float weight(void*) +{ + return 1.; +} char* bufcache_label(label* ld, char* c) { - *(size_t*)c = ld->costs.size(); + *(size_t *)c = ld->costs.size(); c += sizeof(size_t); - for (unsigned int i = 0; i < ld->costs.size(); i++) + for (unsigned int i = 0; i< ld->costs.size(); i++) { - *(wclass*)c = ld->costs[i]; + *(wclass *)c = ld->costs[i]; c += sizeof(wclass); } return c; @@ -78,37 +82,36 @@ char* bufcache_label(label* ld, char* c) void cache_label(void* v, io_buf& cache) { - char* c; - label* ld = (label*)v; - cache.buf_write(c, sizeof(size_t) + sizeof(wclass) * ld->costs.size()); - bufcache_label(ld, c); + char *c; + label* ld = (label*) v; + buf_write(cache, c, sizeof(size_t)+sizeof(wclass)*ld->costs.size()); + bufcache_label(ld,c); } void default_label(void* v) { - label* ld = (label*)v; + label* ld = (label*) v; ld->costs.clear(); } bool test_label(void* v) { - label* ld = (label*)v; + label* ld = (label*) v; if (ld->costs.size() == 0) return true; - for (unsigned int i = 0; i < ld->costs.size(); i++) + for (unsigned int i=0; icosts.size(); i++) if (FLT_MAX != ld->costs[i].x) return false; return true; } -void delete_label(void* v) + void delete_label(void* v) { label* ld = (label*)v; - if (ld) - ld->costs.delete_v(); + if (ld) ld->costs.delete_v(); } -void copy_label(void* dst, void* src) +void copy_label(void*dst, void*src) { if (dst && src) { @@ -120,14 +123,13 @@ void copy_label(void* dst, void* src) bool substring_eq(substring ss, const char* str) { - size_t len_ss = ss.end - ss.begin; + size_t len_ss = ss.end - ss.begin; size_t len_str = strlen(str); - if (len_ss != len_str) - return false; + if (len_ss != len_str) return false; return (strncmp(ss.begin, str, len_ss) == 0); } -void parse_label(parser* p, shared_data* sd, void* v, v_array& words) +void parse_label(parser* p, shared_data*sd, void* v, v_array& words) { label* ld = (label*)v; ld->costs.clear(); @@ -138,31 +140,29 @@ void parse_label(parser* p, shared_data* sd, void* v, v_array& words) float fx; name_value(words[0], p->parse_name, fx); bool eq_shared = substring_eq(p->parse_name[0], "***shared***"); - bool eq_label = substring_eq(p->parse_name[0], "***label***"); - if (!sd->ldict) + bool eq_label = substring_eq(p->parse_name[0], "***label***"); + if (! sd->ldict) { eq_shared |= substring_eq(p->parse_name[0], "shared"); - eq_label |= substring_eq(p->parse_name[0], "label"); + eq_label |= substring_eq(p->parse_name[0], "label"); } if (eq_shared || eq_label) { if (eq_shared) { - if (p->parse_name.size() != 1) - cerr << "shared feature vectors should not have costs on: " << words[0] << endl; + if (p->parse_name.size() != 1) cerr << "shared feature vectors should not have costs on: " << words[0] << endl; else { - wclass f = {-FLT_MAX, 0, 0., 0.}; + wclass f = { -FLT_MAX, 0, 0., 0.}; ld->costs.push_back(f); } } if (eq_label) { - if (p->parse_name.size() != 2) - cerr << "label feature vectors should have exactly one cost on: " << words[0] << endl; + if (p->parse_name.size() != 2) cerr << "label feature vectors should have exactly one cost on: " << words[0] << endl; else { - wclass f = {float_of_substring(p->parse_name[1]), 0, 0., 0.}; + wclass f = { float_of_substring(p->parse_name[1]), 0, 0., 0.}; ld->costs.push_back(f); } } @@ -173,7 +173,7 @@ void parse_label(parser* p, shared_data* sd, void* v, v_array& words) // otherwise this is a "real" example for (unsigned int i = 0; i < words.size(); i++) { - wclass f = {0., 0, 0., 0.}; + wclass f = {0.,0,0.,0.}; name_value(words[i], p->parse_name, f.x); if (p->parse_name.size() == 0) @@ -181,8 +181,7 @@ void parse_label(parser* p, shared_data* sd, void* v, v_array& words) if (p->parse_name.size() == 1 || p->parse_name.size() == 2 || p->parse_name.size() == 3) { - f.class_index = - sd->ldict ? (uint32_t)sd->ldict->get(p->parse_name[0]) : (uint32_t)hashstring(p->parse_name[0], 0); + f.class_index = sd->ldict ? (uint32_t)sd->ldict->get(p->parse_name[0]) : (uint32_t)hashstring(p->parse_name[0], 0); if (p->parse_name.size() == 1 && f.x >= 0) // test examples are specified just by un-valued class #s f.x = FLT_MAX; } @@ -193,8 +192,13 @@ void parse_label(parser* p, shared_data* sd, void* v, v_array& words) } } -label_parser cs_label = {default_label, parse_label, cache_label, read_cached_label, delete_label, weight, copy_label, - test_label, sizeof(label)}; +label_parser cs_label = {default_label, parse_label, + cache_label, read_cached_label, + delete_label, weight, + copy_label, + test_label, + sizeof(label) + }; void print_update(vw& all, bool is_test, example& ec, multi_ex* ec_seq, bool action_scores, uint32_t prediction) { @@ -213,10 +217,10 @@ void print_update(vw& all, bool is_test, example& ec, multi_ex* ec_seq, bool act const example& first_ex = *(*ec_seq)[0]; v_array costs = first_ex.l.cs.costs; - if (costs.size() == 1 && costs[0].class_index == 0 && costs[0].x < 0) - ecc++; + if (costs.size() == 1 && costs[0].class_index == 0 && costs[0].x < 0) ecc++; - for (; ecc != &(*ec_seq->cend()); ecc++) num_current_features += (*ecc)->num_features; + for (; ecc!=&(*ec_seq->cend()); ecc++) + num_current_features += (*ecc)->num_features; } std::string label_buf; @@ -232,22 +236,17 @@ void print_update(vw& all, bool is_test, example& ec, multi_ex* ec_seq, bool act pred_buf << std::setw(all.sd->col_current_predict) << std::right << std::setfill(' '); if (all.sd->ldict) { - if (action_scores) - pred_buf << all.sd->ldict->get(ec.pred.a_s[0].action); - else - pred_buf << all.sd->ldict->get(prediction); + if (action_scores) pred_buf << all.sd->ldict->get(ec.pred.a_s[0].action); + else pred_buf << all.sd->ldict->get(prediction); } - else - pred_buf << ec.pred.a_s[0].action; - if (action_scores) - pred_buf << "....."; - all.sd->print_update(all.holdout_set_off, all.current_pass, label_buf, pred_buf.str(), num_current_features, - all.progress_add, all.progress_arg); - ; + else pred_buf << ec.pred.a_s[0].action; + if (action_scores) pred_buf <<"....."; + all.sd->print_update(all.holdout_set_off, all.current_pass, label_buf, pred_buf.str(), + num_current_features, all.progress_add, all.progress_arg);; } else - all.sd->print_update(all.holdout_set_off, all.current_pass, label_buf, prediction, num_current_features, - all.progress_add, all.progress_arg); + all.sd->print_update(all.holdout_set_off, all.current_pass, label_buf, prediction, + num_current_features, all.progress_add, all.progress_arg); } } @@ -257,31 +256,31 @@ void output_example(vw& all, example& ec) float loss = 0.; if (!test_label(&ld)) - { - // need to compute exact loss - size_t pred = (size_t)ec.pred.multiclass; - - float chosen_loss = FLT_MAX; - float min = FLT_MAX; - for (auto& cl : ld.costs) { - if (cl.class_index == pred) - chosen_loss = cl.x; - if (cl.x < min) - min = cl.x; - } - if (chosen_loss == FLT_MAX) - cerr << "warning: csoaa predicted an invalid class. Are all multi-class labels in the {1..k} range?" << endl; + //need to compute exact loss + size_t pred = (size_t)ec.pred.multiclass; - loss = chosen_loss - min; - // TODO(alberto): add option somewhere to allow using absolute loss instead? - // loss = chosen_loss; - } + float chosen_loss = FLT_MAX; + float min = FLT_MAX; + for (auto& cl : ld.costs) + { + if (cl.class_index == pred) + chosen_loss = cl.x; + if (cl.x < min) + min = cl.x; + } + if (chosen_loss == FLT_MAX) + cerr << "warning: csoaa predicted an invalid class. Are all multi-class labels in the {1..k} range?" << endl; + + loss = chosen_loss - min; + // TODO(alberto): add option somewhere to allow using absolute loss instead? + // loss = chosen_loss; + } all.sd->update(ec.test_only, !test_label(&ld), loss, 1.f, ec.num_features); for (int sink : all.final_prediction_sink) - if (!all.sd->ldict) + if (! all.sd->ldict) all.print(sink, (float)ec.pred.multiclass, 0, ec.tag); else { @@ -295,8 +294,7 @@ void output_example(vw& all, example& ec) for (unsigned int i = 0; i < ld.costs.size(); i++) { wclass cl = ld.costs[i]; - if (i > 0) - outputStringStream << ' '; + if (i > 0) outputStringStream << ' '; outputStringStream << cl.class_index << ':' << cl.partial_prediction; } all.print_text(all.raw_prediction, outputStringStream.str(), ec.tag); @@ -314,23 +312,18 @@ void finish_example(vw& all, example& ec) bool example_is_test(example& ec) { v_array costs = ec.l.cs.costs; - if (costs.size() == 0) - return true; - for (size_t j = 0; j < costs.size(); j++) - if (costs[j].x != FLT_MAX) - return false; + if (costs.size() == 0) return true; + for (size_t j=0; j costs = ec.l.cs.costs; - if (costs.size() != 1) - return false; - if (costs[0].class_index != 0) - return false; - if (costs[0].x != -FLT_MAX) - return false; + if (costs.size() != 1) return false; + if (costs[0].class_index != 0) return false; + if (costs[0].x != -FLT_MAX) return false; return true; } -} // namespace COST_SENSITIVE +} diff --git a/vowpalwabbit/warm_cb.cc b/vowpalwabbit/warm_cb.cc index cd4378be91b..a796708dcbf 100644 --- a/vowpalwabbit/warm_cb.cc +++ b/vowpalwabbit/warm_cb.cc @@ -27,13 +27,6 @@ using namespace VW::config; #define CIRCULAR 2 #define OVERWRITE 3 -#define INTER_VALI 1 -#define WS_VALI_SPLIT 2 -#define WS_VALI_NOSPLIT 3 - -#define INSTANCE_WT 1 -#define DATASET_WT 2 - #define ABS_CENTRAL 1 #define ABS_CENTRAL_ZEROONE 2 #define MINIMAX_CENTRAL 3 @@ -60,8 +53,6 @@ struct warm_cb bool upd_inter; int cor_type_ws; float cor_prob_ws; - int cor_type_inter; - float cor_prob_inter; int vali_method; int wt_scheme; int lambda_scheme; @@ -205,7 +196,7 @@ void copy_example_to_adf(warm_cb& data, example& ec) } } -float minimax_lambda(float epsilon, size_t num_actions, size_t warm_start_period, size_t interaction_period) +float minimax_lambda(float epsilon, size_t num_actions) { return epsilon / (num_actions + epsilon); } @@ -238,7 +229,7 @@ void setup_lambdas(warm_cb& data) if (data.lambda_scheme == ABS_CENTRAL || data.lambda_scheme == ABS_CENTRAL_ZEROONE) lambdas[mid] = 0.5; else - lambdas[mid] = minimax_lambda(data.epsilon, data.num_actions, data.ws_period, data.inter_period); + lambdas[mid] = minimax_lambda(data.epsilon, data.num_actions); for (uint32_t i = mid; i > 0; i--) lambdas[i-1] = lambdas[i] / 2.0; @@ -276,11 +267,6 @@ uint32_t corrupt_action(warm_cb& data, uint32_t action, int ec_type) cor_prob = data.cor_prob_ws; cor_type = data.cor_type_ws; } - else - { - cor_prob = data.cor_prob_inter; - cor_type = data.cor_type_inter; - } float randf = merand48(data.all->random_state); if (randf < cor_prob) @@ -313,20 +299,11 @@ float compute_weight_multiplier(warm_cb& data, size_t i, int ec_type) float total_train_size = ws_train_size + inter_train_size; float total_weight = (1-data.lambdas[i]) * ws_train_size + data.lambdas[i] * inter_train_size; - if (data.wt_scheme == INSTANCE_WT) - { - if (ec_type == WARM_START) - weight_multiplier = (1-data.lambdas[i]) * total_train_size / (total_weight + FLT_MIN); - else - weight_multiplier = data.lambdas[i] * total_train_size / (total_weight + FLT_MIN); - } + if (ec_type == WARM_START) + weight_multiplier = (1-data.lambdas[i]) * total_train_size / (total_weight + FLT_MIN); else - { - if (ec_type == WARM_START) - weight_multiplier = (1-data.lambdas[i]) * total_train_size / ws_train_size; - else - weight_multiplier = data.lambdas[i] * total_train_size / inter_train_size; - } + weight_multiplier = data.lambdas[i] * total_train_size / (total_weight + FLT_MIN); + return weight_multiplier; } @@ -350,48 +327,6 @@ void accumu_costs_iv_adf(warm_cb& data, multi_learner& base, example& ec) } } -template -void accumu_costs_wsv_adf(warm_cb& data, multi_learner& base) -{ - uint32_t ws_vali_size = data.ws_vali_size; - //only update cumulative costs at the end of every epoch - if ( data.inter_iter >= 1 && abs( log2(data.inter_iter+1) - floor(log2(data.inter_iter+1)) ) < 1e-4 ) - { - for (uint32_t i = 0; i < data.choices_lambda; i++) - data.cumulative_costs[i] = 0; - - uint32_t num_epochs = ceil(log2(data.inter_period)); - uint32_t epoch = log2(data.inter_iter+1) - 1; - float batch_vali_size = ((float) ws_vali_size) / num_epochs; - uint32_t lb, ub; - - if (data.vali_method == WS_VALI_SPLIT) - { - lb = ceil(batch_vali_size * epoch); - ub = ceil(batch_vali_size * (epoch + 1)); - } - else - { - lb = 0; - ub = ws_vali_size; - } - - for (uint32_t i = 0; i < data.choices_lambda; i++) - { - for (uint32_t j = lb; j < ub; j++) - { - example* ec_vali = data.ws_vali[j]; - uint32_t pred_label = predict_sublearner_adf(data, base, *ec_vali, i); - - if (use_cs) - data.cumulative_costs[i] += loss_cs(data, ec_vali->l.cs.costs, pred_label); - else - data.cumulative_costs[i] += loss(data, ec_vali->l.multi.label, pred_label); - } - } - } -} - template void add_to_vali(warm_cb& data, example& ec) { @@ -413,7 +348,7 @@ uint32_t predict_sup_adf(warm_cb& data, multi_learner& base, example& ec) } template -void learn_sup_adf(warm_cb& data, multi_learner& base, example& ec, int ec_type) +void learn_sup_adf(warm_cb& data, example& ec, int ec_type) { copy_example_to_adf(data, ec); //generate cost-sensitive label (for cost-sensitive learner's temporary use) @@ -459,7 +394,7 @@ void predict_or_learn_sup_adf(warm_cb& data, multi_learner& base, example& ec, i uint32_t action = predict_sup_adf(data, base, ec); if (ind_update(data, ec_type)) - learn_sup_adf(data, base, ec, ec_type); + learn_sup_adf(data, ec, ec_type); ec.pred.multiclass = action; } @@ -525,15 +460,12 @@ void predict_or_learn_bandit_adf(warm_cb& data, multi_learner& base, example& ec else cl.cost = loss(data, ec.l.multi.label, cl.action); - if (ec_type == INTERACTION && data.vali_method == INTER_VALI) + if (ec_type == INTERACTION) accumu_costs_iv_adf(data, base, ec); if (ind_update(data, ec_type)) learn_bandit_adf(data, base, ec, ec_type); - if (ec_type == INTERACTION && (data.vali_method == WS_VALI_SPLIT || data.vali_method == WS_VALI_NOSPLIT)) - accumu_costs_wsv_adf(data, base); - ec.pred.multiclass = cl.action; } @@ -560,22 +492,16 @@ void predict_or_learn_adf(warm_cb& data, multi_learner& base, example& ec) data.mc_label = ec.l.multi; if (data.ws_iter < data.ws_period) ec.l.multi.label = corrupt_action(data, data.mc_label.label, WARM_START); - else if (data.inter_iter < data.inter_period) - ec.l.multi.label = corrupt_action(data, data.mc_label.label, INTERACTION); } // Warm start phase if (data.ws_iter < data.ws_period) { - if (data.ws_iter < data.ws_train_size) - { if (data.ws_type == SUPERVISED_WS) predict_or_learn_sup_adf(data, base, ec, WARM_START); else if (data.ws_type == BANDIT_WS) predict_or_learn_bandit_adf(data, base, ec, WARM_START); - } - else - add_to_vali(data, ec); + ec.weight = 0; data.ws_iter++; } @@ -589,7 +515,10 @@ void predict_or_learn_adf(warm_cb& data, multi_learner& base, example& ec) } // Skipping the rest of the examples else - ec.weight = 0; + { + ec.weight = 0; + ec.pred.multiclass = 1; + } // Restore the original labels if (use_cs) @@ -597,6 +526,8 @@ void predict_or_learn_adf(warm_cb& data, multi_learner& base, example& ec) else ec.l.multi = data.mc_label; + cout<(num_actions); - if (data.vali_method == WS_VALI_SPLIT || data.vali_method == WS_VALI_NOSPLIT) - { - data.ws_train_size = ceil(data.ws_period / 2.0); - data.ws_vali_size = data.ws_period - data.ws_train_size; - } - else - { - data.ws_train_size = data.ws_period; - data.ws_vali_size = 0; - } + data.ws_train_size = data.ws_period; + data.ws_vali_size = 0; + data.ws_iter = 0; data.inter_iter = 0; @@ -664,18 +588,14 @@ base_learner* warm_cb_setup(options_i& options, vw& all) .add(make_option("interaction_update", data->upd_inter).help("indicator of interaction updates")) .add(make_option("corrupt_type_warm_start", data->cor_type_ws).default_value(UAR).help("type of label corruption in the warm start phase (1: uniformly at random, 2: circular, 3: replacing with overwriting label)")) .add(make_option("corrupt_prob_warm_start", data->cor_prob_ws).default_value(0.f).help("probability of label corruption in the warm start phase")) - .add(make_option("corrupt_type_interaction", data->cor_type_inter).default_value(UAR).help("type of label corruption in the interaction phase (1: uniformly at random, 2: circular, 3: replacing with overwriting label)")) - .add(make_option("corrupt_prob_interaction", data->cor_prob_inter).default_value(0.f).help("probability of label corruption in the interaction phase")) .add(make_option("choices_lambda", data->choices_lambda).default_value(1U).help("the number of candidate lambdas to aggregate (lambda is the importance weight parameter between the two sources)")) .add(make_option("lambda_scheme", data->lambda_scheme).default_value(ABS_CENTRAL).help("The scheme for generating candidate lambda set (1: center lambda=0.5, 2: center lambda=0.5, min lambda=0, max lambda=1, 3: center lambda=epsilon/(#actions+epsilon), 4: center lambda=epsilon/(#actions+epsilon), min lambda=0, max lambda=1); the rest of candidate lambda values are generated using a doubling scheme")) - .add(make_option("weighting_scheme", data->wt_scheme).default_value(INSTANCE_WT).help("weighting scheme (1: per instance weighting, where for every lambda, each contextual bandit example have weight lambda/(1-lambda) times that of each warm start example, 2: per dataset weighting, where for every lambda, the contextual bandit dataset has total weight lambda/(1-lambda) times that of the warm start dataset)")) - .add(make_option("validation_method", data->vali_method).default_value(INTER_VALI).help("lambda selection criterion (1: using contextual bandit examples with progressive validation, 2: using warm start examples, with fresh validation examples at each epoch, 3: using warm start examples, with a single validation set throughout)")) .add(make_option("overwrite_label", data->overwrite_label).default_value(1U).help("the label used by type 3 corruptions (overwriting)")) .add(make_option("sim_bandit", data->sim_bandit).help("simulate contextual bandit updates on warm start examples")); options.add_and_parse(new_options); - if(use_cs && (options.was_supplied("corrupt_type_warm_start") || options.was_supplied("corrupt_prob_warm_start") || options.was_supplied("corrupt_type_interaction") || options.was_supplied("corrupt_prob_interaction") )) + if( use_cs && ( options.was_supplied("corrupt_type_warm_start") || options.was_supplied("corrupt_prob_warm_start") ) ) { THROW("label corruption on cost-sensitive examples not currently supported"); } From 12b36b99ad73495cae2883ab1ef823dbf5f72869 Mon Sep 17 00:00:00 2001 From: Chicheng Zhang Date: Thu, 7 Feb 2019 22:10:27 -0500 Subject: [PATCH 118/127] revert cost_sensitive.cc --- vowpalwabbit/cost_sensitive.cc | 203 +++++++++++++++++---------------- 1 file changed, 105 insertions(+), 98 deletions(-) diff --git a/vowpalwabbit/cost_sensitive.cc b/vowpalwabbit/cost_sensitive.cc index 80e8e5c4438..2d239e6d612 100644 --- a/vowpalwabbit/cost_sensitive.cc +++ b/vowpalwabbit/cost_sensitive.cc @@ -6,43 +6,42 @@ using namespace std; namespace COST_SENSITIVE { - -void name_value(substring &s, v_array& name, float &v) +void name_value(substring& s, v_array& name, float& v) { tokenize(':', s, name); switch (name.size()) { - case 0: - case 1: - v = 1.; - break; - case 2: - v = float_of_substring(name[1]); - if (nanpattern(v)) - THROW("error NaN value for: " << name[0]); - break; - default: - cerr << "example with a wierd name. What is '"; - cerr.write(s.begin, s.end - s.begin); - cerr << "'?\n"; + case 0: + case 1: + v = 1.; + break; + case 2: + v = float_of_substring(name[1]); + if (nanpattern(v)) + THROW("error NaN value for: " << name[0]); + break; + default: + cerr << "example with a wierd name. What is '"; + cerr.write(s.begin, s.end - s.begin); + cerr << "'?\n"; } } char* bufread_label(label* ld, char* c, io_buf& cache) { - size_t num = *(size_t *)c; + size_t num = *(size_t*)c; ld->costs.clear(); c += sizeof(size_t); - size_t total = sizeof(wclass)*num; - if (buf_read(cache, c, (int)total) < total) + size_t total = sizeof(wclass) * num; + if (cache.buf_read(c, (int)total) < total) { cout << "error in demarshal of cost data" << endl; return c; } - for (size_t i = 0; icosts.push_back(temp); } @@ -52,29 +51,26 @@ char* bufread_label(label* ld, char* c, io_buf& cache) size_t read_cached_label(shared_data*, void* v, io_buf& cache) { - label* ld = (label*) v; + label* ld = (label*)v; ld->costs.clear(); - char *c; + char* c; size_t total = sizeof(size_t); - if (buf_read(cache, c, (int)total) < total) + if (cache.buf_read(c, (int)total) < total) return 0; - bufread_label(ld,c, cache); + bufread_label(ld, c, cache); return total; } -float weight(void*) -{ - return 1.; -} +float weight(void*) { return 1.; } char* bufcache_label(label* ld, char* c) { - *(size_t *)c = ld->costs.size(); + *(size_t*)c = ld->costs.size(); c += sizeof(size_t); - for (unsigned int i = 0; i< ld->costs.size(); i++) + for (unsigned int i = 0; i < ld->costs.size(); i++) { - *(wclass *)c = ld->costs[i]; + *(wclass*)c = ld->costs[i]; c += sizeof(wclass); } return c; @@ -82,36 +78,37 @@ char* bufcache_label(label* ld, char* c) void cache_label(void* v, io_buf& cache) { - char *c; - label* ld = (label*) v; - buf_write(cache, c, sizeof(size_t)+sizeof(wclass)*ld->costs.size()); - bufcache_label(ld,c); + char* c; + label* ld = (label*)v; + cache.buf_write(c, sizeof(size_t) + sizeof(wclass) * ld->costs.size()); + bufcache_label(ld, c); } void default_label(void* v) { - label* ld = (label*) v; + label* ld = (label*)v; ld->costs.clear(); } bool test_label(void* v) { - label* ld = (label*) v; + label* ld = (label*)v; if (ld->costs.size() == 0) return true; - for (unsigned int i=0; icosts.size(); i++) + for (unsigned int i = 0; i < ld->costs.size(); i++) if (FLT_MAX != ld->costs[i].x) return false; return true; } - void delete_label(void* v) +void delete_label(void* v) { label* ld = (label*)v; - if (ld) ld->costs.delete_v(); + if (ld) + ld->costs.delete_v(); } -void copy_label(void*dst, void*src) +void copy_label(void* dst, void* src) { if (dst && src) { @@ -123,13 +120,14 @@ void copy_label(void*dst, void*src) bool substring_eq(substring ss, const char* str) { - size_t len_ss = ss.end - ss.begin; + size_t len_ss = ss.end - ss.begin; size_t len_str = strlen(str); - if (len_ss != len_str) return false; + if (len_ss != len_str) + return false; return (strncmp(ss.begin, str, len_ss) == 0); } -void parse_label(parser* p, shared_data*sd, void* v, v_array& words) +void parse_label(parser* p, shared_data* sd, void* v, v_array& words) { label* ld = (label*)v; ld->costs.clear(); @@ -140,29 +138,31 @@ void parse_label(parser* p, shared_data*sd, void* v, v_array& words) float fx; name_value(words[0], p->parse_name, fx); bool eq_shared = substring_eq(p->parse_name[0], "***shared***"); - bool eq_label = substring_eq(p->parse_name[0], "***label***"); - if (! sd->ldict) + bool eq_label = substring_eq(p->parse_name[0], "***label***"); + if (!sd->ldict) { eq_shared |= substring_eq(p->parse_name[0], "shared"); - eq_label |= substring_eq(p->parse_name[0], "label"); + eq_label |= substring_eq(p->parse_name[0], "label"); } if (eq_shared || eq_label) { if (eq_shared) { - if (p->parse_name.size() != 1) cerr << "shared feature vectors should not have costs on: " << words[0] << endl; + if (p->parse_name.size() != 1) + cerr << "shared feature vectors should not have costs on: " << words[0] << endl; else { - wclass f = { -FLT_MAX, 0, 0., 0.}; + wclass f = {-FLT_MAX, 0, 0., 0.}; ld->costs.push_back(f); } } if (eq_label) { - if (p->parse_name.size() != 2) cerr << "label feature vectors should have exactly one cost on: " << words[0] << endl; + if (p->parse_name.size() != 2) + cerr << "label feature vectors should have exactly one cost on: " << words[0] << endl; else { - wclass f = { float_of_substring(p->parse_name[1]), 0, 0., 0.}; + wclass f = {float_of_substring(p->parse_name[1]), 0, 0., 0.}; ld->costs.push_back(f); } } @@ -173,7 +173,7 @@ void parse_label(parser* p, shared_data*sd, void* v, v_array& words) // otherwise this is a "real" example for (unsigned int i = 0; i < words.size(); i++) { - wclass f = {0.,0,0.,0.}; + wclass f = {0., 0, 0., 0.}; name_value(words[i], p->parse_name, f.x); if (p->parse_name.size() == 0) @@ -181,7 +181,8 @@ void parse_label(parser* p, shared_data*sd, void* v, v_array& words) if (p->parse_name.size() == 1 || p->parse_name.size() == 2 || p->parse_name.size() == 3) { - f.class_index = sd->ldict ? (uint32_t)sd->ldict->get(p->parse_name[0]) : (uint32_t)hashstring(p->parse_name[0], 0); + f.class_index = + sd->ldict ? (uint32_t)sd->ldict->get(p->parse_name[0]) : (uint32_t)hashstring(p->parse_name[0], 0); if (p->parse_name.size() == 1 && f.x >= 0) // test examples are specified just by un-valued class #s f.x = FLT_MAX; } @@ -192,13 +193,8 @@ void parse_label(parser* p, shared_data*sd, void* v, v_array& words) } } -label_parser cs_label = {default_label, parse_label, - cache_label, read_cached_label, - delete_label, weight, - copy_label, - test_label, - sizeof(label) - }; +label_parser cs_label = {default_label, parse_label, cache_label, read_cached_label, delete_label, weight, copy_label, + test_label, sizeof(label)}; void print_update(vw& all, bool is_test, example& ec, multi_ex* ec_seq, bool action_scores, uint32_t prediction) { @@ -217,10 +213,10 @@ void print_update(vw& all, bool is_test, example& ec, multi_ex* ec_seq, bool act const example& first_ex = *(*ec_seq)[0]; v_array costs = first_ex.l.cs.costs; - if (costs.size() == 1 && costs[0].class_index == 0 && costs[0].x < 0) ecc++; + if (costs.size() == 1 && costs[0].class_index == 0 && costs[0].x < 0) + ecc++; - for (; ecc!=&(*ec_seq->cend()); ecc++) - num_current_features += (*ecc)->num_features; + for (; ecc != &(*ec_seq->cend()); ecc++) num_current_features += (*ecc)->num_features; } std::string label_buf; @@ -236,17 +232,22 @@ void print_update(vw& all, bool is_test, example& ec, multi_ex* ec_seq, bool act pred_buf << std::setw(all.sd->col_current_predict) << std::right << std::setfill(' '); if (all.sd->ldict) { - if (action_scores) pred_buf << all.sd->ldict->get(ec.pred.a_s[0].action); - else pred_buf << all.sd->ldict->get(prediction); + if (action_scores) + pred_buf << all.sd->ldict->get(ec.pred.a_s[0].action); + else + pred_buf << all.sd->ldict->get(prediction); } - else pred_buf << ec.pred.a_s[0].action; - if (action_scores) pred_buf <<"....."; - all.sd->print_update(all.holdout_set_off, all.current_pass, label_buf, pred_buf.str(), - num_current_features, all.progress_add, all.progress_arg);; + else + pred_buf << ec.pred.a_s[0].action; + if (action_scores) + pred_buf << "....."; + all.sd->print_update(all.holdout_set_off, all.current_pass, label_buf, pred_buf.str(), num_current_features, + all.progress_add, all.progress_arg); + ; } else - all.sd->print_update(all.holdout_set_off, all.current_pass, label_buf, prediction, - num_current_features, all.progress_add, all.progress_arg); + all.sd->print_update(all.holdout_set_off, all.current_pass, label_buf, prediction, num_current_features, + all.progress_add, all.progress_arg); } } @@ -256,31 +257,31 @@ void output_example(vw& all, example& ec) float loss = 0.; if (!test_label(&ld)) - { - //need to compute exact loss - size_t pred = (size_t)ec.pred.multiclass; - - float chosen_loss = FLT_MAX; - float min = FLT_MAX; - for (auto& cl : ld.costs) - { - if (cl.class_index == pred) - chosen_loss = cl.x; - if (cl.x < min) - min = cl.x; - } - if (chosen_loss == FLT_MAX) - cerr << "warning: csoaa predicted an invalid class. Are all multi-class labels in the {1..k} range?" << endl; + { + // need to compute exact loss + size_t pred = (size_t)ec.pred.multiclass; - loss = chosen_loss - min; - // TODO(alberto): add option somewhere to allow using absolute loss instead? - // loss = chosen_loss; + float chosen_loss = FLT_MAX; + float min = FLT_MAX; + for (auto& cl : ld.costs) + { + if (cl.class_index == pred) + chosen_loss = cl.x; + if (cl.x < min) + min = cl.x; } + if (chosen_loss == FLT_MAX) + cerr << "warning: csoaa predicted an invalid class. Are all multi-class labels in the {1..k} range?" << endl; + + loss = chosen_loss - min; + // TODO(alberto): add option somewhere to allow using absolute loss instead? + // loss = chosen_loss; + } all.sd->update(ec.test_only, !test_label(&ld), loss, 1.f, ec.num_features); for (int sink : all.final_prediction_sink) - if (! all.sd->ldict) + if (!all.sd->ldict) all.print(sink, (float)ec.pred.multiclass, 0, ec.tag); else { @@ -294,7 +295,8 @@ void output_example(vw& all, example& ec) for (unsigned int i = 0; i < ld.costs.size(); i++) { wclass cl = ld.costs[i]; - if (i > 0) outputStringStream << ' '; + if (i > 0) + outputStringStream << ' '; outputStringStream << cl.class_index << ':' << cl.partial_prediction; } all.print_text(all.raw_prediction, outputStringStream.str(), ec.tag); @@ -312,18 +314,23 @@ void finish_example(vw& all, example& ec) bool example_is_test(example& ec) { v_array costs = ec.l.cs.costs; - if (costs.size() == 0) return true; - for (size_t j=0; j costs = ec.l.cs.costs; - if (costs.size() != 1) return false; - if (costs[0].class_index != 0) return false; - if (costs[0].x != -FLT_MAX) return false; + if (costs.size() != 1) + return false; + if (costs[0].class_index != 0) + return false; + if (costs[0].x != -FLT_MAX) + return false; return true; } -} +} // namespace COST_SENSITIVE From 1c0400be938a541f9aa18867e5efa48b37a63a02 Mon Sep 17 00:00:00 2001 From: Chicheng Zhang Date: Thu, 7 Feb 2019 22:35:01 -0500 Subject: [PATCH 119/127] fixed the weighting issue in cs examples --- vowpalwabbit/cost_sensitive.cc | 4 ++-- vowpalwabbit/warm_cb.cc | 2 -- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/vowpalwabbit/cost_sensitive.cc b/vowpalwabbit/cost_sensitive.cc index 2d239e6d612..9021308cc46 100644 --- a/vowpalwabbit/cost_sensitive.cc +++ b/vowpalwabbit/cost_sensitive.cc @@ -273,12 +273,12 @@ void output_example(vw& all, example& ec) if (chosen_loss == FLT_MAX) cerr << "warning: csoaa predicted an invalid class. Are all multi-class labels in the {1..k} range?" << endl; - loss = chosen_loss - min; + loss = (chosen_loss - min) * ec.weight; // TODO(alberto): add option somewhere to allow using absolute loss instead? // loss = chosen_loss; } - all.sd->update(ec.test_only, !test_label(&ld), loss, 1.f, ec.num_features); + all.sd->update(ec.test_only, !test_label(&ld), loss, ec.weight, ec.num_features); for (int sink : all.final_prediction_sink) if (!all.sd->ldict) diff --git a/vowpalwabbit/warm_cb.cc b/vowpalwabbit/warm_cb.cc index a796708dcbf..2a61a1a935d 100644 --- a/vowpalwabbit/warm_cb.cc +++ b/vowpalwabbit/warm_cb.cc @@ -526,8 +526,6 @@ void predict_or_learn_adf(warm_cb& data, multi_learner& base, example& ec) else ec.l.multi = data.mc_label; - cout< Date: Thu, 7 Feb 2019 23:05:23 -0500 Subject: [PATCH 120/127] . --- vowpalwabbit/Makefile.am | 61 ---------------------------------------- 1 file changed, 61 deletions(-) delete mode 100644 vowpalwabbit/Makefile.am diff --git a/vowpalwabbit/Makefile.am b/vowpalwabbit/Makefile.am deleted file mode 100644 index cfab1395555..00000000000 --- a/vowpalwabbit/Makefile.am +++ /dev/null @@ -1,61 +0,0 @@ -lib_LTLIBRARIES = liballreduce.la libvw.la libvw_c_wrapper.la - -liballreduce_la_SOURCES = allreduce_sockets.cc allreduce_threads.cc vw_exception.cc - -bin_PROGRAMS = vw active_interactor - -libvw_la_SOURCES = parser_helper.cc global_data.cc io_buf.cc parse_regressor.cc parse_primitives.cc unique_sort.cc cache.cc rand48.cc no_label.cc simple_label.cc multiclass.cc oaa.cc multilabel_oaa.cc boosting.cc ect.cc marginal.cc autolink.cc binary.cc lrq.cc cost_sensitive.cc multilabel.cc label_dictionary.cc csoaa.cc cb.cc cb_adf.cc cb_algs.cc search.cc search_meta.cc search_sequencetask.cc search_dep_parser.cc search_hooktask.cc search_multiclasstask.cc search_entityrelationtask.cc search_graph.cc parse_example.cc scorer.cc network.cc parse_args.cc accumulate.cc gd.cc learner.cc mwt.cc lda_core.cc gd_mf.cc mf.cc bfgs.cc noop.cc print.cc example.cc parser.cc loss_functions.cc sender.cc nn.cc confidence.cc bs.cc cbify.cc explore_eval.cc topk.cc stagewise_poly.cc log_multi.cc recall_tree.cc active.cc active_cover.cc cs_active.cc kernel_svm.cc best_constant.cc ftrl.cc svrg.cc lrqfa.cc interact.cc comp_io.cc interactions.cc vw_exception.cc vw_validate.cc audit_regressor.cc gen_cs_example.cc cb_explore.cc action_score.cc cb_explore_adf.cc OjaNewton.cc parse_example_json.cc baseline.cc classweight.cc warm_cb.cc - -libvw_c_wrapper_la_SOURCES = vwdll.cpp - -# accumulate.cc uses all_reduce -libvw_la_LIBADD = liballreduce.la -libvw_c_wrapper_la_LIBADD = libvw.la - -ACLOCAL_AMFLAGS = -I acinclude.d - -AM_CXXFLAGS = ${BOOST_CPPFLAGS} ${ZLIB_CPPFLAGS} ${PTHREAD_CFLAGS} -Wall -Wno-unused-local-typedefs -LIBS = ${BOOST_LDFLAGS} ${BOOST_PROGRAM_OPTIONS_LIB} ${ZLIB_LDFLAGS} ${PTHREAD_LIBS} - -CXXOPTIMIZE = - -if PROFILE -CXXOPTIMIZE += -pg -endif - -if VWBUG -CXXOPTIMIZE += -g -O1 -else -CXXOPTIMIZE += -O3 -fomit-frame-pointer -DNDEBUG -endif - -if NITPICK -AM_CXXFLAGS += -Wextra -Wundef -Wshadow -Wunsafe-loop-optimizations -Wconversion -Wmissing-format-attribute -AM_CXXFLAGS += -Wredundant-decls -ansi -Wmissing-noreturn -endif - -if PARALLELIZE -AM_CXXFLAGS += -Wno-strict-aliasing -fopenmp -endif - -if FREEBSD -AM_CXXFLAGS += -l compat -else -AM_CXXFLAGS += -pedantic -endif - -if CLANG_LIBCXX -AM_CXXFLAGS += -stdlib=libc++ -endif - -AM_CXXFLAGS += -I ../rapidjson/include -I ../explore - -AM_CXXFLAGS += $(CXXOPTIMIZE) - -vw_SOURCES = main.cc -vw_CXXFLAGS = $(AM_CXXFLAGS) - -vw_LDADD = libvw.la liballreduce.la -vw_DEPENDENCIES = libvw.la liballreduce.la - -active_interactor_SOURCES = active_interactor.cc From 63d8c40b61961a6dc760596eb389e345eb375f29 Mon Sep 17 00:00:00 2001 From: Chicheng Zhang Date: Thu, 7 Feb 2019 23:26:11 -0500 Subject: [PATCH 121/127] edited vw_core.vcxproj --- vowpalwabbit/vw_core.vcxproj | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/vowpalwabbit/vw_core.vcxproj b/vowpalwabbit/vw_core.vcxproj index 7e7515c689e..a68eb8b43a0 100644 --- a/vowpalwabbit/vw_core.vcxproj +++ b/vowpalwabbit/vw_core.vcxproj @@ -171,6 +171,7 @@ + @@ -269,6 +270,7 @@ + @@ -372,4 +374,4 @@ - \ No newline at end of file + From 99d642b8ceb7f99d00e0d921089670a3e401fd7d Mon Sep 17 00:00:00 2001 From: Chicheng Zhang Date: Fri, 8 Feb 2019 10:59:35 -0500 Subject: [PATCH 122/127] added new warm cb test cases --- test/RunTests | 40 ++++++++----------- test/train-sets/ref/cbify_ws_maj.stderr | 19 --------- test/train-sets/ref/cbify_ws_uar.stderr | 19 --------- test/train-sets/ref/cbify_ws_wsgt.stderr | 19 --------- ...fy_ws_no_int_upd.stderr => warm_cb.stderr} | 3 ++ test/train-sets/ref/warm_cb_cs.stderr | 21 ++++++++++ ...cbify_ws_cyc.stderr => warm_cb_cyc.stderr} | 3 ++ ...e.stderr => warm_cb_lambda_zeroone.stderr} | 3 ++ ...fy_ws.stderr => warm_cb_no_int_upd.stderr} | 3 ++ ...ws_upd.stderr => warm_cb_no_ws_upd.stderr} | 3 ++ ...bandit.stderr => warm_cb_simbandit.stderr} | 3 ++ 11 files changed, 55 insertions(+), 81 deletions(-) delete mode 100644 test/train-sets/ref/cbify_ws_maj.stderr delete mode 100644 test/train-sets/ref/cbify_ws_uar.stderr delete mode 100644 test/train-sets/ref/cbify_ws_wsgt.stderr rename test/train-sets/ref/{cbify_ws_no_int_upd.stderr => warm_cb.stderr} (80%) create mode 100644 test/train-sets/ref/warm_cb_cs.stderr rename test/train-sets/ref/{cbify_ws_cyc.stderr => warm_cb_cyc.stderr} (80%) rename test/train-sets/ref/{cbify_ws_lambda_zeroone.stderr => warm_cb_lambda_zeroone.stderr} (80%) rename test/train-sets/ref/{cbify_ws.stderr => warm_cb_no_int_upd.stderr} (81%) rename test/train-sets/ref/{cbify_ws_no_ws_upd.stderr => warm_cb_no_ws_upd.stderr} (80%) rename test/train-sets/ref/{cbify_ws_simbandit.stderr => warm_cb_simbandit.stderr} (80%) diff --git a/test/RunTests b/test/RunTests index 1679bedccbb..41cec2685a5 100755 --- a/test/RunTests +++ b/test/RunTests @@ -1638,37 +1638,29 @@ echo "1 | feature:1" | {VW} -a --initial_weight 0.1 --initial_t 0.3 train-sets/ref/no_shared_features.stderr # Test 176 warm_cb warm start -{VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 -d train-sets/multiclass - train-sets/ref/cbify_ws.stderr +{VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --warm_start_update --interaction_update -d train-sets/multiclass + train-sets/ref/warm_cb.stderr # Test 177 warm_cb warm start with lambda set containing 0/1 -{VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --lambda_scheme 2 -d train-sets/multiclass - train-sets/ref/cbify_ws_lambda_zeroone.stderr +{VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --lambda_scheme 2 --warm_start_update --interaction_update -d train-sets/multiclass + train-sets/ref/warm_cb_lambda_zeroone.stderr # Test 178 warm_cb warm start with warm start update turned off -{VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --warm_start_update false -d train-sets/multiclass - train-sets/ref/cbify_ws_no_ws_upd.stderr +{VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --interaction_update -d train-sets/multiclass + train-sets/ref/warm_cb_no_ws_upd.stderr # Test 179 warm_cb warm start with interaction update turned off -{VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --interaction_update false -d train-sets/multiclass - train-sets/ref/cbify_ws_no_int_upd.stderr +{VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.0 --warm_start 3 --interaction 7 --choices_lambda 8 --warm_start_update -d train-sets/multiclass + train-sets/ref/warm_cb_no_int_upd.stderr # Test 180 warm_cb warm start with bandit warm start type (Sim-Bandit) -{VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --warm_start_type 2 -d train-sets/multiclass - train-sets/ref/cbify_ws_simbandit.stderr +{VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 1 --warm_start_update --interaction_update --sim_bandit -d train-sets/multiclass + train-sets/ref/warm_cb_simbandit.stderr -# Test 181 warm_cb warm start with UAR supervised corruption -{VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --corrupt_type_warm_start 1 --corrupt_prob_warm_start 0.5 -d train-sets/multiclass - train-sets/ref/cbify_ws_uar.stderr +# Test 181 warm_cb warm start with CYC supervised corruption +{VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --warm_start_update --interaction_update --corrupt_type_warm_start 2 --corrupt_prob_warm_start 0.5 -d train-sets/multiclass + train-sets/ref/warm_cb_cyc.stderr -# Test 182 warm_cb warm start with CYC supervised corruption -{VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --corrupt_type_warm_start 2 --corrupt_prob_warm_start 0.5 -d train-sets/multiclass - train-sets/ref/cbify_ws_cyc.stderr - -# Test 183 warm_cb warm start with MAJ supervised corruption -{VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --corrupt_type_warm_start 3 --corrupt_prob_warm_start 0.5 --overwrite_label 1 -d train-sets/multiclass - train-sets/ref/cbify_ws_maj.stderr - -# Test 184 warm_cb warm start with warm start distribution being the ground truth -{VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --corrupt_type_interaction 2 --corrupt_prob_interaction 0.5 --weighting_scheme 2 --validation_method 2 -d train-sets/multiclass - train-sets/ref/cbify_ws_wsgt.stderr +# Test 182 warm_cb warm start with input cost-sensitive examples +{VW} --warm_cb 3 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 1 --interaction 2 --choices_lambda 8 --warm_start_update --interaction_update --warm_cb_cs -d train-sets/cs_cb + train-sets/ref/warm_cb_cs.stderr diff --git a/test/train-sets/ref/cbify_ws_maj.stderr b/test/train-sets/ref/cbify_ws_maj.stderr deleted file mode 100644 index 2a12135dfa0..00000000000 --- a/test/train-sets/ref/cbify_ws_maj.stderr +++ /dev/null @@ -1,19 +0,0 @@ -Num weight bits = 18 -learning rate = 0.5 -initial_t = 0 -power_t = 0.5 -using no cache -Reading datafile = train-sets/multiclass -num sources = 1 -average since example example current current current -loss last counter weight label predict features -1.000000 1.000000 4 1.0 4 1 2 -1.000000 1.000000 5 2.0 5 1 2 -1.000000 1.000000 7 4.0 7 1 2 - -finished run -number of examples = 10 -weighted example sum = 7.000000 -weighted label sum = 0.000000 -average loss = 1.000000 -total feature number = 20 diff --git a/test/train-sets/ref/cbify_ws_uar.stderr b/test/train-sets/ref/cbify_ws_uar.stderr deleted file mode 100644 index 6d05ba5a0db..00000000000 --- a/test/train-sets/ref/cbify_ws_uar.stderr +++ /dev/null @@ -1,19 +0,0 @@ -Num weight bits = 18 -learning rate = 0.5 -initial_t = 0 -power_t = 0.5 -using no cache -Reading datafile = train-sets/multiclass -num sources = 1 -average since example example current current current -loss last counter weight label predict features -1.000000 1.000000 4 1.0 4 3 2 -1.000000 1.000000 5 2.0 5 3 2 -1.000000 1.000000 7 4.0 7 3 2 - -finished run -number of examples = 10 -weighted example sum = 7.000000 -weighted label sum = 0.000000 -average loss = 1.000000 -total feature number = 20 diff --git a/test/train-sets/ref/cbify_ws_wsgt.stderr b/test/train-sets/ref/cbify_ws_wsgt.stderr deleted file mode 100644 index d05436ac3a2..00000000000 --- a/test/train-sets/ref/cbify_ws_wsgt.stderr +++ /dev/null @@ -1,19 +0,0 @@ -Num weight bits = 18 -learning rate = 0.5 -initial_t = 0 -power_t = 0.5 -using no cache -Reading datafile = train-sets/multiclass -num sources = 1 -average since example example current current current -loss last counter weight label predict features -1.000000 1.000000 4 1.0 4 2 2 -1.000000 1.000000 5 2.0 5 2 2 -1.000000 1.000000 7 4.0 7 2 2 - -finished run -number of examples = 10 -weighted example sum = 7.000000 -weighted label sum = 0.000000 -average loss = 1.000000 -total feature number = 20 diff --git a/test/train-sets/ref/cbify_ws_no_int_upd.stderr b/test/train-sets/ref/warm_cb.stderr similarity index 80% rename from test/train-sets/ref/cbify_ws_no_int_upd.stderr rename to test/train-sets/ref/warm_cb.stderr index 6d05ba5a0db..542eedca77d 100644 --- a/test/train-sets/ref/cbify_ws_no_int_upd.stderr +++ b/test/train-sets/ref/warm_cb.stderr @@ -17,3 +17,6 @@ weighted example sum = 7.000000 weighted label sum = 0.000000 average loss = 1.000000 total feature number = 20 +average variance estimate = 171.578140 +theoretical average variance = 200.000000 +last lambda chosen = 0.031250 among lambdas ranging from 0.031250 to 0.937500 diff --git a/test/train-sets/ref/warm_cb_cs.stderr b/test/train-sets/ref/warm_cb_cs.stderr new file mode 100644 index 00000000000..0fa13e7b3e3 --- /dev/null +++ b/test/train-sets/ref/warm_cb_cs.stderr @@ -0,0 +1,21 @@ +Num weight bits = 18 +learning rate = 0.5 +initial_t = 0 +power_t = 0.5 +using no cache +Reading datafile = train-sets/cs_cb +num sources = 1 +average since example example current current current +loss last counter weight label predict features +0.000000 0.000000 2 1.0 known 2 4 +0.000000 0.000000 3 2.0 known 2 4 + +finished run +number of examples = 3 +weighted example sum = 2.000000 +weighted label sum = 0.000000 +average loss = 0.000000 +total feature number = 12 +average variance estimate = 1.034483 +theoretical average variance = 60.000000 +last lambda chosen = 0.031250 among lambdas ranging from 0.031250 to 0.937500 diff --git a/test/train-sets/ref/cbify_ws_cyc.stderr b/test/train-sets/ref/warm_cb_cyc.stderr similarity index 80% rename from test/train-sets/ref/cbify_ws_cyc.stderr rename to test/train-sets/ref/warm_cb_cyc.stderr index a1affe4ec96..0f2fa85641b 100644 --- a/test/train-sets/ref/cbify_ws_cyc.stderr +++ b/test/train-sets/ref/warm_cb_cyc.stderr @@ -17,3 +17,6 @@ weighted example sum = 7.000000 weighted label sum = 0.000000 average loss = 0.857143 total feature number = 20 +average variance estimate = 143.156311 +theoretical average variance = 200.000000 +last lambda chosen = 0.937500 among lambdas ranging from 0.031250 to 0.937500 diff --git a/test/train-sets/ref/cbify_ws_lambda_zeroone.stderr b/test/train-sets/ref/warm_cb_lambda_zeroone.stderr similarity index 80% rename from test/train-sets/ref/cbify_ws_lambda_zeroone.stderr rename to test/train-sets/ref/warm_cb_lambda_zeroone.stderr index 344c43a5335..0b01cc9af71 100644 --- a/test/train-sets/ref/cbify_ws_lambda_zeroone.stderr +++ b/test/train-sets/ref/warm_cb_lambda_zeroone.stderr @@ -17,3 +17,6 @@ weighted example sum = 7.000000 weighted label sum = 0.000000 average loss = 0.857143 total feature number = 20 +average variance estimate = 60.903835 +theoretical average variance = 200.000000 +last lambda chosen = 0.000000 among lambdas ranging from 0.000000 to 1.000000 diff --git a/test/train-sets/ref/cbify_ws.stderr b/test/train-sets/ref/warm_cb_no_int_upd.stderr similarity index 81% rename from test/train-sets/ref/cbify_ws.stderr rename to test/train-sets/ref/warm_cb_no_int_upd.stderr index 6d05ba5a0db..2eb6f8c199b 100644 --- a/test/train-sets/ref/cbify_ws.stderr +++ b/test/train-sets/ref/warm_cb_no_int_upd.stderr @@ -17,3 +17,6 @@ weighted example sum = 7.000000 weighted label sum = 0.000000 average loss = 1.000000 total feature number = 20 +average variance estimate = 1.000000 +theoretical average variance = inf +last lambda chosen = 0.000000 among lambdas ranging from 0.000000 to 0.000000 diff --git a/test/train-sets/ref/cbify_ws_no_ws_upd.stderr b/test/train-sets/ref/warm_cb_no_ws_upd.stderr similarity index 80% rename from test/train-sets/ref/cbify_ws_no_ws_upd.stderr rename to test/train-sets/ref/warm_cb_no_ws_upd.stderr index 4b334d4e73b..9b172856ee1 100644 --- a/test/train-sets/ref/cbify_ws_no_ws_upd.stderr +++ b/test/train-sets/ref/warm_cb_no_ws_upd.stderr @@ -17,3 +17,6 @@ weighted example sum = 7.000000 weighted label sum = 0.000000 average loss = 0.714286 total feature number = 20 +average variance estimate = 7.512840 +theoretical average variance = 200.000000 +last lambda chosen = 1.000000 among lambdas ranging from 1.000000 to 1.000000 diff --git a/test/train-sets/ref/cbify_ws_simbandit.stderr b/test/train-sets/ref/warm_cb_simbandit.stderr similarity index 80% rename from test/train-sets/ref/cbify_ws_simbandit.stderr rename to test/train-sets/ref/warm_cb_simbandit.stderr index 6d935a38a61..84e75bd328c 100644 --- a/test/train-sets/ref/cbify_ws_simbandit.stderr +++ b/test/train-sets/ref/warm_cb_simbandit.stderr @@ -17,3 +17,6 @@ weighted example sum = 7.000000 weighted label sum = 0.000000 average loss = 0.857143 total feature number = 20 +average variance estimate = 4.685901 +theoretical average variance = 200.000000 +last lambda chosen = 0.500000 among lambdas ranging from 0.500000 to 0.500000 From 3ad0f7bdda05703368aecd9288461700e3e849a3 Mon Sep 17 00:00:00 2001 From: Chicheng Zhang Date: Sun, 24 Feb 2019 23:08:09 -0500 Subject: [PATCH 123/127] overwrote regcb test results, as we further divide importance weights of each example in the mtr reduction by 1/num_actions --- test/train-sets/ref/cbify_regcb.stderr | 8 ++++---- test/train-sets/ref/cbify_regcbopt.stderr | 10 +++++----- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/test/train-sets/ref/cbify_regcb.stderr b/test/train-sets/ref/cbify_regcb.stderr index dd5883333e1..42d1733cb12 100644 --- a/test/train-sets/ref/cbify_regcb.stderr +++ b/test/train-sets/ref/cbify_regcb.stderr @@ -3,18 +3,18 @@ learning rate = 0.5 initial_t = 0 power_t = 0.5 using no cache -Reading datafile = train-sets/multiclass +Reading datafile = ../../test/train-sets/multiclass num sources = 1 average since example example current current current loss last counter weight label predict features 1.000000 1.000000 1 1.0 1 5 2 1.000000 1.000000 2 2.0 2 10 2 -0.750000 0.500000 4 4.0 4 7 2 -0.750000 0.750000 8 8.0 8 4 2 +0.750000 0.500000 4 4.0 4 8 2 +0.875000 1.000000 8 8.0 8 4 2 finished run number of examples = 10 weighted example sum = 10.000000 weighted label sum = 0.000000 -average loss = 0.800000 +average loss = 0.900000 total feature number = 20 diff --git a/test/train-sets/ref/cbify_regcbopt.stderr b/test/train-sets/ref/cbify_regcbopt.stderr index 8bb64392725..1a4a367eb37 100644 --- a/test/train-sets/ref/cbify_regcbopt.stderr +++ b/test/train-sets/ref/cbify_regcbopt.stderr @@ -3,18 +3,18 @@ learning rate = 0.5 initial_t = 0 power_t = 0.5 using no cache -Reading datafile = train-sets/multiclass +Reading datafile = ../../test/train-sets/multiclass num sources = 1 average since example example current current current loss last counter weight label predict features 1.000000 1.000000 1 1.0 1 5 2 -1.000000 1.000000 2 2.0 2 9 2 -1.000000 1.000000 4 4.0 4 7 2 -0.875000 0.750000 8 8.0 8 4 2 +1.000000 1.000000 2 2.0 2 10 2 +0.750000 0.500000 4 4.0 4 7 2 +0.750000 0.750000 8 8.0 8 4 2 finished run number of examples = 10 weighted example sum = 10.000000 weighted label sum = 0.000000 -average loss = 0.900000 +average loss = 0.800000 total feature number = 20 From 2fa610e42f86de24c337ed816f833839696c2a8d Mon Sep 17 00:00:00 2001 From: Chicheng Zhang Date: Sun, 24 Feb 2019 23:25:09 -0500 Subject: [PATCH 124/127] corrected a mistake in new regcb test result --- test/train-sets/ref/cbify_regcb.stderr | 2 +- test/train-sets/ref/cbify_regcbopt.stderr | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/test/train-sets/ref/cbify_regcb.stderr b/test/train-sets/ref/cbify_regcb.stderr index 42d1733cb12..1b713f8a1e7 100644 --- a/test/train-sets/ref/cbify_regcb.stderr +++ b/test/train-sets/ref/cbify_regcb.stderr @@ -3,7 +3,7 @@ learning rate = 0.5 initial_t = 0 power_t = 0.5 using no cache -Reading datafile = ../../test/train-sets/multiclass +Reading datafile = train-sets/multiclass num sources = 1 average since example example current current current loss last counter weight label predict features diff --git a/test/train-sets/ref/cbify_regcbopt.stderr b/test/train-sets/ref/cbify_regcbopt.stderr index 1a4a367eb37..dd5883333e1 100644 --- a/test/train-sets/ref/cbify_regcbopt.stderr +++ b/test/train-sets/ref/cbify_regcbopt.stderr @@ -3,7 +3,7 @@ learning rate = 0.5 initial_t = 0 power_t = 0.5 using no cache -Reading datafile = ../../test/train-sets/multiclass +Reading datafile = train-sets/multiclass num sources = 1 average since example example current current current loss last counter weight label predict features From 5e923d808c45fdf3898ed036beb645dbb80d8712 Mon Sep 17 00:00:00 2001 From: Chicheng Zhang Date: Wed, 20 Mar 2019 16:17:50 -0400 Subject: [PATCH 125/127] reorder reduction stack --- vowpalwabbit/parse_args.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vowpalwabbit/parse_args.cc b/vowpalwabbit/parse_args.cc index 1e31cec9971..63a5a2d9fdc 100644 --- a/vowpalwabbit/parse_args.cc +++ b/vowpalwabbit/parse_args.cc @@ -1270,9 +1270,9 @@ void parse_reductions(options_i& options, vw& all) all.reduction_stack.push(cb_algs_setup); all.reduction_stack.push(cb_adf_setup); all.reduction_stack.push(mwt_setup); - all.reduction_stack.push(warm_cb_setup); all.reduction_stack.push(cb_explore_setup); all.reduction_stack.push(cb_explore_adf_setup); + all.reduction_stack.push(warm_cb_setup); all.reduction_stack.push(cbify_setup); all.reduction_stack.push(cbifyldf_setup); all.reduction_stack.push(explore_eval_setup); From c71d3e3d197df1ff58cba5141818a6e237c332e5 Mon Sep 17 00:00:00 2001 From: Chicheng Zhang Date: Wed, 27 Mar 2019 16:16:56 -0400 Subject: [PATCH 126/127] changed the weight scaling back without 1/K; changed the central value of lambda --- test/train-sets/ref/warm_cb.stderr | 2 +- test/train-sets/ref/warm_cb_cyc.stderr | 2 +- vowpalwabbit/cb_adf.cc | 3 ++- vowpalwabbit/warm_cb.cc | 7 +++++-- 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/test/train-sets/ref/warm_cb.stderr b/test/train-sets/ref/warm_cb.stderr index 542eedca77d..1ed30fba719 100644 --- a/test/train-sets/ref/warm_cb.stderr +++ b/test/train-sets/ref/warm_cb.stderr @@ -19,4 +19,4 @@ average loss = 1.000000 total feature number = 20 average variance estimate = 171.578140 theoretical average variance = 200.000000 -last lambda chosen = 0.031250 among lambdas ranging from 0.031250 to 0.937500 +last lambda chosen = 0.937500 among lambdas ranging from 0.031250 to 0.937500 diff --git a/test/train-sets/ref/warm_cb_cyc.stderr b/test/train-sets/ref/warm_cb_cyc.stderr index 0f2fa85641b..4254bb45d3f 100644 --- a/test/train-sets/ref/warm_cb_cyc.stderr +++ b/test/train-sets/ref/warm_cb_cyc.stderr @@ -19,4 +19,4 @@ average loss = 0.857143 total feature number = 20 average variance estimate = 143.156311 theoretical average variance = 200.000000 -last lambda chosen = 0.937500 among lambdas ranging from 0.031250 to 0.937500 +last lambda chosen = 0.750000 among lambdas ranging from 0.031250 to 0.937500 diff --git a/vowpalwabbit/cb_adf.cc b/vowpalwabbit/cb_adf.cc index c632b57c77e..f0634489180 100644 --- a/vowpalwabbit/cb_adf.cc +++ b/vowpalwabbit/cb_adf.cc @@ -39,6 +39,7 @@ struct cb_adf uint64_t offset; bool no_predict; bool rank_all; + }; CB::cb_class get_observed_cost(multi_ex& examples) @@ -111,7 +112,7 @@ void learn_MTR(cb_adf& mydata, multi_learner& base, multi_ex& examples) gen_cs_example_mtr(mydata.gen_cs, examples, mydata.cs_labels); uint32_t nf = (uint32_t)examples[mydata.gen_cs.mtr_example]->num_features; float old_weight = examples[mydata.gen_cs.mtr_example]->weight; - examples[mydata.gen_cs.mtr_example]->weight *= 1.f / examples[mydata.gen_cs.mtr_example]->l.cb.costs[0].probability * ((float)mydata.gen_cs.event_sum / (float)mydata.gen_cs.action_sum) * (1.f / (float)examples.size()); + examples[mydata.gen_cs.mtr_example]->weight *= 1.f / examples[mydata.gen_cs.mtr_example]->l.cb.costs[0].probability * ((float)mydata.gen_cs.event_sum / (float)mydata.gen_cs.action_sum); GEN_CS::call_cs_ldf(base, mydata.gen_cs.mtr_ec_seq, mydata.cb_labels, mydata.cs_labels, mydata.prepped_cs_labels, mydata.offset); examples[mydata.gen_cs.mtr_example]->num_features = nf; examples[mydata.gen_cs.mtr_example]->weight = old_weight; diff --git a/vowpalwabbit/warm_cb.cc b/vowpalwabbit/warm_cb.cc index 2a61a1a935d..b8af0c4c489 100644 --- a/vowpalwabbit/warm_cb.cc +++ b/vowpalwabbit/warm_cb.cc @@ -196,9 +196,12 @@ void copy_example_to_adf(warm_cb& data, example& ec) } } +// Changing the minimax value from eps/(K+eps) +// to eps/(1+eps) to accomodate for +// weight scaling of bandit examples by factor 1/K in mtr reduction float minimax_lambda(float epsilon, size_t num_actions) { - return epsilon / (num_actions + epsilon); + return epsilon / (1.0f + epsilon); } void setup_lambdas(warm_cb& data) @@ -587,7 +590,7 @@ base_learner* warm_cb_setup(options_i& options, vw& all) .add(make_option("corrupt_type_warm_start", data->cor_type_ws).default_value(UAR).help("type of label corruption in the warm start phase (1: uniformly at random, 2: circular, 3: replacing with overwriting label)")) .add(make_option("corrupt_prob_warm_start", data->cor_prob_ws).default_value(0.f).help("probability of label corruption in the warm start phase")) .add(make_option("choices_lambda", data->choices_lambda).default_value(1U).help("the number of candidate lambdas to aggregate (lambda is the importance weight parameter between the two sources)")) - .add(make_option("lambda_scheme", data->lambda_scheme).default_value(ABS_CENTRAL).help("The scheme for generating candidate lambda set (1: center lambda=0.5, 2: center lambda=0.5, min lambda=0, max lambda=1, 3: center lambda=epsilon/(#actions+epsilon), 4: center lambda=epsilon/(#actions+epsilon), min lambda=0, max lambda=1); the rest of candidate lambda values are generated using a doubling scheme")) + .add(make_option("lambda_scheme", data->lambda_scheme).default_value(ABS_CENTRAL).help("The scheme for generating candidate lambda set (1: center lambda=0.5, 2: center lambda=0.5, min lambda=0, max lambda=1, 3: center lambda=epsilon/(1+epsilon), 4: center lambda=epsilon/(1+epsilon), min lambda=0, max lambda=1); the rest of candidate lambda values are generated using a doubling scheme")) .add(make_option("overwrite_label", data->overwrite_label).default_value(1U).help("the label used by type 3 corruptions (overwriting)")) .add(make_option("sim_bandit", data->sim_bandit).help("simulate contextual bandit updates on warm start examples")); From 13bf77c584ef525e121fa7784410d41fa8522ce6 Mon Sep 17 00:00:00 2001 From: Chicheng Zhang Date: Wed, 27 Mar 2019 16:34:12 -0400 Subject: [PATCH 127/127] changed back regcbopt test results; undo changes in cb_adf.cc --- test/train-sets/ref/cbify_regcb.stderr | 6 +++--- test/train-sets/ref/cbify_regcbopt.stderr | 8 ++++---- vowpalwabbit/cb_adf.cc | 7 ++++--- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/test/train-sets/ref/cbify_regcb.stderr b/test/train-sets/ref/cbify_regcb.stderr index 1b713f8a1e7..dd5883333e1 100644 --- a/test/train-sets/ref/cbify_regcb.stderr +++ b/test/train-sets/ref/cbify_regcb.stderr @@ -9,12 +9,12 @@ average since example example current current current loss last counter weight label predict features 1.000000 1.000000 1 1.0 1 5 2 1.000000 1.000000 2 2.0 2 10 2 -0.750000 0.500000 4 4.0 4 8 2 -0.875000 1.000000 8 8.0 8 4 2 +0.750000 0.500000 4 4.0 4 7 2 +0.750000 0.750000 8 8.0 8 4 2 finished run number of examples = 10 weighted example sum = 10.000000 weighted label sum = 0.000000 -average loss = 0.900000 +average loss = 0.800000 total feature number = 20 diff --git a/test/train-sets/ref/cbify_regcbopt.stderr b/test/train-sets/ref/cbify_regcbopt.stderr index dd5883333e1..8bb64392725 100644 --- a/test/train-sets/ref/cbify_regcbopt.stderr +++ b/test/train-sets/ref/cbify_regcbopt.stderr @@ -8,13 +8,13 @@ num sources = 1 average since example example current current current loss last counter weight label predict features 1.000000 1.000000 1 1.0 1 5 2 -1.000000 1.000000 2 2.0 2 10 2 -0.750000 0.500000 4 4.0 4 7 2 -0.750000 0.750000 8 8.0 8 4 2 +1.000000 1.000000 2 2.0 2 9 2 +1.000000 1.000000 4 4.0 4 7 2 +0.875000 0.750000 8 8.0 8 4 2 finished run number of examples = 10 weighted example sum = 10.000000 weighted label sum = 0.000000 -average loss = 0.800000 +average loss = 0.900000 total feature number = 20 diff --git a/vowpalwabbit/cb_adf.cc b/vowpalwabbit/cb_adf.cc index f0634489180..ccc9cefe131 100644 --- a/vowpalwabbit/cb_adf.cc +++ b/vowpalwabbit/cb_adf.cc @@ -39,7 +39,6 @@ struct cb_adf uint64_t offset; bool no_predict; bool rank_all; - }; CB::cb_class get_observed_cost(multi_ex& examples) @@ -112,8 +111,10 @@ void learn_MTR(cb_adf& mydata, multi_learner& base, multi_ex& examples) gen_cs_example_mtr(mydata.gen_cs, examples, mydata.cs_labels); uint32_t nf = (uint32_t)examples[mydata.gen_cs.mtr_example]->num_features; float old_weight = examples[mydata.gen_cs.mtr_example]->weight; - examples[mydata.gen_cs.mtr_example]->weight *= 1.f / examples[mydata.gen_cs.mtr_example]->l.cb.costs[0].probability * ((float)mydata.gen_cs.event_sum / (float)mydata.gen_cs.action_sum); - GEN_CS::call_cs_ldf(base, mydata.gen_cs.mtr_ec_seq, mydata.cb_labels, mydata.cs_labels, mydata.prepped_cs_labels, mydata.offset); + examples[mydata.gen_cs.mtr_example]->weight *= 1.f / examples[mydata.gen_cs.mtr_example]->l.cb.costs[0].probability * + ((float)mydata.gen_cs.event_sum / (float)mydata.gen_cs.action_sum); + GEN_CS::call_cs_ldf( + base, mydata.gen_cs.mtr_ec_seq, mydata.cb_labels, mydata.cs_labels, mydata.prepped_cs_labels, mydata.offset); examples[mydata.gen_cs.mtr_example]->num_features = nf; examples[mydata.gen_cs.mtr_example]->weight = old_weight; swap(examples[0]->pred.a_s, mydata.a_s);