VowpalWabbit · JohnLangford · Apr 2, 2019 · Jan 25, 2018 · Jan 29, 2018 · Jan 29, 2018
diff --git a/test/RunTests b/test/RunTests
@@ -1647,3 +1647,35 @@ echo "1 | feature:1" | {VW} -a --initial_weight 0.1 --initial_t 0.3
 # Test 175: cbify ldf, regcbopt
 {VW} -d train-sets/cs_test.ldf --cbify_ldf --cb_type mtr --regcbopt --mellowness 0.01
     train-sets/ref/cbify_ldf_regcbopt.stderr
+
+# Test 176 cb_adf, sharedfeatures
+{VW}  --dsjson --cb_adf -d train-sets/no_shared_features.json
+    train-sets/ref/no_shared_features.stderr
+
+# Test 177 warm_cb warm start
+{VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --warm_start_update --interaction_update -d train-sets/multiclass
+    train-sets/ref/warm_cb.stderr
+
+# Test 178 warm_cb warm start with lambda set containing 0/1
+{VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --lambda_scheme 2 --warm_start_update --interaction_update -d train-sets/multiclass
+    train-sets/ref/warm_cb_lambda_zeroone.stderr
+
+# Test 179 warm_cb warm start with warm start update turned off
+{VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --interaction_update -d train-sets/multiclass
+    train-sets/ref/warm_cb_no_ws_upd.stderr
+
+# Test 180 warm_cb warm start with interaction update turned off
+{VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.0 --warm_start 3 --interaction 7 --choices_lambda 8 --warm_start_update -d train-sets/multiclass
+    train-sets/ref/warm_cb_no_int_upd.stderr
+
+# Test 181 warm_cb warm start with bandit warm start type (Sim-Bandit)
+{VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 1 --warm_start_update --interaction_update --sim_bandit -d train-sets/multiclass
+    train-sets/ref/warm_cb_simbandit.stderr
+
+# Test 182 warm_cb warm start with CYC supervised corruption
+{VW} --warm_cb 10 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 3 --interaction 7 --choices_lambda 8 --warm_start_update --interaction_update --corrupt_type_warm_start 2 --corrupt_prob_warm_start 0.5 -d train-sets/multiclass
+    train-sets/ref/warm_cb_cyc.stderr
+
+# Test 183 warm_cb warm start with input cost-sensitive examples
+{VW} --warm_cb 3 --cb_explore_adf --cb_type mtr --epsilon 0.05 --warm_start 1 --interaction 2 --choices_lambda 8 --warm_start_update --interaction_update --warm_cb_cs -d train-sets/cs_cb
+    train-sets/ref/warm_cb_cs.stderr
diff --git a/test/train-sets/ref/cbify_regcb.stderr b/test/train-sets/ref/cbify_regcb.stderr
@@ -9,12 +9,12 @@ average  since         example        example  current  current  current
 loss     last          counter         weight    label  predict features
 1.000000 1.000000            1            1.0        1        5        2
 1.000000 1.000000            2            2.0        2       10        2
-0.750000 0.500000            4            4.0        4        7        2
-0.750000 0.750000            8            8.0        8        4        2
+0.750000 0.500000            4            4.0        4        8        2
+0.875000 1.000000            8            8.0        8        4        2
 
 finished run
 number of examples = 10
 weighted example sum = 10.000000
 weighted label sum = 0.000000
-average loss = 0.800000
+average loss = 0.900000
 total feature number = 20
diff --git a/test/train-sets/ref/cbify_regcbopt.stderr b/test/train-sets/ref/cbify_regcbopt.stderr
@@ -8,13 +8,13 @@ num sources = 1
 average  since         example        example  current  current  current
 loss     last          counter         weight    label  predict features
 1.000000 1.000000            1            1.0        1        5        2
-1.000000 1.000000            2            2.0        2        9        2
-1.000000 1.000000            4            4.0        4        7        2
-0.875000 0.750000            8            8.0        8        4        2
+1.000000 1.000000            2            2.0        2       10        2
+0.750000 0.500000            4            4.0        4        7        2
+0.750000 0.750000            8            8.0        8        4        2
 
 finished run
 number of examples = 10
 weighted example sum = 10.000000
 weighted label sum = 0.000000
-average loss = 0.900000
+average loss = 0.800000
 total feature number = 20
diff --git a/test/train-sets/ref/warm_cb.stderr b/test/train-sets/ref/warm_cb.stderr
@@ -0,0 +1,22 @@
+Num weight bits = 18
+learning rate = 0.5
+initial_t = 0
+power_t = 0.5
+using no cache
+Reading datafile = train-sets/multiclass
+num sources = 1
+average  since         example        example  current  current  current
+loss     last          counter         weight    label  predict features
+1.000000 1.000000            4            1.0        4        3        2
+1.000000 1.000000            5            2.0        5        3        2
+1.000000 1.000000            7            4.0        7        3        2
+
+finished run
+number of examples = 10
+weighted example sum = 7.000000
+weighted label sum = 0.000000
+average loss = 1.000000
+total feature number = 20
+average variance estimate = 171.578140
+theoretical average variance = 200.000000
+last lambda chosen = 0.031250 among lambdas ranging from 0.031250 to 0.937500
diff --git a/test/train-sets/ref/warm_cb_cs.stderr b/test/train-sets/ref/warm_cb_cs.stderr
@@ -0,0 +1,21 @@
+Num weight bits = 18
+learning rate = 0.5
+initial_t = 0
+power_t = 0.5
+using no cache
+Reading datafile = train-sets/cs_cb
+num sources = 1
+average  since         example        example  current  current  current
+loss     last          counter         weight    label  predict features
+0.000000 0.000000            2            1.0    known        2        4
+0.000000 0.000000            3            2.0    known        2        4
+
+finished run
+number of examples = 3
+weighted example sum = 2.000000
+weighted label sum = 0.000000
+average loss = 0.000000
+total feature number = 12
+average variance estimate = 1.034483
+theoretical average variance = 60.000000
+last lambda chosen = 0.031250 among lambdas ranging from 0.031250 to 0.937500
diff --git a/test/train-sets/ref/warm_cb_cyc.stderr b/test/train-sets/ref/warm_cb_cyc.stderr
@@ -0,0 +1,22 @@
+Num weight bits = 18
+learning rate = 0.5
+initial_t = 0
+power_t = 0.5
+using no cache
+Reading datafile = train-sets/multiclass
+num sources = 1
+average  since         example        example  current  current  current
+loss     last          counter         weight    label  predict features
+0.000000 0.000000            4            1.0        4        4        2
+0.500000 1.000000            5            2.0        5        4        2
+0.750000 1.000000            7            4.0        7        3        2
+
+finished run
+number of examples = 10
+weighted example sum = 7.000000
+weighted label sum = 0.000000
+average loss = 0.857143
+total feature number = 20
+average variance estimate = 143.156311
+theoretical average variance = 200.000000
+last lambda chosen = 0.937500 among lambdas ranging from 0.031250 to 0.937500
diff --git a/test/train-sets/ref/warm_cb_lambda_zeroone.stderr b/test/train-sets/ref/warm_cb_lambda_zeroone.stderr
@@ -0,0 +1,22 @@
+Num weight bits = 18
+learning rate = 0.5
+initial_t = 0
+power_t = 0.5
+using no cache
+Reading datafile = train-sets/multiclass
+num sources = 1
+average  since         example        example  current  current  current
+loss     last          counter         weight    label  predict features
+1.000000 1.000000            4            1.0        4        3        2
+1.000000 1.000000            5            2.0        5        9        2
+0.750000 0.500000            7            4.0        7        7        2
+
+finished run
+number of examples = 10
+weighted example sum = 7.000000
+weighted label sum = 0.000000
+average loss = 0.857143
+total feature number = 20
+average variance estimate = 60.903835
+theoretical average variance = 200.000000
+last lambda chosen = 0.000000 among lambdas ranging from 0.000000 to 1.000000
diff --git a/test/train-sets/ref/warm_cb_no_int_upd.stderr b/test/train-sets/ref/warm_cb_no_int_upd.stderr
@@ -0,0 +1,22 @@
+Num weight bits = 18
+learning rate = 0.5
+initial_t = 0
+power_t = 0.5
+using no cache
+Reading datafile = train-sets/multiclass
+num sources = 1
+average  since         example        example  current  current  current
+loss     last          counter         weight    label  predict features
+1.000000 1.000000            4            1.0        4        3        2
+1.000000 1.000000            5            2.0        5        3        2
+1.000000 1.000000            7            4.0        7        3        2
+
+finished run
+number of examples = 10
+weighted example sum = 7.000000
+weighted label sum = 0.000000
+average loss = 1.000000
+total feature number = 20
+average variance estimate = 1.000000
+theoretical average variance = inf
+last lambda chosen = 0.000000 among lambdas ranging from 0.000000 to 0.000000
diff --git a/test/train-sets/ref/warm_cb_no_ws_upd.stderr b/test/train-sets/ref/warm_cb_no_ws_upd.stderr
@@ -0,0 +1,22 @@
+Num weight bits = 18
+learning rate = 0.5
+initial_t = 0
+power_t = 0.5
+using no cache
+Reading datafile = train-sets/multiclass
+num sources = 1
+average  since         example        example  current  current  current
+loss     last          counter         weight    label  predict features
+1.000000 1.000000            4            1.0        4        5        2
+1.000000 1.000000            5            2.0        5        9        2
+0.750000 0.500000            7            4.0        7        7        2
+
+finished run
+number of examples = 10
+weighted example sum = 7.000000
+weighted label sum = 0.000000
+average loss = 0.714286
+total feature number = 20
+average variance estimate = 7.512840
+theoretical average variance = 200.000000
+last lambda chosen = 1.000000 among lambdas ranging from 1.000000 to 1.000000
diff --git a/test/train-sets/ref/warm_cb_simbandit.stderr b/test/train-sets/ref/warm_cb_simbandit.stderr
@@ -0,0 +1,22 @@
+Num weight bits = 18
+learning rate = 0.5
+initial_t = 0
+power_t = 0.5
+using no cache
+Reading datafile = train-sets/multiclass
+num sources = 1
+average  since         example        example  current  current  current
+loss     last          counter         weight    label  predict features
+1.000000 1.000000            4            1.0        4        7        2
+1.000000 1.000000            5            2.0        5        1        2
+0.750000 0.500000            7            4.0        7       10        2
+
+finished run
+number of examples = 10
+weighted example sum = 7.000000
+weighted label sum = 0.000000
+average loss = 0.857143
+total feature number = 20
+average variance estimate = 4.685901
+theoretical average variance = 200.000000
+last lambda chosen = 0.500000 among lambdas ranging from 0.500000 to 0.500000
diff --git a/vowpalwabbit/CMakeLists.txt b/vowpalwabbit/CMakeLists.txt
@@ -32,7 +32,7 @@ set(vw_all_headers
   binary.h nn.h boosting.h ftrl.h no_label.h spanning_tree.h bs.h gd.h noop.h stable_unique.h
   cache.h gd_mf.h oaa.h stagewise_poly.h cb_adf.h gd_predict.h OjaNewton.h svrg.h cb_algs.h
   gen_cs_example.h parse_args.h topk.h cb_explore_adf.h parse_dispatch_loop.h unique_sort.h
-  interact.h interactions.h parse_example_json.h cbify.h interactions_predict.h vw_allreduce.h
+  interact.h interactions.h parse_example_json.h cbify.h warm_cb.h interactions_predict.h vw_allreduce.h
   classweight.h parse_regressor.h kernel_svm.h confidence.h label_dictionary.h
   config.h.in primitives.h lda_core.h print.h vw_versions.h
 )
@@ -46,7 +46,7 @@ set(vw_all_sources
   search_multiclasstask.cc search_entityrelationtask.cc search_graph.cc parse_example.cc scorer.cc
   network.cc parse_args.cc accumulate.cc gd.cc learner.cc mwt.cc lda_core.cc gd_mf.cc mf.cc bfgs.cc
   noop.cc print.cc example.cc parser.cc loss_functions.cc sender.cc nn.cc confidence.cc bs.cc
-  cbify.cc explore_eval.cc topk.cc stagewise_poly.cc log_multi.cc recall_tree.cc active.cc
+  cbify.cc warm_cb.cc explore_eval.cc topk.cc stagewise_poly.cc log_multi.cc recall_tree.cc active.cc
   active_cover.cc cs_active.cc kernel_svm.cc best_constant.cc ftrl.cc svrg.cc lrqfa.cc interact.cc
   comp_io.cc interactions.cc vw_validate.cc audit_regressor.cc gen_cs_example.cc cb_explore.cc
   action_score.cc cb_explore_adf.cc OjaNewton.cc baseline.cc classweight.cc

diff --git a/vowpalwabbit/cb_adf.cc b/vowpalwabbit/cb_adf.cc
@@ -111,10 +111,8 @@ void learn_MTR(cb_adf& mydata, multi_learner& base, multi_ex& examples)
   gen_cs_example_mtr(mydata.gen_cs, examples, mydata.cs_labels);
   uint32_t nf = (uint32_t)examples[mydata.gen_cs.mtr_example]->num_features;
   float old_weight = examples[mydata.gen_cs.mtr_example]->weight;
-  examples[mydata.gen_cs.mtr_example]->weight *= 1.f / examples[mydata.gen_cs.mtr_example]->l.cb.costs[0].probability *
-      ((float)mydata.gen_cs.event_sum / (float)mydata.gen_cs.action_sum);
-  GEN_CS::call_cs_ldf<true>(
-      base, mydata.gen_cs.mtr_ec_seq, mydata.cb_labels, mydata.cs_labels, mydata.prepped_cs_labels, mydata.offset);
+  examples[mydata.gen_cs.mtr_example]->weight *= 1.f / examples[mydata.gen_cs.mtr_example]->l.cb.costs[0].probability * ((float)mydata.gen_cs.event_sum / (float)mydata.gen_cs.action_sum) * (1.f / (float)examples.size());
+  GEN_CS::call_cs_ldf<true>(base, mydata.gen_cs.mtr_ec_seq, mydata.cb_labels, mydata.cs_labels, mydata.prepped_cs_labels, mydata.offset);
   examples[mydata.gen_cs.mtr_example]->num_features = nf;
   examples[mydata.gen_cs.mtr_example]->weight = old_weight;
   swap(examples[0]->pred.a_s, mydata.a_s);

diff --git a/vowpalwabbit/cb_explore_adf.cc b/vowpalwabbit/cb_explore_adf.cc
@@ -269,7 +269,9 @@ void predict_or_learn_first(cb_explore_adf& data, multi_learner& base, multi_ex&
 template <bool is_learn>
 void predict_or_learn_greedy(cb_explore_adf& data, multi_learner& base, multi_ex& examples)
 {
-  // Explore uniform random an epsilon fraction of the time.
+	data.offset = examples[0]->ft_offset;
+  //Explore uniform random an epsilon fraction of the time.
+
   if (is_learn && test_adf_sequence(examples) != nullptr)
     multiline_learn_or_predict<true>(base, examples, data.offset);
   else

diff --git a/vowpalwabbit/cost_sensitive.cc b/vowpalwabbit/cost_sensitive.cc
@@ -273,12 +273,12 @@ void output_example(vw& all, example& ec)
     if (chosen_loss == FLT_MAX)
       cerr << "warning: csoaa predicted an invalid class. Are all multi-class labels in the {1..k} range?" << endl;
 
-    loss = chosen_loss - min;
+    loss = (chosen_loss - min) * ec.weight;
     // TODO(alberto): add option somewhere to allow using absolute loss instead?
     // loss = chosen_loss;
   }
 
-  all.sd->update(ec.test_only, !test_label(&ld), loss, 1.f, ec.num_features);
+  all.sd->update(ec.test_only, !test_label(&ld), loss, ec.weight, ec.num_features);
 
   for (int sink : all.final_prediction_sink)
     if (!all.sd->ldict)

diff --git a/vowpalwabbit/learner.h b/vowpalwabbit/learner.h
@@ -468,7 +468,7 @@ single_learner* as_singleline(learner<T, E>* l)
 template <bool is_learn>
 void multiline_learn_or_predict(multi_learner& base, multi_ex& examples, const uint64_t offset, const uint32_t id = 0)
 {
-  std::vector<uint64_t> saved_offsets(examples.size());
+  std::vector<uint64_t> saved_offsets;
   for (auto ec : examples)
   {
     saved_offsets.push_back(ec->ft_offset);

diff --git a/vowpalwabbit/parse_args.cc b/vowpalwabbit/parse_args.cc
@@ -73,6 +73,7 @@ license as described in the file LICENSE.
 #include "explore_eval.h"
 #include "baseline.h"
 #include "classweight.h"
+#include "warm_cb.h"
 // #include "cntk.h"
 
 #include "options.h"
@@ -1268,6 +1269,7 @@ void parse_reductions(options_i& options, vw& all)
   all.reduction_stack.push_back(mwt_setup);
   all.reduction_stack.push_back(cb_explore_setup);
   all.reduction_stack.push_back(cb_explore_adf_setup);
+  all.reduction_stack.push_back(warm_cb_setup);
   all.reduction_stack.push_back(cbify_setup);
   all.reduction_stack.push_back(cbifyldf_setup);
   all.reduction_stack.push_back(explore_eval_setup);

diff --git a/vowpalwabbit/vw_core.vcxproj b/vowpalwabbit/vw_core.vcxproj
@@ -171,6 +171,7 @@
     <ClInclude Include="gen_cs_example.h" />
     <ClInclude Include="cb_adf.h" />
     <ClInclude Include="cbify.h" />
+    <ClInclude Include="warm_cb.h" />
     <ClInclude Include="comp_io.h" />
     <ClInclude Include="confidence.h" />
     <ClInclude Include="constant.h" />
@@ -269,6 +270,7 @@
     <ClCompile Include="cache.cc" />
     <ClCompile Include="cb.cc" />
     <ClCompile Include="cbify.cc" />
+    <ClCompile Include="warm_cb.cc" />
     <ClCompile Include="cb_explore.cc" />
     <ClCompile Include="cb_explore_adf.cc" />
     <ClCompile Include="gen_cs_example.cc" />
@@ -371,4 +373,4 @@
     <Error Condition="!Exists('$(SolutionDir)\packages\zlib.v140.windesktop.msvcstl.static.rt-dyn.1.2.8.8\build\native\zlib.v140.windesktop.msvcstl.static.rt-dyn.targets')" Text="$([System.String]::Format('$(ErrorText)', '$(SolutionDir)\packages\zlib.v140.windesktop.msvcstl.static.rt-dyn.1.2.8.8\build\native\zlib.v140.windesktop.msvcstl.static.rt-dyn.targets'))" />
   </Target>
   <Import Project="..\sdl\SDL-7.0-NativeAnalysis.targets" />
-</Project>
+</Project>