diff --git a/test/train-sets/ref/rcv1_raw_cb_explore.stderr b/test/train-sets/ref/rcv1_raw_cb_explore.stderr index 966ecc82cc9..d70d35e4b0c 100644 --- a/test/train-sets/ref/rcv1_raw_cb_explore.stderr +++ b/test/train-sets/ref/rcv1_raw_cb_explore.stderr @@ -10,20 +10,20 @@ Reading datafile = train-sets/rcv1_raw_cb_small.vw num sources = 1 average since example example current current current loss last counter weight label predict features -0.050000 0.050000 1 1.0 1 1:0.975000 280 -0.025000 0.000000 2 2.0 1 2:0.975000 598 -0.012500 0.000000 4 4.0 1 2:0.975000 784 -0.250000 0.487500 8 8.0 1 2:0.975000 850 -0.368750 0.487500 16 16.0 1 2:0.975000 118 -0.245312 0.121875 32 32.0 1 1:0.975000 166 -0.216406 0.187500 64 64.0 2 1:0.975000 340 -0.216406 0.216406 128 128.0 1 2:0.975000 610 -0.300195 0.383984 256 256.0 2 2:0.975000 712 -0.333984 0.367773 512 512.0 2 1:0.975000 424 +1.950000 1.950000 1 1.0 1 1:0.975000 280 +0.975000 0.000000 2 2.0 1 2:0.975000 598 +0.487500 0.000000 4 4.0 1 2:0.975000 784 +0.250000 0.012500 8 8.0 1 2:0.975000 850 +0.618750 0.987500 16 16.0 1 2:0.975000 118 +0.318750 0.018750 32 32.0 1 1:0.975000 166 +0.321875 0.325000 64 64.0 2 1:0.975000 340 +0.244922 0.167969 128 128.0 1 2:0.975000 610 +0.231250 0.217578 256 256.0 2 2:0.975000 712 +0.216992 0.202734 512 512.0 2 1:0.975000 424 finished run number of examples = 1000 weighted example sum = 1000.000000 weighted label sum = 0.000000 -average loss = 0.362700 +average loss = 0.160650 total feature number = 440020 diff --git a/vowpalwabbit/cb_explore.cc b/vowpalwabbit/cb_explore.cc index 6fd15e15c57..16d7f05b68e 100644 --- a/vowpalwabbit/cb_explore.cc +++ b/vowpalwabbit/cb_explore.cc @@ -241,7 +241,7 @@ void output_example(vw& all, cb_explore& data, example& ec, CB::label& ld) if ((c.known_cost = get_observed_cost(ld)) != nullptr) for (uint32_t i = 0; i < ec.pred.a_s.size(); i++) - loss += get_unbiased_cost(c.known_cost, c.pred_scores, i) * ec.pred.a_s[i].score; + loss += get_unbiased_cost(c.known_cost, c.pred_scores, i+1) * ec.pred.a_s[i].score; all.sd->update(ec.test_only, get_observed_cost(ld) != nullptr, loss, 1.f, ec.num_features);