Skip to content

Commit

Permalink
fix!: save/load entire tag in flat_example + bump version to 9.6 (#4266)
Browse files Browse the repository at this point in the history
* fix\!: save/load entire tag in flat_example

* clang

* update flat_exampl tag to v_array

* remove unused

* bump version

* clang

Co-authored-by: Jack Gerrits <jackgerrits@users.noreply.github.com>
  • Loading branch information
bassmang and jackgerrits authored Nov 4, 2022
1 parent 5489584 commit 2b1874f
Show file tree
Hide file tree
Showing 49 changed files with 140 additions and 58 deletions.
28 changes: 28 additions & 0 deletions test/core.vwtest.json
Original file line number Diff line number Diff line change
Expand Up @@ -5414,5 +5414,33 @@
"depends_on": [
418
]
},
{
"id": 420,
"desc": "Save model using ksvm to test flat_example save_load",
"vw_command": "--ksvm -d train-sets/tagged_data.dat -f models/ksvm_flat.model",
"diff_files": {
"stderr": "train-sets/ref/ksvm_model_save.stderr",
"stdout": "train-sets/ref/ksvm_model_save.stdout"
},
"input_files": [
"train-sets/tagged_data.dat"
]
},
{
"id": 421,
"desc": "Load model using ksvm to test flat_example save_load",
"vw_command": "--ksvm -d train-sets/tagged_data.dat -i models/ksvm_flat.model",
"diff_files": {
"stderr": "train-sets/ref/ksvm_model_load.stderr",
"stdout": "train-sets/ref/ksvm_model_load.stdout"
},
"input_files": [
"train-sets/tagged_data.dat",
"models/ksvm_flat.model"
],
"depends_on": [
420
]
}
]
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Version 9.5.0
Version 9.6.0
Id
Min label:-1
Max label:0
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Version 9.5.0
Version 9.6.0
Id
Min label:-1
Max label:0
Expand Down
2 changes: 1 addition & 1 deletion test/pred-sets/ref/ccb_lots_of_interactions.inv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Version 9.5.0
Version 9.6.0
Id
Min label:-1
Max label:0
Expand Down
2 changes: 1 addition & 1 deletion test/pred-sets/ref/ccb_quad.inv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Version 9.5.0
Version 9.6.0
Id
Min label:-1
Max label:0
Expand Down
2 changes: 1 addition & 1 deletion test/pred-sets/ref/ccb_quad_save_resume.inv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Version 9.5.0
Version 9.6.0
Id
Min label:-1
Max label:0
Expand Down
2 changes: 1 addition & 1 deletion test/pred-sets/ref/readable_model_privacy.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Version 9.5.0
Version 9.6.0
Id
Min label:0
Max label:1
Expand Down
2 changes: 1 addition & 1 deletion test/pred-sets/ref/readable_model_privacy_no_tags.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Version 9.5.0
Version 9.6.0
Id
Min label:0
Max label:1
Expand Down
2 changes: 1 addition & 1 deletion test/pred-sets/ref/slates_w_interactions.inv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Version 9.5.0
Version 9.6.0
Id
Min label:0
Max label:0.8
Expand Down
2 changes: 1 addition & 1 deletion test/pred-sets/ref/t288.readable
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Version 9.5.0
Version 9.6.0
Id
Min label:0
Max label:1
Expand Down
2 changes: 1 addition & 1 deletion test/train-sets/ref/automl_readable.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Version 9.5.0
Version 9.6.0
Id
Min label:-1
Max label:0
Expand Down
2 changes: 1 addition & 1 deletion test/train-sets/ref/automl_readable_cubic.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Version 9.5.0
Version 9.6.0
Id
Min label:-1
Max label:0
Expand Down
2 changes: 1 addition & 1 deletion test/train-sets/ref/cbadf_automl_readable.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Version 9.5.0
Version 9.6.0
Id
Min label:-1
Max label:0
Expand Down
2 changes: 1 addition & 1 deletion test/train-sets/ref/cbadf_automl_readable_predictonly.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Version 9.5.0
Version 9.6.0
Id
Min label:-1
Max label:0
Expand Down
2 changes: 1 addition & 1 deletion test/train-sets/ref/cbzo_constant_invert_hash.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Version 9.5.0
Version 9.6.0
Id
Min label:-1.13836
Max label:3.60884
Expand Down
2 changes: 1 addition & 1 deletion test/train-sets/ref/cbzo_constant_readable_model.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Version 9.5.0
Version 9.6.0
Id
Min label:-1.13836
Max label:3.60884
Expand Down
2 changes: 1 addition & 1 deletion test/train-sets/ref/cbzo_linear_invert_hash.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Version 9.5.0
Version 9.6.0
Id
Min label:-3.35097
Max label:3.13689
Expand Down
2 changes: 1 addition & 1 deletion test/train-sets/ref/cbzo_linear_readable_model.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Version 9.5.0
Version 9.6.0
Id
Min label:-3.35097
Max label:3.13689
Expand Down
2 changes: 1 addition & 1 deletion test/train-sets/ref/coin.readable
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Version 9.5.0
Version 9.6.0
Id
Min label:0
Max label:2
Expand Down
2 changes: 1 addition & 1 deletion test/train-sets/ref/coin_model_overflow.invert.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Version 9.5.0
Version 9.6.0
Id
Min label:0
Max label:1
Expand Down
2 changes: 1 addition & 1 deletion test/train-sets/ref/dupeindex_self_cubic.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Version 9.5.0
Version 9.6.0
Id
Min label:0
Max label:1
Expand Down
2 changes: 1 addition & 1 deletion test/train-sets/ref/dupeindex_self_quadratic.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Version 9.5.0
Version 9.6.0
Id
Min label:0
Max label:1
Expand Down
2 changes: 1 addition & 1 deletion test/train-sets/ref/dupeindex_self_quartic.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Version 9.5.0
Version 9.6.0
Id
Min label:0
Max label:1
Expand Down
2 changes: 1 addition & 1 deletion test/train-sets/ref/ftrl.readable
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Version 9.5.0
Version 9.6.0
Id
Min label:0
Max label:2
Expand Down
2 changes: 1 addition & 1 deletion test/train-sets/ref/ignore_feature.interactions
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Version 9.5.0
Version 9.6.0
Id
Min label:-1
Max label:0
Expand Down
2 changes: 1 addition & 1 deletion test/train-sets/ref/ignore_feature_default_ns.interactions
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Version 9.5.0
Version 9.6.0
Id
Min label:-1
Max label:0
Expand Down
2 changes: 1 addition & 1 deletion test/train-sets/ref/ignore_multiple_features.interactions
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Version 9.5.0
Version 9.6.0
Id
Min label:-1
Max label:0
Expand Down
2 changes: 1 addition & 1 deletion test/train-sets/ref/inv_hash_load_model.invert.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Version 9.5.0
Version 9.6.0
Id
Min label:0
Max label:2
Expand Down
2 changes: 1 addition & 1 deletion test/train-sets/ref/inv_hash_load_model.readable.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Version 9.5.0
Version 9.6.0
Id
Min label:0
Max label:2
Expand Down
28 changes: 28 additions & 0 deletions test/train-sets/ref/ksvm_model_load.stderr
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
Lambda = 1
Kernel = linear
using no cache
Reading datafile = train-sets/tagged_data.dat
num sources = 1
Num weight bits = 18
learning rate = 0.5
initial_t = 0
power_t = 0.5
Enabled reductions: ksvm, scorer-identity, count_label
Input label = SIMPLE
Output pred = SCALAR
average since example example current current current
loss last counter weight label predict features
0.549445 0.549445 1 1.0 1.0000 0.4506 50
0.558138 0.566831 2 2.0 -1.0000 -0.4332 103

finished run
number of examples = 2
weighted example sum = 2.000000
weighted label sum = 0.000000
average loss = 0.558138
best constant = -1.000000
best constant's loss = 1.000000
total feature number = 153
Num support = 4
Number of kernel evaluations = 5 Number of cache queries = 12
Total loss = 1.116275
Empty file.
29 changes: 29 additions & 0 deletions test/train-sets/ref/ksvm_model_save.stderr
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
final_regressor = models/ksvm_flat.model
Lambda = 1
Kernel = linear
using no cache
Reading datafile = train-sets/tagged_data.dat
num sources = 1
Num weight bits = 18
learning rate = 0.5
initial_t = 0
power_t = 0.5
Enabled reductions: ksvm, scorer-identity, count_label
Input label = SIMPLE
Output pred = SCALAR
average since example example current current current
loss last counter weight label predict features
1.000000 1.000000 1 1.0 1.0000 0.0000 50
1.266108 1.532215 2 2.0 -1.0000 0.5322 103

finished run
number of examples = 2
weighted example sum = 2.000000
weighted label sum = 0.000000
average loss = 1.266108
best constant = -1.000000
best constant's loss = 1.000000
total feature number = 153
Num support = 2
Number of kernel evaluations = 1 Number of cache queries = 4
Total loss = 2.532215
Empty file.
2 changes: 1 addition & 1 deletion test/train-sets/ref/l1_l2_default_model.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Version 9.5.0
Version 9.6.0
Id
Min label:0
Max label:0
Expand Down
2 changes: 1 addition & 1 deletion test/train-sets/ref/l1_l2_override_model.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Version 9.5.0
Version 9.6.0
Id
Min label:0
Max label:0
Expand Down
2 changes: 1 addition & 1 deletion test/train-sets/ref/l1_override_l2_leave_model.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Version 9.5.0
Version 9.6.0
Id
Min label:0
Max label:0
Expand Down
2 changes: 1 addition & 1 deletion test/train-sets/ref/l1l2_migrate.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Version 9.5.0
Version 9.6.0
Id
Min label:0
Max label:0
Expand Down
2 changes: 1 addition & 1 deletion test/train-sets/ref/l1l2_migrate_override.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Version 9.5.0
Version 9.6.0
Id
Min label:0
Max label:0
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Version 9.5.0
Version 9.6.0
Id
Min label:0
Max label:1
Expand Down
2 changes: 1 addition & 1 deletion test/train-sets/ref/pistol.readable
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Version 9.5.0
Version 9.6.0
Id
Min label:0
Max label:2
Expand Down
2 changes: 1 addition & 1 deletion test/train-sets/ref/w_out_slot_ns.interactions
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Version 9.5.0
Version 9.6.0
Id
Min label:-1
Max label:0
Expand Down
2 changes: 2 additions & 0 deletions test/train-sets/tagged_data.dat
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
1 tag|f 13:3.9656971e-02 24:3.4781646e-02 69:4.6296168e-02 85:6.1853945e-02 140:3.2349996e-02 156:1.0290844e-01 175:6.8493910e-02 188:2.8366476e-02 229:7.4871540e-02 230:9.1505975e-02 234:5.4200061e-02 236:4.4855952e-02 238:5.3422898e-02 387:1.4059304e-01 394:7.5131744e-02 433:1.1118756e-01 434:1.2540409e-01 438:6.5452829e-02 465:2.2644201e-01 468:8.5926279e-02 518:1.0214076e-01 534:9.4191484e-02 613:7.0990764e-02 646:8.7701865e-02 660:7.2289191e-02 709:9.0660661e-02 752:1.0580081e-01 757:6.7965068e-02 812:2.2685185e-01 932:6.8250686e-02 1028:4.8203137e-02 1122:1.2381379e-01 1160:1.3038123e-01 1189:7.1542501e-02 1530:9.2655659e-02 1664:6.5160148e-02 1865:8.5823394e-02 2524:1.6407280e-01 2525:1.1528353e-01 2526:9.7131468e-02 2536:5.7415009e-01 2543:1.4978983e-01 2848:1.0446861e-01 3370:9.2423186e-02 3960:1.5554591e-01 7052:1.2632671e-01 16893:1.9762035e-01 24036:3.2674628e-01 24303:2.2660980e-01
-1 tag|f 9:8.5609287e-02 14:2.9904654e-02 19:6.1031535e-02 20:2.1757640e-02 24:1.3484491e-02 39:5.0661467e-02 45:2.5001373e-02 49:6.0599580e-02 50:2.5216307e-02 55:4.5040369e-02 64:5.1786009e-02 65:1.2385602e-01 69:5.0108045e-02 70:3.8740762e-02 73:3.9056923e-02 82:8.0505788e-02 90:3.5707459e-02 99:4.7706675e-02 104:5.1884215e-02 105:1.5568805e-01 106:1.3511626e-01 130:1.0544337e-01 133:8.1089266e-02 171:5.9266284e-02 180:7.7435717e-02 217:6.8277337e-02 233:2.6908301e-02 234:3.5577789e-02 254:1.0109196e-01 286:3.6022667e-02 300:1.2054443e-01 305:3.7950054e-02 326:5.6624860e-02 337:6.8930335e-02 348:3.7790950e-02 400:4.4774704e-02 417:4.3467607e-02 434:8.2317248e-02 441:1.1299837e-01 465:9.6445926e-02 476:5.0166391e-02 481:1.0036784e-01 495:1.0148438e-01 497:4.0623948e-02 510:4.2873766e-02 514:4.4061519e-02 518:6.7046829e-02 548:9.7496063e-02 606:4.7255926e-02 655:5.1991425e-02 678:3.3746067e-02 724:3.5048731e-02 759:6.3103504e-02 768:4.6424236e-02 802:2.4630768e-02 820:4.3894887e-02 910:5.6646861e-02 934:7.6288253e-02 995:4.2333681e-02 1011:4.5684557e-02 1091:6.5844811e-02 1100:1.5925008e-01 1288:4.4281408e-02 1321:5.2120164e-02 1340:1.5260276e-01 1574:7.5433277e-02 1629:6.3325211e-02 1654:3.2249656e-02 1712:1.6111535e-01 1796:8.0251180e-02 1930:8.7665550e-02 2031:1.4472182e-01 2036:8.9130148e-02 2039:9.6134968e-02 2277:8.1756182e-02 2330:7.0579961e-02 2334:8.2388259e-02 2343:8.3097421e-02 2344:2.0492174e-01 2348:7.7909611e-02 2360:9.3834393e-02 2362:3.7530366e-02 2376:7.0720568e-02 2493:1.8794763e-01 2495:5.9343126e-02 2520:1.1095246e-01 2949:5.7359278e-02 3370:2.5833043e-01 4523:1.9765969e-01 4525:1.1142892e-01 5307:9.4257712e-02 5401:7.0384808e-02 5593:8.1356630e-02 6093:7.4096188e-02 9217:1.0568235e-01 11017:8.1550762e-02 12301:1.2676764e-01 12332:2.9680410e-01 12338:9.4060794e-02 12339:2.5156361e-01 12340:2.6923507e-01 12341:1.5476021e-01
2 changes: 1 addition & 1 deletion vcpkg.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"$schema": "https://raw.githubusercontent.com/microsoft/vcpkg/master/scripts/vcpkg.schema.json",
"name": "vowpal-wabbit",
"version": "9.5.0",
"version": "9.6.0",
"dependencies": [
"boost-math",
"eigen3",
Expand Down
2 changes: 1 addition & 1 deletion version.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
9.5.0
9.6.0
3 changes: 1 addition & 2 deletions vowpalwabbit/core/include/vw/core/example.h
Original file line number Diff line number Diff line change
Expand Up @@ -145,8 +145,7 @@ class flat_example
polylabel l;
reduction_features ex_reduction_features;

size_t tag_len;
char* tag; // An identifier for the example.
VW::v_array<char> tag; // An identifier for the example.

size_t example_counter;
uint64_t ft_offset;
Expand Down
2 changes: 1 addition & 1 deletion vowpalwabbit/core/include/vw/core/model_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ inline size_t read_model_field(io_buf& io, std::string& str)
bytes += read_model_field(io, str_size);
char* cs = nullptr;
bytes += io.buf_read(cs, str_size * sizeof(char));
str = std::string(cs);
str = std::string(cs, str_size);
return bytes;
}

Expand Down
3 changes: 3 additions & 0 deletions vowpalwabbit/core/include/vw/core/vw_versions.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,5 +63,8 @@ constexpr VW::version_struct VERSION_FILE_WITH_ACTIVE_SEEN_LABELS{9, 0, 0};
/// Moved option values from command line to model data
constexpr VW::version_struct VERSION_FILE_WITH_L1_AND_L2_STATE_IN_MODEL_DATA{9, 0, 0};

/// Moved option values from command line to model data
constexpr VW::version_struct VERSION_FILE_WITH_FLAT_EXAMPLE_TAG_FIX{9, 6, 0};

} // namespace version_definitions
} // namespace VW
Loading

0 comments on commit 2b1874f

Please sign in to comment.