Skip to content

Commit

Permalink
Merge branch 'master' into model_ver_save_load
Browse files Browse the repository at this point in the history
  • Loading branch information
peterychang authored Mar 14, 2024
2 parents 557c8f4 + 9837a0e commit ba53b71
Show file tree
Hide file tree
Showing 71 changed files with 3,590 additions and 432 deletions.
3 changes: 2 additions & 1 deletion .github/workflows/asan.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ jobs:
strategy:
fail-fast: false
matrix:
os: [windows-latest, ubuntu-latest, macos-latest]
#os: [windows-latest, ubuntu-latest, macos-latest]
os: [ubuntu-latest, macos-latest] # Temporarily remove windows asan
preset: [vcpkg-asan-debug, vcpkg-ubsan-debug]
exclude:
# UBSan not supported by MSVC on Windows
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/build_windows_cmake.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ jobs:
CMAKE_BUILD_DIR: ${{ github.workspace }}/vw/build
SOURCE_DIR: ${{ github.workspace }}/vw
VCPKG_ROOT: ${{ github.workspace }}/vw/ext_libs/vcpkg
VCPKG_REF: 501db0f17ef6df184fcdbfbe0f87cde2313b6ab1
VCPKG_REF: 53bef8994c541b6561884a8395ea35715ece75db

steps:
- uses: actions/checkout@v3
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/python_wheels.yml
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ jobs:
runs-on: windows-2019
env:
VCPKG_ROOT: ${{ github.workspace }}\\vcpkg
VCPKG_REF: 501db0f17ef6df184fcdbfbe0f87cde2313b6ab1
VCPKG_REF: 53bef8994c541b6561884a8395ea35715ece75db
VCPKG_DEFAULT_BINARY_CACHE: ${{ github.workspace }}\vcpkg_binary_cache
strategy:
matrix:
Expand Down
16 changes: 16 additions & 0 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "(ctest) Launch",
"type": "cppdbg",
"cwd": "${workspaceFolder}",
"request": "launch",
"program": "${cmake.testProgram}",
"args": [ "${cmake.testArgs}" ]
}
]
}
58 changes: 29 additions & 29 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,10 @@ if(VW_FEAT_LDA AND NOT BUILD_PYTHON)
list(APPEND VCPKG_MANIFEST_FEATURES "lda")
endif()

option(BUILD_TESTING "Build tests" ON)
if(BUILD_TESTING)
list(APPEND VCPKG_MANIFEST_FEATURES "tests")
endif()
#option(BUILD_TESTING "Build tests" ON)
#if(BUILD_TESTING)
# list(APPEND VCPKG_MANIFEST_FEATURES "tests")
#endif()

option(BUILD_BENCHMARKS "Build benchmarks" OFF)
if(BUILD_BENCHMARKS)
Expand Down Expand Up @@ -100,6 +100,31 @@ set(CMAKE_CXX_EXTENSIONS OFF)
set(CMAKE_VISIBILITY_INLINES_HIDDEN TRUE)
set(CMAKE_CXX_VISIBILITY_PRESET "hidden")

option(VW_USE_ASAN "Compile with AddressSanitizer" OFF)
option(VW_USE_UBSAN "Compile with UndefinedBehaviorSanitizer" OFF)

if(VW_USE_ASAN)
add_compile_definitions(VW_USE_ASAN)
if(MSVC)
add_compile_options(/fsanitize=address)
add_link_options(/InferASanLibs /incremental:no /debug)
else()
add_compile_options(-fsanitize=address -fno-omit-frame-pointer -g3)
add_link_options(-fsanitize=address -fno-omit-frame-pointer -g3)
endif()
endif()

if(VW_USE_UBSAN)
add_compile_definitions(VW_USE_UBSAN)
if(MSVC)
message(FATAL_ERROR "UBSan not supported on MSVC")
else()
add_compile_options(-fsanitize=undefined -fno-sanitize-recover -fno-omit-frame-pointer -g3)
add_link_options(-fsanitize=undefined -fno-sanitize-recover -fno-omit-frame-pointer -g3)
endif()
endif()


include(VowpalWabbitUtils)

if(MSVC)
Expand Down Expand Up @@ -152,33 +177,8 @@ option(VW_SSE2NEON_SYS_DEP "Override using the submodule for SSE2Neon dependency
option(VW_BUILD_VW_C_WRAPPER "Enable building the c_wrapper project" ON)
option(vw_BUILD_NET_CORE "Build .NET Core targets" OFF)
option(vw_BUILD_NET_FRAMEWORK "Build .NET Framework targets" OFF)
option(VW_USE_ASAN "Compile with AddressSanitizer" OFF)
option(VW_USE_UBSAN "Compile with UndefinedBehaviorSanitizer" OFF)
option(VW_BUILD_WASM "Add WASM target" OFF)

if(VW_USE_ASAN)
add_compile_definitions(VW_USE_ASAN)
if(MSVC)
add_compile_options(/fsanitize=address /GS- /wd5072)
add_link_options(/InferASanLibs /incremental:no /debug)
# Workaround for MSVC ASan issue here: https://developercommunity.visualstudio.com/t/VS2022---Address-sanitizer-on-x86-Debug-/10116361
add_compile_definitions(_DISABLE_STRING_ANNOTATION)
else()
add_compile_options(-fsanitize=address -fno-omit-frame-pointer -g3)
add_link_options(-fsanitize=address -fno-omit-frame-pointer -g3)
endif()
endif()

if(VW_USE_UBSAN)
add_compile_definitions(VW_USE_UBSAN)
if(MSVC)
message(FATAL_ERROR "UBSan not supported on MSVC")
else()
add_compile_options(-fsanitize=undefined -fno-sanitize-recover -fno-omit-frame-pointer -g3)
add_link_options(-fsanitize=undefined -fno-sanitize-recover -fno-omit-frame-pointer -g3)
endif()
endif()

if(VW_INSTALL AND NOT VW_ZLIB_SYS_DEP)
message(WARNING "Installing with a vendored version of zlib is not recommended. Use VW_ZLIB_SYS_DEP to use a system dependency or specify VW_INSTALL=OFF to silence this warning.")
endif()
Expand Down
2 changes: 1 addition & 1 deletion CMakePresets.json
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
},
"VW_GTEST_SYS_DEP": {
"type": "BOOL",
"value": "ON"
"value": "OFF"
},
"VW_EIGEN_SYS_DEP": {
"type": "BOOL",
Expand Down
2 changes: 1 addition & 1 deletion cmake/VowpalWabbitUtils.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ if(CMAKE_PROJECT_NAME STREQUAL PROJECT_NAME)
include(FetchContent)
FetchContent_Declare(
googletest
URL https://github.com/google/googletest/archive/refs/tags/release-1.11.0.zip
URL https://github.com/google/googletest/archive/refs/tags/v1.13.0.zip
)
# For Windows: Prevent overriding the parent project's compiler/linker settings
set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
Expand Down
4 changes: 2 additions & 2 deletions ext_libs/ext_libs.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ if(RAPIDJSON_SYS_DEP)
# Since EXACT is not specified, any version compatible with 1.1.0 is accepted (>= 1.1.0)
find_package(RapidJSON 1.1.0 CONFIG REQUIRED)
add_library(RapidJSON INTERFACE)
target_include_directories(RapidJSON INTERFACE ${RapidJSON_INCLUDE_DIRS})
target_include_directories(RapidJSON INTERFACE ${RapidJSON_INCLUDE_DIRS} ${RAPIDJSON_INCLUDE_DIRS})
else()
add_library(RapidJSON INTERFACE)
target_include_directories(RapidJSON SYSTEM INTERFACE "${CMAKE_CURRENT_LIST_DIR}/rapidjson/include")
Expand Down Expand Up @@ -127,4 +127,4 @@ if(VW_FEAT_CB_GRAPH_FEEDBACK)
target_include_directories(mlpack_ensmallen SYSTEM INTERFACE ${CMAKE_CURRENT_LIST_DIR}/armadillo-code/include)

target_include_directories(mlpack_ensmallen SYSTEM INTERFACE ${CMAKE_CURRENT_LIST_DIR}/ensmallen/include)
endif()
endif()
2 changes: 1 addition & 1 deletion ext_libs/vcpkg
Submodule vcpkg updated 5927 files
14 changes: 8 additions & 6 deletions python/docs/source/tutorials/DFtoVW_tutorial.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -802,15 +802,17 @@
"\n",
"# Adding columns for easier visualization\n",
"weights_df[\"feature_name\"] = weights_df.apply(\n",
" lambda row: row.vw_feature_name.split(\"=\")[0]\n",
" if row.is_cat\n",
" else row.vw_feature_name,\n",
" lambda row: (\n",
" row.vw_feature_name.split(\"=\")[0] if row.is_cat else row.vw_feature_name\n",
" ),\n",
" axis=1,\n",
")\n",
"weights_df[\"feature_value\"] = weights_df.apply(\n",
" lambda row: row.vw_feature_name.split(\"=\")[1].zfill(2)\n",
" if row.is_cat\n",
" else row.vw_feature_name,\n",
" lambda row: (\n",
" row.vw_feature_name.split(\"=\")[1].zfill(2)\n",
" if row.is_cat\n",
" else row.vw_feature_name\n",
" ),\n",
" axis=1,\n",
")\n",
"weights_df.sort_values([\"feature_name\", \"feature_value\"], inplace=True)"
Expand Down
2 changes: 1 addition & 1 deletion python/docs/source/tutorials/cmd_first_steps.md
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,6 @@ The model predicted a value of **0**. This result means our house will not need
## More to explore

- See [Python tutorial](python_first_steps.ipynb) for a quick introduction to the basics of training and testing your model.
- To learn more about how to approach a contextual bandits problem using tVowpal Wabbit — including how to work with different contextual bandits approaches, how to format data, and understand the results — see the [Contextual Bandit Reinforcement Learning Tutorial](python_Contextual_bandits_and_Vowpal_Wabbit.ipynb).
- To learn more about how to approach a contextual bandits problem using Vowpal Wabbit — including how to work with different contextual bandits approaches, how to format data, and understand the results — see the [Contextual Bandit Reinforcement Learning Tutorial](python_Contextual_bandits_and_Vowpal_Wabbit.ipynb).
- For more on the contextual bandits approach to reinforcement learning, including a content personalization scenario, see the [Contextual Bandit Simulation Tutorial](python_Simulating_a_news_personalization_scenario_using_Contextual_Bandits.ipynb).
- See the [Linear Regression Tutorial](cmd_linear_regression.md) for a different look at the roof replacement problem and learn more about Vowpal Wabbit's format and understanding the results.
3 changes: 1 addition & 2 deletions python/tests/confidence_sequence.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,5 @@ def lblogwealth(self, *, t, sumXt, v, eta, s, alpha):

return max(
0,
(sumXt - sqrt(gamma1**2 * ll * v + gamma2**2 * ll**2) - gamma2 * ll)
/ t,
(sumXt - sqrt(gamma1**2 * ll * v + gamma2**2 * ll**2) - gamma2 * ll) / t,
)
32 changes: 17 additions & 15 deletions python/tests/crminustwo.py
Original file line number Diff line number Diff line change
Expand Up @@ -440,21 +440,23 @@ def intervaldiff(
candidates.append(
(
gstar,
None
if isclose(kappa, 0)
else {
"kappastar": kappa,
"betastar": beta,
"gammastar": gamma,
"taustar": tau,
"ufake": ufake,
"wfake": wfake,
"rfake": rex,
"qfunc": lambda c, u, w, r, k=kappa, g=gamma, b=beta, t=tau, s=sign, num=n: -c
* (b + g * u + t * w + s * (u - w) * r)
/ ((num + 1) * k),
"mle": mle,
},
(
None
if isclose(kappa, 0)
else {
"kappastar": kappa,
"betastar": beta,
"gammastar": gamma,
"taustar": tau,
"ufake": ufake,
"wfake": wfake,
"rfake": rex,
"qfunc": lambda c, u, w, r, k=kappa, g=gamma, b=beta, t=tau, s=sign, num=n: -c
* (b + g * u + t * w + s * (u - w) * r)
/ ((num + 1) * k),
"mle": mle,
}
),
)
)

Expand Down
6 changes: 3 additions & 3 deletions python/vowpalwabbit/pyvw.py
Original file line number Diff line number Diff line change
Expand Up @@ -532,9 +532,9 @@ def parse(
for ex in str_ex
]
):
str_ex: List[
Example
] = str_ex # pytype: disable=annotation-type-mismatch
str_ex: List[Example] = (
str_ex # pytype: disable=annotation-type-mismatch
)
return str_ex

if not isinstance(str_ex, (list, str)):
Expand Down
29 changes: 29 additions & 0 deletions test/core.vwtest.json
Original file line number Diff line number Diff line change
Expand Up @@ -6073,5 +6073,34 @@
"depends_on": [
467
]
},
{
"id": 469,
"desc": "https://github.com/VowpalWabbit/vowpal_wabbit/issues/4669",
"vw_command": "--ccb_explore_adf --dsjson -d train-sets/issue4669.dsjson -f issue4669.model",
"diff_files": {
"stderr": "train-sets/ref/issue4669_train.stderr",
"stdout": "train-sets/ref/issue4669_train.stdout"
},
"input_files": [
"train-sets/issue4669.dsjson"
]
},
{
"id": 470,
"desc": "https://github.com/VowpalWabbit/vowpal_wabbit/issues/4669",
"vw_command": "--ccb_explore_adf --dsjson --all_slots_loss --epsilon 0 -t -i issue4669.model -t -d train-sets/issue4669.dsjson -p issue4669_test_pred.txt",
"diff_files": {
"stderr": "train-sets/ref/issue4669_test.stderr",
"stdout": "train-sets/ref/issue4669_test.stdout",
"issue4669_test_pred.txt": "train-sets/ref/issue4669_test_pred.txt"
},
"input_files": [
"train-sets/issue4669.dsjson",
"issue4669.model"
],
"depends_on": [
469
]
}
]
16 changes: 10 additions & 6 deletions test/run_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,17 +68,21 @@ def _are_same(expected: Any, actual: Any, key: str) -> Tuple[bool, str]:
elif isinstance(expected, (int, bool, str)):
return (
expected == actual,
f"Key '{key}' value mismatch. Expected: '{expected}', but found: '{actual}'"
if expected != actual
else "",
(
f"Key '{key}' value mismatch. Expected: '{expected}', but found: '{actual}'"
if expected != actual
else ""
),
)
elif isinstance(expected, (float)):
delta = abs(expected - actual)
return (
delta < epsilon,
f"Key '{key}' value mismatch. Expected: '{expected}', but found: '{actual}' (using epsilon: '{epsilon}')"
if delta >= epsilon
else "",
(
f"Key '{key}' value mismatch. Expected: '{expected}', but found: '{actual}' (using epsilon: '{epsilon}')"
if delta >= epsilon
else ""
),
)
elif isinstance(expected, dict):
expected_keys = set(expected.keys())
Expand Down
1 change: 1 addition & 0 deletions test/save_resume_test.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Test that the models generated with and without --predict_only_model produce the same predictions when loaded in test_mode.
"""

import sys
import os
import optparse
Expand Down
Binary file modified test/train-sets/0001.fb
Binary file not shown.
Binary file modified test/train-sets/ccb.fb
Binary file not shown.
Binary file modified test/train-sets/cs.fb
Binary file not shown.
1 change: 1 addition & 0 deletions test/train-sets/issue4669.dsjson
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"c":{"_multi":[{"f":"1"},{"f":"2"}],"_slots":[{"_inc":[0,1]},{"_inc":[1]}]},"_outcomes":[{"_label_cost":1.0,"_a":[0,1],"_p":[0.5,0.5]},{"_label_cost":0.0,"_a":[1],"_p":[1]}]}
Binary file modified test/train-sets/multiclass.fb
Binary file not shown.
Binary file modified test/train-sets/multilabel.fb
Binary file not shown.
Binary file modified test/train-sets/rcv1_cb_eval.fb
Binary file not shown.
Binary file modified test/train-sets/rcv1_raw_cb_small.fb
Binary file not shown.
19 changes: 6 additions & 13 deletions test/train-sets/ref/active-simulation.t24.stderr
Original file line number Diff line number Diff line change
Expand Up @@ -11,20 +11,13 @@ Output pred = SCALAR
average since example example current current current
loss last counter weight label predict features
1.000000 1.000000 1 1.0 -1.0000 0.0000 128
0.791125 0.755288 2 6.8 -1.0000 -0.1309 44
1.274829 1.444750 8 26.3 1.0000 -0.2020 34
1.083985 0.895011 73 52.8 1.0000 0.0214 21
0.887295 0.693362 130 106.3 -1.0000 -0.3071 146
0.788245 0.690009 233 213.6 -1.0000 0.0421 47
0.664628 0.541195 398 427.4 -1.0000 -0.1863 68
0.634406 0.604328 835 856.9 -1.0000 -0.4327 40

finished run
number of examples = 1000
weighted example sum = 1014.004519
weighted label sum = -68.618036
average loss = 0.630964
best constant = -0.067670
best constant's loss = 0.995421
weighted example sum = 1.000000
weighted label sum = -1.000000
average loss = 1.000000
best constant = -1.000000
best constant's loss = 0.000000
total feature number = 78739
total queries = 474
total queries = 1
8 changes: 6 additions & 2 deletions test/train-sets/ref/help.stdout
Original file line number Diff line number Diff line change
Expand Up @@ -221,8 +221,12 @@ Weight Options:
[Reduction] Active Learning Options:
--active Enable active learning (type: bool, keep, necessary)
--simulation Active learning simulation mode (type: bool)
--mellowness arg Active learning mellowness parameter c_0. Default 8 (type: float,
default: 8, keep)
--direct Active learning via the tag and predictions interface. Tag should
start with "query?" to get query decision. Returned prediction
is either -1 for no or the importance weight for yes. (type:
bool)
--mellowness arg Active learning mellowness parameter c_0. Default 1. (type: float,
default: 1, keep)
[Reduction] Active Learning with Cover Options:
--active_cover Enable active learning with cover (type: bool, keep, necessary)
--mellowness arg Active learning mellowness parameter c_0 (type: float, default:
Expand Down
23 changes: 23 additions & 0 deletions test/train-sets/ref/issue4669_test.stderr
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
only testing
predictions = issue4669_test_pred.txt
using no cache
Reading datafile = train-sets/issue4669.dsjson
num sources = 1
Num weight bits = 18
learning rate = 0.5
initial_t = 1
power_t = 0.5
cb_type = mtr
Enabled learners: gd, generate_interactions, scorer-identity, csoaa_ldf-rank, cb_adf, cb_explore_adf_greedy, cb_sample, shared_feature_merger, ccb_explore_adf
Input label = CCB
Output pred = DECISION_PROBS
average since example example current current current
loss last counter weight label predict features
0.000000 0.000000 1 1.0 0:1,1:0 1,None 9

finished run
number of examples = 1
weighted example sum = 1.000000
weighted label sum = 0.000000
average loss = 0.000000
total feature number = 9
Empty file.
Loading

0 comments on commit ba53b71

Please sign in to comment.