diff --git a/python/tests/assert_job.py b/python/tests/assert_job.py index 3268dc83ca9..4378730eb7a 100644 --- a/python/tests/assert_job.py +++ b/python/tests/assert_job.py @@ -1,5 +1,5 @@ import numpy as np -from numpy.testing import assert_allclose, assert_array_almost_equal +from numpy.testing import assert_allclose, assert_almost_equal from vw_executor.vw import ExecutionStatus @@ -9,11 +9,13 @@ def get_from_kwargs(kwargs, key, default=None): else: return default + def majority_close(arr1, arr2, rtol, atol, threshold): # Check if the majority of elements are close close_count = np.count_nonzero(np.isclose(arr1, arr2, rtol=rtol, atol=atol)) return close_count > len(arr1) * threshold + def assert_weight(job, **kwargs): atol = get_from_kwargs(kwargs, "atol", 10e-8) rtol = get_from_kwargs(kwargs, "rtol", 10e-5) @@ -23,23 +25,34 @@ def assert_weight(job, **kwargs): with open(data[0], "r") as f: data = f.readlines() data = [i.strip() for i in data] - weights = job[0].model9('--readable_model').weights + weights = job[0].model9("--readable_model").weights weights = weights["weight"].to_list() - assert_allclose(weights, expected_weights, atol=atol, rtol=rtol), f"weights should be {expected_weights}" - -def assert_prediction(job, **kwargs): - assert job.status == ExecutionStatus.Success, "job should be successful" - atol = kwargs.get("atol", 10e-8) - rtol = kwargs.get("rtol", 10e-5) - threshold = kwargs.get("threshold", 0.9) - constant = kwargs["expected_value"] - predictions = job.outputs['-p'] - with open(predictions[0], "r") as f: - predictions = f.readlines() - predictions = [float(i) for i in predictions[1:]] - assert majority_close(predictions, [constant]*len(predictions), rtol=rtol, atol=atol, threshold=threshold), f"predicted value should be {constant}" - + assert_allclose( + weights, expected_weights, atol=atol, rtol=rtol + ), f"weights should be {expected_weights}" -def assert_functions(): - return \ No newline at end of file +def assert_prediction(job, **kwargs): + assert job.status == ExecutionStatus.Success, "job should be successful" + atol = kwargs.get("atol", 10e-8) + rtol = kwargs.get("rtol", 10e-5) + threshold = kwargs.get("threshold", 0.9) + constant = kwargs["expected_value"] + predictions = job.outputs["-p"] + with open(predictions[0], "r") as f: + predictions = [i.strip() for i in f.readlines()] + predictions = [i for i in predictions if i != ""] + predictions = [float(i) for i in predictions[1:]] + assert majority_close( + predictions, + [constant] * len(predictions), + rtol=rtol, + atol=atol, + threshold=threshold, + ), f"predicted value should be {constant}" + + +def assert_loss(job, **kwargs): + assert job.status == ExecutionStatus.Success, "job should be successful" + assert type(job[0].loss) == float, "loss should be an float" + assert_almost_equal(job[0].loss, kwargs["expected_loss"]) diff --git a/python/tests/test_regression.py b/python/tests/core.py similarity index 57% rename from python/tests/test_regression.py rename to python/tests/core.py index 21779f4143b..55c95bc9dbf 100644 --- a/python/tests/test_regression.py +++ b/python/tests/core.py @@ -5,25 +5,36 @@ import numpy as np import pytest import os -from test_helper import json_to_dict_list, dynamic_function_call, get_function_object, generate_string_combinations +from test_helper import ( + json_to_dict_list, + dynamic_function_call, + get_function_object, + generate_string_combinations, +) CURR_DICT = os.path.dirname(os.path.abspath(__file__)) + def combine_list_cmds_grids(cmds, base_grid): list_of_key_val = [] grids = [] for key, value in cmds.items(): value = [i for i in value if i != ""] if str(value).isdigit(): - list_of_key_val.append([f" {key} {format(li, '.5f').rstrip('0').rstrip('.') }" for li in value]) + list_of_key_val.append( + [f" {key} {format(li, '.5f').rstrip('0').rstrip('.') }" for li in value] + ) else: list_of_key_val.append([f" {key} {li}" for li in value]) - for new_cmd in generate_string_combinations([base_grid["#base"][0]], *list_of_key_val): + for new_cmd in generate_string_combinations( + [base_grid["#base"][0]], *list_of_key_val + ): tmp_grid = base_grid.copy() tmp_grid["#base"][0] = new_cmd grids.append(tmp_grid) return grids + def cleanup_data_file(): script_directory = os.path.dirname(os.path.realpath(__file__)) # List all files in the directory @@ -33,11 +44,12 @@ def cleanup_data_file(): if file.endswith(".txt"): file_path = os.path.join(script_directory, file) os.remove(file_path) - + + @pytest.fixture def test_description(request): resource = request.param - yield resource # + yield resource # cleanup_data_file() @@ -48,23 +60,35 @@ def core_test(files, grid, outputs, job_assert, job_assert_args): job_assert(j, **job_assert_args) -@pytest.mark.parametrize('test_description', json_to_dict_list("pytest.json"), indirect=True) +@pytest.mark.parametrize( + "test_description", json_to_dict_list("test_cb.json"), indirect=True +) def test_all(test_description): - + mutiply = test_description.get("*", None) plus = test_description.get("+", None) - base_grid = test_description['grid'] + base_grid = test_description["grid"] grids = [] if mutiply: grids = combine_list_cmds_grids(mutiply, base_grid) else: grids.append(base_grid) - + for grid in grids: - options = Grid( - grid + options = Grid(grid) + data = dynamic_function_call( + "data_generation", + test_description["data_func"], + *test_description["data_func_args"].values(), ) - data = dynamic_function_call("data_generation", test_description['data_func'], *test_description["data_func_args"]) - assert_job = get_function_object("assert_job", test_description['assert_func']) - core_test(data, options, test_description['output'], assert_job, test_description['assert_func_args']) + for assert_func in test_description["assert_functions"]: + assert_job = get_function_object("assert_job", assert_func["assert_func"]) + script_directory = os.path.dirname(os.path.realpath(__file__)) + core_test( + script_directory + data, + options, + test_description["output"], + assert_job, + assert_func["assert_func_args"], + ) diff --git a/python/tests/data_generation.py b/python/tests/data_generation.py index d2ea66cb53f..cfe2bf8c71f 100644 --- a/python/tests/data_generation.py +++ b/python/tests/data_generation.py @@ -1,10 +1,44 @@ import random +import os +from test_helper import get_function_object + +script_directory = os.path.dirname(os.path.realpath(__file__)) + def constant_function(no_sample, constant, lower_bound, upper_bound): dataFile = f"constant_func_{no_sample}_{constant}_{upper_bound}_{lower_bound}.txt" - with open(dataFile, "w") as f: + with open(script_directory + "/" + dataFile, "w") as f: random.seed(10) for _ in range(no_sample): x = random.uniform(lower_bound, upper_bound) f.write(f"{constant} |f x:{x}\n") return dataFile + + +def random_number_items(items): + num_items_to_select = random.randint(1, len(items)) + return random.sample(items, num_items_to_select) + + +def generate_cb_data( + num_examples, num_features, num_actions, reward_function, probability_function +): + reward_function_obj = get_function_object( + "reward_functions", reward_function["name"] + ) + probability_function_obj = get_function_object( + "probability_functions", probability_function["name"] + ) + dataFile = f"cb_test_{num_examples}_{num_actions}_{num_features}.txt" + features = [f"feature{index}" for index in range(1, num_features + 1)] + with open(script_directory + "/" + dataFile, "w") as f: + for _ in range(num_examples): + chosen_action = random.randint(1, num_actions) + cost = reward_function_obj(chosen_action, **reward_function["params"]) + probability = probability_function_obj( + chosen_action, **probability_function["params"] + ) + f.write( + f'{chosen_action}:{cost}:{probability} | {" ".join(random_number_items(features))}\n' + ) + return dataFile diff --git a/python/tests/probability_functions.py b/python/tests/probability_functions.py new file mode 100644 index 00000000000..d8a4843fbac --- /dev/null +++ b/python/tests/probability_functions.py @@ -0,0 +1,2 @@ +def constant_probability(chosen_action=None): + return 1 diff --git a/python/tests/pytest.json b/python/tests/pytest.json deleted file mode 100644 index 8d0ae43a11f..00000000000 --- a/python/tests/pytest.json +++ /dev/null @@ -1,54 +0,0 @@ -[ - { - "data_func": "constant_function", - "data_func_args": [2000,5,1,100], - "assert_func": "assert_prediction", - "assert_func_args": { - "expected_value": 5, - "threshold":0.5 - }, - "grid": { - "#base": ["-P 50000 --preserve_performance_counters --save_resume "], - "#reg": ["", "--coin"] - }, - "output": ["--readable_model", "-p"] - }, - { - "data_func": "constant_function", - "data_func_args": [2000,5,1,100], - "assert_func": "assert_weight", - "assert_func_args": { - "expected_weights":[5, 0], - "atol": 1, - "rtol": 0.01 - }, - "grid": { - "#base": ["-P 50000 --preserve_performance_counters --save_resume "], - "#reg": ["", "--coin"] - }, - "output": ["--readable_model", "-p"] - }, - { - "data_func": "constant_function", - "data_func_args": [2000,5,1,100], - "assert_func": "assert_weight", - "assert_func_args": { - "expected_weights":[5, 0], - "atol": 100, - "rtol": 10 - }, - "grid": { - "#base": ["-P 1 --preserve_performance_counters --save_resume"], - "#reg": ["", "--coin", "--ftrl", "--pistol"] - }, - "output": ["--readable_model", "-p"], - "*" : { - "--learning_rate": [0.01, 0.001, 0.1], - "--loss_function": ["absolute", "quantile"], - "--power_t": [0.2, 0.5, ""] - }, - "+" :{ - "--learning_rate": [0.01, 0.001, 0.1] - } - } -] \ No newline at end of file diff --git a/python/tests/reward_functions.py b/python/tests/reward_functions.py new file mode 100644 index 00000000000..1f2a16f1e37 --- /dev/null +++ b/python/tests/reward_functions.py @@ -0,0 +1,2 @@ +def constant_reward(chosen_action=None): + return 1 diff --git a/python/tests/test_cb.json b/python/tests/test_cb.json new file mode 100644 index 00000000000..7a6a89ccd08 --- /dev/null +++ b/python/tests/test_cb.json @@ -0,0 +1,60 @@ +[ + { + "data_func": "generate_cb_data", + "data_func_args": { + "num_examples": 100, + "num_features": 1, + "num_action": 1, + "reward_function": { + "name": "constant_reward", + "params": {} + }, + "probability_function": { + "name": "constant_probability", + "params": {} + } + }, + "assert_functions": [ + { + "assert_func": "assert_loss", + "assert_func_args": {"expected_loss": 1} + }, + { + "assert_func": "assert_prediction", + "assert_func_args": { + "expected_value": 0, + "threshold": 0.5 + } + }, + { + "assert_func": "assert_weight", + "assert_func_args": { + "expected_weights": [ + 5, + 0 + ], + "atol": 100, + "rtol": 100 + } + } + ], + "grid": { + "#base": [ + "--cb 1 -P 10000 --preserve_performance_counters --save_resume" + ], + "--cb_type": [ + "ips", + "mtr" + ] + }, + "*": { + "--cb": [ + 1 + ] + }, + "output": [ + "--readable_model", + "-p" + ] + } +] \ No newline at end of file diff --git a/python/tests/test_helper.py b/python/tests/test_helper.py index fdcddbd7d8f..04460d9486f 100644 --- a/python/tests/test_helper.py +++ b/python/tests/test_helper.py @@ -7,11 +7,12 @@ # Get the current directory current_dir = os.path.dirname(os.path.abspath(__file__)) + def json_to_dict_list(file): - with open(current_dir + "/" + file, 'r') as file: + with open(current_dir + "/" + file, "r") as file: # Load the JSON data return json.load(file) - + def dynamic_function_call(module_name, function_name, *args, **kwargs): try: @@ -36,7 +37,6 @@ def get_function_object(module_name, function_name): print(f"Function '{function_name}' not found in module '{module_name}'.") - def generate_test_function(test_data): @pytest.dynamic def test_dynamic(): @@ -59,9 +59,9 @@ def generate_pytest_from_json(filepath): for test_case in json_data: test_function = generate_test_function(test_case) globals()[test_function.__name__] = test_function - + def generate_string_combinations(*lists): combinations = list(itertools.product(*lists)) - combinations = [''.join(combination) for combination in combinations] + combinations = ["".join(combination) for combination in combinations] return combinations diff --git a/python/tests/test_regs.json b/python/tests/test_regs.json new file mode 100644 index 00000000000..aea0a8f5a62 --- /dev/null +++ b/python/tests/test_regs.json @@ -0,0 +1,46 @@ +[ + { + "data_func": "constant_function", + "data_func_args": { + "no_sample": 2000, + "constant": 5, + "lower_bound": 1, + "upper_bound": 100 + }, + "assert_functions": [ + { + "assert_func": "assert_prediction", + "assert_func_args": { + "expected_value": 5, + "threshold": 0.5 + } + }, + { + "assert_func": "assert_weight", + "assert_func_args": { + "expected_weights": [ + 5, + 0 + ], + "atol": 100, + "rtol": 10 + } + } + ], + "grid": { + "#base": [ + "-P 50000 --preserve_performance_counters --save_resume " + ], + "#reg": [ + "", + "--coin", + "--ftrl", + "--pistol" + ] + }, + "output": [ + "--readable_model", + "-p" + ] + } +] \ No newline at end of file