diff --git a/python/tests/test_framework/assert_job.py b/python/tests/test_framework/assert_job.py index b8c51606565..415500c9e81 100644 --- a/python/tests/test_framework/assert_job.py +++ b/python/tests/test_framework/assert_job.py @@ -68,11 +68,17 @@ def assert_loss(job, **kwargs): assert job.status == ExecutionStatus.Success, "job should be successful" assert type(job[0].loss) == float, "loss should be an float" decimal = kwargs.get("decimal", 2) - if job[0].loss < kwargs["expected_loss"]: - return assert_almost_equal(job[0].loss, kwargs["expected_loss"], decimal=decimal) +def assert_loss_below(job, **kwargs): + assert job.status == ExecutionStatus.Success, "job should be successful" + assert type(job[0].loss) == float, "loss should be an float" + assert ( + job[0].loss <= kwargs["expected_loss"] + ), f"loss should be below {kwargs['expected_loss']}" + + def assert_prediction_with_generated_data(job, **kwargs): assert job.status == ExecutionStatus.Success, "job should be successful" expected_class = [] diff --git a/python/tests/test_framework/cb/data_generation.py b/python/tests/test_framework/cb/data_generation.py index 2b218da999a..82359233c69 100644 --- a/python/tests/test_framework/cb/data_generation.py +++ b/python/tests/test_framework/cb/data_generation.py @@ -17,8 +17,7 @@ def generate_cb_data( num_actions, reward_function, logging_policy, - no_context=1, - context_name=None, + context_name=["1"], ): dataFile = f"cb_test_{num_examples}_{num_actions}_{num_features}.txt" @@ -32,16 +31,20 @@ def generate_cb_data( features = [f"feature{index}" for index in range(1, num_features + 1)] with open(os.path.join(script_directory, dataFile), "w") as f: for _ in range(num_examples): + no_context = len(context_name) if no_context > 1: context = random.randint(1, no_context) - if not context_name: - context_name = [f"{index}" for index in range(1, no_context + 1)] + else: + context = 1 def return_cost_probability(chosen_action, context=1): - cost = reward_function_obj( + cost = -reward_function_obj( chosen_action, context, **reward_function["params"] ) + if "params" not in logging_policy: + logging_policy["params"] = {} logging_policy["params"]["chosen_action"] = chosen_action + logging_policy["params"]["num_actions"] = num_actions probability = logging_policy_obj(**logging_policy["params"]) return cost, probability diff --git a/python/tests/test_framework/classification/data_generation.py b/python/tests/test_framework/classification/data_generation.py index fb03ccf5065..48640735117 100644 --- a/python/tests/test_framework/classification/data_generation.py +++ b/python/tests/test_framework/classification/data_generation.py @@ -7,20 +7,20 @@ def generate_classification_data( - num_sample, + num_example, num_classes, num_features, classify_func, bounds=None, ): - dataFile = f"classification_{num_classes}_{num_features}_{num_sample}.txt" + dataFile = f"classification_{num_classes}_{num_features}_{num_example}.txt" classify_func_obj = get_function_object( "classification.classification_functions", classify_func["name"] ) if not bounds: bounds = [[0, 1] for _ in range(num_features)] with open(os.path.join(script_directory, dataFile), "w") as f: - for _ in range(num_sample): + for _ in range(num_example): x = [ random.uniform(bounds[index][0], bounds[index][1]) for index in range(num_features) diff --git a/python/tests/test_framework/slate/action_space.py b/python/tests/test_framework/slate/action_space.py new file mode 100644 index 00000000000..b7015023dc7 --- /dev/null +++ b/python/tests/test_framework/slate/action_space.py @@ -0,0 +1,7 @@ +def new_action_after_threshold(**kwargs): + iteration = kwargs.get("iteration", 0) + threshold = kwargs.get("threshold", 0) + # before iteration 500, it is sunny and after it is raining + if iteration > threshold: + return kwargs["after"] + return kwargs["before"] diff --git a/python/tests/test_framework/slate/assert_job.py b/python/tests/test_framework/slate/assert_job.py new file mode 100644 index 00000000000..887b05a050d --- /dev/null +++ b/python/tests/test_framework/slate/assert_job.py @@ -0,0 +1,43 @@ +from numpy.testing import assert_allclose, assert_almost_equal +from vw_executor.vw import ExecutionStatus +import numpy as np + + +def majority_close(arr1, arr2, rtol, atol, threshold): + # Check if the majority of elements are close + close_count = np.count_nonzero(np.isclose(arr1, arr2, rtol=rtol, atol=atol)) + return close_count >= len(arr1) * threshold + + +def assert_prediction(job, **kwargs): + assert job.status == ExecutionStatus.Success, "job should be successful" + atol = kwargs.get("atol", 10e-8) + rtol = kwargs.get("rtol", 10e-5) + threshold = kwargs.get("threshold", 0.9) + expected_value = kwargs["expected_value"] + predictions = job.outputs["-p"] + res = [] + with open(predictions[0], "r") as f: + exampleRes = [] + while True: + line = f.readline() + if not line: + break + if line.count(":") == 0: + res.append(exampleRes) + exampleRes = [] + continue + slotRes = [0] * line.count(":") + slot = line.split(",") + for i in range(len(slot)): + actionInd = int(slot[i].split(":")[0]) + slotRes[i] = float(slot[actionInd].split(":")[1]) + exampleRes.append(slotRes) + + assert majority_close( + res, + [expected_value] * len(res), + rtol=rtol, + atol=atol, + threshold=threshold, + ), f"predicted value should be {expected_value}, \n actual values are {res}" diff --git a/python/tests/test_framework/slate/data_generation.py b/python/tests/test_framework/slate/data_generation.py new file mode 100644 index 00000000000..9f2955f15d0 --- /dev/null +++ b/python/tests/test_framework/slate/data_generation.py @@ -0,0 +1,70 @@ +import random +import os +from test_helper import get_function_object + +script_directory = os.path.dirname(os.path.realpath(__file__)) +random.seed(10) + + +def generate_slate_data( + num_examples, + reward_function, + logging_policy, + action_space, + context_name=["1"], +): + + action_space_obj = get_function_object("slate.action_space", action_space["name"]) + + reward_function_obj = get_function_object( + "slate.reward_functions", reward_function["name"] + ) + logging_policy_obj = get_function_object( + "slate.logging_policies", logging_policy["name"] + ) + + def return_cost_probability(chosen_action, chosen_slot, context): + cost = -reward_function_obj( + chosen_action, context, chosen_slot, **reward_function["params"] + ) + logging_policy["params"]["num_action"] = num_actions[chosen_slot - 1] + logging_policy["params"]["chosen_action"] = chosen_action + probability = logging_policy_obj(**logging_policy["params"]) + return cost, probability + + dataFile = f"slate_test_{num_examples}_{generate_slate_data.__name__}.txt" + with open(os.path.join(script_directory, dataFile), "w") as f: + for i in range(num_examples): + action_space["params"]["iteration"] = i + action_spaces = action_space_obj(**action_space["params"]) + num_slots = len(action_spaces) + num_actions = [len(slot) for slot in action_spaces] + slot_name = [f"slot_{index}" for index in range(1, num_slots + 1)] + chosen_actions = [] + num_context = len(context_name) + if num_context > 1: + context = random.randint(1, num_context) + else: + context = 1 + for s in range(num_slots): + chosen_actions.append(random.randint(1, num_actions[s])) + chosen_actions_cost_prob = [ + return_cost_probability(action, slot + 1, context) + for slot, action in enumerate(chosen_actions) + ] + total_cost = sum([cost for cost, _ in chosen_actions_cost_prob]) + + f.write(f"slates shared {total_cost} |User {context_name[context-1]}\n") + # write actions + for ind, slot in enumerate(action_spaces): + for a in slot: + f.write( + f"slates action {ind} |Action {a}\n", + ) + + for s in range(num_slots): + f.write( + f"slates slot {chosen_actions[s]}:{chosen_actions_cost_prob[s][1]} |Slot {slot_name[s]}\n" + ) + f.write("\n") + return os.path.join(script_directory, dataFile) diff --git a/python/tests/test_framework/slate/logging_policies.py b/python/tests/test_framework/slate/logging_policies.py new file mode 100644 index 00000000000..1b80912ffe7 --- /dev/null +++ b/python/tests/test_framework/slate/logging_policies.py @@ -0,0 +1,3 @@ +def even_probability(chosen_action, **kwargs): + num_actions = kwargs["num_action"] + return round(1 / num_actions, 2) diff --git a/python/tests/test_framework/slate/reward_functions.py b/python/tests/test_framework/slate/reward_functions.py new file mode 100644 index 00000000000..2a95169a508 --- /dev/null +++ b/python/tests/test_framework/slate/reward_functions.py @@ -0,0 +1,12 @@ +def fixed_reward(chosen_action, context, slot, **kwargs): + reward = kwargs["reward"] + return reward[slot - 1][chosen_action - 1] + + +def reverse_reward_after_threshold(chosen_action, context, slot, **kwargs): + reward = kwargs["reward"] + iteration = kwargs.get("iteration", 0) + threshold = kwargs.get("threshold", 0) + if iteration > threshold: + reward = [i[::-1] for i in reward] + return reward[slot - 1][chosen_action - 1] diff --git a/python/tests/test_framework/test_configs/cb.json b/python/tests/test_framework/test_configs/cb.json index 880141d444c..26d8ae09119 100644 --- a/python/tests/test_framework/test_configs/cb.json +++ b/python/tests/test_framework/test_configs/cb.json @@ -6,7 +6,7 @@ "params": { "num_examples": 100, "num_features": 1, - "num_action": 2, + "num_actions": 2, "reward_function": { "name": "constant_reward", "params": { @@ -19,17 +19,17 @@ "logging_policy": { "name": "even_probability", "params": { - "num_actions": 2 } }, - "no_context": 2 + "context_name": ["1", "2"] } }, "assert_functions": [ { "name": "assert_loss", "params": { - "expected_loss": 0.1 + "expected_loss": -1, + "decimal": 1 } }, { @@ -44,61 +44,61 @@ } ], "grids": { - "g0": { + "cb": { "#base": [ "--cb_explore 2" ] }, - "g1": { + "epsilon": { "--epsilon": [ 0.1, 0.2, 0.3 ] }, - "g2": { + "first": { "--first": [ 1, 2 ] }, - "g3": { + "bag": { "--bag": [ 5, 6, 7 ] }, - "g4": { + "cover": { "--cover": [ 1, 2, 3 ] }, - "g5": { + "squarecb": { "--squarecb": [ "--gamma_scale 1000", "--gamma_scale 10000" ] }, - "g6": { + "synthcover": { "--synthcover": [ "" ] }, - "g7": { + "regcb": { "--regcb": [ "" ] }, - "g8": { + "softmax": { "--softmax": [ "" ] } }, - "grids_expression": "g0 * (g1 + g2 + g3 + g4 + g5 +g6 + g7 + g8)", + "grids_expression": "cb * (epsilon + first + bag + cover + squarecb + synthcover + regcb + softmax)", "output": [ "--readable_model", "-p" @@ -111,16 +111,13 @@ "params": { "num_examples": 100, "num_features": 1, - "num_action": 1, + "num_actions": 1, "reward_function": { "name": "fixed_reward", "params": {} }, "logging_policy": { - "name": "even_probability", - "params": { - "num_actions": 1 - } + "name": "even_probability" } } }, @@ -128,7 +125,7 @@ { "name": "assert_loss", "params": { - "expected_loss": 1 + "expected_loss": -1 } }, { @@ -167,7 +164,7 @@ "params": { "num_examples": 100, "num_features": 2, - "num_action": 2, + "num_actions": 2, "reward_function": { "name": "fixed_reward_two_action", "params": {} @@ -175,17 +172,16 @@ "logging_policy": { "name": "even_probability", "params": { - "num_actions": 2 } }, - "no_context": 2 + "context_name": ["1", "2"] } }, "assert_functions": [ { "name": "assert_loss", "params": { - "expected_loss": 0.6, + "expected_loss": -0.4, "decimal": 1 } }, @@ -203,61 +199,61 @@ } ], "grids": { - "g0": { + "cb": { "#base": [ "--cb_explore_adf" ] }, - "g1": { + "epsilon": { "--epsilon": [ 0.1, 0.2, 0.3 ] }, - "g2": { + "first": { "--first": [ 1, 2 ] }, - "g3": { + "bag": { "--bag": [ 5, 6, 7 ] }, - "g4": { + "cover": { "--cover": [ 1, 2, 3 ] }, - "g5": { + "squarecb": { "--squarecb": [ "--gamma_scale 1000", "--gamma_scale 10000" ] }, - "g6": { + "synthcover": { "--synthcover": [ "" ] }, - "g7": { + "regcb": { "--regcb": [ "" ] }, - "g8": { + "softmax": { "--softmax": [ "" ] } }, - "grids_expression": "g0 * (g1 + g2 + g3 + g4 + g5 +g6 + g7 + g8)", + "grids_expression": "cb * (epsilon + first + bag + cover + squarecb + synthcover + regcb + softmax)", "output": [ "--readable_model", "-p" diff --git a/python/tests/test_framework/test_configs/classification.json b/python/tests/test_framework/test_configs/classification.json index bcb269f8910..bcd29a04012 100644 --- a/python/tests/test_framework/test_configs/classification.json +++ b/python/tests/test_framework/test_configs/classification.json @@ -4,9 +4,9 @@ "data_func": { "name": "generate_classification_data", "params": { - "no_sample": 2000, - "no_class": 2, - "no_features": 1, + "num_example": 2000, + "num_classes": 2, + "num_features": 1, "classify_func": { "name": "binary_classification_one_feature", "params": {} @@ -26,7 +26,7 @@ "data_func": { "name": "generate_classification_data", "params": { - "no_sample": 100, + "num_example": 100, "no_class": 2, "no_features": 1, "classify_func": { @@ -57,9 +57,9 @@ "data_func": { "name": "generate_classification_data", "params": { - "no_sample": 100000, - "no_class": 25, - "no_features": 2, + "num_example": 100000, + "num_classes": 25, + "num_features": 2, "classify_func": { "name": "multi_classification_two_features", "params": {} @@ -73,16 +73,16 @@ "data_func": { "name": "generate_classification_data", "params": { - "no_sample": 500, - "no_class": 25, - "no_features": 2, + "num_example": 500, + "num_classes": 25, + "num_features": 2, "classify_func": { "name": "multi_classification_two_features", "params": {} } } }, - "accuracy_threshold": 0.5 + "accuracy_threshold": 0.4 } } ], diff --git a/python/tests/test_framework/test_configs/slate.json b/python/tests/test_framework/test_configs/slate.json new file mode 100644 index 00000000000..bc7f59b49cf --- /dev/null +++ b/python/tests/test_framework/test_configs/slate.json @@ -0,0 +1,144 @@ +[ + { + "test_name": "slates", + "data_func": { + "name": "generate_slate_data", + "params": { + "num_examples": 1000, + "reward_function": { + "name": "reverse_reward_after_threshold", + "params": { + "reward": [ + [ + 1, + 0 + ], + [ + 0, + 1 + ] + ], + "threshold": 500 + } + }, + "logging_policy": { + "name": "even_probability", + "params": {} + }, + "action_space": { + "name": "new_action_after_threshold", + "params": { + "threshold": 500, + "before": [ + [ + "longshirt", + "tshirt" + ], + [ + "shorts", + "jeans" + ] + ], + "after": [ + [ + "rainshirt", + "buttonupshirt" + ], + [ + "formalpants", + "rainpants" + ] + ] + } + } + } + }, + "assert_functions": [ + { + "name": "assert_loss", + "params": { + "expected_loss": -1.9, + "decimal": 0.1 + } + }, + { + "name": "assert_prediction", + "params": { + "expected_value": [ + [ + 0.1, + 0.9 + ], + [ + 0.9, + 0.1 + ] + ], + "threshold": 0.8, + "atol": 0.01, + "rtol": 0.01 + } + } + ], + "grids": { + "slate": { + "#base": [ + "--slates" + ] + }, + "epsilon": { + "--epsilon": [ + 0.1, + 0.2, + 0.3 + ] + }, + "first": { + "--first": [ + 1, + 2 + ] + }, + "bag": { + "--bag": [ + 5, + 6, + 7 + ] + }, + "cover": { + "--cover": [ + 1, + 2, + 3 + ] + }, + "squarecb": { + "--squarecb": [ + "--gamma_scale 1000", + "--gamma_scale 10000" + ] + }, + "synthcover": { + "--synthcover": [ + "" + ] + }, + "regcb": { + "--regcb": [ + "" + ] + }, + "softmax": { + "--softmax": [ + "" + ] + } + }, + "grids_expression": "slate * (epsilon + first + bag + cover + squarecb + synthcover + regcb + softmax)", + "output": [ + "--readable_model", + "-p" + ] + } +] \ No newline at end of file diff --git a/python/tests/test_framework/test_core.py b/python/tests/test_framework/test_core.py index 56f58ddfa6b..9e0f6ae056b 100644 --- a/python/tests/test_framework/test_core.py +++ b/python/tests/test_framework/test_core.py @@ -7,11 +7,11 @@ import logging from test_helper import ( json_to_dict_list, - dynamic_function_call, - get_function_object, evaluate_expression, - variable_mapping, copy_file, + call_function_with_dirs, + custom_sort, + get_function_obj_with_dirs, ) from conftest import STORE_OUTPUT @@ -43,7 +43,7 @@ def cleanup_data_file(): @pytest.fixture def test_descriptions(request): resource = request.param - yield resource # + yield resource cleanup_data_file() @@ -86,30 +86,28 @@ def get_options(grids, expression): @pytest.mark.usefixtures("test_descriptions", TEST_CONFIG_FILES) def init_all(test_descriptions): for tIndex, tests in enumerate(test_descriptions): + task_folder = TEST_CONFIG_FILES_NAME[tIndex].split(".")[0] + package_name = [task_folder + ".", ""] + package_name = custom_sort(task_folder, package_name) + package_name.append(".") if type(tests) is not list: tests = [tests] for test_description in tests: options = get_options( test_description["grids"], test_description["grids_expression"] ) - task_folder = TEST_CONFIG_FILES_NAME[tIndex].split(".")[0] - package_name = [task_folder + ".", ""] - for dir in package_name: - try: - data = dynamic_function_call( - dir + "data_generation", - test_description["data_func"]["name"], - *test_description["data_func"]["params"].values(), - ) - if data: - break - except: - pass + data = call_function_with_dirs( + package_name, + "data_generation", + test_description["data_func"]["name"], + **test_description["data_func"]["params"], + ) for assert_func in test_description["assert_functions"]: - assert_job = get_function_object("assert_job", assert_func["name"]) - if not assert_job: - continue + + assert_job = get_function_obj_with_dirs( + package_name, "assert_job", assert_func["name"] + ) script_directory = os.path.dirname(os.path.realpath(__file__)) core_test( os.path.join(script_directory, data), diff --git a/python/tests/test_framework/test_helper.py b/python/tests/test_framework/test_helper.py index 01092a12adf..237ff9ac2b1 100644 --- a/python/tests/test_framework/test_helper.py +++ b/python/tests/test_framework/test_helper.py @@ -26,13 +26,6 @@ def evaluate_expression(expression, variables): return result -def variable_mapping(grids): - variables_map = {} - for i in range(len(grids)): - variables_map["g" + str(len(variables_map))] = grids[i] - return variables_map - - def dynamic_function_call(module_name, function_name, *args, **kwargs): try: calling_frame = inspect.stack()[1] @@ -43,12 +36,13 @@ def dynamic_function_call(module_name, function_name, *args, **kwargs): result = function(*args, **kwargs) return result except ImportError: - print(f"Module '{module_name}' not found.") + pass except AttributeError: - print(f"Function '{function_name}' not found in module '{module_name}'.") + pass def get_function_object(module_name, function_name): + function = None try: calling_frame = inspect.stack()[1] calling_module = inspect.getmodule(calling_frame[0]) @@ -57,9 +51,9 @@ def get_function_object(module_name, function_name): function = getattr(module, function_name) return function except ImportError: - print(f"Module '{module_name}' not found.") + pass except AttributeError: - print(f"Function '{function_name}' not found in module '{module_name}'.") + pass def generate_string_combinations(*lists): @@ -78,3 +72,54 @@ def copy_file(source_file, destination_file): print( f"Permission denied. Unable to copy '{source_file}' to '{destination_file}'." ) + + +def call_function_with_dirs(dirs, module_name, function_name, **kargs): + + for dir in dirs: + try: + data = dynamic_function_call( + dir + module_name, + function_name, + **kargs, + ) + if data: + return data + except Exception as error: + if type(error) not in [ModuleNotFoundError]: + raise error + + +def get_function_obj_with_dirs(dirs, module_name, function_name): + obj = None + for dir in dirs: + try: + obj = get_function_object( + dir + module_name, + function_name, + ) + if obj: + return obj + except Exception as error: + if type(error) not in [ModuleNotFoundError]: + raise error + if not obj: + raise ModuleNotFoundError( + f"Module '{module_name}' not found in any of the directories {dirs}." + ) + + +def calculate_similarity(word, string): + # Calculate the similarity score between the string and the word + score = 0 + for char in word: + if char in string: + score += 1 + return score + + +def custom_sort(word, strings): + # Sort the list of strings based on their similarity to the word + return sorted( + strings, key=lambda string: calculate_similarity(word, string), reverse=True + )