VowpalWabbit · ataymano · May 31, 2023 · May 17, 2023 · May 24, 2023 · May 26, 2023
diff --git a/python/tests/assert_job.py b/python/tests/assert_job.py
@@ -0,0 +1,45 @@
+import numpy as np
+from numpy.testing import assert_allclose, assert_array_almost_equal
+from vw_executor.vw import ExecutionStatus
+
+
+def get_from_kwargs(kwargs, key, default=None):
+    if key in kwargs:
+        return kwargs[key]
+    else:
+        return default
+
+def majority_close(arr1, arr2, rtol, atol, threshold):
+    # Check if the majority of elements are close
+    close_count = np.count_nonzero(np.isclose(arr1, arr2, rtol=rtol, atol=atol))
+    return close_count > len(arr1) * threshold
+
+def assert_weight(job, **kwargs):
+    atol = get_from_kwargs(kwargs, "atol", 10e-8)
+    rtol = get_from_kwargs(kwargs, "rtol", 10e-5)
+    expected_weights = kwargs["expected_weights"]
+    assert job.status == ExecutionStatus.Success, f"{job.opts} job should be successful"
+    data = job.outputs["--readable_model"]
+    with open(data[0], "r") as f:
+        data = f.readlines()
+    data = [i.strip() for i in data]
+    weights = job[0].model9('--readable_model').weights
+    weights = weights["weight"].to_list()
+    assert_allclose(weights, expected_weights, atol=atol, rtol=rtol), f"weights should be {expected_weights}"
+
+def assert_prediction(job, **kwargs):
+        assert job.status == ExecutionStatus.Success, "job should be successful"
+        atol = kwargs.get("atol", 10e-8)
+        rtol = kwargs.get("rtol", 10e-5)
+        threshold = kwargs.get("threshold", 0.9)
+        constant = kwargs["expected_value"]
+        predictions = job.outputs['-p']
+        with open(predictions[0], "r") as f:
+            predictions = f.readlines()
+            predictions = [float(i) for i in predictions[1:]]
+            assert majority_close(predictions, [constant]*len(predictions), rtol=rtol, atol=atol, threshold=threshold), f"predicted value should be {constant}"
+
+
+
+def assert_functions():
+    return 
diff --git a/python/tests/data_generation.py b/python/tests/data_generation.py
@@ -0,0 +1,10 @@
+import random
+
+def constant_function(no_sample, constant, lower_bound, upper_bound):
+    dataFile = f"constant_func_{no_sample}_{constant}_{upper_bound}_{lower_bound}.txt"
+    with open(dataFile, "w") as f:
+        random.seed(10)
+        for _ in range(no_sample):
+            x = random.uniform(lower_bound, upper_bound)
+            f.write(f"{constant} |f x:{x}\n")
+    return dataFile
diff --git a/python/tests/pytest.json b/python/tests/pytest.json
@@ -0,0 +1,54 @@
+[
+    {
+        "data_func": "constant_function",
+        "data_func_args": [2000,5,1,100],
+        "assert_func": "assert_prediction",
+        "assert_func_args": {
+            "expected_value": 5,
+            "threshold":0.5
+        },
+        "grid":   {
+            "#base": ["-P 50000 --preserve_performance_counters --save_resume "],
+            "#reg": ["", "--coin"]
+        },
+        "output": ["--readable_model", "-p"]
+    },
+    {
+        "data_func": "constant_function",
+        "data_func_args": [2000,5,1,100],
+        "assert_func": "assert_weight",
+        "assert_func_args": {
+            "expected_weights":[5, 0],
+            "atol": 1,
+            "rtol": 0.01
+        },
+        "grid":   {
+            "#base": ["-P 50000 --preserve_performance_counters --save_resume "],
+            "#reg": ["", "--coin"]
+        },
+        "output": ["--readable_model", "-p"]
+    },
+    {
+        "data_func": "constant_function",
+        "data_func_args": [2000,5,1,100],
+        "assert_func": "assert_weight",
+        "assert_func_args": {
+            "expected_weights":[5, 0],
+            "atol": 100,
+            "rtol": 10
+        },
+        "grid":   {
+            "#base": ["-P 1 --preserve_performance_counters --save_resume"],
+            "#reg": ["", "--coin", "--ftrl", "--pistol"]
+        },
+        "output": ["--readable_model", "-p"],
+        "*" : {
+            "--learning_rate": [0.01, 0.001, 0.1],
+            "--loss_function": ["absolute", "quantile"],
+            "--power_t": [0.2, 0.5, ""]
+        },
+        "+" :{
+            "--learning_rate": [0.01, 0.001, 0.1]
+        }
+    }
+]
diff --git a/python/tests/test_helper.py b/python/tests/test_helper.py
@@ -0,0 +1,67 @@
+import json
+import importlib
+import pytest
+import os
+import itertools
+
+# Get the current directory
+current_dir = os.path.dirname(os.path.abspath(__file__))
+
+def json_to_dict_list(file):
+    with open(current_dir + "/" + file, 'r') as file:
+        # Load the JSON data
+        return json.load(file)
+
+
+def dynamic_function_call(module_name, function_name, *args, **kwargs):
+    try:
+        module = importlib.import_module(module_name)
+        function = getattr(module, function_name)
+        result = function(*args, **kwargs)
+        return result
+    except ImportError:
+        print(f"Module '{module_name}' not found.")
+    except AttributeError:
+        print(f"Function '{function_name}' not found in module '{module_name}'.")
+
+
+def get_function_object(module_name, function_name):
+    try:
+        module = importlib.import_module(module_name)
+        function = getattr(module, function_name)
+        return function
+    except ImportError:
+        print(f"Module '{module_name}' not found.")
+    except AttributeError:
+        print(f"Function '{function_name}' not found in module '{module_name}'.")
+
+
+
+def generate_test_function(test_data):
+    @pytest.dynamic
+    def test_dynamic():
+        pass
+        # Perform the test using the test_data
+        # ...
+
+    # Set a custom name for the test function
+    test_dynamic.__name__ = test_data["name"]
+
+    return test_dynamic
+
+
+def generate_pytest_from_json(filepath):
+    # Load the JSON data from a file
+    with open(filepath, "r") as file:
+        json_data = json.load(file)
+
+    # Iterate over the JSON data and dynamically generate the test functions
+    for test_case in json_data:
+        test_function = generate_test_function(test_case)
+        globals()[test_function.__name__] = test_function
+
+
+def generate_string_combinations(*lists):
+    combinations = list(itertools.product(*lists))
+    combinations = [''.join(combination) for combination in combinations]
+    return combinations
diff --git a/python/tests/test_regression.py b/python/tests/test_regression.py
@@ -0,0 +1,70 @@
+from vw_executor.vw import Vw
+from vw_executor.vw_opts import Grid
+from numpy.testing import assert_allclose
+import pandas as pd
+import numpy as np
+import pytest
+import os
+from test_helper import json_to_dict_list, dynamic_function_call, get_function_object, generate_string_combinations
+
+CURR_DICT = os.path.dirname(os.path.abspath(__file__))
+
+def combine_list_cmds_grids(cmds, base_grid):
+    list_of_key_val = []
+    grids = []
+    for key, value in cmds.items():
+        value = [i for i in value if i != ""]
+        if str(value).isdigit():
+            list_of_key_val.append([f" {key} {format(li, '.5f').rstrip('0').rstrip('.') }" for li in value])
+        else:
+            list_of_key_val.append([f" {key} {li}" for li in value])
+    for new_cmd in generate_string_combinations([base_grid["#base"][0]], *list_of_key_val):
+        tmp_grid = base_grid.copy()
+        tmp_grid["#base"][0] = new_cmd
+        grids.append(tmp_grid)
+    return grids
+
+def cleanup_data_file():
+    script_directory = os.path.dirname(os.path.realpath(__file__))
+    # List all files in the directory
+    files = os.listdir(script_directory)
+    # Iterate over the files and remove the ones with .txt extension
+    for file in files:
+        if file.endswith(".txt"):
+            file_path = os.path.join(script_directory, file)
+            os.remove(file_path)
+
+@pytest.fixture
+def test_description(request):
+    resource = request.param
+    yield resource  # 
+    cleanup_data_file()
+
+
+def core_test(files, grid, outputs, job_assert, job_assert_args):
+    vw = Vw(CURR_DICT + "/.vw_cache", reset=True, handler=None)
+    result = vw.train(files, grid, outputs)
+    for j in result:
+        job_assert(j, **job_assert_args)
+
+
+@pytest.mark.parametrize('test_description', json_to_dict_list("pytest.json"), indirect=True)
+def test_all(test_description):
+
+    mutiply = test_description.get("*", None)
+    plus = test_description.get("+", None)
+
+    base_grid = test_description['grid']
+    grids = []
+    if mutiply:
+        grids = combine_list_cmds_grids(mutiply, base_grid)
+    else:
+        grids.append(base_grid)
+
+    for grid in grids:
+        options = Grid(
+            grid
+        )
+        data = dynamic_function_call("data_generation", test_description['data_func'], *test_description["data_func_args"])
+        assert_job = get_function_object("assert_job", test_description['assert_func'])
+        core_test(data, options, test_description['output'], assert_job, test_description['assert_func_args'])