Skip to content

Commit

Permalink
check tables size before run bencmarks (#10317)
Browse files Browse the repository at this point in the history
  • Loading branch information
iddqdex authored Oct 11, 2024
1 parent 7bae47f commit 8372e70
Show file tree
Hide file tree
Showing 6 changed files with 194 additions and 36 deletions.
54 changes: 49 additions & 5 deletions ydb/tests/olap/load/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from ydb.tests.olap.lib.ydb_cli import YdbCliHelper, WorkloadType
from ydb.tests.olap.lib.allure_utils import allure_test_description
from ydb.tests.olap.lib.results_processor import ResultsProcessor
from ydb.tests.olap.scenario.helpers.scenario_tests_helper import ScenarioTestHelper
from time import time
from typing import Optional
from allure_commons._core import plugin_manager
Expand Down Expand Up @@ -45,6 +46,24 @@ def _get_timeout(cls, query_num: int) -> float:
def _test_name(cls, query_num: int) -> str:
return f'Query{query_num:02d}'

@allure.step('check tables size')
def check_tables_size(self, folder: Optional[str], tables: dict[str, int]):
sth = ScenarioTestHelper(None)
errors: list[str] = []
for table, expected_size in tables.items():
if folder is None:
table_full = table
elif folder.endswith('/') or table.startswith('/'):
table_full = f'{folder}{table}'
else:
table_full = f'{folder}/{table}'
size = sth.get_table_rows_count(table_full)
if size != expected_size:
errors.append(f'table `{table}`: expect {expected_size}, but actually is {size};')
if len(errors) > 0:
msg = "\n".join(errors)
pytest.fail(f'Unexpected tables size in `{folder}`:\n {msg}')

def process_query_result(self, result: YdbCliHelper.WorkloadRunResult, query_num: int, iterations: int, upload: bool):
def _get_duraton(stats, field):
if stats is None:
Expand Down Expand Up @@ -131,12 +150,37 @@ def _attach_plans(plan: YdbCliHelper.QueryPlan) -> None:
exc = exc.with_traceback(result.traceback)
raise exc

def setup_class(self) -> None:
if not hasattr(self, 'do_setup_class'):
return
error = None
tb = None
start_time = time()
try:
self.do_setup_class(self)
except BaseException as e:
error = str(e)
tb = e.__traceback__
ResultsProcessor.upload_results(
kind='Load',
suite=self.suite,
test='_Verification',
timestamp=start_time,
is_successful=(error is None)
)
if error is not None:
exc = pytest.fail.Exception(error)
exc.with_traceback(tb)
raise exc

def run_workload_test(self, path: str, query_num: int) -> None:
allure_listener = next(filter(lambda x: isinstance(x, AllureListener), plugin_manager.get_plugin_manager().get_plugins()))
allure_test_result = allure_listener.allure_logger.get_test(None)
query_num_param = next(filter(lambda x: x.name == 'query_num', allure_test_result.parameters), None)
if query_num_param:
query_num_param.mode = allure.parameter_mode.HIDDEN.value
for plugin in plugin_manager.get_plugin_manager().get_plugins():
if isinstance(plugin, AllureListener):
allure_test_result = plugin.allure_logger.get_test(None)
if allure_test_result is not None:
for param in allure_test_result.parameters:
if param.name == 'query_num':
param.mode = allure.parameter_mode.HIDDEN.value
start_time = time()
result = YdbCliHelper.workload_run(
path=path,
Expand Down
25 changes: 7 additions & 18 deletions ydb/tests/olap/load/test_clickbench.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
import allure
import pytest
from time import time
from conftest import LoadSuiteBase
from os import getenv
from ydb.tests.olap.lib.results_processor import ResultsProcessor
from ydb.tests.olap.lib.ydb_cli import WorkloadType, YdbCliHelper
from ydb.tests.olap.lib.ydb_cluster import YdbCluster
from ydb.tests.olap.lib.utils import get_external_param
Expand All @@ -13,19 +11,20 @@ class TestClickbench(LoadSuiteBase):
suite = 'Clickbench'
workload_type: WorkloadType = WorkloadType.Clickbench
refference: str = 'CH.60'
path = get_external_param('table-path-clickbench', f'{YdbCluster.tables_path}/clickbench/hits')

def setup_class(self):
def do_setup_class(self):
if getenv('NO_VERIFY_DATA', '0') == '1' or getenv('NO_VERIFY_DATA_CLICKBECNH', '0') == '1':
return
root_path = YdbCluster.tables_path
path = get_external_param('table-path-clickbench', f'{root_path}/clickbench/hits')

self.check_tables_size(folder=None, tables={'clickbench/hits': 99997497})

fail_count = 0
start_time = time()
for query_num in range(0, 43):
try:
with allure.step(f'request {query_num}'):
result = YdbCliHelper.workload_run(
path=path,
path=self.path,
query_num=query_num,
iterations=1,
workload_type=self.workload_type,
Expand All @@ -36,19 +35,9 @@ def setup_class(self):
except BaseException:
fail_count += 1

test = '_Verification'
ResultsProcessor.upload_results(
kind='Load',
suite=self.suite,
test=test,
timestamp=start_time,
is_successful=(fail_count == 0)
)
if fail_count > 0:
pytest.fail(f'{fail_count} verification queries failed')

@pytest.mark.parametrize('query_num', [i for i in range(0, 43)])
def test_clickbench(self, query_num):
root_path = YdbCluster.tables_path
path = get_external_param(f'table-path-{self.suite}', f'{root_path}/clickbench/hits')
self.run_workload_test(path, query_num)
self.run_workload_test(self.path, query_num)
94 changes: 89 additions & 5 deletions ydb/tests/olap/load/test_tpcds.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,94 @@
from __future__ import annotations
import pytest
from conftest import LoadSuiteBase
from os import getenv
from ydb.tests.olap.lib.ydb_cli import WorkloadType
from ydb.tests.olap.lib.utils import get_external_param
from ydb.tests.olap.lib.ydb_cluster import YdbCluster


class TpcdsSuiteBase(LoadSuiteBase):
size: int = None
size: int = 0
workload_type: WorkloadType = WorkloadType.TPC_DS
iterations: int = 3
tables_size: dict[str, int] = {}

def _get_tables_size(self) -> dict[str, int]:
result: dict[str, int] = {
'customer_demographics': 1920800,
'date_dim': 73049,
'household_demographics': 7200,
'income_band': 20,
'ship_mode': 20,
'time_dim': 86400,
}
result.update(self.tables_size)
return result

def _get_path(self, full: bool = True) -> str:
if full:
tpcds_path = get_external_param('table-path-tpcds', f'{YdbCluster.tables_path}/tpcds')
else:
tpcds_path = 'tpcds'
return get_external_param(f'table-path-{self.suite}', f'{tpcds_path}/s{self.size}')

def do_setup_class(self):
if getenv('NO_VERIFY_DATA', '0') == '1' or getenv('NO_VERIFY_DATA_TPCH', '0') == '1' or getenv(f'NO_VERIFY_DATA_TPCH_{self.size}'):
return
self.check_tables_size(self, folder=self._get_path(self, False), tables=self._get_tables_size(self))

@pytest.mark.parametrize('query_num', [i for i in range(1, 100)])
def test_tpcds(self, query_num: int):
root_path = YdbCluster.tables_path
tpcds_path = get_external_param('table-path-tpcds', f'{root_path}/tpcds')
path = get_external_param(f'table-path-{self.suite}', f'{tpcds_path}/s{self.size}')
self.run_workload_test(path, query_num)
self.run_workload_test(self._get_path(), query_num)


class TestTpcds1(TpcdsSuiteBase):
size: int = 1
tables_size: dict[str, int] = {
'call_center': 6,
'catalog_page': 11718,
'catalog_returns': 144067,
'catalog_sales': 1441548,
'customer_address': 50000,
'customer': 100000,
'inventory': 11745000,
'item': 18000,
'promotion': 300,
'reason': 35,
'store': 12,
'store_returns': 287514,
'store_sales': 2880404,
'warehouse': 5,
'web_page': 60,
'web_returns': 71763,
'web_sales': 719384,
'web_site': 30,
}


class TestTpcds10(TpcdsSuiteBase):
size: int = 10
timeout = max(TpcdsSuiteBase.timeout, 300.)
tables_size: dict[str, int] = {
'call_center': 24,
'catalog_page': 12000,
'catalog_returns': 1439749,
'catalog_sales': 14401261,
'customer': 500000,
'customer_address': 250000,
'inventory': 133110000,
'item': 102000,
'promotion': 500,
'reason': 45,
'store': 102,
'store_returns': 2875432,
'store_sales': 28800991,
'warehouse': 10,
'web_page': 200,
'web_returns': 719217,
'web_sales': 7197566,
'web_site': 42,
}


class TestTpcds100(TpcdsSuiteBase):
Expand All @@ -35,6 +99,26 @@ class TestTpcds100(TpcdsSuiteBase):
14: LoadSuiteBase.QuerySettings(timeout=max(TpcdsSuiteBase.timeout, 7200.)),
72: LoadSuiteBase.QuerySettings(timeout=max(TpcdsSuiteBase.timeout, 7200.)),
}
tables_size: dict[str, int] = {
'call_center': 30,
'catalog_page': 20400,
'catalog_returns': 14404374,
'catalog_sales': 143997065,
'customer': 2000000,
'customer_address': 1000000,
'inventory': 399330000,
'item': 204000,
'promotion': 1000,
'reason': 55,
'store': 402,
'store_returns': 28795080,
'store_sales': 287997024,
'warehouse': 15,
'web_page': 2040,
'web_returns': 7197670,
'web_sales': 72001237,
'web_site': 24,
}


class TestTpcds1000(TpcdsSuiteBase):
Expand Down
50 changes: 45 additions & 5 deletions ydb/tests/olap/load/test_tpch.py
Original file line number Diff line number Diff line change
@@ -1,41 +1,81 @@
from __future__ import annotations
import pytest
from conftest import LoadSuiteBase
from os import getenv
from ydb.tests.olap.lib.ydb_cli import WorkloadType
from ydb.tests.olap.lib.utils import get_external_param
from ydb.tests.olap.lib.ydb_cluster import YdbCluster


class TpchSuiteBase(LoadSuiteBase):
size: int = None
size: int = 0
workload_type: WorkloadType = WorkloadType.TPC_H
iterations: int = 3
tables_size: dict[str, int] = {}

def _get_tables_size(self) -> dict[str, int]:
result: dict[str, int] = {
'customer': 150000 * self.size,
'nation': 25,
'orders': 1500000 * self.size,
'part': 200000 * self.size,
'partsupp': 800000 * self.size,
'region': 5,
'supplier': 10000 * self.size,
}
result.update(self.tables_size)
return result

def _get_path(self, full: bool = True) -> str:
if full:
tpch_path = get_external_param('table-path-tpch', f'{YdbCluster.tables_path}/tpch')
else:
tpch_path = 'tpch'
return get_external_param(f'table-path-{self.suite}', f'{tpch_path}/s{self.size}')

def do_setup_class(self):
if getenv('NO_VERIFY_DATA', '0') == '1' or getenv('NO_VERIFY_DATA_TPCH', '0') == '1' or getenv(f'NO_VERIFY_DATA_TPCH_{self.size}'):
return
self.check_tables_size(self, folder=self._get_path(self, False), tables=self._get_tables_size(self))

@pytest.mark.parametrize('query_num', [i for i in range(1, 23)])
def test_tpch(self, query_num: int):
root_path = YdbCluster.tables_path
tpch_path = get_external_param('table-path-tpch', f'{root_path}/tpch')
path = get_external_param(f'table-path-{self.suite}', f'{tpch_path}/s{self.size}')
self.run_workload_test(path, query_num)
self.run_workload_test(self._get_path(), query_num)


class TestTpch1(TpchSuiteBase):
tables_size: dict[str, int] = {
'lineitem': 6001215,
}
size: int = 1


class TestTpch10(TpchSuiteBase):
tables_size: dict[str, int] = {
'lineitem': 59986052,
}
size: int = 10


class TestTpch100(TpchSuiteBase):
tables_size: dict[str, int] = {
'lineitem': 600037902,
}
size: int = 100
timeout = max(TpchSuiteBase.timeout, 300.)


class TestTpch1000(TpchSuiteBase):
tables_size: dict[str, int] = {
'lineitem': 5999989709,
}
size: int = 1000
timeout = max(TpchSuiteBase.timeout, 1000.)


class TestTpch10000(TpchSuiteBase):
tables_size: dict[str, int] = {
'lineitem': 59999994267,
}
size: int = 10000
timeout = max(TpchSuiteBase.timeout, 3600.)
1 change: 1 addition & 0 deletions ydb/tests/olap/load/ya.make
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ PY3TEST()
contrib/python/allure-python-commons
ydb/public/sdk/python/enable_v3_new_behavior
ydb/tests/olap/lib
ydb/tests/olap/scenario/helpers
library/python/testing/yatest_common
ydb/public/sdk/python
)
Expand Down
6 changes: 3 additions & 3 deletions ydb/tests/olap/scenario/helpers/scenario_tests_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import json
from ydb.tests.olap.lib.ydb_cluster import YdbCluster
from abc import abstractmethod, ABC
from typing import Set, List, Dict, Any, Callable
from typing import Set, List, Dict, Any, Callable, Optional
from time import sleep


Expand Down Expand Up @@ -223,7 +223,7 @@ def EOF(self) -> bool:

pass

def __init__(self, context: TestContext) -> None:
def __init__(self, context: Optional[TestContext]) -> None:
"""Constructor.
Args:
Expand Down Expand Up @@ -256,7 +256,7 @@ def _add_not_empty(p: str, dir: str):

@staticmethod
def _run_with_expected_status(
operation: callable,
operation: Callable,
expected_status: ydb.StatusCode | Set[ydb.StatusCode],
retriable_status: ydb.StatusCode | Set[ydb.StatusCode] = {},
n_retries=0,
Expand Down

0 comments on commit 8372e70

Please sign in to comment.