Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revert "Revert "Revert "CHIA-414 fixup datalayer benchmark""" #18223

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
142 changes: 65 additions & 77 deletions chia/_tests/core/data_layer/test_data_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@

import aiohttp
import aiosqlite
import big_o
import big_o.complexities
import pytest

from chia._tests.core.data_layer.util import Example, add_0123_example, add_01234567_example
Expand Down Expand Up @@ -1517,101 +1515,91 @@ async def test_clear_pending_roots_returns_root(
assert cleared_root == pending_root


@dataclass
class BatchInsertBenchmarkCase:
pre: int
count: int
limit: float
marks: Marks = ()

@property
def id(self) -> str:
return f"pre={self.pre},count={self.count}"


@dataclass
class BatchesInsertBenchmarkCase:
count: int
batch_count: int
limit: float
marks: Marks = ()

@property
def id(self) -> str:
return f"count={self.count},batch_count={self.batch_count}"


@datacases(
BatchInsertBenchmarkCase(
pre=0,
count=100,
limit=2.2,
),
BatchInsertBenchmarkCase(
pre=1_000,
count=100,
limit=4,
),
BatchInsertBenchmarkCase(
pre=0,
count=1_000,
limit=30,
),
BatchInsertBenchmarkCase(
pre=1_000,
count=1_000,
limit=36,
),
BatchInsertBenchmarkCase(
pre=10_000,
count=25_000,
limit=52,
),
)
@pytest.mark.anyio
async def test_benchmark_batch_insert_speed(
data_store: DataStore,
store_id: bytes32,
benchmark_runner: BenchmarkRunner,
case: BatchInsertBenchmarkCase,
) -> None:
r = random.Random()
r.seed("shadowlands", version=2)

test_size = 100
max_pre_size = 20_000
# may not be needed if big_o already considers the effect
# TODO: must be > 0 to avoid an issue with the log class?
lowest_considered_n = 2000
simplicity_bias_percentage = 10 / 100

batch_count, remainder = divmod(max_pre_size, test_size)
assert remainder == 0, "the last batch would be a different size"

changelist = [
{
"action": "insert",
"key": x.to_bytes(32, byteorder="big", signed=False),
"value": bytes(r.getrandbits(8) for _ in range(1200)),
}
for x in range(max_pre_size)
for x in range(case.pre + case.count)
]

pre = changelist[:max_pre_size]

records: Dict[int, float] = {}

total_inserted = 0
pre_iter = iter(pre)
with benchmark_runner.print_runtime(
label="overall",
clock=time.monotonic,
):
while True:
pre_batch = list(itertools.islice(pre_iter, test_size))
if len(pre_batch) == 0:
break

with benchmark_runner.print_runtime(
label="count",
clock=time.monotonic,
) as f:
await data_store.insert_batch(
store_id=store_id,
changelist=pre_batch,
# TODO: does this mess up test accuracy?
status=Status.COMMITTED,
)
pre = changelist[: case.pre]
batch = changelist[case.pre : case.pre + case.count]

records[total_inserted] = f.result().duration
total_inserted += len(pre_batch)

considered_durations = {n: duration for n, duration in records.items() if n >= lowest_considered_n}
ns = list(considered_durations.keys())
durations = list(considered_durations.values())
best_class, fitted = big_o.infer_big_o_class(ns=ns, time=durations)
simplicity_bias = simplicity_bias_percentage * fitted[best_class]
best_class, fitted = big_o.infer_big_o_class(ns=ns, time=durations, simplicity_bias=simplicity_bias)

print(f"allowed simplicity bias: {simplicity_bias}")
print(big_o.reports.big_o_report(best=best_class, others=fitted))

assert isinstance(
best_class, (big_o.complexities.Constant, big_o.complexities.Linear)
), f"must be constant or linear: {best_class}"

coefficient_maximums = [0.65, 0.000_25, *(10**-n for n in range(5, 100))]

coefficients = best_class.coefficients()
paired = list(zip(coefficients, coefficient_maximums))
assert len(paired) == len(coefficients)
for index, [actual, maximum] in enumerate(paired):
benchmark_runner.record_value(
value=actual,
limit=maximum,
label=f"{type(best_class).__name__} coefficient {index}",
if case.pre > 0:
await data_store.insert_batch(
store_id=store_id,
changelist=pre,
status=Status.COMMITTED,
)
assert actual <= maximum, f"(coefficient {index}) {actual} > {maximum}: {paired}"


@dataclass
class BatchesInsertBenchmarkCase:
count: int
batch_count: int
limit: float
marks: Marks = ()

@property
def id(self) -> str:
return f"count={self.count},batch_count={self.batch_count}"
with benchmark_runner.assert_runtime(seconds=case.limit):
await data_store.insert_batch(
store_id=store_id,
changelist=batch,
)


@datacases(
Expand Down
42 changes: 8 additions & 34 deletions chia/_tests/process_junit.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,32 +189,6 @@ def main(
)


def format_number(n: float) -> str:
complete = f"{n:.999f}"
integral_digits, decimal_separator, decimal_digits = complete.partition(".")
for index, digit in enumerate(decimal_digits):
if digit != "0":
places = index + 1
break
else:
places = 0

group_size = 3

places = ((places + group_size) // group_size) * group_size
decimal_digits = decimal_digits[:places]

result = ""
result += ",".join(
[integral_digits[start : start + group_size] for start in range(0, len(integral_digits), group_size)]
)
result += "."
result += " ".join(
[decimal_digits[start : start + group_size] for start in range(0, len(decimal_digits), group_size)]
)
return result


def output_benchmark(
link_line_separator: str,
link_prefix: str,
Expand All @@ -241,17 +215,17 @@ def output_benchmark(
three_sigma_str = "-"
if len(result.durations) > 1:
durations_mean = mean(result.durations)
mean_str = f"{format_number(durations_mean)} s"
mean_str = f"{durations_mean:.3f} s"

try:
three_sigma_str = f"{format_number(durations_mean + 3 * stdev(result.durations))} s"
three_sigma_str = f"{durations_mean + 3 * stdev(result.durations):.3f} s"
except StatisticsError:
pass

durations_max = max(result.durations)
max_str = f"{format_number(durations_max)} s"
max_str = f"{durations_max:.3f} s"

limit_str = f"{format_number(result.limit)} s"
limit_str = f"{result.limit:.3f} s"

percent = 100 * durations_max / result.limit
if percent >= 100:
Expand Down Expand Up @@ -318,17 +292,17 @@ def output_time_out_assert(
three_sigma_str = "-"
if len(result.durations) > 1:
durations_mean = mean(result.durations)
mean_str = f"{format_number(durations_mean)} s"
mean_str = f"{durations_mean:.3f} s"

try:
three_sigma_str = f"{format_number(durations_mean + 3 * stdev(result.durations))} s"
three_sigma_str = f"{durations_mean + 3 * stdev(result.durations):.3f} s"
except StatisticsError:
pass

durations_max = max(result.durations)
max_str = f"{format_number(durations_max)} s"
max_str = f"{durations_max:.3f} s"

limit_str = f"{format_number(result.limit)} s"
limit_str = f"{result.limit:.3f} s"

percent = 100 * durations_max / result.limit
if percent >= 100:
Expand Down
28 changes: 0 additions & 28 deletions chia/_tests/util/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,34 +390,6 @@ def assert_runtime(self, *args: Any, **kwargs: Any) -> _AssertRuntime:
kwargs.setdefault("overhead", self.overhead)
return _AssertRuntime(*args, **kwargs)

def print_runtime(self, *args: Any, **kwargs: Any) -> _AssertRuntime:
kwargs.setdefault("enable_assertion", False)
# TODO: ick
kwargs.setdefault("seconds", 1)
kwargs.setdefault("overhead", self.overhead)
return _AssertRuntime(*args, **kwargs)

def record_value(self, value: float, limit: float, label: str) -> None:
if ether.record_property is not None:
file, line = caller_file_and_line(
relative_to=(
pathlib.Path(chia.__file__).parent.parent,
pathlib.Path(chia._tests.__file__).parent.parent,
)
)
data = BenchmarkData(
duration=value,
path=pathlib.Path(file),
line=line,
limit=limit,
label=label,
)

ether.record_property( # pylint: disable=E1102
data.tag,
json.dumps(data.marshal(), ensure_ascii=True, sort_keys=True),
)


@contextlib.contextmanager
def assert_rpc_error(error: str) -> Iterator[None]:
Expand Down
1 change: 0 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@
]

dev_dependencies = [
"big-o==0.11.0",
"build==1.2.1",
"coverage==7.5.3",
"diff-cover==9.0.0",
Expand Down
Loading