Skip to content

Commit

Permalink
Revert "Revert "CHIA-414 fixup datalayer benchmark"" (#18107)
Browse files Browse the repository at this point in the history
* Revert "Revert "CHIA-414 fixup datalayer benchmark" (#18054)"

This reverts commit b49eb0a.

* repeats

* report coefficients

* hardcode timeout

* 5

* no benchmarks

* 0.6

* repeat name

* format_number()

* higher limits

* 10x

* tidy
  • Loading branch information
altendky committed Jun 18, 2024
1 parent 60ae4bc commit e87d51a
Show file tree
Hide file tree
Showing 4 changed files with 140 additions and 73 deletions.
142 changes: 77 additions & 65 deletions chia/_tests/core/data_layer/test_data_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@

import aiohttp
import aiosqlite
import big_o
import big_o.complexities
import pytest

from chia._tests.core.data_layer.util import Example, add_0123_example, add_01234567_example
Expand Down Expand Up @@ -1515,91 +1517,101 @@ async def test_clear_pending_roots_returns_root(
assert cleared_root == pending_root


@dataclass
class BatchInsertBenchmarkCase:
pre: int
count: int
limit: float
marks: Marks = ()

@property
def id(self) -> str:
return f"pre={self.pre},count={self.count}"


@dataclass
class BatchesInsertBenchmarkCase:
count: int
batch_count: int
limit: float
marks: Marks = ()

@property
def id(self) -> str:
return f"count={self.count},batch_count={self.batch_count}"


@datacases(
BatchInsertBenchmarkCase(
pre=0,
count=100,
limit=2.2,
),
BatchInsertBenchmarkCase(
pre=1_000,
count=100,
limit=4,
),
BatchInsertBenchmarkCase(
pre=0,
count=1_000,
limit=30,
),
BatchInsertBenchmarkCase(
pre=1_000,
count=1_000,
limit=36,
),
BatchInsertBenchmarkCase(
pre=10_000,
count=25_000,
limit=52,
),
)
@pytest.mark.anyio
async def test_benchmark_batch_insert_speed(
data_store: DataStore,
store_id: bytes32,
benchmark_runner: BenchmarkRunner,
case: BatchInsertBenchmarkCase,
) -> None:
r = random.Random()
r.seed("shadowlands", version=2)

test_size = 100
max_pre_size = 20_000
# may not be needed if big_o already considers the effect
# TODO: must be > 0 to avoid an issue with the log class?
lowest_considered_n = 2000
simplicity_bias_percentage = 10 / 100

batch_count, remainder = divmod(max_pre_size, test_size)
assert remainder == 0, "the last batch would be a different size"

changelist = [
{
"action": "insert",
"key": x.to_bytes(32, byteorder="big", signed=False),
"value": bytes(r.getrandbits(8) for _ in range(1200)),
}
for x in range(case.pre + case.count)
for x in range(max_pre_size)
]

pre = changelist[: case.pre]
batch = changelist[case.pre : case.pre + case.count]
pre = changelist[:max_pre_size]

if case.pre > 0:
await data_store.insert_batch(
store_id=store_id,
changelist=pre,
status=Status.COMMITTED,
)
records: Dict[int, float] = {}

with benchmark_runner.assert_runtime(seconds=case.limit):
await data_store.insert_batch(
store_id=store_id,
changelist=batch,
total_inserted = 0
pre_iter = iter(pre)
with benchmark_runner.print_runtime(
label="overall",
clock=time.monotonic,
):
while True:
pre_batch = list(itertools.islice(pre_iter, test_size))
if len(pre_batch) == 0:
break

with benchmark_runner.print_runtime(
label="count",
clock=time.monotonic,
) as f:
await data_store.insert_batch(
store_id=store_id,
changelist=pre_batch,
# TODO: does this mess up test accuracy?
status=Status.COMMITTED,
)

records[total_inserted] = f.result().duration
total_inserted += len(pre_batch)

considered_durations = {n: duration for n, duration in records.items() if n >= lowest_considered_n}
ns = list(considered_durations.keys())
durations = list(considered_durations.values())
best_class, fitted = big_o.infer_big_o_class(ns=ns, time=durations)
simplicity_bias = simplicity_bias_percentage * fitted[best_class]
best_class, fitted = big_o.infer_big_o_class(ns=ns, time=durations, simplicity_bias=simplicity_bias)

print(f"allowed simplicity bias: {simplicity_bias}")
print(big_o.reports.big_o_report(best=best_class, others=fitted))

assert isinstance(
best_class, (big_o.complexities.Constant, big_o.complexities.Linear)
), f"must be constant or linear: {best_class}"

coefficient_maximums = [0.65, 0.000_25, *(10**-n for n in range(5, 100))]

coefficients = best_class.coefficients()
paired = list(zip(coefficients, coefficient_maximums))
assert len(paired) == len(coefficients)
for index, [actual, maximum] in enumerate(paired):
benchmark_runner.record_value(
value=actual,
limit=maximum,
label=f"{type(best_class).__name__} coefficient {index}",
)
assert actual <= maximum, f"(coefficient {index}) {actual} > {maximum}: {paired}"


@dataclass
class BatchesInsertBenchmarkCase:
count: int
batch_count: int
limit: float
marks: Marks = ()

@property
def id(self) -> str:
return f"count={self.count},batch_count={self.batch_count}"


@datacases(
Expand Down
42 changes: 34 additions & 8 deletions chia/_tests/process_junit.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,32 @@ def main(
)


def format_number(n: float) -> str:
complete = f"{n:.999f}"
integral_digits, decimal_separator, decimal_digits = complete.partition(".")
for index, digit in enumerate(decimal_digits):
if digit != "0":
places = index + 1
break
else:
places = 0

group_size = 3

places = ((places + group_size) // group_size) * group_size
decimal_digits = decimal_digits[:places]

result = ""
result += ",".join(
[integral_digits[start : start + group_size] for start in range(0, len(integral_digits), group_size)]
)
result += "."
result += " ".join(
[decimal_digits[start : start + group_size] for start in range(0, len(decimal_digits), group_size)]
)
return result


def output_benchmark(
link_line_separator: str,
link_prefix: str,
Expand All @@ -215,17 +241,17 @@ def output_benchmark(
three_sigma_str = "-"
if len(result.durations) > 1:
durations_mean = mean(result.durations)
mean_str = f"{durations_mean:.3f} s"
mean_str = f"{format_number(durations_mean)} s"

try:
three_sigma_str = f"{durations_mean + 3 * stdev(result.durations):.3f} s"
three_sigma_str = f"{format_number(durations_mean + 3 * stdev(result.durations))} s"
except StatisticsError:
pass

durations_max = max(result.durations)
max_str = f"{durations_max:.3f} s"
max_str = f"{format_number(durations_max)} s"

limit_str = f"{result.limit:.3f} s"
limit_str = f"{format_number(result.limit)} s"

percent = 100 * durations_max / result.limit
if percent >= 100:
Expand Down Expand Up @@ -292,17 +318,17 @@ def output_time_out_assert(
three_sigma_str = "-"
if len(result.durations) > 1:
durations_mean = mean(result.durations)
mean_str = f"{durations_mean:.3f} s"
mean_str = f"{format_number(durations_mean)} s"

try:
three_sigma_str = f"{durations_mean + 3 * stdev(result.durations):.3f} s"
three_sigma_str = f"{format_number(durations_mean + 3 * stdev(result.durations))} s"
except StatisticsError:
pass

durations_max = max(result.durations)
max_str = f"{durations_max:.3f} s"
max_str = f"{format_number(durations_max)} s"

limit_str = f"{result.limit:.3f} s"
limit_str = f"{format_number(result.limit)} s"

percent = 100 * durations_max / result.limit
if percent >= 100:
Expand Down
28 changes: 28 additions & 0 deletions chia/_tests/util/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,6 +390,34 @@ def assert_runtime(self, *args: Any, **kwargs: Any) -> _AssertRuntime:
kwargs.setdefault("overhead", self.overhead)
return _AssertRuntime(*args, **kwargs)

def print_runtime(self, *args: Any, **kwargs: Any) -> _AssertRuntime:
kwargs.setdefault("enable_assertion", False)
# TODO: ick
kwargs.setdefault("seconds", 1)
kwargs.setdefault("overhead", self.overhead)
return _AssertRuntime(*args, **kwargs)

def record_value(self, value: float, limit: float, label: str) -> None:
if ether.record_property is not None:
file, line = caller_file_and_line(
relative_to=(
pathlib.Path(chia.__file__).parent.parent,
pathlib.Path(chia._tests.__file__).parent.parent,
)
)
data = BenchmarkData(
duration=value,
path=pathlib.Path(file),
line=line,
limit=limit,
label=label,
)

ether.record_property( # pylint: disable=E1102
data.tag,
json.dumps(data.marshal(), ensure_ascii=True, sort_keys=True),
)


@contextlib.contextmanager
def assert_rpc_error(error: str) -> Iterator[None]:
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
]

dev_dependencies = [
"big-o==0.11.0",
"build==1.2.1",
"coverage==7.5.3",
"diff-cover==9.0.0",
Expand Down

0 comments on commit e87d51a

Please sign in to comment.