Skip to content

Commit

Permalink
ruff
Browse files Browse the repository at this point in the history
  • Loading branch information
xadupre committed Aug 29, 2024
1 parent 035e23c commit f782915
Show file tree
Hide file tree
Showing 71 changed files with 548 additions and 496 deletions.
2 changes: 1 addition & 1 deletion _cmake/clang_format.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/bin/bash
clear
echo "--ruff--"
ruff .
ruff check .
echo "--cython-lint--"
cython-lint .
echo "--clang-format--"
Expand Down
8 changes: 4 additions & 4 deletions _doc/examples/plot_bench_cypy_ort.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@
#############################################
# Benchmark
# +++++++++
dims = list(int(i) for i in script_args.dims.split(","))
dims = [int(i) for i in script_args.dims.split(",")]

data = []
for dim in tqdm(dims):
Expand All @@ -99,18 +99,18 @@
number, repeat = script_args.number * 5, script_args.repeat * 5
x = numpy.random.randn(dim, dim).astype(numpy.float32)
t_ort = measure_time(
lambda: sess_ort.run(None, {"X": x})[0], number=number, repeat=50
lambda x=x: sess_ort.run(None, {"X": x})[0], number=number, repeat=50
)
t_ort["name"] = "ort"
t_ort["dim"] = dim
data.append(t_ort)

t_ext = measure_time(lambda: sess_ext.run([x])[0], number=number, repeat=repeat)
t_ext = measure_time(lambda x=x: sess_ext.run([x])[0], number=number, repeat=repeat)
t_ext["name"] = "ext"
t_ext["dim"] = dim
data.append(t_ext)

t_ext2 = measure_time(lambda: sess_ext.run_1_1(x), number=number, repeat=repeat)
t_ext2 = measure_time(lambda x=x: sess_ext.run_1_1(x), number=number, repeat=repeat)
t_ext2["name"] = "ext_1_1"
t_ext2["dim"] = dim
data.append(t_ext2)
Expand Down
6 changes: 3 additions & 3 deletions _doc/examples/plot_bench_gemm_f8.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,14 +111,14 @@ def type2string(dt):
dims.append((m, n, k))
else:
dims.append(int(d))
tests = list(int(i) for i in script_args.tests.split(","))
tests = [int(i) for i in script_args.tests.split(",")]

pbar = tqdm(list(product(tests, dims)))
obs = []
for test, dim in pbar:
pbar.set_description(f"type={test} dim={dim}")
if test in {8, 9, 10, 12, 13}:
warnings.warn(f"unsupported configuration {test}.")
warnings.warn(f"unsupported configuration {test}.", stacklevel=0)
continue
mdim = dim if isinstance(dim, int) else max(dim)
if mdim < 128:
Expand Down Expand Up @@ -187,7 +187,7 @@ def type2string(dt):
# ++++++++++++++++++++++++

if df.shape[0] > 0:
dfi = df[col_def + ["~dim", "mnk", "t-total", "t-gemm_sync"]]
dfi = df[[*col_def, "~dim", "mnk", "t-total", "t-gemm_sync"]]
print(dfi)

###################################
Expand Down
16 changes: 11 additions & 5 deletions _doc/examples/plot_bench_gemm_ort.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,15 +237,15 @@ def create_cast(to, cuda=False):
#
# The benchmark will run the following configurations.

types = list(getattr(TensorProto, a) for a in script_args.types.split(","))
types = [getattr(TensorProto, a) for a in script_args.types.split(",")]
engine = [InferenceSession, CReferenceEvaluator]
providers = [
["CUDAExecutionProvider", "CPUExecutionProvider"],
["CPUExecutionProvider"],
]
# M, N, K
# we use multiple of 8, otherwise, float8 does not work.
dims = [list(int(i) for i in line.split(",")) for line in script_args.dims.split(";")]
dims = [[int(i) for i in line.split(",")] for line in script_args.dims.split(";")]
domains = ["onnx_extended.ortops.tutorial.cuda", "", "com.microsoft"]


Expand Down Expand Up @@ -420,7 +420,11 @@ def rendering_obs(obs, dim, number, repeat, domain, provider, internal_time):
feeds = {"A": matrices[k1].numpy(), "B": matrices[k2].numpy()}
sess = engine(onx)
sess.run(None, feeds)
obs = measure_time(lambda: sess.run(None, feeds), repeat=repeat, number=number)
obs = measure_time(
lambda sess=sess, feeds=feeds: sess.run(None, feeds),
repeat=repeat,
number=number,
)

elif engine == InferenceSession:
if provider[0] not in get_available_providers():
Expand All @@ -441,13 +445,15 @@ def rendering_obs(obs, dim, number, repeat, domain, provider, internal_time):
out_names = ["C"]

# warmup
for i in range(script_args.warmup):
for _i in range(script_args.warmup):
sess._sess.run_with_ort_values(the_feeds, out_names, None)[0]

# benchamrk
times = []

def fct_benchmarked():
def fct_benchmarked(
sess=sess, times=times, out_names=out_names, the_feeds=the_feeds
):
got = sess._sess.run_with_ort_values(the_feeds, out_names, None)
if len(got) > 1:
times.append(got[1])
Expand Down
22 changes: 16 additions & 6 deletions _doc/examples/plot_op_conv_denorm.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,9 +185,9 @@ def modify(onx, scale):
diff0 = numpy.abs(got0 / scale - expected).max()

# t1 = measure_time(lambda: sess1.run(None, feeds), repeat=2, number=5)
t2 = measure_time(lambda: sess2.run(None, feeds), repeat=2, number=5)
t3 = measure_time(lambda: sess3.run(None, feeds), repeat=2, number=5)
t4 = measure_time(lambda: sess4.run(None, feeds), repeat=2, number=5)
t2 = measure_time(lambda sess2=sess3: sess2.run(None, feeds), repeat=2, number=5)
t3 = measure_time(lambda sess3=sess3: sess3.run(None, feeds), repeat=2, number=5)
t4 = measure_time(lambda sess4=sess4: sess4.run(None, feeds), repeat=2, number=5)
obs = dict(
scale=scale,
ort=t2["average"],
Expand All @@ -204,7 +204,9 @@ def modify(onx, scale):
tb = torch.from_numpy(b)
torch.nn.functional.conv2d(tx, tw, tb, padding=1)
t3 = measure_time(
lambda: torch.nn.functional.conv2d(tx, tw, tb, padding=1),
lambda tx=tx, tw=tw, tb=tb: torch.nn.functional.conv2d(
tx, tw, tb, padding=1
),
repeat=2,
number=5,
)
Expand All @@ -223,8 +225,16 @@ def modify(onx, scale):
cuda_feeds = {"X": x_ortvalue}
sess2.run_with_ort_values(None, cuda_feeds)
sess3.run_with_ort_values(None, cuda_feeds)
t2 = measure_time(lambda: sess2.run(None, cuda_feeds), repeat=2, number=5)
t3 = measure_time(lambda: sess3.run(None, cuda_feeds), repeat=2, number=5)
t2 = measure_time(
lambda sess2=sess2, cuda_feeds=cuda_feeds: sess2.run(None, cuda_feeds),
repeat=2,
number=5,
)
t3 = measure_time(
lambda sess3=sess3, cuda_feeds=cuda_feeds: sess3.run(None, cuda_feeds),
repeat=2,
number=5,
)
obs["ort-cuda"] = t2["average"]
obs["ort-cuda-opt"] = t2["average"]

Expand Down
4 changes: 2 additions & 2 deletions _doc/examples/plot_op_conv_py_vs_c.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,8 @@
W = np.ones((1, 1, 3, 3), dtype=np.float32)
B = np.array([[[[0]]]], dtype=np.float32)
feeds = {"X": X, "W": W, "B": B}
t1 = measure_time(lambda: sess1.run(None, feeds))
t2 = measure_time(lambda: sess2.run(None, feeds))
t1 = measure_time(lambda feeds=feeds: sess1.run(None, feeds))
t2 = measure_time(lambda feeds=feeds: sess2.run(None, feeds))
obs = dict(size=i, onnx=t1["average"], onnx_extended=t2["average"])
data.append(obs)
if unit_test_going() and len(data) >= 2:
Expand Down
4 changes: 2 additions & 2 deletions _doc/examples/plot_op_gemm2_cuda.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,13 +195,13 @@ def benchmark(sess, sizes, label):
bind, cuda_feeds = move_inputs(sess, feeds)

begin = time.perf_counter()
for i in range(script_args.warmup):
for _i in range(script_args.warmup):
# sess.run(None, feeds)
sess._sess.run_with_iobinding(bind, None)
warmup = time.perf_counter() - begin

times = []
for i in range(script_args.repeat):
for _i in range(script_args.repeat):
begin = time.perf_counter()
# sess.run(None, feeds)
sess._sess.run_with_iobinding(bind, None)
Expand Down
4 changes: 2 additions & 2 deletions _doc/examples/plot_op_mul_cuda.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,13 +210,13 @@ def benchmark(sess, sizes, label):
bind, cuda_feeds = move_inputs(sess, feeds)

begin = time.perf_counter()
for i in range(script_args.warmup):
for _i in range(script_args.warmup):
# sess.run(None, feeds)
sess._sess.run_with_iobinding(bind, None)
warmup = time.perf_counter() - begin

times = []
for i in range(script_args.repeat):
for _i in range(script_args.repeat):
begin = time.perf_counter()
# sess.run(None, feeds)
sess._sess.run_with_iobinding(bind, None)
Expand Down
4 changes: 2 additions & 2 deletions _doc/examples/plot_op_scatternd_cuda.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,13 +302,13 @@ def benchmark(
bind, cuda_feeds = move_inputs(sess, feeds)

begin = time.perf_counter()
for i in range(script_args.warmup):
for _i in range(script_args.warmup):
# sess.run(None, feeds)
sess._sess.run_with_iobinding(bind, None)
warmup = time.perf_counter() - begin

times = []
for i in range(script_args.repeat):
for _i in range(script_args.repeat):
begin = time.perf_counter()
# sess.run(None, feeds)
sess._sess.run_with_iobinding(bind, None)
Expand Down
4 changes: 2 additions & 2 deletions _doc/examples/plot_op_scatternd_mask_cuda.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,12 +333,12 @@ def benchmark(sizes, config, itype, times_col: int = 1, times_indices: int = 1):
sess = get_session(model)
bind, cuda_feeds = move_inputs(sess, feeds)
begin = time.perf_counter()
for i in range(script_args.warmup):
for _i in range(script_args.warmup):
sess._sess.run_with_iobinding(bind, None)
warmup = time.perf_counter() - begin

times = []
for i in range(script_args.repeat):
for _i in range(script_args.repeat):
begin = time.perf_counter()
# sess.run(None, feeds)
sess._sess.run_with_iobinding(bind, None)
Expand Down
9 changes: 6 additions & 3 deletions _doc/examples/plot_op_transpose_2d_cast_cuda.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,13 +194,13 @@ def benchmark(
bind, cuda_feeds = move_inputs(sess, feeds)

begin = time.perf_counter()
for i in range(script_args.warmup):
for _i in range(script_args.warmup):
# sess.run(None, feeds)
sess._sess.run_with_iobinding(bind, None)
warmup = time.perf_counter() - begin

times = []
for i in range(script_args.repeat):
for _i in range(script_args.repeat):
begin = time.perf_counter()
# sess.run(None, feeds)
sess._sess.run_with_iobinding(bind, None)
Expand Down Expand Up @@ -262,7 +262,10 @@ def benchmark(
ax = pivot[["Not Fused", "Fused"]].plot(
logx=True,
logy=True,
title=f"Not Fused/Fused implementation for Transpose + Cast on CUDA\nitype={itype}",
title=(
f"Not Fused/Fused implementation for Transpose + "
f"Cast on CUDA\nitype={itype}"
),
)
ax.get_figure().savefig("plot_op_transpose_2d_cast_cuda.png")

Expand Down
12 changes: 6 additions & 6 deletions _doc/examples/plot_op_tree_ensemble_implementations.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ def compile_tree(
compiler_options.SetMakeAllLeavesSameDepth(pipeline_width)
compiler_options.SetReorderTreesByDepth(reorder_tree_by_depth)
compiler_options.SetNumberOfFeatures(n_features)
assert 8 < batch_size
assert 8 < batch_size # noqa: SIM300
compiler_options.SetPipelineWidth(8)

if verbose:
Expand Down Expand Up @@ -344,7 +344,9 @@ def make_ort_assembly_session(

llc_exe = os.environ.get("TEST_LLC_EXE", "SKIP")
if llc_exe == "SKIP":
warnings.warn("Unable to find environment variable 'TEST_LLC_EXE'.")
warnings.warn(
"Unable to find environment variable 'TEST_LLC_EXE'.", stacklevel=0
)
return None

filename = "plot_op_tree_ensemble_implementation.onnx"
Expand Down Expand Up @@ -383,9 +385,7 @@ def transform_model(model, use_sparse=False, **kwargs):
onx = ModelProto()
onx.ParseFromString(model.SerializeToString())
att = get_node_attribute(onx.graph.node[0], "nodes_modes")
modes = ",".join(map(lambda s: s.decode("ascii"), att.strings)).replace(
"BRANCH_", ""
)
modes = ",".join([s.decode("ascii") for s in att.strings]).replace("BRANCH_", "")
if use_sparse and "new_op_type" not in kwargs:
kwargs["new_op_type"] = "TreeEnsembleRegressorSparse"
if use_sparse:
Expand Down Expand Up @@ -538,7 +538,7 @@ def enumerate_implementations(
disc = diff.mean()
max_disc = diff.max()
obs = measure_time(
lambda: sess.run(None, feeds),
lambda sess=sess, feeds=feeds: sess.run(None, feeds),
repeat=script_args.repeat,
number=script_args.number,
warmup=script_args.warmup,
Expand Down
20 changes: 9 additions & 11 deletions _doc/examples/plot_op_tree_ensemble_optim.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
--max_depth=10
--n_features=50
--batch_size=100000
Another example with a full list of parameters:
python plot_op_tree_ensemble_optim.py
Expand All @@ -49,7 +49,7 @@
::
python plot_op_tree_ensemble_optim.py
--n_trees=100 --n_features=10 --batch_size=10000 --max_depth=8 -s SHORT
--n_trees=100 --n_features=10 --batch_size=10000 --max_depth=8 -s SHORT
"""

import logging
Expand Down Expand Up @@ -185,9 +185,7 @@ def transform_model(model, **kwargs):
onx = ModelProto()
onx.ParseFromString(model.SerializeToString())
att = get_node_attribute(onx.graph.node[0], "nodes_modes")
modes = ",".join(map(lambda s: s.decode("ascii"), att.strings)).replace(
"BRANCH_", ""
)
modes = ",".join([s.decode("ascii") for s in att.strings]).replace("BRANCH_", "")
return change_onnx_operator_domain(
onx,
op_type="TreeEnsembleRegressor",
Expand Down Expand Up @@ -307,12 +305,12 @@ def transform_model(model, **kwargs):
)
elif script_args.scenario == "CUSTOM":
optim_params = dict(
parallel_tree=list(int(i) for i in script_args.parallel_tree.split(",")),
parallel_tree_N=list(int(i) for i in script_args.parallel_tree_N.split(",")),
parallel_N=list(int(i) for i in script_args.parallel_N.split(",")),
batch_size_tree=list(int(i) for i in script_args.batch_size_tree.split(",")),
batch_size_rows=list(int(i) for i in script_args.batch_size_rows.split(",")),
use_node3=list(int(i) for i in script_args.use_node3.split(",")),
parallel_tree=[int(i) for i in script_args.parallel_tree.split(",")],
parallel_tree_N=[int(i) for i in script_args.parallel_tree_N.split(",")],
parallel_N=[int(i) for i in script_args.parallel_N.split(",")],
batch_size_tree=[int(i) for i in script_args.batch_size_tree.split(",")],
batch_size_rows=[int(i) for i in script_args.batch_size_rows.split(",")],
use_node3=[int(i) for i in script_args.use_node3.split(",")],
)
else:
raise ValueError(
Expand Down
20 changes: 9 additions & 11 deletions _doc/examples/plot_op_tree_ensemble_sparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
--n_features=50
--sparsity=0.9
--batch_size=100000
Another example with a full list of parameters:
python plot_op_tree_ensemble_sparse.py
Expand All @@ -45,7 +45,7 @@
::
python plot_op_tree_ensemble_sparse.py
--n_trees=100 --n_features=10 --batch_size=10000 --max_depth=8 -s SHORT
--n_trees=100 --n_features=10 --batch_size=10000 --max_depth=8 -s SHORT
"""

import logging
Expand Down Expand Up @@ -197,9 +197,7 @@ def transform_model(model, use_sparse=False, **kwargs):
onx = ModelProto()
onx.ParseFromString(model.SerializeToString())
att = get_node_attribute(onx.graph.node[0], "nodes_modes")
modes = ",".join(map(lambda s: s.decode("ascii"), att.strings)).replace(
"BRANCH_", ""
)
modes = ",".join([s.decode("ascii") for s in att.strings]).replace("BRANCH_", "")
if use_sparse and "new_op_type" not in kwargs:
kwargs["new_op_type"] = "TreeEnsembleRegressorSparse"
if use_sparse:
Expand Down Expand Up @@ -347,12 +345,12 @@ def transform_model(model, use_sparse=False, **kwargs):
)
elif script_args.scenario == "CUSTOM":
optim_params = dict(
parallel_tree=list(int(i) for i in script_args.parallel_tree.split(",")),
parallel_tree_N=list(int(i) for i in script_args.parallel_tree_N.split(",")),
parallel_N=list(int(i) for i in script_args.parallel_N.split(",")),
batch_size_tree=list(int(i) for i in script_args.batch_size_tree.split(",")),
batch_size_rows=list(int(i) for i in script_args.batch_size_rows.split(",")),
use_node3=list(int(i) for i in script_args.use_node3.split(",")),
parallel_tree=[int(i) for i in script_args.parallel_tree.split(",")],
parallel_tree_N=[int(i) for i in script_args.parallel_tree_N.split(",")],
parallel_N=[int(i) for i in script_args.parallel_N.split(",")],
batch_size_tree=[int(i) for i in script_args.batch_size_tree.split(",")],
batch_size_rows=[int(i) for i in script_args.batch_size_rows.split(",")],
use_node3=[int(i) for i in script_args.use_node3.split(",")],
)
else:
raise ValueError(
Expand Down
Loading

0 comments on commit f782915

Please sign in to comment.