Skip to content

Commit

Permalink
Merge branch 'main' into jcw/fix-win-protobuf
Browse files Browse the repository at this point in the history
  • Loading branch information
jcwchen authored Oct 19, 2023
2 parents d09b21f + bbe7011 commit 9cb487b
Show file tree
Hide file tree
Showing 15 changed files with 146 additions and 40 deletions.
2 changes: 1 addition & 1 deletion .azure-pipelines/Linux-CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ jobs:
- script: |
source venv/bin/activate
pytest -sv --cov=onnx --cov-report=xml --cov-append --cov-branch --junit-xml pytest.xml
pytest -sv --cov=onnx --cov-report=xml --cov-append --cov-branch --junit-xml pytest.xml -n auto --dist loadscope
if [ $? -ne 0 ]; then
echo "pytest failed"
exit 1
Expand Down
4 changes: 2 additions & 2 deletions .azure-pipelines/MacOS-CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ jobs:
displayName: 'Install dependencies and ONNX'
- script: |
pytest
pytest -n auto --dist loadscope
if [ $? -ne 0 ]; then
echo "pytest failed"
exit 1
Expand All @@ -83,7 +83,7 @@ jobs:
export ORT_MAX_IR_SUPPORTED_VERSION=8
export ORT_MAX_ML_OPSET_SUPPORTED_VERSION=3
export ORT_MAX_ONNX_OPSET_SUPPORTED_VERSION=19
pytest
pytest -n auto --dist loadscope
if [ $? -ne 0 ]; then
echo "pytest failed when testing onnx with onnxruntime"
exit 1
Expand Down
4 changes: 2 additions & 2 deletions .azure-pipelines/Windows-CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ jobs:
)
pip install -e ".[reference]" -v
pytest
pytest -n auto --dist loadscope
IF NOT %ERRORLEVEL% EQU 0 (
@echo "pytest failed"
EXIT 1
Expand Down Expand Up @@ -118,7 +118,7 @@ jobs:
pip uninstall -y onnx
pip install .
pytest
pytest -n auto --dist loadscope
IF NOT %ERRORLEVEL% EQU 0 (
@echo "pytest failed when testing onnx with libprotobuf=3.20"
EXIT 1
Expand Down
4 changes: 2 additions & 2 deletions docs/Changelog-ml.md
Original file line number Diff line number Diff line change
Expand Up @@ -1085,9 +1085,9 @@ This version of the operator has been available since version 3 of the 'ai.onnx.
<dt><tt>aggregate_function</tt> : string (default is SUM)</dt>
<dd>Defines how to aggregate leaf values within a target. <br>One of 'AVERAGE,' 'SUM,' 'MIN,' 'MAX.'</dd>
<dt><tt>base_values</tt> : list of floats</dt>
<dd>Base values for classification, added to final class score; the size must be the same as the classes or can be left unassigned (assumed 0)</dd>
<dd>Base values for regression, added to final prediction after applying aggregate_function; the size must be the same as the classes or can be left unassigned (assumed 0)</dd>
<dt><tt>base_values_as_tensor</tt> : tensor</dt>
<dd>Base values for classification, added to final class score; the size must be the same as the classes or can be left unassigned (assumed 0)</dd>
<dd>Base values for regression, added to final prediction after applying aggregate_function; the size must be the same as the classes or can be left unassigned (assumed 0)</dd>
<dt><tt>n_targets</tt> : int</dt>
<dd>The total number of targets.</dd>
<dt><tt>nodes_falsenodeids</tt> : list of ints</dt>
Expand Down
4 changes: 3 additions & 1 deletion docs/Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -21043,7 +21043,9 @@ This version of the operator has been available since version 17 of the default
Let `d[i]` indicate the i-th dimension of `X`.
If `X`'s shape is `[d[0], ..., d[axis-1], d[axis], ..., d[rank-1]]`,
the shape of `Mean` and `InvStdDev` is `[d[0], ..., d[axis-1], 1, ..., 1]`.
`Y` and `X` have the same shape.
`Y` and `X` have the same shape. This operator supports unidirectional broadcasting
(tensors `Scale` and `B` should be unidirectional broadcastable to tensor `X`);
for more details please check [the doc](Broadcasting.md).

#### Version

Expand Down
4 changes: 2 additions & 2 deletions docs/Operators-ml.md
Original file line number Diff line number Diff line change
Expand Up @@ -1038,9 +1038,9 @@ Other versions of this operator: <a href="Changelog-ml.md#ai.onnx.ml.TreeEnsembl
<dt><tt>aggregate_function</tt> : string (default is SUM)</dt>
<dd>Defines how to aggregate leaf values within a target. <br>One of 'AVERAGE,' 'SUM,' 'MIN,' 'MAX.'</dd>
<dt><tt>base_values</tt> : list of floats</dt>
<dd>Base values for classification, added to final class score; the size must be the same as the classes or can be left unassigned (assumed 0)</dd>
<dd>Base values for regression, added to final prediction after applying aggregate_function; the size must be the same as the classes or can be left unassigned (assumed 0)</dd>
<dt><tt>base_values_as_tensor</tt> : tensor</dt>
<dd>Base values for classification, added to final class score; the size must be the same as the classes or can be left unassigned (assumed 0)</dd>
<dd>Base values for regression, added to final prediction after applying aggregate_function; the size must be the same as the classes or can be left unassigned (assumed 0)</dd>
<dt><tt>n_targets</tt> : int</dt>
<dd>The total number of targets.</dd>
<dt><tt>nodes_falsenodeids</tt> : list of ints</dt>
Expand Down
4 changes: 3 additions & 1 deletion docs/Operators.md
Original file line number Diff line number Diff line change
Expand Up @@ -13172,7 +13172,9 @@ expect(
Let `d[i]` indicate the i-th dimension of `X`.
If `X`'s shape is `[d[0], ..., d[axis-1], d[axis], ..., d[rank-1]]`,
the shape of `Mean` and `InvStdDev` is `[d[0], ..., d[axis-1], 1, ..., 1]`.
`Y` and `X` have the same shape.
`Y` and `X` have the same shape. This operator supports unidirectional broadcasting
(tensors `Scale` and `B` should be unidirectional broadcastable to tensor `X`);
for more details please check [the doc](Broadcasting.md).

#### Version

Expand Down
4 changes: 3 additions & 1 deletion onnx/defs/nn/defs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2516,7 +2516,9 @@ static const char* LayerNormalization_ver17_doc = R"DOC(
Let `d[i]` indicate the i-th dimension of `X`.
If `X`'s shape is `[d[0], ..., d[axis-1], d[axis], ..., d[rank-1]]`,
the shape of `Mean` and `InvStdDev` is `[d[0], ..., d[axis-1], 1, ..., 1]`.
`Y` and `X` have the same shape.
`Y` and `X` have the same shape. This operator supports unidirectional broadcasting
(tensors `Scale` and `B` should be unidirectional broadcastable to tensor `X`);
for more details please check [the doc](Broadcasting.md).
)DOC";

bool BuildContextDependentFunctionBodyLayerNormalization(
Expand Down
4 changes: 2 additions & 2 deletions onnx/defs/traditionalml/defs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -998,12 +998,12 @@ ONNX_ML_OPERATOR_SET_SCHEMA(
std::string("SUM"))
.Attr(
"base_values",
"Base values for classification, added to final class score; the size must be the same as the classes or can be left unassigned (assumed 0)",
"Base values for regression, added to final prediction after applying aggregate_function; the size must be the same as the classes or can be left unassigned (assumed 0)",
AttributeProto::FLOATS,
OPTIONAL_VALUE)
.Attr(
"base_values_as_tensor",
"Base values for classification, added to final class score; the size must be the same as the classes or can be left unassigned (assumed 0)",
"Base values for regression, added to final prediction after applying aggregate_function; the size must be the same as the classes or can be left unassigned (assumed 0)",
AttributeProto::TENSOR,
OPTIONAL_VALUE)
.TypeAndShapeInferenceFunction([](InferenceContext& ctx) {
Expand Down
4 changes: 3 additions & 1 deletion onnx/reference/ops/aionnxml/op_tree_ensemble_regressor.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,10 @@ def _run( # type: ignore
)
if aggregate_function == "AVERAGE":
res /= n_trees

# Convention is to add base_values after aggregate function
if base_values is not None:
res[:, :] = np.array(base_values).reshape((1, -1))
res[:, :] += np.array(base_values).reshape((1, -1))

if post_transform in (None, "NONE"):
return (res,)
Expand Down
11 changes: 6 additions & 5 deletions onnx/reference/ops/op_tfidf_vectorizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ def __init__(self):
self.added_keys = []

def emplace(self, key, value):
if not isinstance(key, int):
raise TypeError(f"key must be a NGramPart not {type(key)}.")
if not isinstance(key, (int, str)):
raise TypeError(f"key must be a int or str not {type(key)}.")
if not isinstance(value, NgramPart):
raise TypeError(f"value must be a NGramPart not {type(value)}.")
if key not in self:
Expand Down Expand Up @@ -147,11 +147,12 @@ def __init__(self, onnx_node, run_params): # type: ignore
self.output_size_ = max(self.ngram_indexes_) + 1
self.weights_ = self.weights # type: ignore
self.pool_int64s_ = self.pool_int64s # type: ignore
self.pool_strings_ = self.pool_strings # type: ignore

self.int64_map_ = NgramPart(-10)
self.int64_map_.init()

total_items = len(self.pool_int64s_)
total_items = len(self.pool_int64s_ or self.pool_strings_)
ngram_id = 1 # start with 1, 0 - means no n-gram
# Load into dictionary only required gram sizes
ngram_size = 1
Expand All @@ -170,7 +171,7 @@ def __init__(self, onnx_node, run_params): # type: ignore
and ngram_size <= self.max_gram_length_
):
ngram_id = populate_grams(
self.pool_int64s_,
self.pool_int64s_ or self.pool_strings_,
start_idx,
ngrams,
ngram_size,
Expand Down Expand Up @@ -359,7 +360,7 @@ def _run( # type: ignore
# TfidfVectorizer returns a zero tensor of shape
# {b_dim, output_size} when b_dim is the number of received observations
# and output_size the is the maximum value in ngram_indexes attribute plus 1.
return self.output_result(B, frequencies) # type: ignore[arg-type]
return (self.output_result(B, frequencies),) # type: ignore[arg-type]

def fn(row_num):
self.compute_impl(
Expand Down
113 changes: 103 additions & 10 deletions onnx/test/reference_evaluator_ml_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

import numpy as np # type: ignore
from numpy.testing import assert_allclose # type: ignore
from parameterized import parameterized

from onnx import ONNX_ML, TensorProto, TypeProto, ValueInfoProto
from onnx.checker import check_model
Expand Down Expand Up @@ -757,7 +758,7 @@ def test_linear_classifier_unary(self):

@staticmethod
def _get_test_tree_ensemble_regressor(
aggregate_function, rule="BRANCH_LEQ", unique_targets=False
aggregate_function, rule="BRANCH_LEQ", unique_targets=False, base_values=None
):
X = make_tensor_value_info("X", TensorProto.FLOAT, [None, None])
Y = make_tensor_value_info("Y", TensorProto.FLOAT, [None, None])
Expand Down Expand Up @@ -786,6 +787,7 @@ def _get_test_tree_ensemble_regressor(
domain="ai.onnx.ml",
n_targets=1,
aggregate_function=aggregate_function,
base_values=base_values,
nodes_falsenodeids=[4, 3, 0, 0, 0, 2, 0, 4, 0, 0],
nodes_featureids=[0, 2, 0, 0, 0, 0, 0, 2, 0, 0],
nodes_hitrates=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
Expand Down Expand Up @@ -828,23 +830,34 @@ def _get_test_tree_ensemble_regressor(
check_model(onx)
return onx

@parameterized.expand(
[
(f"{agg}_{base_values}", base_values, agg)
for base_values in (None, [1.0])
for agg in ("SUM", "AVERAGE", "MIN", "MAX")
]
)
@unittest.skipIf(not ONNX_ML, reason="onnx not compiled with ai.onnx.ml")
def test_tree_ensemble_regressor(self):
def test_tree_ensemble_regressor(self, name, base_values, agg):
self.assertTrue(ONNX_ML)
del name # variable only used to print test name
x = np.arange(9).reshape((-1, 3)).astype(np.float32) / 10 - 0.5
expected_agg = {
"SUM": np.array([[0.576923], [0.576923], [0.576923]], dtype=np.float32),
"AVERAGE": np.array([[0.288462], [0.288462], [0.288462]], dtype=np.float32),
"MIN": np.array([[0.076923], [0.076923], [0.076923]], dtype=np.float32),
"MAX": np.array([[0.5], [0.5], [0.5]], dtype=np.float32),
}
for agg in ("SUM", "AVERAGE", "MIN", "MAX"):
expected = expected_agg[agg]
with self.subTest(aggregate_function=agg):
onx = self._get_test_tree_ensemble_regressor(agg)
self._check_ort(onx, {"X": x}, equal=True)
sess = ReferenceEvaluator(onx)
got = sess.run(None, {"X": x})
assert_allclose(expected, got[0], atol=1e-6)

expected = expected_agg[agg]
if base_values is not None:
expected += base_values[0]
with self.subTest(aggregate_function=agg):
onx = self._get_test_tree_ensemble_regressor(agg, base_values=base_values)
self._check_ort(onx, {"X": x}, equal=True)
sess = ReferenceEvaluator(onx)
got = sess.run(None, {"X": x})
assert_allclose(expected, got[0], atol=1e-6)

@unittest.skipIf(not ONNX_ML, reason="onnx not compiled with ai.onnx.ml")
def test_tree_ensemble_regressor_rule(self):
Expand Down Expand Up @@ -1789,6 +1802,86 @@ def test_svm_regressor_linear_one_class(self):
got = sess.run(None, {"X": x})
assert_allclose(expected[0], got[0], atol=1e-6)

def test_onnxrt_tfidf_vectorizer_ints(self):
inputi = np.array([[1, 1, 3, 3, 3, 7], [8, 6, 7, 5, 6, 8]]).astype(np.int64)
output = np.array(
[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0]]
).astype(np.float32)

ngram_counts = np.array([0, 4]).astype(np.int64)
ngram_indexes = np.array([0, 1, 2, 3, 4, 5, 6]).astype(np.int64)
pool_int64s = np.array([2, 3, 5, 4, 5, 6, 7, 8, 6, 7]).astype( # unigrams
np.int64
) # bigrams

model = make_model_gen_version(
make_graph(
[
make_node(
"TfIdfVectorizer",
["tokens"],
["out"],
mode="TF",
min_gram_length=2,
max_gram_length=2,
max_skip_count=0,
ngram_counts=ngram_counts,
ngram_indexes=ngram_indexes,
pool_int64s=pool_int64s,
)
],
"tfidf",
[make_tensor_value_info("tokens", TensorProto.INT64, [None, None])],
[make_tensor_value_info("out", TensorProto.FLOAT, [None, None])],
),
opset_imports=OPSETS,
)

oinf = ReferenceEvaluator(model)
res = oinf.run(None, {"tokens": inputi})
self.assertEqual(output.tolist(), res[0].tolist())

def test_onnxrt_tfidf_vectorizer_strings(self):
inputi = np.array(
[["i1", "i1", "i3", "i3", "i3", "i7"], ["i8", "i6", "i7", "i5", "i6", "i8"]]
)
output = np.array(
[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0]]
).astype(np.float32)

ngram_counts = np.array([0, 4]).astype(np.int64)
ngram_indexes = np.array([0, 1, 2, 3, 4, 5, 6]).astype(np.int64)
pool_strings = np.array(
["i2", "i3", "i5", "i4", "i5", "i6", "i7", "i8", "i6", "i7"]
)

model = make_model_gen_version(
make_graph(
[
make_node(
"TfIdfVectorizer",
["tokens"],
["out"],
mode="TF",
min_gram_length=2,
max_gram_length=2,
max_skip_count=0,
ngram_counts=ngram_counts,
ngram_indexes=ngram_indexes,
pool_strings=pool_strings,
)
],
"tfidf",
[make_tensor_value_info("tokens", TensorProto.INT64, [None, None])],
[make_tensor_value_info("out", TensorProto.FLOAT, [None, None])],
),
opset_imports=OPSETS,
)

oinf = ReferenceEvaluator(model)
res = oinf.run(None, {"tokens": inputi})
self.assertEqual(output.tolist(), res[0].tolist())


if __name__ == "__main__":
unittest.main(verbosity=2)
22 changes: 12 additions & 10 deletions onnx/test/version_converter/automatic_upgrade_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,14 @@
# to the most recent version and runs checker and shape inference on the final upgraded model.
####################################################################################

tested_ops = []


class TestAutomaticUpgrade(automatic_conversion_test_base.TestAutomaticConversion):
@classmethod
def setUpClass(cls):
cls.tested_ops = []

def _test_op_upgrade(self, op, *args, **kwargs):
tested_ops.append(op)
self.tested_ops.append(op)
self._test_op_conversion(op, *args, **kwargs, is_upgrade=True)

def test_Abs(self) -> None:
Expand Down Expand Up @@ -1739,9 +1741,10 @@ def test_RegexFullMatch(self) -> None:
)

def test_ops_tested(self) -> None:
# NOTE: This test is order dependent and needs to run last in this class
all_schemas = onnx.defs.get_all_schemas()
all_op_names = [schema.name for schema in all_schemas if schema.domain == ""]
excluded_ops = [
all_op_names = {schema.name for schema in all_schemas if schema.domain == ""}
excluded_ops = {
# Sequence-based and Optional-based ops disabled because
# the version converter doesn't play nicely with sequences
"ConcatFromSequence",
Expand All @@ -1757,12 +1760,11 @@ def test_ops_tested(self) -> None:
"OptionalGetElement",
"OptionalHasElement",
"StringSplit",
]
all_op_names = [op for op in all_op_names if op not in excluded_ops]
}
expected_tested_ops = all_op_names - excluded_ops

untested_ops = set(all_op_names) - set(tested_ops)
print(untested_ops)
assert len(untested_ops) == 0
untested_ops = expected_tested_ops - set(self.tested_ops)
self.assertEqual(untested_ops, set())


if __name__ == "__main__":
Expand Down
1 change: 1 addition & 0 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ parameterized
protobuf
pytest
pytest-cov
pytest-xdist
setuptools
twine
wheel
Expand Down
1 change: 1 addition & 0 deletions requirements-release.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ parameterized
protobuf==4.21.12
pytest
pytest-cov
pytest-xdist
setuptools
twine
wheel
Expand Down

0 comments on commit 9cb487b

Please sign in to comment.