From be10d2687924ef27335c270c47689f158cccd6ea Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Tue, 14 Jan 2025 13:15:47 +0100 Subject: [PATCH] always assert closenes and not equality --- tests/onnxruntime/test_modeling.py | 188 +++++++++++++++-------------- 1 file changed, 100 insertions(+), 88 deletions(-) diff --git a/tests/onnxruntime/test_modeling.py b/tests/onnxruntime/test_modeling.py index 806753acb6..2a946ea891 100644 --- a/tests/onnxruntime/test_modeling.py +++ b/tests/onnxruntime/test_modeling.py @@ -128,13 +128,16 @@ logger = logging.get_logger() +ATOL = 1e-4 +RTOL = 1e-4 + class ORTModelIntegrationTest(unittest.TestCase): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self.TEST_MODEL_ID = "sshleifer/tiny-distilbert-base-cased-distilled-squad" self.LOCAL_MODEL_PATH = "tests/assets/onnx" self.ONNX_MODEL_ID = "philschmid/distilbert-onnx" + self.TINY_ONNX_MODEL_ID = "fxmarty/resnet-tiny-beans" self.FAIL_ONNX_MODEL_ID = "sshleifer/tiny-distilbert-base-cased-distilled-squad" self.ONNX_SEQ2SEQ_MODEL_ID = "optimum/t5-small" @@ -764,7 +767,6 @@ def test_seq2seq_model_on_gpu_id(self): model.decoder_with_past.session.get_provider_options()["CUDAExecutionProvider"]["device_id"], "1" ) - # test string device input for to() @require_torch_gpu @pytest.mark.cuda_ep_test def test_seq2seq_model_on_gpu_str(self): @@ -1255,9 +1257,7 @@ def test_trust_remote_code(self): ort_logits = ort_model(**inputs).logits - self.assertTrue( - torch.allclose(pt_logits, ort_logits, atol=1e-4), f" Maxdiff: {torch.abs(pt_logits - ort_logits).max()}" - ) + torch.testing.assert_close(pt_logits, ort_logits, atol=ATOL, rtol=RTOL) @parameterized.expand(("", "onnx")) def test_loading_with_config_not_from_subfolder(self, subfolder): @@ -1344,11 +1344,11 @@ def test_compare_to_transformers(self, model_arch): self.assertIsInstance(onnx_outputs.end_logits, self.TENSOR_ALIAS_TO_TYPE[input_type]) # Compare tensor outputs - self.assertTrue( - torch.allclose(torch.Tensor(onnx_outputs.start_logits), transformers_outputs.start_logits, atol=1e-4) + torch.testing.assert_close( + torch.Tensor(onnx_outputs.start_logits), transformers_outputs.start_logits, atol=ATOL, rtol=RTOL ) - self.assertTrue( - torch.allclose(torch.Tensor(onnx_outputs.end_logits), transformers_outputs.end_logits, atol=1e-4) + torch.testing.assert_close( + torch.Tensor(onnx_outputs.end_logits), transformers_outputs.end_logits, atol=ATOL, rtol=RTOL ) gc.collect() @@ -1465,8 +1465,10 @@ def test_compare_to_io_binding(self, model_arch): self.assertIsInstance(io_outputs.end_logits, torch.Tensor) # compare tensor outputs - self.assertTrue(torch.equal(onnx_outputs.start_logits, io_outputs.start_logits)) - self.assertTrue(torch.equal(onnx_outputs.end_logits, io_outputs.end_logits)) + torch.testing.assert_close( + torch.Tensor(io_outputs.start_logits), onnx_outputs.start_logits, atol=ATOL, rtol=RTOL + ) + torch.testing.assert_close(torch.Tensor(io_outputs.end_logits), onnx_outputs.end_logits, atol=ATOL, rtol=RTOL) gc.collect() @@ -1534,7 +1536,9 @@ def test_compare_to_transformers(self, model_arch): self.assertIsInstance(onnx_outputs.logits, self.TENSOR_ALIAS_TO_TYPE[input_type]) # compare tensor outputs - self.assertTrue(torch.allclose(torch.Tensor(onnx_outputs.logits), transformers_outputs.logits, atol=1e-4)) + torch.testing.assert_close( + torch.Tensor(onnx_outputs.logits), transformers_outputs.logits, atol=ATOL, rtol=RTOL + ) gc.collect() @@ -1637,7 +1641,7 @@ def test_compare_to_io_binding(self, model_arch): self.assertIsInstance(io_outputs.logits, torch.Tensor) # compare tensor outputs - self.assertTrue(torch.equal(onnx_outputs.logits, io_outputs.logits)) + torch.testing.assert_close(torch.Tensor(io_outputs.logits), onnx_outputs.logits, atol=ATOL, rtol=RTOL) gc.collect() @@ -1715,7 +1719,9 @@ def test_compare_to_transformers(self, model_arch): self.assertIsInstance(onnx_outputs.logits, self.TENSOR_ALIAS_TO_TYPE[input_type]) # compare tensor outputs - self.assertTrue(torch.allclose(torch.Tensor(onnx_outputs.logits), transformers_outputs.logits, atol=1e-4)) + torch.testing.assert_close( + torch.Tensor(onnx_outputs.logits), transformers_outputs.logits, atol=ATOL, rtol=RTOL + ) gc.collect() @@ -1845,7 +1851,7 @@ def test_compare_to_io_binding(self, model_arch): self.assertIsInstance(io_outputs.logits, torch.Tensor) # compare tensor outputs - self.assertTrue(torch.equal(onnx_outputs.logits, io_outputs.logits)) + torch.testing.assert_close(onnx_outputs.logits, io_outputs.logits, atol=ATOL, rtol=RTOL) gc.collect() @@ -1916,7 +1922,9 @@ def test_compare_to_transformers(self, model_arch): self.assertIsInstance(onnx_outputs.logits, self.TENSOR_ALIAS_TO_TYPE[input_type]) # compare tensor outputs - self.assertTrue(torch.allclose(torch.Tensor(onnx_outputs.logits), transformers_outputs.logits, atol=1e-4)) + torch.testing.assert_close( + torch.Tensor(onnx_outputs.logits), transformers_outputs.logits, atol=ATOL, rtol=RTOL + ) gc.collect() @@ -2025,7 +2033,7 @@ def test_compare_to_io_binding(self, model_arch): self.assertIsInstance(io_outputs.logits, torch.Tensor) # compare tensor outputs - self.assertTrue(torch.equal(onnx_outputs.logits, io_outputs.logits)) + torch.testing.assert_close(onnx_outputs.logits, io_outputs.logits, atol=ATOL, rtol=RTOL) gc.collect() @@ -2073,10 +2081,11 @@ def test_compare_to_transformers(self, model_arch): self.assertIsInstance(onnx_outputs.last_hidden_state, self.TENSOR_ALIAS_TO_TYPE[input_type]) # compare tensor outputs - self.assertTrue( - torch.allclose( - torch.Tensor(onnx_outputs.last_hidden_state), transformers_outputs.last_hidden_state, atol=1e-4 - ) + torch.testing.assert_close( + torch.Tensor(onnx_outputs.last_hidden_state), + transformers_outputs.last_hidden_state, + atol=ATOL, + rtol=RTOL, ) gc.collect() @@ -2183,7 +2192,7 @@ def test_compare_to_io_binding(self, model_arch): self.assertIsInstance(io_outputs.last_hidden_state, torch.Tensor) # compare tensor outputs - self.assertTrue(torch.equal(onnx_outputs.last_hidden_state, io_outputs.last_hidden_state)) + torch.testing.assert_close(onnx_outputs.last_hidden_state, io_outputs.last_hidden_state, atol=ATOL, rtol=RTOL) gc.collect() @@ -2196,7 +2205,9 @@ def test_default_token_type_ids(self): token_type_ids = tokens.pop("token_type_ids") outs = model(token_type_ids=token_type_ids, **tokens) outs_without_token_type_ids = model(**tokens) - self.assertTrue(np.allclose(outs.last_hidden_state, outs_without_token_type_ids.last_hidden_state)) + torch.testing.assert_close( + outs.last_hidden_state, outs_without_token_type_ids.last_hidden_state, atol=ATOL, rtol=RTOL + ) gc.collect() @@ -2264,7 +2275,9 @@ def test_compare_to_transformers(self, model_arch): self.assertIsInstance(onnx_outputs.logits, self.TENSOR_ALIAS_TO_TYPE[input_type]) # Compare tensor outputs - self.assertTrue(torch.allclose(torch.Tensor(onnx_outputs.logits), transformers_outputs.logits, atol=1e-4)) + torch.testing.assert_close( + torch.Tensor(onnx_outputs.logits), transformers_outputs.logits, atol=ATOL, rtol=RTOL + ) gc.collect() @@ -2303,7 +2316,7 @@ def test_compare_to_io_binding(self, model_arch): self.assertIsInstance(io_outputs.logits, torch.Tensor) # compare tensor outputs - self.assertTrue(torch.equal(onnx_outputs.logits, io_outputs.logits)) + torch.testing.assert_close(io_outputs.logits, onnx_outputs.logits, atol=ATOL, rtol=RTOL) gc.collect() @@ -2460,10 +2473,7 @@ def test_compare_to_transformers(self, test_name: str, model_arch: str, use_cach self.assertIsInstance(onnx_outputs.logits, torch.Tensor) # compare tensor outputs - self.assertTrue( - torch.allclose(onnx_outputs.logits, transformers_outputs.logits, atol=1e-4), - f"Maxdiff: {(onnx_outputs.logits - transformers_outputs.logits).abs()}", - ) + torch.testing.assert_close(onnx_outputs.logits, transformers_outputs.logits, atol=ATOL, rtol=RTOL) # Compare batched generation. tokenizer.pad_token_id = tokenizer.eos_token_id @@ -2505,13 +2515,11 @@ def test_compare_to_transformers(self, test_name: str, model_arch: str, use_cach set_seed(SEED) with torch.no_grad(): transformers_outputs = transformers_model.generate(**tokens, generation_config=gen_config) + set_seed(SEED) onnx_outputs = onnx_model.generate(**tokens, generation_config=gen_config) - self.assertTrue( - torch.equal(onnx_outputs, transformers_outputs), - f"Failed with generation config : {gen_config}, transformers outputs {transformers_outputs}, ONNX model outputs {onnx_outputs}", - ) + torch.testing.assert_close(onnx_outputs, transformers_outputs, atol=ATOL, rtol=RTOL) gc.collect() @@ -2680,7 +2688,7 @@ def test_compare_with_and_without_past_key_values(self, model_arch): **tokens, min_new_tokens=self.GENERATION_LENGTH, max_new_tokens=self.GENERATION_LENGTH, num_beams=1 ) - self.assertTrue(torch.equal(outputs_model_with_pkv, outputs_model_without_pkv)) + torch.testing.assert_close(outputs_model_with_pkv, outputs_model_without_pkv, atol=ATOL, rtol=RTOL) self.assertEqual(outputs_model_with_pkv.shape[1], tokens["input_ids"].shape[1] + self.GENERATION_LENGTH) self.assertEqual(outputs_model_without_pkv.shape[1], tokens["input_ids"].shape[1] + self.GENERATION_LENGTH) @@ -2725,7 +2733,7 @@ def test_compare_merged_and_not_merged_models_outputs(self, test_name: str, mode outputs_model_not_merged = model_not_merged.generate(**tokens) outputs_model_merged = model_merged.generate(**tokens) - self.assertTrue(torch.equal(outputs_model_merged, outputs_model_not_merged)) + torch.testing.assert_close(outputs_model_not_merged, outputs_model_merged, atol=ATOL, rtol=RTOL) @parameterized.expand( grid_parameters({"model_arch": SUPPORTED_ARCHITECTURES, "use_cache": [True], "use_merged": [False, True]}) @@ -2766,7 +2774,7 @@ def test_compare_to_io_binding(self, test_name: str, model_arch: str, use_cache: self.assertIsInstance(io_outputs.logits, torch.Tensor) # compare tensor outputs - self.assertTrue(torch.equal(onnx_outputs.logits, io_outputs.logits)) + torch.testing.assert_close(io_outputs.logits, onnx_outputs.logits, atol=ATOL, rtol=RTOL) gc.collect() @@ -2793,7 +2801,7 @@ def test_compare_generation_to_io_binding(self, test_name: str, model_arch: str, io_outputs = io_model.generate(**tokens) # compare tensor outputs - self.assertTrue(torch.equal(onnx_outputs, io_outputs)) + torch.testing.assert_close(io_outputs, onnx_outputs, atol=ATOL, rtol=RTOL) gc.collect() @@ -2890,7 +2898,7 @@ def test_compare_to_timm(self, model_arch): self.assertIsInstance(onnx_outputs.logits, self.TENSOR_ALIAS_TO_TYPE[input_type]) # compare tensor outputs - self.assertTrue(torch.allclose(torch.Tensor(onnx_outputs.logits), timm_outputs, atol=1e-4)) + torch.testing.assert_close(torch.Tensor(onnx_outputs.logits), timm_outputs, atol=ATOL, rtol=RTOL) gc.collect() @@ -2924,7 +2932,7 @@ def test_compare_to_transformers(self, model_arch): self.assertIsInstance(onnx_outputs.logits, self.TENSOR_ALIAS_TO_TYPE[input_type]) # compare tensor outputs - self.assertTrue(torch.allclose(torch.Tensor(onnx_outputs.logits), trtfs_outputs.logits, atol=1e-4)) + torch.testing.assert_close(torch.Tensor(onnx_outputs.logits), trtfs_outputs.logits, atol=ATOL, rtol=RTOL) gc.collect() @@ -3041,10 +3049,7 @@ def test_compare_to_io_binding(self, model_arch): self.assertIsInstance(io_outputs.logits, torch.Tensor) # compare tensor outputs - self.assertTrue( - torch.allclose(onnx_outputs.logits, io_outputs.logits, atol=1e-4), - f" Maxdiff: {torch.abs(onnx_outputs.logits - io_outputs.logits).max()}", - ) + torch.testing.assert_close(onnx_outputs.logits, io_outputs.logits, atol=ATOL, rtol=RTOL) gc.collect() @@ -3091,7 +3096,7 @@ def test_compare_to_transformers(self, model_arch): self.assertIsInstance(onnx_outputs.logits, self.TENSOR_ALIAS_TO_TYPE[input_type]) # compare tensor outputs - self.assertTrue(torch.allclose(torch.Tensor(onnx_outputs.logits), trtfs_outputs.logits, atol=1e-4)) + torch.testing.assert_close(torch.Tensor(onnx_outputs.logits), trtfs_outputs.logits, atol=ATOL, rtol=RTOL) gc.collect() @@ -3206,10 +3211,7 @@ def test_compare_to_io_binding(self, model_arch): self.assertIsInstance(io_outputs.logits, torch.Tensor) # compare tensor outputs - self.assertTrue( - torch.allclose(onnx_outputs.logits, io_outputs.logits, atol=1e-4), - f" Maxdiff: {torch.abs(onnx_outputs.logits - io_outputs.logits).max()}", - ) + torch.testing.assert_close(onnx_outputs.logits, io_outputs.logits, atol=ATOL, rtol=RTOL) gc.collect() @@ -3274,7 +3276,9 @@ def test_compare_to_transformers(self, model_arch): self.assertIsInstance(onnx_outputs.logits, self.TENSOR_ALIAS_TO_TYPE[input_type]) # compare tensor outputs - self.assertTrue(torch.allclose(torch.Tensor(onnx_outputs.logits), transformers_outputs.logits, atol=1e-4)) + torch.testing.assert_close( + torch.Tensor(onnx_outputs.logits), transformers_outputs.logits, atol=ATOL, rtol=RTOL + ) gc.collect() @@ -3390,7 +3394,7 @@ def test_compare_to_io_binding(self, model_arch): self.assertIsInstance(io_outputs.logits, torch.Tensor) # compare tensor outputs - self.assertTrue(torch.allclose(onnx_outputs.logits, io_outputs.logits, atol=1e-4)) + torch.testing.assert_close(onnx_outputs.logits, io_outputs.logits, atol=ATOL, rtol=RTOL) gc.collect() @@ -3453,7 +3457,9 @@ def test_compare_to_transformers(self, model_arch): self.assertIsInstance(onnx_outputs.logits, self.TENSOR_ALIAS_TO_TYPE[input_type]) # compare tensor outputs - self.assertTrue(torch.allclose(torch.Tensor(onnx_outputs.logits), transformers_outputs.logits, atol=1e-4)) + torch.testing.assert_close( + torch.Tensor(onnx_outputs.logits), transformers_outputs.logits, atol=ATOL, rtol=RTOL + ) gc.collect() @@ -3483,7 +3489,7 @@ def test_compare_to_io_binding(self, model_arch): self.assertIsInstance(io_outputs.logits, torch.Tensor) # compare tensor outputs - self.assertTrue(torch.allclose(torch.Tensor(onnx_outputs.logits), io_outputs.logits, atol=1e-1)) + torch.testing.assert_close(torch.Tensor(onnx_outputs.logits), io_outputs.logits, atol=ATOL, rtol=RTOL) gc.collect() @@ -3541,9 +3547,11 @@ def test_compare_to_transformers(self, model_arch): self.assertIsInstance(onnx_outputs.embeddings, self.TENSOR_ALIAS_TO_TYPE[input_type]) # compare tensor outputs - self.assertTrue(torch.allclose(torch.Tensor(onnx_outputs.logits), transformers_outputs.logits, atol=1e-4)) - self.assertTrue( - torch.allclose(torch.Tensor(onnx_outputs.embeddings), transformers_outputs.embeddings, atol=1e-4) + torch.testing.assert_close( + torch.Tensor(onnx_outputs.logits), transformers_outputs.logits, atol=ATOL, rtol=RTOL + ) + torch.testing.assert_close( + torch.Tensor(onnx_outputs.embeddings), transformers_outputs.embeddings, atol=ATOL, rtol=RTOL ) gc.collect() @@ -3575,8 +3583,8 @@ def test_compare_to_io_binding(self, model_arch): self.assertIsInstance(io_outputs.embeddings, torch.Tensor) # compare tensor outputs - self.assertTrue(torch.allclose(onnx_outputs.logits, io_outputs.logits, atol=1e-4)) - self.assertTrue(torch.allclose(onnx_outputs.embeddings, io_outputs.embeddings, atol=1e-4)) + torch.testing.assert_close(onnx_outputs.logits, io_outputs.logits, atol=ATOL, rtol=RTOL) + torch.testing.assert_close(onnx_outputs.embeddings, io_outputs.embeddings, atol=ATOL, rtol=RTOL) gc.collect() @@ -3624,6 +3632,7 @@ def test_compare_to_transformers(self, model_arch): with torch.no_grad(): transformers_outputs = transformers_model(**input_values) + for input_type in ["pt", "np"]: input_values = processor(self._generate_random_audio_data(), return_tensors=input_type) onnx_outputs = onnx_model(**input_values) @@ -3632,7 +3641,9 @@ def test_compare_to_transformers(self, model_arch): self.assertIsInstance(onnx_outputs.logits, self.TENSOR_ALIAS_TO_TYPE[input_type]) # compare tensor outputs - self.assertTrue(torch.allclose(torch.Tensor(onnx_outputs.logits), transformers_outputs.logits, atol=1e-4)) + torch.testing.assert_close( + torch.Tensor(onnx_outputs.logits), transformers_outputs.logits, atol=ATOL, rtol=RTOL + ) gc.collect() @@ -3866,8 +3877,8 @@ def test_compare_to_transformers(self, test_name: str, model_arch: str, use_cach self.assertIsInstance(onnx_outputs.logits, self.TENSOR_ALIAS_TO_TYPE[input_type]) # Compare tensor outputs - self.assertTrue( - torch.allclose(torch.Tensor(onnx_outputs.logits), transformers_outputs.logits, atol=1e-4) + torch.testing.assert_close( + torch.Tensor(onnx_outputs.logits), transformers_outputs.logits, atol=ATOL, rtol=RTOL ) gc.collect() @@ -4112,7 +4123,7 @@ def test_compare_with_and_without_past_key_values(self, model_arch: str): **tokens, min_new_tokens=self.GENERATION_LENGTH, max_new_tokens=self.GENERATION_LENGTH, num_beams=1 ) - self.assertTrue(torch.equal(outputs_model_with_pkv, outputs_model_without_pkv)) + torch.testing.assert_close(outputs_model_with_pkv, outputs_model_without_pkv, rtol=RTOL, atol=ATOL) self.assertEqual(outputs_model_with_pkv.shape[1], self.GENERATION_LENGTH + 1) self.assertEqual(outputs_model_without_pkv.shape[1], self.GENERATION_LENGTH + 1) @@ -4163,7 +4174,7 @@ def test_compare_merged_and_not_merged_models_outputs(self, test_name: str, mode outputs_model_not_merged = model_not_merged.generate(**tokens) outputs_model_merged = model_merged.generate(**tokens) - self.assertTrue(torch.equal(outputs_model_merged, outputs_model_not_merged)) + torch.testing.assert_close(outputs_model_not_merged, outputs_model_merged, rtol=RTOL, atol=ATOL) @parameterized.expand( grid_parameters({"model_arch": SUPPORTED_ARCHITECTURES, "use_cache": [True], "use_merged": [False, True]}) @@ -4217,7 +4228,7 @@ def test_compare_to_io_binding(self, test_name: str, model_arch: str, use_cache: self.assertIsInstance(io_outputs.logits, torch.Tensor) # compare tensor outputs - self.assertTrue(torch.equal(onnx_outputs.logits, io_outputs.logits)) + torch.testing.assert_close(onnx_outputs.logits, io_outputs.logits, atol=ATOL, rtol=RTOL) gc.collect() @@ -4274,7 +4285,7 @@ def test_compare_generation_to_io_binding( io_outputs = io_model.generate(**tokens, num_beams=num_beams) # compare tensor outputs - self.assertTrue(torch.equal(onnx_outputs, io_outputs)) + torch.testing.assert_close(onnx_outputs, io_outputs, atol=ATOL, rtol=RTOL) gc.collect() @@ -4430,7 +4441,9 @@ def test_compare_to_transformers(self, test_name: str, model_arch: str, use_cach self.assertIsInstance(onnx_outputs.logits, self.TENSOR_ALIAS_TO_TYPE[input_type]) # Compare tensor outputs - self.assertTrue(torch.allclose(torch.Tensor(onnx_outputs.logits), transformers_outputs.logits, atol=1e-4)) + torch.testing.assert_close( + torch.Tensor(onnx_outputs.logits), transformers_outputs.logits, atol=ATOL, rtol=RTOL + ) new_tokens = 20 # because tiny random speech to text model has a max_position_embeddings of 20 @@ -4451,7 +4464,7 @@ def test_compare_to_transformers(self, test_name: str, model_arch: str, use_cach num_beams=1, ) - self.assertTrue(torch.equal(onnx_outputs, transformers_outputs)) + torch.testing.assert_close(torch.Tensor(onnx_outputs), transformers_outputs, atol=ATOL, rtol=RTOL) gc.collect() @@ -4573,7 +4586,7 @@ def test_compare_with_and_without_past_key_values(self, model_arch: str): **features, min_new_tokens=generation_length, max_new_tokens=generation_length, num_beams=1 ) - self.assertTrue(torch.equal(outputs_model_with_pkv, outputs_model_without_pkv)) + torch.testing.assert_close(outputs_model_with_pkv, outputs_model_without_pkv, rtol=RTOL, atol=ATOL) if model_arch == "whisper" and is_transformers_version(">=", "4.43"): gen_length = generation_length + 2 @@ -4629,7 +4642,7 @@ def test_compare_merged_and_not_merged_models_outputs(self, test_name: str, mode **features, min_new_tokens=generation_length, max_new_tokens=generation_length, num_beams=1 ) - self.assertTrue(torch.equal(outputs_model_merged, outputs_model_not_merged)) + torch.testing.assert_close(outputs_model_not_merged, outputs_model_merged, rtol=RTOL, atol=ATOL) @parameterized.expand( grid_parameters({"model_arch": SUPPORTED_ARCHITECTURES, "use_cache": [True], "use_merged": [False, True]}) @@ -4674,7 +4687,7 @@ def test_compare_to_io_binding(self, test_name: str, model_arch: str, use_cache: self.assertIsInstance(io_outputs.logits, torch.Tensor) # compare tensor outputs - self.assertTrue(torch.equal(onnx_outputs.logits, io_outputs.logits)) + torch.testing.assert_close(onnx_outputs.logits, io_outputs.logits, atol=ATOL, rtol=RTOL) gc.collect() @@ -4726,7 +4739,7 @@ def test_compare_generation_to_io_binding( io_outputs = io_model.generate(**features, num_beams=num_beams) # compare tensor outputs - self.assertTrue(torch.equal(onnx_outputs, io_outputs)) + torch.testing.assert_close(onnx_outputs, io_outputs, atol=ATOL, rtol=RTOL) gc.collect() @@ -4776,7 +4789,9 @@ def test_compare_to_transformers(self, model_arch: str): self.assertIsInstance(onnx_outputs, ImageSuperResolutionOutput) self.assertTrue("reconstruction" in onnx_outputs) self.assertIsInstance(onnx_outputs.reconstruction, torch.Tensor) - torch.testing.assert_close(onnx_outputs.reconstruction, transformers_outputs.reconstruction, atol=1e-4) + torch.testing.assert_close( + onnx_outputs.reconstruction, transformers_outputs.reconstruction, atol=ATOL, rtol=RTOL + ) gc.collect() @@ -4975,8 +4990,8 @@ def test_compare_to_transformers(self, test_name: str, model_arch: str, use_cach self.assertTrue("logits" in onnx_outputs) self.assertIsInstance(onnx_outputs.logits, self.TENSOR_ALIAS_TO_TYPE[input_type]) - self.assertTrue( - torch.allclose(torch.Tensor(onnx_outputs.logits), transformers_outputs.logits, atol=1e-3) + torch.testing.assert_close( + torch.Tensor(onnx_outputs.logits), transformers_outputs.logits, atol=ATOL, rtol=RTOL ) if use_cache: @@ -4990,11 +5005,7 @@ def test_compare_to_transformers(self, test_name: str, model_arch: str, use_cach for ort_pkv, trfs_pkv in zip( onnx_outputs["past_key_values"][i], transformers_outputs["past_key_values"][i] ): - ort_pkv = torch.Tensor(ort_pkv) - self.assertTrue( - torch.allclose(ort_pkv, trfs_pkv, atol=1e-3), - f" Maxdiff: {torch.abs(ort_pkv - trfs_pkv).max()}", - ) + torch.testing.assert_close(torch.Tensor(ort_pkv), trfs_pkv, atol=ATOL, rtol=RTOL) gc.collect() @@ -5095,7 +5106,6 @@ def test_pipeline_on_rocm_ep(self, test_name: str, model_arch: str, use_cache: b self.assertTrue(isinstance(outputs[0]["generated_text"], str)) @parameterized.expand(SUPPORTED_ARCHITECTURES[:1]) - @pytest.mark.cuda_ep_test # mark as GPU test as well to run the without/with cache timing test on the slow tests def test_compare_with_and_without_past_key_values(self, model_arch: str): model_args = {"test_name": model_arch + "_False", "model_arch": model_arch, "use_cache": False} self._setup(model_args) @@ -5124,7 +5134,7 @@ def test_compare_with_and_without_past_key_values(self, model_arch: str): **features, min_new_tokens=self.GENERATION_LENGTH, max_new_tokens=self.GENERATION_LENGTH, num_beams=1 ) - self.assertTrue(torch.equal(outputs_model_with_pkv, outputs_model_without_pkv)) + torch.testing.assert_close(outputs_model_with_pkv, outputs_model_without_pkv, rtol=RTOL, atol=ATOL) self.assertEqual(outputs_model_with_pkv.shape[1], self.GENERATION_LENGTH + 1) self.assertEqual(outputs_model_without_pkv.shape[1], self.GENERATION_LENGTH + 1) @@ -5171,7 +5181,7 @@ def test_compare_to_io_binding(self, test_name: str, model_arch: str, use_cache: self.assertIsInstance(io_outputs.logits, torch.Tensor) # compare tensor outputs - self.assertTrue(torch.equal(onnx_outputs.logits, io_outputs.logits)) + torch.testing.assert_close(onnx_outputs.logits, io_outputs.logits, atol=ATOL, rtol=RTOL) gc.collect() @@ -5218,7 +5228,7 @@ def test_compare_generation_to_io_binding( io_outputs = io_model.generate(**features, num_beams=num_beams) # compare tensor outputs - self.assertTrue(torch.equal(onnx_outputs, io_outputs)) + torch.testing.assert_close(onnx_outputs, io_outputs, atol=ATOL, rtol=RTOL) gc.collect() @@ -5308,7 +5318,7 @@ def test_compare_to_io_binding(self, *args, **kwargs): self.assertIsInstance(io_outputs.pooler_output, torch.Tensor) # compare tensor outputs - self.assertTrue(torch.equal(onnx_outputs.pooler_output, io_outputs.pooler_output)) + torch.testing.assert_close(onnx_outputs.pooler_output, io_outputs.pooler_output, atol=ATOL, rtol=RTOL) gc.collect() @@ -5453,7 +5463,9 @@ def test_compare_to_transformers(self, test_name: str, model_arch: str, use_cach self.assertTrue("logits" in onnx_outputs) self.assertIsInstance(onnx_outputs.logits, self.TENSOR_ALIAS_TO_TYPE[input_type]) - self.assertTrue(torch.allclose(torch.Tensor(onnx_outputs.logits), transformers_outputs.logits, atol=1e-4)) + torch.testing.assert_close( + torch.Tensor(onnx_outputs.logits), transformers_outputs.logits, atol=ATOL, rtol=RTOL + ) gc.collect() @@ -5490,7 +5502,7 @@ def test_compare_with_and_without_past_key_values(self, model_arch: str): **inputs, min_new_tokens=self.GENERATION_LENGTH, max_new_tokens=self.GENERATION_LENGTH, num_beams=1 ) - self.assertTrue(torch.equal(outputs_model_with_pkv, outputs_model_without_pkv)) + torch.testing.assert_close(outputs_model_with_pkv, outputs_model_without_pkv, rtol=RTOL, atol=ATOL) self.assertEqual(outputs_model_with_pkv.shape[1], self.GENERATION_LENGTH + 1) self.assertEqual(outputs_model_without_pkv.shape[1], self.GENERATION_LENGTH + 1) @@ -5536,7 +5548,7 @@ def test_compare_merged_and_not_merged_models_outputs(self, test_name: str, mode outputs_model_not_merged = model_not_merged.generate(**inputs) outputs_model_merged = model_merged.generate(**inputs) - self.assertTrue(torch.equal(outputs_model_merged, outputs_model_not_merged)) + torch.testing.assert_close(outputs_model_not_merged, outputs_model_merged, rtol=RTOL, atol=ATOL) @parameterized.expand( grid_parameters({"model_arch": SUPPORTED_ARCHITECTURES, "use_cache": [True], "use_merged": [False, True]}) @@ -5582,7 +5594,7 @@ def test_compare_to_io_binding(self, test_name: str, model_arch: str, use_cache: self.assertTrue("logits" in io_outputs) self.assertIsInstance(io_outputs.logits, torch.Tensor) - self.assertTrue(torch.allclose(onnx_outputs.logits, io_outputs.logits, atol=1e-4)) + torch.testing.assert_close(onnx_outputs.logits, io_outputs.logits, atol=ATOL, rtol=RTOL) gc.collect() @@ -5629,7 +5641,7 @@ def test_compare_generation_to_io_binding( io_outputs = io_model.generate(**inputs, num_beams=num_beams) # compare tensor outputs - self.assertTrue(torch.equal(onnx_outputs, io_outputs)) + torch.testing.assert_close(onnx_outputs, io_outputs, atol=ATOL, rtol=RTOL) gc.collect()