diff --git a/_doc/api/validation_cpu.rst b/_doc/api/validation_cpu.rst index 53430645..bf3b27bf 100644 --- a/_doc/api/validation_cpu.rst +++ b/_doc/api/validation_cpu.rst @@ -16,4 +16,12 @@ _validation .. autofunction:: onnx_extended.validation.cpu._validation.benchmark_cache_tree +.. autofunction:: onnx_extended.validation.cpu._validation.double2float_rn + .. autofunction:: onnx_extended.validation.cpu._validation.murmurhash3_bytes_s32 + +.. autofunction:: onnx_extended.validation.cpu._validation.float2half_rn + +.. autofunction:: onnx_extended.validation.cpu._validation.half2float + +.. autofunction:: onnx_extended.validation.cpu._validation.has_sse3 diff --git a/_unittests/ut_validation/test_cpu_fpemu.cpp b/_unittests/ut_validation/test_cpu_fpemu.cpp index bea48d8d..4d8f9c77 100644 --- a/_unittests/ut_validation/test_cpu_fpemu.cpp +++ b/_unittests/ut_validation/test_cpu_fpemu.cpp @@ -5,6 +5,9 @@ using namespace cpu_fpemu; void test_cast() { + +#if defined(__SSSE3__) + float f = 1.f; double d = 1.f; float ff = __double2float_rn(d); @@ -12,6 +15,9 @@ void test_cast() { unsigned short u = __float2half_rn(f); float bu = __half2float(u); ASSERT_THROW(f == bu); + +#endif + } int main(int, char**) { diff --git a/_unittests/ut_validation/test_cpu_fpemu.py b/_unittests/ut_validation/test_cpu_fpemu.py index 7c60e462..fa4a89bb 100644 --- a/_unittests/ut_validation/test_cpu_fpemu.py +++ b/_unittests/ut_validation/test_cpu_fpemu.py @@ -1,14 +1,17 @@ import unittest from onnx_extended.ext_test_case import ExtTestCase -from onnx_extended.validation.cpu._validation import ( - double2float_rn, - float2half_rn, - half2float, -) +from onnx_extended.validation.cpu._validation import has_sse3 class TestCpuFpEmu(ExtTestCase): + @unittest.skipIf(not has_sse3(), "SSE3 not available") def test_cast(self): + from onnx_extended.validation.cpu._validation import ( + double2float_rn, + float2half_rn, + half2float, + ) + self.assertEqual(double2float_rn(1), 1) self.assertEqual(half2float(float2half_rn(1)), 1) diff --git a/onnx_extended/validation/cpu/_validation.cpp b/onnx_extended/validation/cpu/_validation.cpp index d09ad332..ef079f10 100644 --- a/onnx_extended/validation/cpu/_validation.cpp +++ b/onnx_extended/validation/cpu/_validation.cpp @@ -87,6 +87,13 @@ The code is `benchmark_cache_tree :return: hash )pbdoc"); +#if defined(__SSSE3__) + + m.def( + "has_sse3", []() -> bool { return true; }, + R"pbdoc(Tells if SSE3 instructions are available. +They are needed to convert floart to half and half to float.)pbdoc"); + m.def("double2float_rn", &cpu_fpemu::__double2float_rn, py::arg("d"), R"pbdoc(Converts a double into float.)pbdoc"); @@ -96,6 +103,15 @@ The code is `benchmark_cache_tree m.def("half2float", &cpu_fpemu::__half2float, py::arg("d"), R"pbdoc(Converts a half represented as an unsigned short into float.)pbdoc"); +#else + + m.def( + "has_sse3", []() -> bool { return false; }, + R"pbdoc(Tells if SSE3 instructions are available. +They are needed to convert floart to half and half to float.)pbdoc"); + +#endif + m.def("sparse_struct_to_dense", &sparse_struct_to_dense, py::arg("v"), R"pbdoc(Converts a sparse structure stored in a float tensor into a dense vector.)pbdoc"); diff --git a/onnx_extended/validation/cpu/cpu_fpemu.hpp b/onnx_extended/validation/cpu/cpu_fpemu.hpp index d7b84f3a..25cef20e 100644 --- a/onnx_extended/validation/cpu/cpu_fpemu.hpp +++ b/onnx_extended/validation/cpu/cpu_fpemu.hpp @@ -13,10 +13,16 @@ #pragma once +#if defined(__SSSE3__) + #include +#endif + namespace cpu_fpemu { +#if defined(__SSSE3__) + inline float __double2float_rn(double inval) { float out[4] = {0}; __m128 vout = _mm_cvtpd_ps(_mm_set1_pd(inval)); @@ -47,4 +53,6 @@ inline float __half2float(unsigned short h_val) { return _cvtsh_ss(h_val); } #endif +#endif + } // namespace cpu_fpemu diff --git a/pyproject.toml b/pyproject.toml index d241d9cb..a08600b3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -113,7 +113,7 @@ archs = ["x86_64"] build = "cp*" skip = "pypy* *musllinux* cp36-* cp37-* cp38-* cp39-* cp312-* cp313-* cp314-*" manylinux-x86_64-image = "manylinux_2_28" -before-build = "pip install auditwheel-symbols abi3audit" +before-build = "pip install auditwheel-symbols abi3audit;python -c 'import sysconfig;print(sysconfig.get_platform())'" build-verbosity = 1 repair-wheel-command = "auditwheel-symbols --manylinux 2_28 {wheel} ; abi3audit {wheel} ; auditwheel repair -w {dest_dir} {wheel} || exit 0" test-command = "yum list installed&&python -m pip install onnxruntime&&python -c \"import onnx_extended;onnx_extended.check_installation(val=True,ortops=True)\"&&python -c \"import onnx_extended;onnx_extended.check_installation(val=True,ortcy=True,verbose=True)\"" @@ -121,7 +121,9 @@ test-command = "yum list installed&&python -m pip install onnxruntime&&python -c [tool.cibuildwheel.macos] archs = ["x86_64"] build = "cp*" +before-build = "brew install llvm libomp" skip = "pypy* pp* cp36-* cp37-* cp38-* cp39-* cp312-* cp313-* cp314-*" +# environment="LDFLAGS='-L/opt/homebrew/opt/llvm/lib -L/opt/homebrew/opt/libomp/lib' CPPFLAGS='-I/opt/homebrew/opt/llvm/include -I/opt/homebrew/opt/libomp/include'" [tool.cibuildwheel.windows] archs = ["AMD64"]