From 1a3c7f209163dfbb56005ecbf02a4a3b46c0d2f2 Mon Sep 17 00:00:00 2001 From: Elliot Saba Date: Tue, 11 Jun 2024 17:08:40 -0700 Subject: [PATCH 1/5] Add `LBT_FORCE_*` environment variable overrides This provides a flexible mechanism through which LBT's autodetection facilities can be overridden. It enables debugging strange behaviors without needing to recompile LBT from scratch to disable a certain layer of its behavior. --- src/Makefile | 2 +- src/autodetection.c | 32 +++++++++++++++++ src/env_utils.c | 62 ++++++++++++++++++++++++++++++++ src/libblastrampoline.c | 11 +++--- src/libblastrampoline_internal.h | 6 ++++ test/isamax_test/Makefile | 15 ++++++++ test/isamax_test/isamax_test.c | 16 +++++++++ test/runtests.jl | 17 +++++++-- 8 files changed, 151 insertions(+), 10 deletions(-) create mode 100644 src/env_utils.c create mode 100644 test/isamax_test/Makefile create mode 100644 test/isamax_test/isamax_test.c diff --git a/src/Makefile b/src/Makefile index 384a17a..859acf6 100644 --- a/src/Makefile +++ b/src/Makefile @@ -15,7 +15,7 @@ maintarget=$(word 1,$(TARGET_LIBRARIES)) all: $(maintarget) # Objects we'll build -MAIN_OBJS := libblastrampoline.o dl_utils.o config.o \ +MAIN_OBJS := libblastrampoline.o dl_utils.o env_utils.o config.o \ autodetection.o \ threading.o deepbindless.o trampolines/trampolines_$(ARCH).o diff --git a/src/autodetection.c b/src/autodetection.c index 86ee124..b339881 100644 --- a/src/autodetection.c +++ b/src/autodetection.c @@ -96,6 +96,12 @@ const char * autodetect_symbol_suffix(void * handle, const char * suffix_hint) { * incorrect `N` to cause it to change its return value based on how it is interpreting arugments. */ int32_t autodetect_blas_interface(void * isamax_addr) { + if (env_lowercase_match("LBT_FORCE_INTERFACE", "ilp64")) { + return LBT_INTERFACE_ILP64; + } + if (env_lowercase_match("LBT_FORCE_INTERFACE", "lp64")) { + return LBT_INTERFACE_LP64; + } // Typecast to function pointer for easier usage below int64_t (*isamax)(int64_t *, float *, int64_t *) = isamax_addr; @@ -145,6 +151,12 @@ int32_t autodetect_blas_interface(void * isamax_addr) { * and determine if the internal pointer dereferences were 32-bit or 64-bit. */ int32_t autodetect_lapack_interface(void * dpotrf_addr) { + if (env_lowercase_match("LBT_FORCE_INTERFACE", "ilp64")) { + return LBT_INTERFACE_ILP64; + } + if (env_lowercase_match("LBT_FORCE_INTERFACE", "lp64")) { + return LBT_INTERFACE_LP64; + } // Typecast to function pointer for easier usage below void (*dpotrf)(char *, int64_t *, double *, int64_t *, int64_t *) = dpotrf_addr; @@ -196,6 +208,12 @@ int32_t autodetect_interface(void * handle, const char * suffix) { #ifdef COMPLEX_RETSTYLE_AUTODETECTION int32_t autodetect_complex_return_style(void * handle, const char * suffix) { + if (env_lowercase_match("LBT_FORCE_RETSTYLE", "normal")) { + return LBT_COMPLEX_RETSTYLE_NORMAL; + } + if (env_lowercase_match("LBT_FORCE_RETSTYLE", "argument")) { + return LBT_COMPLEX_RETSTYLE_ARGUMENT; + } char symbol_name[MAX_SYMBOL_LEN]; build_symbol_name(symbol_name, "zdotc_", suffix); @@ -241,6 +259,13 @@ int32_t autodetect_complex_return_style(void * handle, const char * suffix) { #ifdef F2C_AUTODETECTION int32_t autodetect_f2c(void * handle, const char * suffix) { + if (env_lowercase_match("LBT_FORCE_F2C", "plain")) { + return LBT_F2C_PLAIN; + } + if (env_lowercase_match("LBT_FORCE_F2C", "required")) { + return LBT_F2C_REQUIRED; + } + char symbol_name[MAX_SYMBOL_LEN]; // Attempt BLAS `sdot()` test @@ -278,6 +303,13 @@ int32_t autodetect_f2c(void * handle, const char * suffix) { #ifdef CBLAS_DIVERGENCE_AUTODETECTION int32_t autodetect_cblas_divergence(void * handle, const char * suffix) { + if (env_lowercase_match("LBT_FORCE_CBLAS", "conformant")) { + return LBT_CBLAS_CONFORMANT; + } + if (env_lowercase_match("LBT_FORCE_CBLAS", "divergent")) { + return LBT_CBLAS_DIVERGENT; + } + char symbol_name[MAX_SYMBOL_LEN]; build_symbol_name(symbol_name, "zdotc_", suffix); diff --git a/src/env_utils.c b/src/env_utils.c new file mode 100644 index 0000000..c429fe7 --- /dev/null +++ b/src/env_utils.c @@ -0,0 +1,62 @@ +#include "libblastrampoline_internal.h" +#include + +const char * env_lowercase(const char * env_name) { + // Get environment value, if it's not set, return false + char * env_value = getenv(env_name); + if (env_value == NULL) { + return NULL; + } + + // If it is set, convert to lowercase. + env_value = strdup(env_value); + for (size_t idx=0; idx #include #include +#include #include // Load in our publicly-defined functions/types @@ -75,6 +76,11 @@ void * lookup_self_symbol(const char * symbol_name); const char * lookup_self_path(); void close_library(void * handle); +// Functions in `env_utils.c` +uint8_t env_lowercase_match(const char * env_name, const char * value); +uint8_t env_lowercase_match_any(const char * env_name, uint32_t num_values, ...); +uint8_t env_match_bool(const char * env_name, uint8_t default_value); + // Functions in `autodetection.c` void build_symbol_name(char * out, const char *symbol_name, const char *suffix); const char * autodetect_symbol_suffix(void * handle, const char * suffix_hint); diff --git a/test/isamax_test/Makefile b/test/isamax_test/Makefile new file mode 100644 index 0000000..ac46fb7 --- /dev/null +++ b/test/isamax_test/Makefile @@ -0,0 +1,15 @@ +include ../../src/Make.inc + +all: $(prefix)/isamax_test$(EXE) + +$(prefix): + @mkdir -p $@ + +$(prefix)/isamax_test$(EXE): isamax_test.c | $(prefix) + @$(CC) -o $@ $(CFLAGS) $^ $(LDFLAGS) + +clean: + @rm -f $(prefix)/isamax_test$(EXE) + +run: $(prefix)/isamax_test$(EXE) + @$(prefix)/isamax_test$(EXE) diff --git a/test/isamax_test/isamax_test.c b/test/isamax_test/isamax_test.c new file mode 100644 index 0000000..0fddcd5 --- /dev/null +++ b/test/isamax_test/isamax_test.c @@ -0,0 +1,16 @@ +#include +#include + +extern int64_t isamax_64_(int64_t *, float *, int64_t *); + +#define N 4 +int main() +{ + int64_t n = 0xffffffff00000003; + float X[3] = {1.0f, 2.0f, 1.0f}; + int64_t incx = 1; + + int64_t max_idx = isamax_64_(&n, X, &incx); + printf("max_idx: %lld\n", max_idx); + return 0; +} diff --git a/test/runtests.jl b/test/runtests.jl index 5ca76f9..e8bf37e 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -54,7 +54,7 @@ function run_test((test_name, test_expected_outputs, expect_success), libblas_na cmd = `$(dir)/$(test_name)` p, output = capture_output(addenv(cmd, env)) - expected_return_value = success(p) ^ expect_success + expected_return_value = !xor(success(p), expect_success) if !expected_return_value @error("Test failed", env, p.exitcode, p.termsignal, expect_success) println(output) @@ -63,7 +63,7 @@ function run_test((test_name, test_expected_outputs, expect_success), libblas_na # Expect to see the path to `libblastrampoline` within the output, # since we have `LBT_VERBOSE=1` and at startup, it announces its own path: - if startswith(libblas_name, "blastrampoline") + if startswith(libblas_name, "blastrampoline") && expect_success lbt_libdir = first(libdirs) @test occursin(lbt_libdir, output) end @@ -131,12 +131,25 @@ lbt_dir = joinpath(lbt_dir, binlib) @testset "LBT -> OpenBLAS_jll ($(openblas_interface))" begin libdirs = unique(vcat(lbt_dir, OpenBLAS_jll.LIBPATH_list..., CompilerSupportLibraries_jll.LIBPATH_list...)) run_all_tests(blastrampoline_link_name(), libdirs, openblas_interface, OpenBLAS_jll.libopenblas_path) + + # Test that setting bad `LBT_FORCE_*` values actually breaks things + withenv("LBT_FORCE_RETSTYLE" => "ARGUMENT") do + zdotc_fail = ("zdotc_test", [], false) + run_test(zdotc_fail, blastrampoline_link_name(), libdirs, openblas_interface, OpenBLAS_jll.libopenblas_path) + end end # And again, but this time with OpenBLAS32_jll @testset "LBT -> OpenBLAS32_jll (LP64)" begin libdirs = unique(vcat(lbt_dir, OpenBLAS32_jll.LIBPATH_list..., CompilerSupportLibraries_jll.LIBPATH_list...)) run_all_tests(blastrampoline_link_name(), libdirs, :LP64, OpenBLAS32_jll.libopenblas_path) + + # Test that setting bad `LBT_FORCE_*` values actually breaks things + withenv("LBT_FORCE_INTERFACE" => "ILP64") do + # `max_idx: 2` is incorrect, it's what happens when ILP64 data is given to an LP64 backend + isamax_fail = ("isamax_test", ["max_idx: 2"], true) + run_test(isamax_fail, blastrampoline_link_name(), libdirs, :ILP64, OpenBLAS32_jll.libopenblas_path) + end end # Test against MKL_jll using `libmkl_rt`, which is :LP64 by default From c953e91be7d73483e640f028881eb0d563149e60 Mon Sep 17 00:00:00 2001 From: Elliot Saba Date: Wed, 12 Jun 2024 10:47:14 -0700 Subject: [PATCH 2/5] Add `COMPLEX_RETSTYLE_FNDA` for Windows x64 Windows x64 automatically forces return values onto the stack if they are larger than 64 bits wide [0]. This causes return values from e.g. `zdotc` to be pushed onto a secret first argument, but not the return values from e.g. `cdotc`. To address this, we add a new complex return style, "Float Normal, Double Argument", to specify that `complex float`-returning functions use the normal return style, whereas `complex double`-returning functions use the argument return style. This should fix https://github.com/JuliaLinearAlgebra/BLISBLAS.jl/issues/15 [0] https://learn.microsoft.com/en-us/cpp/build/x64-calling-convention?view=msvc-170 --- src/autodetection.c | 78 ++++++++++++++++++++----- src/cblas_adapters.c | 4 +- src/complex_return_style_adapters.c | 16 ++--- src/libblastrampoline.c | 41 +++++++------ src/libblastrampoline.h | 5 ++ src/libblastrampoline_complex_retdata.h | 55 ++++++++++++++--- test/direct.jl | 2 +- test/utils.jl | 1 + 8 files changed, 151 insertions(+), 51 deletions(-) diff --git a/src/autodetection.c b/src/autodetection.c index b339881..2fed694 100644 --- a/src/autodetection.c +++ b/src/autodetection.c @@ -214,6 +214,9 @@ int32_t autodetect_complex_return_style(void * handle, const char * suffix) { if (env_lowercase_match("LBT_FORCE_RETSTYLE", "argument")) { return LBT_COMPLEX_RETSTYLE_ARGUMENT; } + if (env_lowercase_match("LBT_FORCE_RETSTYLE", "fnda")) { + return LBT_COMPLEX_RETSTYLE_FNDA; + } char symbol_name[MAX_SYMBOL_LEN]; build_symbol_name(symbol_name, "zdotc_", suffix); @@ -222,37 +225,84 @@ int32_t autodetect_complex_return_style(void * handle, const char * suffix) { return LBT_COMPLEX_RETSTYLE_UNKNOWN; } + build_symbol_name(symbol_name, "cdotc_", suffix); + void * cdotc_addr = lookup_symbol(handle, symbol_name); + if (cdotc_addr == NULL) { + return LBT_COMPLEX_RETSTYLE_UNKNOWN; + } + // Typecast to function pointer for easier usage below double complex (*zdotc_normal)( int64_t *, double complex *, int64_t *, double complex *, int64_t *) = zdotc_addr; void (*zdotc_retarg)(double complex *, int64_t *, double complex *, int64_t *, double complex *, int64_t *) = zdotc_addr; + // Typecast to function pointer for easier usage below + float complex (*cdotc_normal)( int64_t *, float complex *, int64_t *, float complex *, int64_t *) = cdotc_addr; + void (*cdotc_retarg)(float complex *, int64_t *, float complex *, int64_t *, float complex *, int64_t *) = cdotc_addr; + /* * First, check to see if `zdotc` zeros out the first argument if all arguments are zero. * Supposedly, most well-behaved implementations will return `0 + 0*I` if the length of * the inputs is zero; so if it is using a "return argument", that's a good way to find out. * - * We detect this by setting `retval` to an initial value of `0.0 + 1.0*I`. This has the - * added benefit of being interpretable as `0` if looked at as an `int{32,64}_t *`, which - * makes this invocation safe across the full normal-return/argument-return vs. lp64/ilp64 - * compatibility square. + * We detect this by setting `retval` to an initial value of `-1` typecast to a complex + * value. The floating-point values are unimportant as they will be written to, but if + * it is interpreted as an `int{32,64}_t`, it will be a negative value (which is not + * allowed and should end the routine immediately). This makes this invocation safe + * across the full normal/argument, lp64/ilp64, cdotc/zdotc compatibility cube. */ - double complex retval = 0.0 + 1.0*I; + double complex retval_double = 0.0 + 1.0*I; int64_t zero = 0; - double complex zeroc = 0.0 + 0.0*I; - zdotc_retarg(&retval, &zero, &zeroc, &zero, &zeroc, &zero); + double complex zeroc_double = 0.0 + 0.0*I; + zdotc_retarg(&retval_double, &zero, &zeroc_double, &zero, &zeroc_double, &zero); - if (creal(retval) == 0.0 && cimag(retval) == 0.0) { - return LBT_COMPLEX_RETSTYLE_ARGUMENT; + /* + * Next, do the same with `cdotc`, in order to detect situations where the ABI is + * automatically inserting an extra argument to return 128-bit-wide values. + * We call this `FNDA` for "Float Normal, Double Argument" style. + */ + int64_t neg1 = -1; + float complex retval_float = *(complex float *)(&neg1); + float complex zeroc_float = 0.0f + 0.0f*I; + cdotc_retarg(&retval_float, &zero, &zeroc_float, &zero, &zeroc_float, &zero); + + if (creal(retval_double) == 0.0 && cimag(retval_double) == 0.0) { + // If the double values were reset, and the float values were also, + // this is easy, we're just always argument-style: + if (creal(retval_float) == 0.0f && cimag(retval_float) == 0.0f) { + return LBT_COMPLEX_RETSTYLE_ARGUMENT; + } + + // If the float values were not, let's try the normal return style: + retval_float = 0.0f + 1.0f*I; + retval_float = cdotc_normal(&zero, &zeroc_float, &zero, &zeroc_float, &zero); + + + // If this works, we are in FNDA style (currently only observed on Windows x64) + if (creal(retval_float) == 0.0f && cimag(retval_float) == 0.0f) { + return LBT_COMPLEX_RETSTYLE_FNDA; + } + + // Otherwise, cdotc is throwing a fit and we don't know what's up. + return LBT_COMPLEX_RETSTYLE_UNKNOWN; } - // If it was _not_ reset, let's hazard a guess that we're dealing with a normal return style: - retval = 0.0 + 1.0*I; - retval = zdotc_normal(&zero, &zeroc, &zero, &zeroc, &zero); - if (creal(retval) == 0.0 && cimag(retval) == 0.0) { + // If our double values were _not_ reset, let's hazard a guess that + // we're dealing with a normal return style and test both types again: + retval_double = 0.0 + 1.0*I; + retval_double = zdotc_normal(&zero, &zeroc_double, &zero, &zeroc_double, &zero); + retval_float = 0.0f + 1.0f*I; + retval_float = cdotc_normal(&zero, &zeroc_float, &zero, &zeroc_float, &zero); + + + // We only test for both working; we don't have a retstyle for float + // being argument style and double being normal style. + if ((creal(retval_double) == 0.0 && cimag(retval_double) == 0.0) && + (creal(retval_float) == 0.0f && cimag(retval_float) == 0.0f)) { return LBT_COMPLEX_RETSTYLE_NORMAL; } - // If that was not reset either, we have no idea what's going on. + // If we get here, zdotc and cdotc are being uncooperative and we + // do not appreciate it at all, not we don't my precious. return LBT_COMPLEX_RETSTYLE_UNKNOWN; } #endif // COMPLEX_RETSTYLE_AUTODETECTION diff --git a/src/cblas_adapters.c b/src/cblas_adapters.c index 98afc63..42af7f0 100644 --- a/src/cblas_adapters.c +++ b/src/cblas_adapters.c @@ -73,8 +73,8 @@ void lbt_cblas_cdotc_sub(const int32_t N, } extern float complex cdotc_64_(const int64_t *, - const float complex *, const int64_t *, - const float complex *, const int64_t *); + const float complex *, const int64_t *, + const float complex *, const int64_t *); void lbt_cblas_cdotc_sub64_(const int64_t N, const float complex *X, const int64_t incX, const float complex *Y, const int64_t incY, diff --git a/src/complex_return_style_adapters.c b/src/complex_return_style_adapters.c index 7b20c14..e5f94f6 100644 --- a/src/complex_return_style_adapters.c +++ b/src/complex_return_style_adapters.c @@ -72,8 +72,8 @@ extern void (*cmplxret_cdotc__addr)(float complex * z, const float complex *, const int32_t *, const float complex *, const int32_t *); float complex cmplxret_cdotc_(const int32_t * N, - const float complex *X, const int32_t * incX, - const float complex *Y, const int32_t * incY) + const float complex *X, const int32_t * incX, + const float complex *Y, const int32_t * incY) { float complex c; cmplxret_cdotc__addr(&c, N, X, incX, Y, incY); @@ -85,8 +85,8 @@ extern void (*cmplxret_cdotc_64__addr)(float complex * z, const float complex *, const int64_t *, const float complex *, const int64_t *); float complex cmplxret_cdotc_64_(const int64_t * N, - const float complex *X, const int64_t * incX, - const float complex *Y, const int64_t * incY) + const float complex *X, const int64_t * incX, + const float complex *Y, const int64_t * incY) { float complex c; cmplxret_cdotc_64__addr(&c, N, X, incX, Y, incY); @@ -100,8 +100,8 @@ extern void (*cmplxret_cdotu__addr)(float complex * z, const float complex *, const int32_t *, const float complex *, const int32_t *); float complex cmplxret_cdotu_(const int32_t * N, - const float complex *X, const int32_t * incX, - const float complex *Y, const int32_t * incY) + const float complex *X, const int32_t * incX, + const float complex *Y, const int32_t * incY) { float complex c; cmplxret_cdotu__addr(&c, N, X, incX, Y, incY); @@ -113,8 +113,8 @@ extern void (*cmplxret_cdotu_64__addr)(float complex * z, const float complex *, const int64_t *, const float complex *, const int64_t *); float complex cmplxret_cdotu_64_(const int64_t * N, - const float complex *X, const int64_t * incX, - const float complex *Y, const int64_t * incY) + const float complex *X, const int64_t * incX, + const float complex *Y, const int64_t * incY) { float complex c; cmplxret_cdotu_64__addr(&c, N, X, incX, Y, incY); diff --git a/src/libblastrampoline.c b/src/libblastrampoline.c index 0a559b8..d253387 100644 --- a/src/libblastrampoline.c +++ b/src/libblastrampoline.c @@ -70,26 +70,29 @@ int32_t set_forward_by_index(int32_t symbol_idx, const void * addr, int32_t inte } #ifdef COMPLEX_RETSTYLE_AUTODETECTION - if (complex_retstyle == LBT_COMPLEX_RETSTYLE_ARGUMENT) { - // Check to see if this symbol is one of the complex-returning functions - for (int complex_symbol_idx=0; cmplxret_func_idxs[complex_symbol_idx] != -1; ++complex_symbol_idx) { - // Skip any symbols that aren't ours - if (cmplxret_func_idxs[complex_symbol_idx] != symbol_idx) - continue; - - // Report to the user that we're cblas-wrapping this one - if (verbose) { - char exported_name[MAX_SYMBOL_LEN]; - build_symbol_name(exported_name, exported_func_names[symbol_idx], interface == LBT_INTERFACE_ILP64 ? "64_" : ""); - printf(" - [%04d] complex(%s)\n", symbol_idx, exported_name); - } + for (int array_idx=0; array_idx < sizeof(cmplxret_func_idxs)/sizeof(int *); ++array_idx) { + if ((complex_retstyle == LBT_COMPLEX_RETSTYLE_ARGUMENT) || + ((complex_retstyle == LBT_COMPLEX_RETSTYLE_FNDA) && array_idx == 1)) { + // Check to see if this symbol is one of the complex-returning functions + for (int complex_symbol_idx=0; cmplxret_func_idxs[array_idx][complex_symbol_idx] != -1; ++complex_symbol_idx) { + // Skip any symbols that aren't ours + if (cmplxret_func_idxs[array_idx][complex_symbol_idx] != symbol_idx) + continue; + + // Report to the user that we're cmplxret-wrapping this one + if (verbose) { + char exported_name[MAX_SYMBOL_LEN]; + build_symbol_name(exported_name, exported_func_names[symbol_idx], interface == LBT_INTERFACE_ILP64 ? "64_" : ""); + printf(" - [%04d] complex(%s)\n", symbol_idx, exported_name); + } - if (interface == LBT_INTERFACE_LP64) { - (*cmplxret_func32_addrs[complex_symbol_idx]) = (*exported_func32_addrs[symbol_idx]); - (*exported_func32_addrs[symbol_idx]) = cmplxret32_func_wrappers[complex_symbol_idx]; - } else { - (*cmplxret_func64_addrs[complex_symbol_idx]) = (*exported_func64_addrs[symbol_idx]); - (*exported_func64_addrs[symbol_idx]) = cmplxret64_func_wrappers[complex_symbol_idx]; + if (interface == LBT_INTERFACE_LP64) { + (*cmplxret_func32_addrs[array_idx][complex_symbol_idx]) = (*exported_func32_addrs[symbol_idx]); + (*exported_func32_addrs[symbol_idx]) = cmplxret_func32_wrappers[array_idx][complex_symbol_idx]; + } else { + (*cmplxret_func64_addrs[array_idx][complex_symbol_idx]) = (*exported_func64_addrs[symbol_idx]); + (*exported_func64_addrs[symbol_idx]) = cmplxret_func64_wrappers[array_idx][complex_symbol_idx]; + } } } } diff --git a/src/libblastrampoline.h b/src/libblastrampoline.h index a000a80..250e82b 100644 --- a/src/libblastrampoline.h +++ b/src/libblastrampoline.h @@ -85,8 +85,13 @@ typedef struct { // Possible values for `retstyle` in `lbt_library_info_t` // These describe whether a library is using "normal" return value passing (e.g. through // the `XMM{0,1}` registers on x86_64, or the `ST{0,1}` floating-point registers on i686) +// This is further complicated by the fact that on certain platforms (such as Windows x64 +// this is dependent on the size of the value being returned, e.g. a complex64 value will +// be returned through registers, but a complex128 value will not. We therefore have a +// special value that denotes this situation) #define LBT_COMPLEX_RETSTYLE_NORMAL 0 #define LBT_COMPLEX_RETSTYLE_ARGUMENT 1 +#define LBT_COMPLEX_RETSTYLE_FNDA 2 // "Float Normal, Double Argument" #define LBT_COMPLEX_RETSTYLE_UNKNOWN -1 // Possible values for `cblas` in `lbt_library_info_t` diff --git a/src/libblastrampoline_complex_retdata.h b/src/libblastrampoline_complex_retdata.h index ea9ecc9..7ff38b4 100644 --- a/src/libblastrampoline_complex_retdata.h +++ b/src/libblastrampoline_complex_retdata.h @@ -11,19 +11,35 @@ COMPLEX128_FUNCS(XX_64) // Build mapping from cmplxret-index to `_addr` instance #define XX(name, index) &cmplxret_##name##_addr, #define XX_64(name, index) &cmplxret_##name##64__addr, -const void ** cmplxret_func32_addrs[] = { +const void ** cmplx64ret_func32_addrs[] = { COMPLEX64_FUNCS(XX) + NULL +}; +const void ** cmplx128ret_func32_addrs[] = { COMPLEX128_FUNCS(XX) NULL }; -const void ** cmplxret_func64_addrs[] = { +const void ** cmplx64ret_func64_addrs[] = { COMPLEX64_FUNCS(XX_64) + NULL +}; +const void ** cmplx128ret_func64_addrs[] = { COMPLEX128_FUNCS(XX_64) NULL }; #undef XX #undef XX_64 +const void *** cmplxret_func32_addrs[] = { + cmplx64ret_func32_addrs, + cmplx128ret_func32_addrs +}; +const void *** cmplxret_func64_addrs[] = { + cmplx64ret_func64_addrs, + cmplx128ret_func64_addrs +}; + + // Forward-declare some functions #define XX(name, index) extern const void * cmplxret_##name ; @@ -40,24 +56,49 @@ COMPLEX128_FUNCS(XX_64) // locations, allowing a cblas index -> function lookup #define XX(name, index) &cmplxret_##name, #define XX_64(name, index) &cmplxret_##name##64_, -const void ** cmplxret32_func_wrappers[] = { +const void ** cmplx64ret_func32_wrappers[] = { COMPLEX64_FUNCS(XX) + NULL +}; +const void ** cmplx128ret_func32_wrappers[] = { COMPLEX128_FUNCS(XX) NULL }; -const void ** cmplxret64_func_wrappers[] = { +const void ** cmplx64ret_func64_wrappers[] = { COMPLEX64_FUNCS(XX_64) + NULL +}; +const void ** cmplx128ret_func64_wrappers[] = { COMPLEX128_FUNCS(XX_64) NULL }; #undef XX #undef XX_64 -// Finally, an array that maps cblas index -> exported symbol index +const void *** cmplxret_func32_wrappers[] = { + cmplx64ret_func32_wrappers, + cmplx128ret_func32_wrappers +}; +const void *** cmplxret_func64_wrappers[] = { + cmplx64ret_func64_wrappers, + cmplx128ret_func64_wrappers +}; + + + +// Finally, an array that maps cmplxret index -> exported symbol index #define XX(name, index) index, -const int cmplxret_func_idxs[] = { +const int cmplx64ret_func_idxs[] = { COMPLEX64_FUNCS(XX) + -1 +}; +const int cmplx128ret_func_idxs[] = { COMPLEX128_FUNCS(XX) -1 }; -#undef XX \ No newline at end of file +#undef XX + +const int * cmplxret_func_idxs[] = { + cmplx64ret_func_idxs, + cmplx128ret_func_idxs +}; diff --git a/test/direct.jl b/test/direct.jl index 0bd4fe8..e1b19be 100644 --- a/test/direct.jl +++ b/test/direct.jl @@ -71,7 +71,7 @@ lbt_handle = dlopen("$(lbt_prefix)/$(binlib)/lib$(lbt_link_name).$(shlib_ext)", @test libs[1].f2c == LBT_F2C_PLAIN if Sys.ARCH ∈ (:x86_64, :aarch64) if Sys.iswindows() - @test libs[1].complex_retstyle == LBT_COMPLEX_RETSTYLE_ARGUMENT + @test libs[1].complex_retstyle == LBT_COMPLEX_RETSTYLE_FNDA else @test libs[1].complex_retstyle == LBT_COMPLEX_RETSTYLE_NORMAL end diff --git a/test/utils.jl b/test/utils.jl index 48f4580..63a4b89 100644 --- a/test/utils.jl +++ b/test/utils.jl @@ -147,6 +147,7 @@ const LBT_INTERFACE_ILP64 = 64 const LBT_F2C_PLAIN = 0 const LBT_COMPLEX_RETSTYLE_NORMAL = 0 const LBT_COMPLEX_RETSTYLE_ARGUMENT = 1 +const LBT_COMPLEX_RETSTYLE_FNDA = 2 const LBT_COMPLEX_RETSTYLE_UNKNOWN = -1 const LBT_CBLAS_CONFORMANT = 0 const LBT_CBLAS_DIVERGENT = 1 From 52aeef8d4265e05696147537a8e43c56a228060a Mon Sep 17 00:00:00 2001 From: Elliot Saba Date: Wed, 12 Jun 2024 12:10:36 -0700 Subject: [PATCH 3/5] Move to `cdotc` instead of `zdotc` to avoid Windows x64 confusion Our new `LBT_FORCE_RETSTYLE => ARGUMENT` doesn't work so well when it's already `ARGUMENT` on Windows x64. --- test/cdotc_test/Makefile | 15 +++++++++++++ .../zdotc_test.c => cdotc_test/cdotc_test.c} | 12 +++++----- test/runtests.jl | 22 +++++++++++-------- test/zdotc_test/Makefile | 15 ------------- 4 files changed, 34 insertions(+), 30 deletions(-) create mode 100644 test/cdotc_test/Makefile rename test/{zdotc_test/zdotc_test.c => cdotc_test/cdotc_test.c} (64%) delete mode 100644 test/zdotc_test/Makefile diff --git a/test/cdotc_test/Makefile b/test/cdotc_test/Makefile new file mode 100644 index 0000000..881af3c --- /dev/null +++ b/test/cdotc_test/Makefile @@ -0,0 +1,15 @@ +include ../../src/Make.inc + +all: $(prefix)/cdotc_test$(EXE) + +$(prefix): + @mkdir -p $@ + +$(prefix)/cdotc_test$(EXE): cdotc_test.c | $(prefix) + @$(CC) -o $@ $(CFLAGS) $^ $(LDFLAGS) + +clean: + @rm -f $(prefix)/cdotc_test$(EXE) + +run: $(prefix)/cdotc_test$(EXE) + @$(prefix)/cdotc_test$(EXE) diff --git a/test/zdotc_test/zdotc_test.c b/test/cdotc_test/cdotc_test.c similarity index 64% rename from test/zdotc_test/zdotc_test.c rename to test/cdotc_test/cdotc_test.c index 832d5a1..43507ca 100644 --- a/test/zdotc_test/zdotc_test.c +++ b/test/cdotc_test/cdotc_test.c @@ -10,13 +10,13 @@ typedef int64_t blasint; typedef int32_t blasint; #endif -extern void MANGLE(cblas_zdotc_sub)(blasint, double complex *, blasint, double complex *, blasint, double complex *); -extern double complex MANGLE(zdotc_)(blasint *, double complex *, blasint *, double complex *, blasint *); +extern void MANGLE(cblas_cdotc_sub)(blasint, float complex *, blasint, float complex *, blasint, float complex *); +extern float complex MANGLE(cdotc_)(blasint *, float complex *, blasint *, float complex *, blasint *); #define N 2 int main() { - double complex A[N], B[N]; + float complex A[N], B[N]; // Initialize `A` with known values (transposed into FORTRAN ordering) A[0] = 3.1 + 1.4*I; @@ -30,13 +30,13 @@ int main() blasint len = N; blasint inca = 1; blasint incb = 1; - complex double C; - MANGLE(cblas_zdotc_sub)(len, &A[0], inca, &B[0], incb, &C); + complex float C; + MANGLE(cblas_cdotc_sub)(len, &A[0], inca, &B[0], incb, &C); // Print out C printf("C (cblas) is: (%8.4f, %8.4f)\n", creal(C), cimag(C)); // Do the same thing, but with the FORTRAN interface - C = MANGLE(zdotc_)(&len, &A[0], &inca, &B[0], &incb); + C = MANGLE(cdotc_)(&len, &A[0], &inca, &B[0], &incb); printf("C (fortran) is: (%8.4f, %8.4f)\n", creal(C), cimag(C)); } diff --git a/test/runtests.jl b/test/runtests.jl index e8bf37e..4ee136b 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -94,14 +94,14 @@ dpstrf = ("dpstrf_test", ("diag(A): 2.2601 1.8067 1.6970 0.4121", sgesv = ("sgesv_test", ("||b||^2 is: 3.0000",), true) sgesv_failure = ("sgesv_test", ("Error: no BLAS/LAPACK library loaded!",), false) sdot = ("sdot_test", ("C is: 1.9900",), true) -zdotc = ("zdotc_test", ( +cdotc = ("cdotc_test", ( "C (cblas) is: ( 1.4700, 3.8300)", "C (fortran) is: ( 1.4700, 3.8300)", ), true) # Helper function to run all the tests with the given arguments # Does not include `dgemmt` because that's MKL-only -function run_all_tests(args...; tests = [dgemm, dpstrf, sgesv, sdot, zdotc]) +function run_all_tests(args...; tests = [dgemm, dpstrf, sgesv, sdot, cdotc]) for test in tests run_test(test, args...) end @@ -133,10 +133,14 @@ lbt_dir = joinpath(lbt_dir, binlib) run_all_tests(blastrampoline_link_name(), libdirs, openblas_interface, OpenBLAS_jll.libopenblas_path) # Test that setting bad `LBT_FORCE_*` values actually breaks things + # This can be somewhat unpredictable (segfaulting sometimes, returning zero other times) + # so it's hard to test on CI, so we comment it out for now. + #= withenv("LBT_FORCE_RETSTYLE" => "ARGUMENT") do - zdotc_fail = ("zdotc_test", [], false) - run_test(zdotc_fail, blastrampoline_link_name(), libdirs, openblas_interface, OpenBLAS_jll.libopenblas_path) + cdotc_fail = ("cdotc_test", cdotc[2], false) + run_test(cdotc_fail, blastrampoline_link_name(), libdirs, openblas_interface, OpenBLAS_jll.libopenblas_path) end + =# end # And again, but this time with OpenBLAS32_jll @@ -156,7 +160,7 @@ end if MKL_jll.is_available() @testset "LBT -> MKL_jll (LP64)" begin libdirs = unique(vcat(lbt_dir, MKL_jll.LIBPATH_list..., CompilerSupportLibraries_jll.LIBPATH_list...)) - run_all_tests(blastrampoline_link_name(), libdirs, :LP64, MKL_jll.libmkl_rt_path; tests = [dgemm, dgemmt, dpstrf, sgesv, sdot, zdotc]) + run_all_tests(blastrampoline_link_name(), libdirs, :LP64, MKL_jll.libmkl_rt_path; tests = [dgemm, dgemmt, dpstrf, sgesv, sdot, cdotc]) end # Test that we can set MKL's interface via an environment variable to select ILP64, and LBT detects it properly @@ -164,7 +168,7 @@ if MKL_jll.is_available() @testset "LBT -> MKL_jll (ILP64, via env)" begin withenv("MKL_INTERFACE_LAYER" => "ILP64") do libdirs = unique(vcat(lbt_dir, MKL_jll.LIBPATH_list..., CompilerSupportLibraries_jll.LIBPATH_list...)) - run_all_tests(blastrampoline_link_name(), libdirs, :ILP64, MKL_jll.libmkl_rt_path; tests = [dgemm, dgemmt, dpstrf, sgesv, sdot, zdotc]) + run_all_tests(blastrampoline_link_name(), libdirs, :ILP64, MKL_jll.libmkl_rt_path; tests = [dgemm, dgemmt, dpstrf, sgesv, sdot, cdotc]) end end end @@ -177,7 +181,7 @@ veclib_blas_path = "/System/Library/Frameworks/Accelerate.framework/Versions/A/F if dlopen_e(veclib_blas_path) != C_NULL # Test that we can run BLAS-only tests without LAPACK loaded (`sgesv` test requires LAPACK symbols) @testset "LBT -> vecLib/libBLAS" begin - run_all_tests(blastrampoline_link_name(), [lbt_dir], :LP64, veclib_blas_path; tests=[dgemm, sdot, zdotc]) + run_all_tests(blastrampoline_link_name(), [lbt_dir], :LP64, veclib_blas_path; tests=[dgemm, sdot, cdotc]) end # With LAPACK as well, run all tests except `dgemmt` @@ -190,14 +194,14 @@ if dlopen_e(veclib_blas_path) != C_NULL if dlsym_e(veclib_lapack_handle, "dpotrf\$NEWLAPACK\$ILP64") != C_NULL @testset "LBT -> vecLib/libBLAS (ILP64)" begin veclib_blas_path_ilp64 = "$(veclib_blas_path)!\x1a\$NEWLAPACK\$ILP64" - run_all_tests(blastrampoline_link_name(), [lbt_dir], :ILP64, veclib_blas_path_ilp64; tests=[dgemm, sdot, zdotc]) + run_all_tests(blastrampoline_link_name(), [lbt_dir], :ILP64, veclib_blas_path_ilp64; tests=[dgemm, sdot, cdotc]) end @testset "LBT -> vecLib/libLAPACK (ILP64)" begin veclib_lapack_path_ilp64 = "$(veclib_lapack_path)!\x1a\$NEWLAPACK\$ILP64" @warn("dpstrf test broken on new LAPACK in Accelerate") dpstrf_broken = (dpstrf[1], "diag(A): 2.2601 1.7140 0.6206 1.1878", true) - run_all_tests(blastrampoline_link_name(), [lbt_dir], :ILP64, veclib_lapack_path_ilp64; tests=[dgemm, dpstrf_broken, sgesv, sdot, zdotc]) + run_all_tests(blastrampoline_link_name(), [lbt_dir], :ILP64, veclib_lapack_path_ilp64; tests=[dgemm, dpstrf_broken, sgesv, sdot, cdotc]) end end end diff --git a/test/zdotc_test/Makefile b/test/zdotc_test/Makefile deleted file mode 100644 index ba0b2d6..0000000 --- a/test/zdotc_test/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -include ../../src/Make.inc - -all: $(prefix)/zdotc_test$(EXE) - -$(prefix): - @mkdir -p $@ - -$(prefix)/zdotc_test$(EXE): zdotc_test.c | $(prefix) - @$(CC) -o $@ $(CFLAGS) $^ $(LDFLAGS) - -clean: - @rm -f $(prefix)/zdotc_test$(EXE) - -run: $(prefix)/zdotc_test$(EXE) - @$(prefix)/zdotc_test$(EXE) From 237afdff7db681dbb51fca9a09d7a4dd4f389e99 Mon Sep 17 00:00:00 2001 From: Elliot Saba Date: Wed, 12 Jun 2024 12:25:24 -0700 Subject: [PATCH 4/5] Update `.gitignore` --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index b00df40..c89602e 100644 --- a/.gitignore +++ b/.gitignore @@ -7,4 +7,5 @@ # Build and output src/build/ -src/prefix/ \ No newline at end of file +src/prefix/ +test/Manifest.toml From 11b29ac46f29394c22b4d4dc595ebcf5755873fa Mon Sep 17 00:00:00 2001 From: Elliot Saba Date: Wed, 12 Jun 2024 14:59:59 -0700 Subject: [PATCH 5/5] Allow for overriding workarounds even when autodetection is unavailable If a user wants to use MKL on i686, we unfortunately need some workarounds for certain function calls. Until we come up with an autodetection strategy that works on i686 as well as x86_64, we can make use of the new environment variable-based overrides to make things work. Let's test that on i686 on CI. --- src/Makefile | 8 -------- src/autodetection.c | 16 +++++++++------- src/libblastrampoline.c | 33 +++++++++++--------------------- src/libblastrampoline_internal.h | 7 ------- test/runtests.jl | 17 +++++++++++----- 5 files changed, 32 insertions(+), 49 deletions(-) diff --git a/src/Makefile b/src/Makefile index 859acf6..c4eb16a 100644 --- a/src/Makefile +++ b/src/Makefile @@ -25,17 +25,9 @@ MAIN_OBJS += win_utils.o endif # If we're on an architecture that supports f2c autodetection, compile that in! -ifeq ($(F2C_AUTODETECTION),1) MAIN_OBJS += f2c_adapters.o -endif - -ifeq ($(CBLAS_DIVERGENCE_AUTODETECTION),1) MAIN_OBJS += cblas_adapters.o -endif - -ifeq ($(COMPLEX_RETSTYLE_AUTODETECTION),1) MAIN_OBJS += complex_return_style_adapters.o -endif # Place the `.o` files into `$(builddir)` MAIN_OBJS := $(addprefix $(builddir)/,$(MAIN_OBJS)) diff --git a/src/autodetection.c b/src/autodetection.c index 2fed694..1d6f59a 100644 --- a/src/autodetection.c +++ b/src/autodetection.c @@ -206,7 +206,6 @@ int32_t autodetect_interface(void * handle, const char * suffix) { return LBT_INTERFACE_UNKNOWN; } -#ifdef COMPLEX_RETSTYLE_AUTODETECTION int32_t autodetect_complex_return_style(void * handle, const char * suffix) { if (env_lowercase_match("LBT_FORCE_RETSTYLE", "normal")) { return LBT_COMPLEX_RETSTYLE_NORMAL; @@ -217,6 +216,8 @@ int32_t autodetect_complex_return_style(void * handle, const char * suffix) { if (env_lowercase_match("LBT_FORCE_RETSTYLE", "fnda")) { return LBT_COMPLEX_RETSTYLE_FNDA; } + +#ifdef COMPLEX_RETSTYLE_AUTODETECTION char symbol_name[MAX_SYMBOL_LEN]; build_symbol_name(symbol_name, "zdotc_", suffix); @@ -300,14 +301,13 @@ int32_t autodetect_complex_return_style(void * handle, const char * suffix) { (creal(retval_float) == 0.0f && cimag(retval_float) == 0.0f)) { return LBT_COMPLEX_RETSTYLE_NORMAL; } +#endif // COMPLEX_RETSTYLE_AUTODETECTION // If we get here, zdotc and cdotc are being uncooperative and we // do not appreciate it at all, not we don't my precious. return LBT_COMPLEX_RETSTYLE_UNKNOWN; } -#endif // COMPLEX_RETSTYLE_AUTODETECTION -#ifdef F2C_AUTODETECTION int32_t autodetect_f2c(void * handle, const char * suffix) { if (env_lowercase_match("LBT_FORCE_F2C", "plain")) { return LBT_F2C_PLAIN; @@ -315,7 +315,7 @@ int32_t autodetect_f2c(void * handle, const char * suffix) { if (env_lowercase_match("LBT_FORCE_F2C", "required")) { return LBT_F2C_REQUIRED; } - +#ifdef F2C_AUTODETECTION char symbol_name[MAX_SYMBOL_LEN]; // Attempt BLAS `sdot()` test @@ -346,12 +346,12 @@ int32_t autodetect_f2c(void * handle, const char * suffix) { // It's an f2c style calling convention return LBT_F2C_REQUIRED; } +#endif // F2C_AUTODETECTION + // We have no idea what happened; nothing works and everything is broken return LBT_F2C_UNKNOWN; } -#endif // F2C_AUTODETECTION -#ifdef CBLAS_DIVERGENCE_AUTODETECTION int32_t autodetect_cblas_divergence(void * handle, const char * suffix) { if (env_lowercase_match("LBT_FORCE_CBLAS", "conformant")) { return LBT_CBLAS_CONFORMANT; @@ -360,6 +360,7 @@ int32_t autodetect_cblas_divergence(void * handle, const char * suffix) { return LBT_CBLAS_DIVERGENT; } +#ifdef CBLAS_DIVERGENCE_AUTODETECTION char symbol_name[MAX_SYMBOL_LEN]; build_symbol_name(symbol_name, "zdotc_", suffix); @@ -382,7 +383,8 @@ int32_t autodetect_cblas_divergence(void * handle, const char * suffix) { return LBT_CBLAS_DIVERGENT; } } +#endif // CBLAS_DIVERGENCE_AUTODETECTION + // If we can't even find `zdotc_64`, we don't know what this is. return LBT_CBLAS_UNKNOWN; } -#endif // CBLAS_DIVERGENCE_AUTODETECTION diff --git a/src/libblastrampoline.c b/src/libblastrampoline.c index d253387..c4337d3 100644 --- a/src/libblastrampoline.c +++ b/src/libblastrampoline.c @@ -1,15 +1,8 @@ #include "libblastrampoline_internal.h" #include "libblastrampoline_trampdata.h" - -#ifdef COMPLEX_RETSTYLE_AUTODETECTION #include "libblastrampoline_complex_retdata.h" -#endif -#ifdef F2C_AUTODETECTION #include "libblastrampoline_f2cdata.h" -#endif -#ifdef CBLAS_DIVERGENCE_AUTODETECTION #include "libblastrampoline_cblasdata.h" -#endif // Sentinel to tell us if we've got a deepbindless workaround active or not #define DEEPBINDLESS_INTERFACE_LP64_LOADED 0x01 @@ -69,7 +62,6 @@ int32_t set_forward_by_index(int32_t symbol_idx, const void * addr, int32_t inte } } -#ifdef COMPLEX_RETSTYLE_AUTODETECTION for (int array_idx=0; array_idx < sizeof(cmplxret_func_idxs)/sizeof(int *); ++array_idx) { if ((complex_retstyle == LBT_COMPLEX_RETSTYLE_ARGUMENT) || ((complex_retstyle == LBT_COMPLEX_RETSTYLE_FNDA) && array_idx == 1)) { @@ -96,7 +88,6 @@ int32_t set_forward_by_index(int32_t symbol_idx, const void * addr, int32_t inte } } } -#endif // COMPLEX_RETSTYLE_AUTODETECTION #ifdef F2C_AUTODETECTION if (f2c == LBT_F2C_REQUIRED) { @@ -224,11 +215,12 @@ LBT_DLLEXPORT int32_t lbt_forward(const char * libname, int32_t clear, int32_t v // Next, let's figure out what the complex return style is: int complex_retstyle = LBT_COMPLEX_RETSTYLE_UNKNOWN; -#ifdef COMPLEX_RETSTYLE_AUTODETECTION complex_retstyle = autodetect_complex_return_style(handle, lib_suffix); if (complex_retstyle == LBT_COMPLEX_RETSTYLE_UNKNOWN) { - fprintf(stderr, "Unable to autodetect complex return style of \"%s\"\n", libname); - return 0; + #ifdef COMPLEX_RETSTYLE_AUTODETECTION + fprintf(stderr, "Unable to autodetect complex return style of \"%s\"\n", libname); + return 0; + #endif // COMPLEX_RETSTYLE_AUTODETECTION } if (verbose) { if (complex_retstyle == LBT_COMPLEX_RETSTYLE_NORMAL) { @@ -238,16 +230,16 @@ LBT_DLLEXPORT int32_t lbt_forward(const char * libname, int32_t clear, int32_t v printf(" -> Autodetected argument-passing complex return style\n"); } } -#endif // COMPLEX_RETSTYLE_AUTODETECTION int f2c = LBT_F2C_PLAIN; -#ifdef F2C_AUTODETECTION // Next, we need to probe to see if this is an f2c-style calling convention library // The only major example of this that we know of is Accelerate on macOS f2c = autodetect_f2c(handle, lib_suffix); if (f2c == LBT_F2C_UNKNOWN) { - fprintf(stderr, "Unable to autodetect calling convention of \"%s\"\n", libname); - return 0; + #ifdef F2C_AUTODETECTION + fprintf(stderr, "Unable to autodetect f2c calling convention of \"%s\"\n", libname); + return 0; + #endif // F2C_AUTODETECTION } if (verbose) { if (f2c == LBT_F2C_REQUIRED) { @@ -257,10 +249,8 @@ LBT_DLLEXPORT int32_t lbt_forward(const char * libname, int32_t clear, int32_t v printf(" -> Autodetected gfortran calling convention\n"); } } -#endif // F2C_AUTODETECTION int cblas = LBT_CBLAS_UNKNOWN; -#ifdef CBLAS_DIVERGENCE_AUTODETECTION // Next, we need to probe to see if this is MKL v2022 with missing ILP64-suffixed // CBLAS symbols, but only if it's an ILP64 library. if (interface == LBT_INTERFACE_ILP64) { @@ -274,7 +264,9 @@ LBT_DLLEXPORT int32_t lbt_forward(const char * libname, int32_t clear, int32_t v printf(" -> Autodetected CBLAS-divergent library!\n"); break; case LBT_CBLAS_UNKNOWN: - printf(" -> CBLAS not found\n"); + #ifdef CBLAS_DIVERGENCE_AUTODETECTION + printf(" -> CBLAS not found/autodetection unavailable\n"); + #endif // CBLAS_DIVERGENCE_AUTODETECTION break; default: printf(" -> ERROR: Impossible CBLAS detection result: %d\n", cblas); @@ -283,7 +275,6 @@ LBT_DLLEXPORT int32_t lbt_forward(const char * libname, int32_t clear, int32_t v } } } -#endif // CBLAS_DIVERGENCE_AUTODETECTION /* * Now, if we are opening a 64-bit library with 32-bit names (e.g. suffix == ""), @@ -367,7 +358,6 @@ LBT_DLLEXPORT int32_t lbt_forward(const char * libname, int32_t clear, int32_t v } } -#ifdef CBLAS_DIVERGENCE_AUTODETECTION // If we're loading a divergent CBLAS library, we need to scan through all // CBLAS symbols, and forward them to wrappers which will convert them to // the FORTRAN equivalents. @@ -390,7 +380,6 @@ LBT_DLLEXPORT int32_t lbt_forward(const char * libname, int32_t clear, int32_t v } } } -#endif // CBLAS_DIVERGENCE_AUTODETECTION record_library_load(libname, handle, lib_suffix, &forwards[0], interface, complex_retstyle, f2c, cblas); if (verbose) { diff --git a/src/libblastrampoline_internal.h b/src/libblastrampoline_internal.h index 19c46a9..e6ca762 100644 --- a/src/libblastrampoline_internal.h +++ b/src/libblastrampoline_internal.h @@ -87,16 +87,9 @@ const char * autodetect_symbol_suffix(void * handle, const char * suffix_hint); int32_t autodetect_blas_interface(void * isamax_addr); int32_t autodetect_lapack_interface(void * dpotrf_addr); int32_t autodetect_interface(void * handle, const char * suffix); -#ifdef COMPLEX_RETSTYLE_AUTODETECTION int32_t autodetect_complex_return_style(void * handle, const char * suffix); -#endif - -#ifdef F2C_AUTODETECTION int32_t autodetect_f2c(void * handle, const char * suffix); -#endif -#ifdef CBLAS_DIVERGENCE_AUTODETECTION int32_t autodetect_cblas_divergence(void * handle, const char * suffix); -#endif // Functions in deepbindless_surrogates.c uint8_t push_fake_lsame(); diff --git a/test/runtests.jl b/test/runtests.jl index 4ee136b..3a5d882 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -4,7 +4,7 @@ using Pkg, Artifacts, Base.BinaryPlatforms, Libdl, Test include("utils.jl") # Compile `dgemm_test.c` and `sgesv_test.c` against the given BLAS/LAPACK -function run_test((test_name, test_expected_outputs, expect_success), libblas_name, libdirs, interface, backing_libs) +function run_test((test_name, test_expected_outputs, expect_success), libblas_name, libdirs, interface, backing_libs; extra_env = Dict()) # We need to configure this C build a bit cflags = String[ "-g", @@ -50,6 +50,7 @@ function run_test((test_name, test_expected_outputs, expect_success), libblas_na "LBT_DEFAULT_LIBS" => backing_libs, "LBT_STRICT" => 1, "LBT_VERBOSE" => 1, + pairs(extra_env)..., ) cmd = `$(dir)/$(test_name)` p, output = capture_output(addenv(cmd, env)) @@ -101,9 +102,9 @@ cdotc = ("cdotc_test", ( # Helper function to run all the tests with the given arguments # Does not include `dgemmt` because that's MKL-only -function run_all_tests(args...; tests = [dgemm, dpstrf, sgesv, sdot, cdotc]) +function run_all_tests(args...; tests = [dgemm, dpstrf, sgesv, sdot, cdotc], kwargs...) for test in tests - run_test(test, args...) + run_test(test, args...; kwargs...) end end @@ -158,9 +159,15 @@ end # Test against MKL_jll using `libmkl_rt`, which is :LP64 by default if MKL_jll.is_available() + # On i686, we can't do complex return style autodetection, so we manually set it, + # knowing that MKL is argument-style. + extra_env = Dict{String,String}() + if Sys.ARCH == :i686 + extra_env["LBT_FORCE_RETSTYLE"] = "ARGUMENT" + end @testset "LBT -> MKL_jll (LP64)" begin libdirs = unique(vcat(lbt_dir, MKL_jll.LIBPATH_list..., CompilerSupportLibraries_jll.LIBPATH_list...)) - run_all_tests(blastrampoline_link_name(), libdirs, :LP64, MKL_jll.libmkl_rt_path; tests = [dgemm, dgemmt, dpstrf, sgesv, sdot, cdotc]) + run_all_tests(blastrampoline_link_name(), libdirs, :LP64, MKL_jll.libmkl_rt_path; tests = [dgemm, dgemmt, dpstrf, sgesv, sdot, cdotc], extra_env) end # Test that we can set MKL's interface via an environment variable to select ILP64, and LBT detects it properly @@ -168,7 +175,7 @@ if MKL_jll.is_available() @testset "LBT -> MKL_jll (ILP64, via env)" begin withenv("MKL_INTERFACE_LAYER" => "ILP64") do libdirs = unique(vcat(lbt_dir, MKL_jll.LIBPATH_list..., CompilerSupportLibraries_jll.LIBPATH_list...)) - run_all_tests(blastrampoline_link_name(), libdirs, :ILP64, MKL_jll.libmkl_rt_path; tests = [dgemm, dgemmt, dpstrf, sgesv, sdot, cdotc]) + run_all_tests(blastrampoline_link_name(), libdirs, :ILP64, MKL_jll.libmkl_rt_path; tests = [dgemm, dgemmt, dpstrf, sgesv, sdot, cdotc], extra_env) end end end