JuliaLinearAlgebra · staticfloat · Jun 13, 2024 · Jun 12, 2024 · Jun 12, 2024 · Jun 12, 2024
diff --git a/.gitignore b/.gitignore
@@ -7,4 +7,5 @@
 
 # Build and output
 src/build/
-src/prefix/
+src/prefix/
+test/Manifest.toml
diff --git a/src/Makefile b/src/Makefile
@@ -15,7 +15,7 @@ maintarget=$(word 1,$(TARGET_LIBRARIES))
 all: $(maintarget)
 
 # Objects we'll build
-MAIN_OBJS := libblastrampoline.o dl_utils.o config.o \
+MAIN_OBJS := libblastrampoline.o dl_utils.o env_utils.o config.o \
 			 autodetection.o \
 			 threading.o deepbindless.o trampolines/trampolines_$(ARCH).o
 
@@ -25,17 +25,9 @@ MAIN_OBJS += win_utils.o
 endif
 
 # If we're on an architecture that supports f2c autodetection, compile that in!
-ifeq ($(F2C_AUTODETECTION),1)
 MAIN_OBJS += f2c_adapters.o
-endif
-
-ifeq ($(CBLAS_DIVERGENCE_AUTODETECTION),1)
 MAIN_OBJS += cblas_adapters.o
-endif
-
-ifeq ($(COMPLEX_RETSTYLE_AUTODETECTION),1)
 MAIN_OBJS += complex_return_style_adapters.o
-endif
 
 # Place the `.o` files into `$(builddir)`
 MAIN_OBJS := $(addprefix $(builddir)/,$(MAIN_OBJS))

diff --git a/src/autodetection.c b/src/autodetection.c
@@ -96,6 +96,12 @@ const char * autodetect_symbol_suffix(void * handle, const char * suffix_hint) {
  * incorrect `N` to cause it to change its return value based on how it is interpreting arugments.
  */
 int32_t autodetect_blas_interface(void * isamax_addr) {
+    if (env_lowercase_match("LBT_FORCE_INTERFACE", "ilp64")) {
+        return LBT_INTERFACE_ILP64;
+    }
+    if (env_lowercase_match("LBT_FORCE_INTERFACE", "lp64")) {
+        return LBT_INTERFACE_LP64;
+    }
     // Typecast to function pointer for easier usage below
     int64_t (*isamax)(int64_t *, float *, int64_t *) = isamax_addr;
 
@@ -145,6 +151,12 @@ int32_t autodetect_blas_interface(void * isamax_addr) {
  * and determine if the internal pointer dereferences were 32-bit or 64-bit.
  */
 int32_t autodetect_lapack_interface(void * dpotrf_addr) {
+    if (env_lowercase_match("LBT_FORCE_INTERFACE", "ilp64")) {
+        return LBT_INTERFACE_ILP64;
+    }
+    if (env_lowercase_match("LBT_FORCE_INTERFACE", "lp64")) {
+        return LBT_INTERFACE_LP64;
+    }
     // Typecast to function pointer for easier usage below
     void (*dpotrf)(char *, int64_t *, double *, int64_t *, int64_t *) = dpotrf_addr;
 
@@ -194,8 +206,18 @@ int32_t autodetect_interface(void * handle, const char * suffix) {
     return LBT_INTERFACE_UNKNOWN;
 }
 
-#ifdef COMPLEX_RETSTYLE_AUTODETECTION
 int32_t autodetect_complex_return_style(void * handle, const char * suffix) {
+    if (env_lowercase_match("LBT_FORCE_RETSTYLE", "normal")) {
+        return LBT_COMPLEX_RETSTYLE_NORMAL;
+    }
+    if (env_lowercase_match("LBT_FORCE_RETSTYLE", "argument")) {
+        return LBT_COMPLEX_RETSTYLE_ARGUMENT;
+    }
+    if (env_lowercase_match("LBT_FORCE_RETSTYLE", "fnda")) {
+        return LBT_COMPLEX_RETSTYLE_FNDA;
+    }
+
+#ifdef COMPLEX_RETSTYLE_AUTODETECTION
     char symbol_name[MAX_SYMBOL_LEN];
 
     build_symbol_name(symbol_name, "zdotc_", suffix);
@@ -204,43 +226,96 @@ int32_t autodetect_complex_return_style(void * handle, const char * suffix) {
         return LBT_COMPLEX_RETSTYLE_UNKNOWN;
     }
 
+    build_symbol_name(symbol_name, "cdotc_", suffix);
+    void * cdotc_addr = lookup_symbol(handle, symbol_name);
+    if (cdotc_addr == NULL) {
+        return LBT_COMPLEX_RETSTYLE_UNKNOWN;
+    }
+
     // Typecast to function pointer for easier usage below
     double complex (*zdotc_normal)(                  int64_t *, double complex *, int64_t *, double complex *, int64_t *) = zdotc_addr;
     void           (*zdotc_retarg)(double complex *, int64_t *, double complex *, int64_t *, double complex *, int64_t *) = zdotc_addr;
 
+    // Typecast to function pointer for easier usage below
+    float complex (*cdotc_normal)(                 int64_t *, float complex *, int64_t *, float complex *, int64_t *) = cdotc_addr;
+    void          (*cdotc_retarg)(float complex *, int64_t *, float complex *, int64_t *, float complex *, int64_t *) = cdotc_addr;
+
     /*
      * First, check to see if `zdotc` zeros out the first argument if all arguments are zero.
      * Supposedly, most well-behaved implementations will return `0 + 0*I` if the length of
      * the inputs is zero; so if it is using a "return argument", that's a good way to find out.
      * 
-     * We detect this by setting `retval` to an initial value of `0.0 + 1.0*I`.  This has the
-     * added benefit of being interpretable as `0` if looked at as an `int{32,64}_t *`, which
-     * makes this invocation safe across the full normal-return/argument-return vs. lp64/ilp64
-     * compatibility square.
+     * We detect this by setting `retval` to an initial value of `-1` typecast to a complex
+     * value.  The floating-point values are unimportant as they will be written to, but if
+     * it is interpreted as an `int{32,64}_t`, it will be a negative value (which is not
+     * allowed and should end the routine immediately).  This makes this invocation safe
+     * across the full normal/argument, lp64/ilp64, cdotc/zdotc compatibility cube.
      */
-    double complex retval = 0.0 + 1.0*I;
+    double complex retval_double = 0.0 + 1.0*I;
     int64_t zero = 0;
-    double complex zeroc = 0.0 + 0.0*I;
-    zdotc_retarg(&retval, &zero, &zeroc, &zero, &zeroc, &zero);
+    double complex zeroc_double = 0.0 + 0.0*I;
+    zdotc_retarg(&retval_double, &zero, &zeroc_double, &zero, &zeroc_double, &zero);
 
-    if (creal(retval) == 0.0 && cimag(retval) == 0.0) {
-        return LBT_COMPLEX_RETSTYLE_ARGUMENT;
+    /*
+     * Next, do the same with `cdotc`, in order to detect situations where the ABI is
+     * automatically inserting an extra argument to return 128-bit-wide values.
+     * We call this `FNDA` for "Float Normal, Double Argument" style.
+     */
+    int64_t neg1 = -1;
+    float complex retval_float = *(complex float *)(&neg1);
+    float complex zeroc_float = 0.0f + 0.0f*I;
+    cdotc_retarg(&retval_float, &zero, &zeroc_float, &zero, &zeroc_float, &zero);
+
+    if (creal(retval_double) == 0.0 && cimag(retval_double) == 0.0) {
+        // If the double values were reset, and the float values were also,
+        // this is easy, we're just always argument-style:
+        if (creal(retval_float) == 0.0f && cimag(retval_float) == 0.0f) {
+            return LBT_COMPLEX_RETSTYLE_ARGUMENT;
+        }
+
+        // If the float values were not, let's try the normal return style:
+        retval_float = 0.0f + 1.0f*I;
+        retval_float = cdotc_normal(&zero, &zeroc_float, &zero, &zeroc_float, &zero);
+
+
+        // If this works, we are in FNDA style (currently only observed on Windows x64)
+        if (creal(retval_float) == 0.0f && cimag(retval_float) == 0.0f) {
+            return LBT_COMPLEX_RETSTYLE_FNDA;
+        }
+
+        // Otherwise, cdotc is throwing a fit and we don't know what's up.
+        return LBT_COMPLEX_RETSTYLE_UNKNOWN;
     }
 
-    // If it was _not_ reset, let's hazard a guess that we're dealing with a normal return style:
-    retval = 0.0 + 1.0*I;
-    retval = zdotc_normal(&zero, &zeroc, &zero, &zeroc, &zero);
-    if (creal(retval) == 0.0 && cimag(retval) == 0.0) {
+    // If our double values were _not_ reset, let's hazard a guess that
+    // we're dealing with a normal return style and test both types again:
+    retval_double = 0.0 + 1.0*I;
+    retval_double = zdotc_normal(&zero, &zeroc_double, &zero, &zeroc_double, &zero);
+    retval_float = 0.0f + 1.0f*I;
+    retval_float = cdotc_normal(&zero, &zeroc_float, &zero, &zeroc_float, &zero);
+
+
+    // We only test for both working; we don't have a retstyle for float
+    // being argument style and double being normal style.
+    if ((creal(retval_double) == 0.0 && cimag(retval_double) == 0.0) &&
+        (creal(retval_float) == 0.0f && cimag(retval_float) == 0.0f)) {
         return LBT_COMPLEX_RETSTYLE_NORMAL;
     }
+#endif // COMPLEX_RETSTYLE_AUTODETECTION
 
-    // If that was not reset either, we have no idea what's going on.
+    // If we get here, zdotc and cdotc are being uncooperative and we
+    // do not appreciate it at all, not we don't my precious.
     return LBT_COMPLEX_RETSTYLE_UNKNOWN;
 }
-#endif // COMPLEX_RETSTYLE_AUTODETECTION
 
-#ifdef F2C_AUTODETECTION
 int32_t autodetect_f2c(void * handle, const char * suffix) {
+    if (env_lowercase_match("LBT_FORCE_F2C", "plain")) {
+        return LBT_F2C_PLAIN;
+    }
+    if (env_lowercase_match("LBT_FORCE_F2C", "required")) {
+        return LBT_F2C_REQUIRED;
+    }
+#ifdef F2C_AUTODETECTION
     char symbol_name[MAX_SYMBOL_LEN];
 
     // Attempt BLAS `sdot()` test
@@ -271,13 +346,21 @@ int32_t autodetect_f2c(void * handle, const char * suffix) {
         // It's an f2c style calling convention
         return LBT_F2C_REQUIRED;
     }
+#endif // F2C_AUTODETECTION
+
     // We have no idea what happened; nothing works and everything is broken
     return LBT_F2C_UNKNOWN;
 }
-#endif // F2C_AUTODETECTION
 
-#ifdef CBLAS_DIVERGENCE_AUTODETECTION
 int32_t autodetect_cblas_divergence(void * handle, const char * suffix) {
+    if (env_lowercase_match("LBT_FORCE_CBLAS", "conformant")) {
+        return LBT_CBLAS_CONFORMANT;
+    }
+    if (env_lowercase_match("LBT_FORCE_CBLAS", "divergent")) {
+        return LBT_CBLAS_DIVERGENT;
+    }
+
+#ifdef CBLAS_DIVERGENCE_AUTODETECTION
     char symbol_name[MAX_SYMBOL_LEN];
 
     build_symbol_name(symbol_name, "zdotc_", suffix);
@@ -300,7 +383,8 @@ int32_t autodetect_cblas_divergence(void * handle, const char * suffix) {
             return LBT_CBLAS_DIVERGENT;
         }
     }
+#endif // CBLAS_DIVERGENCE_AUTODETECTION
+
     // If we can't even find `zdotc_64`, we don't know what this is.
     return LBT_CBLAS_UNKNOWN;
 }
-#endif // CBLAS_DIVERGENCE_AUTODETECTION
diff --git a/src/cblas_adapters.c b/src/cblas_adapters.c
@@ -73,8 +73,8 @@ void lbt_cblas_cdotc_sub(const int32_t N,
 }
 
 extern float complex cdotc_64_(const int64_t *,
-                                const float complex *, const int64_t *,
-                                const float complex *, const int64_t *);
+                               const float complex *, const int64_t *,
+                               const float complex *, const int64_t *);
 void lbt_cblas_cdotc_sub64_(const int64_t N,
                             const float complex *X, const int64_t incX,
                             const float complex *Y, const int64_t incY,

diff --git a/src/complex_return_style_adapters.c b/src/complex_return_style_adapters.c
@@ -72,8 +72,8 @@ extern void (*cmplxret_cdotc__addr)(float complex * z,
                                     const float complex *, const int32_t *,
                                     const float complex *, const int32_t *);
 float complex cmplxret_cdotc_(const int32_t * N,
-                               const float complex *X, const int32_t * incX,
-                               const float complex *Y, const int32_t * incY)
+                              const float complex *X, const int32_t * incX,
+                              const float complex *Y, const int32_t * incY)
 {
    float complex c;
    cmplxret_cdotc__addr(&c, N, X, incX, Y, incY);
@@ -85,8 +85,8 @@ extern void (*cmplxret_cdotc_64__addr)(float complex * z,
                                        const float complex *, const int64_t *,
                                        const float complex *, const int64_t *);
 float complex cmplxret_cdotc_64_(const int64_t * N,
-                                  const float complex *X, const int64_t * incX,
-                                  const float complex *Y, const int64_t * incY)
+                                 const float complex *X, const int64_t * incX,
+                                 const float complex *Y, const int64_t * incY)
 {
    float complex c;
    cmplxret_cdotc_64__addr(&c, N, X, incX, Y, incY);
@@ -100,8 +100,8 @@ extern void (*cmplxret_cdotu__addr)(float complex * z,
                                     const float complex *, const int32_t *,
                                     const float complex *, const int32_t *);
 float complex cmplxret_cdotu_(const int32_t * N,
-                               const float complex *X, const int32_t * incX,
-                               const float complex *Y, const int32_t * incY)
+                              const float complex *X, const int32_t * incX,
+                              const float complex *Y, const int32_t * incY)
 {
    float complex c;
    cmplxret_cdotu__addr(&c, N, X, incX, Y, incY);
@@ -113,8 +113,8 @@ extern void (*cmplxret_cdotu_64__addr)(float complex * z,
                                        const float complex *, const int64_t *,
                                        const float complex *, const int64_t *);
 float complex cmplxret_cdotu_64_(const int64_t * N,
-                                  const float complex *X, const int64_t * incX,
-                                  const float complex *Y, const int64_t * incY)
+                                 const float complex *X, const int64_t * incX,
+                                 const float complex *Y, const int64_t * incY)
 {
    float complex c;
    cmplxret_cdotu_64__addr(&c, N, X, incX, Y, incY);

diff --git a/src/env_utils.c b/src/env_utils.c
@@ -0,0 +1,62 @@
+#include "libblastrampoline_internal.h"
+#include <ctype.h>
+
+const char * env_lowercase(const char * env_name) {
+    // Get environment value, if it's not set, return false
+    char * env_value = getenv(env_name);
+    if (env_value == NULL) {
+        return NULL;
+    }
+
+    // If it is set, convert to lowercase.
+    env_value = strdup(env_value);
+    for (size_t idx=0; idx<strlen(env_value); ++idx) {
+        env_value[idx] = tolower(env_value[idx]);
+    }
+    return env_value;
+}
+
+
+uint8_t env_lowercase_match(const char * env_name, const char * value) {
+    const char * env_value = env_lowercase(env_name);
+    if (env_value == NULL) {
+        return 0;
+    }
+
+    int ret = strcmp(env_value, value) == 0;
+    free((void *)env_value);
+    return ret;
+}
+
+uint8_t env_lowercase_match_any(const char * env_name, uint32_t num_values, ...) {
+    va_list args;
+    va_start(args, num_values);
+
+    // Get environment value
+    const char * env_value = env_lowercase(env_name);
+    if (env_value == NULL) {
+        return 0;
+    }
+
+    // Search through our varargs for a match
+    for (uint32_t idx=0; idx<num_values; idx++ ) {
+        const char *value = va_arg(args, const char *);
+        if (strcmp(env_value, value) == 0) {
+            free((void *)env_value);
+            return 1;
+        }
+    }
+    free((void *)env_value);
+    return 0;
+}
+
+// Check to see if `env_name` matches any "boolean"-like 
+uint8_t env_match_bool(const char * env_name, uint8_t default_value) {
+    if (env_lowercase_match_any(env_name, 3, "0", "false", "no")) {
+        return 0;
+    }
+    if (env_lowercase_match_any(env_name, 3, "1", "true", "yes")) {
+        return 1;
+    }
+    return default_value;
+}