Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Version 5.10.0 #129

Merged
merged 5 commits into from
Jun 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@

# Build and output
src/build/
src/prefix/
src/prefix/
test/Manifest.toml
10 changes: 1 addition & 9 deletions src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ maintarget=$(word 1,$(TARGET_LIBRARIES))
all: $(maintarget)

# Objects we'll build
MAIN_OBJS := libblastrampoline.o dl_utils.o config.o \
MAIN_OBJS := libblastrampoline.o dl_utils.o env_utils.o config.o \
autodetection.o \
threading.o deepbindless.o trampolines/trampolines_$(ARCH).o

Expand All @@ -25,17 +25,9 @@ MAIN_OBJS += win_utils.o
endif

# If we're on an architecture that supports f2c autodetection, compile that in!
ifeq ($(F2C_AUTODETECTION),1)
MAIN_OBJS += f2c_adapters.o
endif

ifeq ($(CBLAS_DIVERGENCE_AUTODETECTION),1)
MAIN_OBJS += cblas_adapters.o
endif

ifeq ($(COMPLEX_RETSTYLE_AUTODETECTION),1)
MAIN_OBJS += complex_return_style_adapters.o
endif

# Place the `.o` files into `$(builddir)`
MAIN_OBJS := $(addprefix $(builddir)/,$(MAIN_OBJS))
Expand Down
124 changes: 104 additions & 20 deletions src/autodetection.c
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,12 @@ const char * autodetect_symbol_suffix(void * handle, const char * suffix_hint) {
* incorrect `N` to cause it to change its return value based on how it is interpreting arugments.
*/
int32_t autodetect_blas_interface(void * isamax_addr) {
if (env_lowercase_match("LBT_FORCE_INTERFACE", "ilp64")) {
return LBT_INTERFACE_ILP64;
}
if (env_lowercase_match("LBT_FORCE_INTERFACE", "lp64")) {
return LBT_INTERFACE_LP64;
}
// Typecast to function pointer for easier usage below
int64_t (*isamax)(int64_t *, float *, int64_t *) = isamax_addr;

Expand Down Expand Up @@ -145,6 +151,12 @@ int32_t autodetect_blas_interface(void * isamax_addr) {
* and determine if the internal pointer dereferences were 32-bit or 64-bit.
*/
int32_t autodetect_lapack_interface(void * dpotrf_addr) {
if (env_lowercase_match("LBT_FORCE_INTERFACE", "ilp64")) {
return LBT_INTERFACE_ILP64;
}
if (env_lowercase_match("LBT_FORCE_INTERFACE", "lp64")) {
return LBT_INTERFACE_LP64;
}
// Typecast to function pointer for easier usage below
void (*dpotrf)(char *, int64_t *, double *, int64_t *, int64_t *) = dpotrf_addr;

Expand Down Expand Up @@ -194,8 +206,18 @@ int32_t autodetect_interface(void * handle, const char * suffix) {
return LBT_INTERFACE_UNKNOWN;
}

#ifdef COMPLEX_RETSTYLE_AUTODETECTION
int32_t autodetect_complex_return_style(void * handle, const char * suffix) {
if (env_lowercase_match("LBT_FORCE_RETSTYLE", "normal")) {
return LBT_COMPLEX_RETSTYLE_NORMAL;
}
if (env_lowercase_match("LBT_FORCE_RETSTYLE", "argument")) {
return LBT_COMPLEX_RETSTYLE_ARGUMENT;
}
if (env_lowercase_match("LBT_FORCE_RETSTYLE", "fnda")) {
return LBT_COMPLEX_RETSTYLE_FNDA;
}

#ifdef COMPLEX_RETSTYLE_AUTODETECTION
char symbol_name[MAX_SYMBOL_LEN];

build_symbol_name(symbol_name, "zdotc_", suffix);
Expand All @@ -204,43 +226,96 @@ int32_t autodetect_complex_return_style(void * handle, const char * suffix) {
return LBT_COMPLEX_RETSTYLE_UNKNOWN;
}

build_symbol_name(symbol_name, "cdotc_", suffix);
void * cdotc_addr = lookup_symbol(handle, symbol_name);
if (cdotc_addr == NULL) {
return LBT_COMPLEX_RETSTYLE_UNKNOWN;
}

// Typecast to function pointer for easier usage below
double complex (*zdotc_normal)( int64_t *, double complex *, int64_t *, double complex *, int64_t *) = zdotc_addr;
void (*zdotc_retarg)(double complex *, int64_t *, double complex *, int64_t *, double complex *, int64_t *) = zdotc_addr;

// Typecast to function pointer for easier usage below
float complex (*cdotc_normal)( int64_t *, float complex *, int64_t *, float complex *, int64_t *) = cdotc_addr;
void (*cdotc_retarg)(float complex *, int64_t *, float complex *, int64_t *, float complex *, int64_t *) = cdotc_addr;

/*
* First, check to see if `zdotc` zeros out the first argument if all arguments are zero.
* Supposedly, most well-behaved implementations will return `0 + 0*I` if the length of
* the inputs is zero; so if it is using a "return argument", that's a good way to find out.
*
* We detect this by setting `retval` to an initial value of `0.0 + 1.0*I`. This has the
* added benefit of being interpretable as `0` if looked at as an `int{32,64}_t *`, which
* makes this invocation safe across the full normal-return/argument-return vs. lp64/ilp64
* compatibility square.
* We detect this by setting `retval` to an initial value of `-1` typecast to a complex
* value. The floating-point values are unimportant as they will be written to, but if
* it is interpreted as an `int{32,64}_t`, it will be a negative value (which is not
* allowed and should end the routine immediately). This makes this invocation safe
* across the full normal/argument, lp64/ilp64, cdotc/zdotc compatibility cube.
*/
double complex retval = 0.0 + 1.0*I;
double complex retval_double = 0.0 + 1.0*I;
int64_t zero = 0;
double complex zeroc = 0.0 + 0.0*I;
zdotc_retarg(&retval, &zero, &zeroc, &zero, &zeroc, &zero);
double complex zeroc_double = 0.0 + 0.0*I;
zdotc_retarg(&retval_double, &zero, &zeroc_double, &zero, &zeroc_double, &zero);

if (creal(retval) == 0.0 && cimag(retval) == 0.0) {
return LBT_COMPLEX_RETSTYLE_ARGUMENT;
/*
* Next, do the same with `cdotc`, in order to detect situations where the ABI is
* automatically inserting an extra argument to return 128-bit-wide values.
* We call this `FNDA` for "Float Normal, Double Argument" style.
*/
int64_t neg1 = -1;
float complex retval_float = *(complex float *)(&neg1);
float complex zeroc_float = 0.0f + 0.0f*I;
cdotc_retarg(&retval_float, &zero, &zeroc_float, &zero, &zeroc_float, &zero);

if (creal(retval_double) == 0.0 && cimag(retval_double) == 0.0) {
// If the double values were reset, and the float values were also,
// this is easy, we're just always argument-style:
if (creal(retval_float) == 0.0f && cimag(retval_float) == 0.0f) {
return LBT_COMPLEX_RETSTYLE_ARGUMENT;
}

// If the float values were not, let's try the normal return style:
retval_float = 0.0f + 1.0f*I;
retval_float = cdotc_normal(&zero, &zeroc_float, &zero, &zeroc_float, &zero);


// If this works, we are in FNDA style (currently only observed on Windows x64)
if (creal(retval_float) == 0.0f && cimag(retval_float) == 0.0f) {
return LBT_COMPLEX_RETSTYLE_FNDA;
}

// Otherwise, cdotc is throwing a fit and we don't know what's up.
return LBT_COMPLEX_RETSTYLE_UNKNOWN;
}

// If it was _not_ reset, let's hazard a guess that we're dealing with a normal return style:
retval = 0.0 + 1.0*I;
retval = zdotc_normal(&zero, &zeroc, &zero, &zeroc, &zero);
if (creal(retval) == 0.0 && cimag(retval) == 0.0) {
// If our double values were _not_ reset, let's hazard a guess that
// we're dealing with a normal return style and test both types again:
retval_double = 0.0 + 1.0*I;
retval_double = zdotc_normal(&zero, &zeroc_double, &zero, &zeroc_double, &zero);
retval_float = 0.0f + 1.0f*I;
retval_float = cdotc_normal(&zero, &zeroc_float, &zero, &zeroc_float, &zero);


// We only test for both working; we don't have a retstyle for float
// being argument style and double being normal style.
if ((creal(retval_double) == 0.0 && cimag(retval_double) == 0.0) &&
(creal(retval_float) == 0.0f && cimag(retval_float) == 0.0f)) {
return LBT_COMPLEX_RETSTYLE_NORMAL;
}
#endif // COMPLEX_RETSTYLE_AUTODETECTION

// If that was not reset either, we have no idea what's going on.
// If we get here, zdotc and cdotc are being uncooperative and we
// do not appreciate it at all, not we don't my precious.
return LBT_COMPLEX_RETSTYLE_UNKNOWN;
}
#endif // COMPLEX_RETSTYLE_AUTODETECTION

#ifdef F2C_AUTODETECTION
int32_t autodetect_f2c(void * handle, const char * suffix) {
if (env_lowercase_match("LBT_FORCE_F2C", "plain")) {
return LBT_F2C_PLAIN;
}
if (env_lowercase_match("LBT_FORCE_F2C", "required")) {
return LBT_F2C_REQUIRED;
}
#ifdef F2C_AUTODETECTION
char symbol_name[MAX_SYMBOL_LEN];

// Attempt BLAS `sdot()` test
Expand Down Expand Up @@ -271,13 +346,21 @@ int32_t autodetect_f2c(void * handle, const char * suffix) {
// It's an f2c style calling convention
return LBT_F2C_REQUIRED;
}
#endif // F2C_AUTODETECTION

// We have no idea what happened; nothing works and everything is broken
return LBT_F2C_UNKNOWN;
}
#endif // F2C_AUTODETECTION

#ifdef CBLAS_DIVERGENCE_AUTODETECTION
int32_t autodetect_cblas_divergence(void * handle, const char * suffix) {
if (env_lowercase_match("LBT_FORCE_CBLAS", "conformant")) {
return LBT_CBLAS_CONFORMANT;
}
if (env_lowercase_match("LBT_FORCE_CBLAS", "divergent")) {
return LBT_CBLAS_DIVERGENT;
}

#ifdef CBLAS_DIVERGENCE_AUTODETECTION
char symbol_name[MAX_SYMBOL_LEN];

build_symbol_name(symbol_name, "zdotc_", suffix);
Expand All @@ -300,7 +383,8 @@ int32_t autodetect_cblas_divergence(void * handle, const char * suffix) {
return LBT_CBLAS_DIVERGENT;
}
}
#endif // CBLAS_DIVERGENCE_AUTODETECTION

// If we can't even find `zdotc_64`, we don't know what this is.
return LBT_CBLAS_UNKNOWN;
}
#endif // CBLAS_DIVERGENCE_AUTODETECTION
4 changes: 2 additions & 2 deletions src/cblas_adapters.c
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,8 @@ void lbt_cblas_cdotc_sub(const int32_t N,
}

extern float complex cdotc_64_(const int64_t *,
const float complex *, const int64_t *,
const float complex *, const int64_t *);
const float complex *, const int64_t *,
const float complex *, const int64_t *);
void lbt_cblas_cdotc_sub64_(const int64_t N,
const float complex *X, const int64_t incX,
const float complex *Y, const int64_t incY,
Expand Down
16 changes: 8 additions & 8 deletions src/complex_return_style_adapters.c
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,8 @@ extern void (*cmplxret_cdotc__addr)(float complex * z,
const float complex *, const int32_t *,
const float complex *, const int32_t *);
float complex cmplxret_cdotc_(const int32_t * N,
const float complex *X, const int32_t * incX,
const float complex *Y, const int32_t * incY)
const float complex *X, const int32_t * incX,
const float complex *Y, const int32_t * incY)
{
float complex c;
cmplxret_cdotc__addr(&c, N, X, incX, Y, incY);
Expand All @@ -85,8 +85,8 @@ extern void (*cmplxret_cdotc_64__addr)(float complex * z,
const float complex *, const int64_t *,
const float complex *, const int64_t *);
float complex cmplxret_cdotc_64_(const int64_t * N,
const float complex *X, const int64_t * incX,
const float complex *Y, const int64_t * incY)
const float complex *X, const int64_t * incX,
const float complex *Y, const int64_t * incY)
{
float complex c;
cmplxret_cdotc_64__addr(&c, N, X, incX, Y, incY);
Expand All @@ -100,8 +100,8 @@ extern void (*cmplxret_cdotu__addr)(float complex * z,
const float complex *, const int32_t *,
const float complex *, const int32_t *);
float complex cmplxret_cdotu_(const int32_t * N,
const float complex *X, const int32_t * incX,
const float complex *Y, const int32_t * incY)
const float complex *X, const int32_t * incX,
const float complex *Y, const int32_t * incY)
{
float complex c;
cmplxret_cdotu__addr(&c, N, X, incX, Y, incY);
Expand All @@ -113,8 +113,8 @@ extern void (*cmplxret_cdotu_64__addr)(float complex * z,
const float complex *, const int64_t *,
const float complex *, const int64_t *);
float complex cmplxret_cdotu_64_(const int64_t * N,
const float complex *X, const int64_t * incX,
const float complex *Y, const int64_t * incY)
const float complex *X, const int64_t * incX,
const float complex *Y, const int64_t * incY)
{
float complex c;
cmplxret_cdotu_64__addr(&c, N, X, incX, Y, incY);
Expand Down
62 changes: 62 additions & 0 deletions src/env_utils.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#include "libblastrampoline_internal.h"
#include <ctype.h>

const char * env_lowercase(const char * env_name) {
// Get environment value, if it's not set, return false
char * env_value = getenv(env_name);
if (env_value == NULL) {
return NULL;
}

// If it is set, convert to lowercase.
env_value = strdup(env_value);
for (size_t idx=0; idx<strlen(env_value); ++idx) {
env_value[idx] = tolower(env_value[idx]);
}
return env_value;
}


uint8_t env_lowercase_match(const char * env_name, const char * value) {
const char * env_value = env_lowercase(env_name);
if (env_value == NULL) {
return 0;
}

int ret = strcmp(env_value, value) == 0;
free((void *)env_value);
return ret;
}

uint8_t env_lowercase_match_any(const char * env_name, uint32_t num_values, ...) {
va_list args;
va_start(args, num_values);

// Get environment value
const char * env_value = env_lowercase(env_name);
if (env_value == NULL) {
return 0;
}

// Search through our varargs for a match
for (uint32_t idx=0; idx<num_values; idx++ ) {
const char *value = va_arg(args, const char *);
if (strcmp(env_value, value) == 0) {
free((void *)env_value);
return 1;
}
}
free((void *)env_value);
return 0;
}

// Check to see if `env_name` matches any "boolean"-like
uint8_t env_match_bool(const char * env_name, uint8_t default_value) {
if (env_lowercase_match_any(env_name, 3, "0", "false", "no")) {
return 0;
}
if (env_lowercase_match_any(env_name, 3, "1", "true", "yes")) {
return 1;
}
return default_value;
}
Loading