-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
e18440b
commit 16c5143
Showing
8 changed files
with
200 additions
and
0 deletions.
There are no files selected for viewing
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
cat << EOF > /home/phaedrus/Forge/GH/Qompass/WaveRunner/NVIDIA/OpenBLAS/README.md | ||
|
||
# OpenBLAS: High-Performance Basic Linear Algebra Subprograms | ||
|
||
OpenBLAS is an optimized implementation of BLAS (Basic Linear Algebra Subprograms) based on GotoBLAS2 1.13 BSD version. It provides a set of low-level routines for performing common linear algebra operations such as vector addition, scalar multiplication, dot products, linear combinations, and matrix multiplication. | ||
|
||
## Why OpenBLAS Matters for GenAI Workloads | ||
|
||
1. Performance: OpenBLAS is highly optimized for various CPU architectures, providing near-optimal performance for linear algebra operations crucial in machine learning and AI algorithms. | ||
|
||
2. Foundation for AI Frameworks: Many popular AI and machine learning frameworks (e.g., TensorFlow, PyTorch) rely on BLAS libraries like OpenBLAS for their core computations. | ||
|
||
3. Efficient Matrix Operations: GenAI models, particularly large language models, heavily depend on matrix multiplications and other linear algebra operations. OpenBLAS accelerates these computations, significantly reducing training and inference times. | ||
|
||
4. Hardware Optimization: This build is optimized for Intel Haswell architecture, ensuring excellent performance on compatible systems. | ||
|
||
5. Multi-threading Support: With OpenMP support, OpenBLAS can efficiently utilize multi-core processors, further speeding up AI workloads. | ||
|
||
6. Memory Efficiency: OpenBLAS implements algorithms that make efficient use of CPU caches, crucial for handling the large matrices involved in GenAI models. | ||
|
||
By using OpenBLAS, developers and researchers can significantly accelerate their GenAI workflows, from data preprocessing to model training and inference, ultimately enabling faster iteration and more complex models. | ||
|
||
|
||
# Qompass OpenBLAS Release | ||
|
||
Version: 0.3.28.dev, compiled on Arch Linux machine with x86_64 processor | ||
|
||
## Build Instructions | ||
|
||
1. Clone the OpenBLAS repository: | ||
\`\`\` | ||
git clone https://github.com/xianyi/OpenBLAS.git | ||
cd OpenBLAS | ||
\`\`\` | ||
|
||
2. Build OpenBLAS: | ||
\`\`\` | ||
make TARGET=HASWELL USE_OPENMP=1 | ||
\`\`\` | ||
|
||
3. Install OpenBLAS: | ||
\`\`\` | ||
sudo make PREFIX=/usr/local install | ||
\`\`\` | ||
|
||
## Notes | ||
|
||
- This build was optimized for Intel Haswell architecture. | ||
- OpenMP support is enabled for multi-threading. | ||
- The library is installed in /usr/local/lib and headers in /usr/local/include. | ||
|
||
EOF | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
libopenblas_haswellp-r0.3.28.dev.a |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
libopenblas_haswellp-r0.3.28.dev.so |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
libopenblas_haswellp-r0.3.28.dev.so |
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,144 @@ | ||
#ifndef OPENBLAS_CONFIG_H | ||
#define OPENBLAS_CONFIG_H | ||
#define OPENBLAS_OS_LINUX 1 | ||
#define OPENBLAS_ARCH_X86_64 1 | ||
#define OPENBLAS_C_GCC 1 | ||
#define OPENBLAS___64BIT__ 1 | ||
#define OPENBLAS_HAVE_C11 1 | ||
#define OPENBLAS_PTHREAD_CREATE_FUNC pthread_create | ||
#define OPENBLAS_BUNDERSCORE _ | ||
#define OPENBLAS_NEEDBUNDERSCORE 1 | ||
#define OPENBLAS_HASWELL | ||
#define OPENBLAS_L1_DATA_SIZE 32768 | ||
#define OPENBLAS_L1_DATA_LINESIZE 64 | ||
#define OPENBLAS_L2_SIZE 262144 | ||
#define OPENBLAS_L2_LINESIZE 64 | ||
#define OPENBLAS_DTB_DEFAULT_ENTRIES 64 | ||
#define OPENBLAS_DTB_SIZE 4096 | ||
#define OPENBLAS_HAVE_CMOV | ||
#define OPENBLAS_HAVE_MMX | ||
#define OPENBLAS_HAVE_SSE | ||
#define OPENBLAS_HAVE_SSE2 | ||
#define OPENBLAS_HAVE_SSE3 | ||
#define OPENBLAS_HAVE_SSSE3 | ||
#define OPENBLAS_HAVE_SSE4_1 | ||
#define OPENBLAS_HAVE_SSE4_2 | ||
#define OPENBLAS_HAVE_AVX | ||
#define OPENBLAS_HAVE_AVX2 | ||
#define OPENBLAS_HAVE_FMA3 | ||
#define OPENBLAS_FMA3 | ||
#define OPENBLAS_CORE_HASWELL | ||
#define OPENBLAS_CHAR_CORENAME "HASWELL" | ||
#define OPENBLAS_SLOCAL_BUFFER_SIZE 20480 | ||
#define OPENBLAS_DLOCAL_BUFFER_SIZE 32768 | ||
#define OPENBLAS_CLOCAL_BUFFER_SIZE 16384 | ||
#define OPENBLAS_ZLOCAL_BUFFER_SIZE 12288 | ||
#define OPENBLAS_GEMM_MULTITHREAD_THRESHOLD 4 | ||
#define OPENBLAS_VERSION " OpenBLAS 0.3.28.dev " | ||
/*This is only for "make install" target.*/ | ||
|
||
#if defined(OPENBLAS_OS_WINNT) || defined(OPENBLAS_OS_CYGWIN_NT) || defined(OPENBLAS_OS_INTERIX) | ||
#define OPENBLAS_WINDOWS_ABI | ||
#define OPENBLAS_OS_WINDOWS | ||
|
||
#ifdef DOUBLE | ||
#define DOUBLE_DEFINED DOUBLE | ||
#undef DOUBLE | ||
#endif | ||
#endif | ||
|
||
#ifdef OPENBLAS_NEEDBUNDERSCORE | ||
#define BLASFUNC(FUNC) FUNC##_ | ||
#else | ||
#define BLASFUNC(FUNC) FUNC | ||
#endif | ||
|
||
#ifdef OPENBLAS_QUAD_PRECISION | ||
typedef struct { | ||
unsigned long x[2]; | ||
} xdouble; | ||
#elif defined OPENBLAS_EXPRECISION | ||
#define xdouble long double | ||
#else | ||
#define xdouble double | ||
#endif | ||
|
||
#if defined(OPENBLAS_OS_WINDOWS) && defined(OPENBLAS___64BIT__) | ||
typedef long long BLASLONG; | ||
typedef unsigned long long BLASULONG; | ||
#else | ||
typedef long BLASLONG; | ||
typedef unsigned long BLASULONG; | ||
#endif | ||
|
||
#ifndef BFLOAT16 | ||
#include <stdint.h> | ||
typedef uint16_t bfloat16; | ||
#endif | ||
|
||
#ifdef OPENBLAS_USE64BITINT | ||
typedef BLASLONG blasint; | ||
#else | ||
typedef int blasint; | ||
#endif | ||
|
||
#if defined(XDOUBLE) || defined(DOUBLE) | ||
#define FLOATRET FLOAT | ||
#else | ||
#ifdef NEED_F2CCONV | ||
#define FLOATRET double | ||
#else | ||
#define FLOATRET float | ||
#endif | ||
#endif | ||
|
||
/* Inclusion of a standard header file is needed for definition of __STDC_* | ||
predefined macros with some compilers (e.g. GCC 4.7 on Linux). This occurs | ||
as a side effect of including either <features.h> or <stdc-predef.h>. */ | ||
#include <stdio.h> | ||
|
||
/* C99 supports complex floating numbers natively, which GCC also offers as an | ||
extension since version 3.0. If neither are available, use a compatible | ||
structure as fallback (see Clause 6.2.5.13 of the C99 standard). */ | ||
#if ((defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \ | ||
(__GNUC__ >= 3 && !defined(__cplusplus))) && !(defined(FORCE_OPENBLAS_COMPLEX_STRUCT))) && !defined(_MSC_VER) | ||
#define OPENBLAS_COMPLEX_C99 | ||
#ifndef __cplusplus | ||
#include <complex.h> | ||
#endif | ||
typedef float _Complex openblas_complex_float; | ||
typedef double _Complex openblas_complex_double; | ||
typedef xdouble _Complex openblas_complex_xdouble; | ||
#define openblas_make_complex_float(real, imag) ((real) + ((imag) * _Complex_I)) | ||
#define openblas_make_complex_double(real, imag) ((real) + ((imag) * _Complex_I)) | ||
#define openblas_make_complex_xdouble(real, imag) ((real) + ((imag) * _Complex_I)) | ||
#define openblas_complex_float_real(z) (creal(z)) | ||
#define openblas_complex_float_imag(z) (cimag(z)) | ||
#define openblas_complex_double_real(z) (creal(z)) | ||
#define openblas_complex_double_imag(z) (cimag(z)) | ||
#define openblas_complex_xdouble_real(z) (creal(z)) | ||
#define openblas_complex_xdouble_imag(z) (cimag(z)) | ||
#else | ||
#define OPENBLAS_COMPLEX_STRUCT | ||
typedef struct { float real, imag; } openblas_complex_float; | ||
typedef struct { double real, imag; } openblas_complex_double; | ||
typedef struct { xdouble real, imag; } openblas_complex_xdouble; | ||
#define openblas_make_complex_float(real, imag) {(real), (imag)} | ||
#define openblas_make_complex_double(real, imag) {(real), (imag)} | ||
#define openblas_make_complex_xdouble(real, imag) {(real), (imag)} | ||
#define openblas_complex_float_real(z) ((z).real) | ||
#define openblas_complex_float_imag(z) ((z).imag) | ||
#define openblas_complex_double_real(z) ((z).real) | ||
#define openblas_complex_double_imag(z) ((z).imag) | ||
#define openblas_complex_xdouble_real(z) ((z).real) | ||
#define openblas_complex_xdouble_imag(z) ((z).imag) | ||
#endif | ||
|
||
/* Inclusion of Linux-specific header is needed for definition of cpu_set_t. */ | ||
#ifdef OPENBLAS_OS_LINUX | ||
#ifndef _GNU_SOURCE | ||
#define _GNU_SOURCE | ||
#endif | ||
#include <sched.h> | ||
#endif | ||
#endif /* OPENBLAS_CONFIG_H */ |