Skip to content

Commit

Permalink
Add OpenBLAS 0.3.28.dev release
Browse files Browse the repository at this point in the history
  • Loading branch information
PhaedrusFlow committed Aug 16, 2024
1 parent e18440b commit 16c5143
Show file tree
Hide file tree
Showing 8 changed files with 200 additions and 0 deletions.
Binary file added NVIDIA/OpenBLAS-0.3.28.dev.tar.gz
Binary file not shown.
53 changes: 53 additions & 0 deletions NVIDIA/OpenBLAS/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
cat << EOF > /home/phaedrus/Forge/GH/Qompass/WaveRunner/NVIDIA/OpenBLAS/README.md

# OpenBLAS: High-Performance Basic Linear Algebra Subprograms

OpenBLAS is an optimized implementation of BLAS (Basic Linear Algebra Subprograms) based on GotoBLAS2 1.13 BSD version. It provides a set of low-level routines for performing common linear algebra operations such as vector addition, scalar multiplication, dot products, linear combinations, and matrix multiplication.

## Why OpenBLAS Matters for GenAI Workloads

1. Performance: OpenBLAS is highly optimized for various CPU architectures, providing near-optimal performance for linear algebra operations crucial in machine learning and AI algorithms.

2. Foundation for AI Frameworks: Many popular AI and machine learning frameworks (e.g., TensorFlow, PyTorch) rely on BLAS libraries like OpenBLAS for their core computations.

3. Efficient Matrix Operations: GenAI models, particularly large language models, heavily depend on matrix multiplications and other linear algebra operations. OpenBLAS accelerates these computations, significantly reducing training and inference times.

4. Hardware Optimization: This build is optimized for Intel Haswell architecture, ensuring excellent performance on compatible systems.

5. Multi-threading Support: With OpenMP support, OpenBLAS can efficiently utilize multi-core processors, further speeding up AI workloads.

6. Memory Efficiency: OpenBLAS implements algorithms that make efficient use of CPU caches, crucial for handling the large matrices involved in GenAI models.

By using OpenBLAS, developers and researchers can significantly accelerate their GenAI workflows, from data preprocessing to model training and inference, ultimately enabling faster iteration and more complex models.


# Qompass OpenBLAS Release

Version: 0.3.28.dev, compiled on Arch Linux machine with x86_64 processor

## Build Instructions

1. Clone the OpenBLAS repository:
\`\`\`
git clone https://github.com/xianyi/OpenBLAS.git
cd OpenBLAS
\`\`\`

2. Build OpenBLAS:
\`\`\`
make TARGET=HASWELL USE_OPENMP=1
\`\`\`

3. Install OpenBLAS:
\`\`\`
sudo make PREFIX=/usr/local install
\`\`\`

## Notes

- This build was optimized for Intel Haswell architecture.
- OpenMP support is enabled for multi-threading.
- The library is installed in /usr/local/lib and headers in /usr/local/include.

EOF

1 change: 1 addition & 0 deletions NVIDIA/OpenBLAS/libopenblas.a
1 change: 1 addition & 0 deletions NVIDIA/OpenBLAS/libopenblas.so
1 change: 1 addition & 0 deletions NVIDIA/OpenBLAS/libopenblas.so.0
Binary file not shown.
Binary file not shown.
144 changes: 144 additions & 0 deletions NVIDIA/OpenBLAS/openblas_config.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
#ifndef OPENBLAS_CONFIG_H
#define OPENBLAS_CONFIG_H
#define OPENBLAS_OS_LINUX 1
#define OPENBLAS_ARCH_X86_64 1
#define OPENBLAS_C_GCC 1
#define OPENBLAS___64BIT__ 1
#define OPENBLAS_HAVE_C11 1
#define OPENBLAS_PTHREAD_CREATE_FUNC pthread_create
#define OPENBLAS_BUNDERSCORE _
#define OPENBLAS_NEEDBUNDERSCORE 1
#define OPENBLAS_HASWELL
#define OPENBLAS_L1_DATA_SIZE 32768
#define OPENBLAS_L1_DATA_LINESIZE 64
#define OPENBLAS_L2_SIZE 262144
#define OPENBLAS_L2_LINESIZE 64
#define OPENBLAS_DTB_DEFAULT_ENTRIES 64
#define OPENBLAS_DTB_SIZE 4096
#define OPENBLAS_HAVE_CMOV
#define OPENBLAS_HAVE_MMX
#define OPENBLAS_HAVE_SSE
#define OPENBLAS_HAVE_SSE2
#define OPENBLAS_HAVE_SSE3
#define OPENBLAS_HAVE_SSSE3
#define OPENBLAS_HAVE_SSE4_1
#define OPENBLAS_HAVE_SSE4_2
#define OPENBLAS_HAVE_AVX
#define OPENBLAS_HAVE_AVX2
#define OPENBLAS_HAVE_FMA3
#define OPENBLAS_FMA3
#define OPENBLAS_CORE_HASWELL
#define OPENBLAS_CHAR_CORENAME "HASWELL"
#define OPENBLAS_SLOCAL_BUFFER_SIZE 20480
#define OPENBLAS_DLOCAL_BUFFER_SIZE 32768
#define OPENBLAS_CLOCAL_BUFFER_SIZE 16384
#define OPENBLAS_ZLOCAL_BUFFER_SIZE 12288
#define OPENBLAS_GEMM_MULTITHREAD_THRESHOLD 4
#define OPENBLAS_VERSION " OpenBLAS 0.3.28.dev "
/*This is only for "make install" target.*/

#if defined(OPENBLAS_OS_WINNT) || defined(OPENBLAS_OS_CYGWIN_NT) || defined(OPENBLAS_OS_INTERIX)
#define OPENBLAS_WINDOWS_ABI
#define OPENBLAS_OS_WINDOWS

#ifdef DOUBLE
#define DOUBLE_DEFINED DOUBLE
#undef DOUBLE
#endif
#endif

#ifdef OPENBLAS_NEEDBUNDERSCORE
#define BLASFUNC(FUNC) FUNC##_
#else
#define BLASFUNC(FUNC) FUNC
#endif

#ifdef OPENBLAS_QUAD_PRECISION
typedef struct {
unsigned long x[2];
} xdouble;
#elif defined OPENBLAS_EXPRECISION
#define xdouble long double
#else
#define xdouble double
#endif

#if defined(OPENBLAS_OS_WINDOWS) && defined(OPENBLAS___64BIT__)
typedef long long BLASLONG;
typedef unsigned long long BLASULONG;
#else
typedef long BLASLONG;
typedef unsigned long BLASULONG;
#endif

#ifndef BFLOAT16
#include <stdint.h>
typedef uint16_t bfloat16;
#endif

#ifdef OPENBLAS_USE64BITINT
typedef BLASLONG blasint;
#else
typedef int blasint;
#endif

#if defined(XDOUBLE) || defined(DOUBLE)
#define FLOATRET FLOAT
#else
#ifdef NEED_F2CCONV
#define FLOATRET double
#else
#define FLOATRET float
#endif
#endif

/* Inclusion of a standard header file is needed for definition of __STDC_*
predefined macros with some compilers (e.g. GCC 4.7 on Linux). This occurs
as a side effect of including either <features.h> or <stdc-predef.h>. */
#include <stdio.h>

/* C99 supports complex floating numbers natively, which GCC also offers as an
extension since version 3.0. If neither are available, use a compatible
structure as fallback (see Clause 6.2.5.13 of the C99 standard). */
#if ((defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \
(__GNUC__ >= 3 && !defined(__cplusplus))) && !(defined(FORCE_OPENBLAS_COMPLEX_STRUCT))) && !defined(_MSC_VER)
#define OPENBLAS_COMPLEX_C99
#ifndef __cplusplus
#include <complex.h>
#endif
typedef float _Complex openblas_complex_float;
typedef double _Complex openblas_complex_double;
typedef xdouble _Complex openblas_complex_xdouble;
#define openblas_make_complex_float(real, imag) ((real) + ((imag) * _Complex_I))
#define openblas_make_complex_double(real, imag) ((real) + ((imag) * _Complex_I))
#define openblas_make_complex_xdouble(real, imag) ((real) + ((imag) * _Complex_I))
#define openblas_complex_float_real(z) (creal(z))
#define openblas_complex_float_imag(z) (cimag(z))
#define openblas_complex_double_real(z) (creal(z))
#define openblas_complex_double_imag(z) (cimag(z))
#define openblas_complex_xdouble_real(z) (creal(z))
#define openblas_complex_xdouble_imag(z) (cimag(z))
#else
#define OPENBLAS_COMPLEX_STRUCT
typedef struct { float real, imag; } openblas_complex_float;
typedef struct { double real, imag; } openblas_complex_double;
typedef struct { xdouble real, imag; } openblas_complex_xdouble;
#define openblas_make_complex_float(real, imag) {(real), (imag)}
#define openblas_make_complex_double(real, imag) {(real), (imag)}
#define openblas_make_complex_xdouble(real, imag) {(real), (imag)}
#define openblas_complex_float_real(z) ((z).real)
#define openblas_complex_float_imag(z) ((z).imag)
#define openblas_complex_double_real(z) ((z).real)
#define openblas_complex_double_imag(z) ((z).imag)
#define openblas_complex_xdouble_real(z) ((z).real)
#define openblas_complex_xdouble_imag(z) ((z).imag)
#endif

/* Inclusion of Linux-specific header is needed for definition of cpu_set_t. */
#ifdef OPENBLAS_OS_LINUX
#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
#include <sched.h>
#endif
#endif /* OPENBLAS_CONFIG_H */

0 comments on commit 16c5143

Please sign in to comment.