Add OpenBLAS 0.3.28.dev release

qompassai · Aug 16, 2024 · 16c5143 · 16c5143
1 parent e18440b
commit 16c5143
Show file tree

Hide file tree

Showing 8 changed files with 200 additions and 0 deletions.
diff --git a/NVIDIA/OpenBLAS-0.3.28.dev.tar.gz b/NVIDIA/OpenBLAS-0.3.28.dev.tar.gz
diff --git a/NVIDIA/OpenBLAS/README.md b/NVIDIA/OpenBLAS/README.md
@@ -0,0 +1,53 @@
+cat << EOF > /home/phaedrus/Forge/GH/Qompass/WaveRunner/NVIDIA/OpenBLAS/README.md
+
+# OpenBLAS: High-Performance Basic Linear Algebra Subprograms
+
+OpenBLAS is an optimized implementation of BLAS (Basic Linear Algebra Subprograms) based on GotoBLAS2 1.13 BSD version. It provides a set of low-level routines for performing common linear algebra operations such as vector addition, scalar multiplication, dot products, linear combinations, and matrix multiplication.
+
+## Why OpenBLAS Matters for GenAI Workloads
+
+1. Performance: OpenBLAS is highly optimized for various CPU architectures, providing near-optimal performance for linear algebra operations crucial in machine learning and AI algorithms.
+
+2. Foundation for AI Frameworks: Many popular AI and machine learning frameworks (e.g., TensorFlow, PyTorch) rely on BLAS libraries like OpenBLAS for their core computations.
+
+3. Efficient Matrix Operations: GenAI models, particularly large language models, heavily depend on matrix multiplications and other linear algebra operations. OpenBLAS accelerates these computations, significantly reducing training and inference times.
+
+4. Hardware Optimization: This build is optimized for Intel Haswell architecture, ensuring excellent performance on compatible systems.
+
+5. Multi-threading Support: With OpenMP support, OpenBLAS can efficiently utilize multi-core processors, further speeding up AI workloads.
+
+6. Memory Efficiency: OpenBLAS implements algorithms that make efficient use of CPU caches, crucial for handling the large matrices involved in GenAI models.
+
+By using OpenBLAS, developers and researchers can significantly accelerate their GenAI workflows, from data preprocessing to model training and inference, ultimately enabling faster iteration and more complex models.
+
+
+# Qompass OpenBLAS Release
+
+Version: 0.3.28.dev, compiled on Arch Linux machine with x86_64 processor
+
+## Build Instructions
+
+1. Clone the OpenBLAS repository:
+   \`\`\`
+   git clone https://github.com/xianyi/OpenBLAS.git
+   cd OpenBLAS
+   \`\`\`
+
+2. Build OpenBLAS:
+   \`\`\`
+   make TARGET=HASWELL USE_OPENMP=1
+   \`\`\`
+
+3. Install OpenBLAS:
+   \`\`\`
+   sudo make PREFIX=/usr/local install
+   \`\`\`
+
+## Notes
+
+- This build was optimized for Intel Haswell architecture.
+- OpenMP support is enabled for multi-threading.
+- The library is installed in /usr/local/lib and headers in /usr/local/include.
+
+EOF
+
diff --git a/NVIDIA/OpenBLAS/libopenblas.a b/NVIDIA/OpenBLAS/libopenblas.a
@@ -0,0 +1 @@
+libopenblas_haswellp-r0.3.28.dev.a
diff --git a/NVIDIA/OpenBLAS/libopenblas.so b/NVIDIA/OpenBLAS/libopenblas.so
@@ -0,0 +1 @@
+libopenblas_haswellp-r0.3.28.dev.so
diff --git a/NVIDIA/OpenBLAS/libopenblas.so.0 b/NVIDIA/OpenBLAS/libopenblas.so.0
@@ -0,0 +1 @@
+libopenblas_haswellp-r0.3.28.dev.so
diff --git a/NVIDIA/OpenBLAS/libopenblas_haswellp-r0.3.28.dev.a b/NVIDIA/OpenBLAS/libopenblas_haswellp-r0.3.28.dev.a
diff --git a/NVIDIA/OpenBLAS/libopenblas_haswellp-r0.3.28.dev.so b/NVIDIA/OpenBLAS/libopenblas_haswellp-r0.3.28.dev.so
diff --git a/NVIDIA/OpenBLAS/openblas_config.h b/NVIDIA/OpenBLAS/openblas_config.h
@@ -0,0 +1,144 @@
+#ifndef OPENBLAS_CONFIG_H
+#define OPENBLAS_CONFIG_H
+#define OPENBLAS_OS_LINUX 1
+#define OPENBLAS_ARCH_X86_64 1
+#define OPENBLAS_C_GCC 1
+#define OPENBLAS___64BIT__ 1
+#define OPENBLAS_HAVE_C11 1
+#define OPENBLAS_PTHREAD_CREATE_FUNC pthread_create
+#define OPENBLAS_BUNDERSCORE _
+#define OPENBLAS_NEEDBUNDERSCORE 1
+#define OPENBLAS_HASWELL 
+#define OPENBLAS_L1_DATA_SIZE 32768
+#define OPENBLAS_L1_DATA_LINESIZE 64
+#define OPENBLAS_L2_SIZE 262144
+#define OPENBLAS_L2_LINESIZE 64
+#define OPENBLAS_DTB_DEFAULT_ENTRIES 64
+#define OPENBLAS_DTB_SIZE 4096
+#define OPENBLAS_HAVE_CMOV 
+#define OPENBLAS_HAVE_MMX 
+#define OPENBLAS_HAVE_SSE 
+#define OPENBLAS_HAVE_SSE2 
+#define OPENBLAS_HAVE_SSE3 
+#define OPENBLAS_HAVE_SSSE3 
+#define OPENBLAS_HAVE_SSE4_1 
+#define OPENBLAS_HAVE_SSE4_2 
+#define OPENBLAS_HAVE_AVX 
+#define OPENBLAS_HAVE_AVX2 
+#define OPENBLAS_HAVE_FMA3 
+#define OPENBLAS_FMA3 
+#define OPENBLAS_CORE_HASWELL 
+#define OPENBLAS_CHAR_CORENAME "HASWELL"
+#define OPENBLAS_SLOCAL_BUFFER_SIZE 20480
+#define OPENBLAS_DLOCAL_BUFFER_SIZE 32768
+#define OPENBLAS_CLOCAL_BUFFER_SIZE 16384
+#define OPENBLAS_ZLOCAL_BUFFER_SIZE 12288
+#define OPENBLAS_GEMM_MULTITHREAD_THRESHOLD 4
+#define OPENBLAS_VERSION " OpenBLAS 0.3.28.dev "
+/*This is only for "make install" target.*/
+
+#if defined(OPENBLAS_OS_WINNT) || defined(OPENBLAS_OS_CYGWIN_NT) || defined(OPENBLAS_OS_INTERIX)
+#define OPENBLAS_WINDOWS_ABI
+#define OPENBLAS_OS_WINDOWS
+
+#ifdef DOUBLE
+#define DOUBLE_DEFINED DOUBLE
+#undef  DOUBLE
+#endif
+#endif
+
+#ifdef OPENBLAS_NEEDBUNDERSCORE
+#define BLASFUNC(FUNC) FUNC##_
+#else
+#define BLASFUNC(FUNC) FUNC
+#endif
+
+#ifdef OPENBLAS_QUAD_PRECISION
+typedef struct {
+  unsigned long x[2];
+}  xdouble;
+#elif defined OPENBLAS_EXPRECISION
+#define xdouble long double
+#else
+#define xdouble double
+#endif
+
+#if defined(OPENBLAS_OS_WINDOWS) && defined(OPENBLAS___64BIT__)
+typedef long long BLASLONG;
+typedef unsigned long long BLASULONG;
+#else
+typedef long BLASLONG;
+typedef unsigned long BLASULONG;
+#endif
+
+#ifndef BFLOAT16
+#include <stdint.h>
+typedef uint16_t bfloat16;
+#endif
+
+#ifdef OPENBLAS_USE64BITINT
+typedef BLASLONG blasint;
+#else
+typedef int blasint;
+#endif
+
+#if defined(XDOUBLE) || defined(DOUBLE)
+#define FLOATRET	FLOAT
+#else
+#ifdef NEED_F2CCONV
+#define FLOATRET	double
+#else
+#define FLOATRET	float
+#endif
+#endif
+
+/* Inclusion of a standard header file is needed for definition of __STDC_*
+   predefined macros with some compilers (e.g. GCC 4.7 on Linux).  This occurs
+   as a side effect of including either <features.h> or <stdc-predef.h>. */
+#include <stdio.h>
+
+/* C99 supports complex floating numbers natively, which GCC also offers as an
+   extension since version 3.0.  If neither are available, use a compatible
+   structure as fallback (see Clause 6.2.5.13 of the C99 standard). */
+#if ((defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \
+      (__GNUC__ >= 3 && !defined(__cplusplus))) && !(defined(FORCE_OPENBLAS_COMPLEX_STRUCT))) && !defined(_MSC_VER)
+  #define OPENBLAS_COMPLEX_C99
+#ifndef __cplusplus
+  #include <complex.h>
+#endif
+  typedef float _Complex openblas_complex_float;
+  typedef double _Complex openblas_complex_double;
+  typedef xdouble _Complex openblas_complex_xdouble;
+  #define openblas_make_complex_float(real, imag)    ((real) + ((imag) * _Complex_I))
+  #define openblas_make_complex_double(real, imag)   ((real) + ((imag) * _Complex_I))
+  #define openblas_make_complex_xdouble(real, imag)  ((real) + ((imag) * _Complex_I))
+  #define openblas_complex_float_real(z)             (creal(z))
+  #define openblas_complex_float_imag(z)             (cimag(z))
+  #define openblas_complex_double_real(z)            (creal(z))
+  #define openblas_complex_double_imag(z)            (cimag(z))
+  #define openblas_complex_xdouble_real(z)           (creal(z))
+  #define openblas_complex_xdouble_imag(z)           (cimag(z))
+#else
+  #define OPENBLAS_COMPLEX_STRUCT
+  typedef struct { float real, imag; } openblas_complex_float;
+  typedef struct { double real, imag; } openblas_complex_double;
+  typedef struct { xdouble real, imag; } openblas_complex_xdouble;
+  #define openblas_make_complex_float(real, imag)    {(real), (imag)}
+  #define openblas_make_complex_double(real, imag)   {(real), (imag)}
+  #define openblas_make_complex_xdouble(real, imag)  {(real), (imag)}
+  #define openblas_complex_float_real(z)             ((z).real)
+  #define openblas_complex_float_imag(z)             ((z).imag)
+  #define openblas_complex_double_real(z)            ((z).real)
+  #define openblas_complex_double_imag(z)            ((z).imag)
+  #define openblas_complex_xdouble_real(z)           ((z).real)
+  #define openblas_complex_xdouble_imag(z)           ((z).imag)
+#endif
+
+/* Inclusion of Linux-specific header is needed for definition of cpu_set_t. */
+#ifdef OPENBLAS_OS_LINUX
+#ifndef _GNU_SOURCE
+ #define _GNU_SOURCE
+#endif
+#include <sched.h>
+#endif
+#endif /* OPENBLAS_CONFIG_H */