Merge branch 'master' into numpy_deprecated_dtypes

ledatelescope · Aug 22, 2023 · 72b99db · 72b99db
2 parents a6f2338 + 5611735
commit 72b99db
Show file tree

Hide file tree

Showing 78 changed files with 7,968 additions and 1,012 deletions.
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -1,13 +1,14 @@
+---
 name: "Build and Test"
-on: [push, pull_request]
+"on": [push, pull_request]
 jobs:
   pre_build:
     runs-on: ubuntu-latest
     outputs:
       should_skip: ${{ steps.skip_check.outputs.should_skip }}
     steps:
       - id: skip_check
-        uses: fkirc/skip-duplicate-actions@v3.4.1
+        uses: fkirc/skip-duplicate-actions@v5
         with:
           concurrent_skipping: 'same_content'
           skip_after_successful_duplicate: 'true'
@@ -19,14 +20,16 @@ jobs:
     strategy:
       matrix:
         os: [self-hosted, ubuntu-latest, macos-latest]
-        python-version: ['2.7', '3.6', '3.8']
-        exclude:
+        python-version: ['3.8', '3.10']
+        include:
+          - os: ubuntu-20.04
+            python-version: '3.6'
           - os: macos-latest
-            python-version: 2.7
+            python-version: '3.6'
       fail-fast: false
     steps:
       - name: "Software Install - Ubuntu"
-        if: ${{ matrix.os == 'ubuntu-latest' || matrix.os == 'self-hosted' }}
+        if: ${{ matrix.os == 'ubuntu-latest' || matrix.os == 'ubuntu-20.04' || matrix.os == 'self-hosted' }}
         run: |
           sudo apt-get update && \
           sudo apt-get install -y \
@@ -48,10 +51,9 @@ jobs:
             gawk \
             gnu-sed \
             pkg-config
-      - uses: actions/setup-python@v2
+      - uses: actions/setup-python@v4.3.0
         with:
           python-version: ${{ matrix.python-version }}
-      - uses: actions/checkout@v2
       - name: "Software Install - Python"
         run: python -m pip install \
                setuptools \
@@ -62,12 +64,25 @@ jobs:
                pint \
                graphviz \
                ctypesgen==1.0.2 \
+               pylint \
                coverage
       - name: "Software Install - Python, part 2"
-        if: ${{ matrix.os == 'self-hosted' && matrix.python-version != '2.7' }}
+        if: ${{ matrix.os == 'self-hosted' }}
+        # Setting CPLUS_INCLUDE_PATH helps pycuda find the right
+        # Python header files <pyconfig.h> to use with its embedded
+        # subset of Boost.
+        env:
+          CPLUS_INCLUDE_PATH: "${{ env.pythonLocation }}/include/python\
+            ${{ matrix.python-version }}"
         run: python -m pip install \
-               cupy \
-               pycuda
+               cupy-cuda12x \
+               pycuda \
+               numba \
+               jupyterlab \
+               jupyter_client \
+               nbformat \
+               nbconvert
+      - uses: actions/checkout@v3
       - name: "Build and Install"
         run: |
           ./configure
@@ -81,16 +96,32 @@ jobs:
           cd test
           bash ./download_test_data.sh
           python -c "from bifrost import telemetry; telemetry.disable()"
-          coverage run --source=bifrost.ring,bifrost,bifrost.pipeline -m unittest discover
+          coverage run --source=bifrost.ring,bifrost,bifrost.pipeline \
+            -m unittest discover
+          coverage xml
+      - name: "Test, part 2"
+        if: ${{ matrix.os == 'self-hosted' }}
+        env:
+          LD_LIBRARY_PATH: /usr/local/lib:${{ env.LD_LIBRARY_PATH }}
+        run: |
+          cd testbench
+          python generate_test_data.py
+          coverage run --source=bifrost.ring,bifrost,bifrost.pipeline test_file_read_write.py
+          coverage run --source=bifrost.ring,bifrost,bifrost.pipeline test_fft.py
+          coverage run --source=bifrost.ring,bifrost,bifrost.pipeline your_first_block.py
+          python download_breakthrough_listen_data.py -y
+          coverage run --source=bifrost.ring,bifrost,bifrost.pipeline test_guppi.py
+          coverage run --source=bifrost.ring,bifrost,bifrost.pipeline test_guppi_reader.py
+          coverage run --source=bifrost.ring,bifrost,bifrost.pipeline test_fdmt.py ./testdata/pulsars/blc0_guppi_57407_61054_PSR_J1840%2B5640_0004.fil
           coverage xml
       - name: "Upload Coverage"
         env:
-           UNITTEST_OS: ${{ matrix.os }}
-           UNITTEST_PY: ${{ matrix.python-version }}
+          UNITTEST_OS: ${{ matrix.os }}
+          UNITTEST_PY: ${{ matrix.python-version }}
         if: ${{ matrix.os == 'self-hosted' && matrix.python-version == '3.8' }}
         uses: codecov/codecov-action@v2
         with:
-          directory: ./test/
+          files: ./test/coverage.xml, ./testbench/coverage.xml
           env_vars: UNITTEST_OS,UNITTEST_PY
           fail_ci_if_error: false
           verbose: true
diff --git a/CHANGELOG b/CHANGELOG
@@ -5,6 +5,9 @@
  * Added horizontal scrolling for long command names to like_bmon.py
  * Use std::filesystem for where possible for file and directory management
  * Fixed a problem in bifrost.ndarray.copy with arrays that are not C contiguous
+ * Added set_stream and get_stream to bifrost.device to help control which CUDA stream is used
+ * Added bifrost.device.ExternalStream as a context manager to help with mixing Bifrost and cupy/pycuda
+ * Fixed a problem calling bifrost.reduce on a slice of an array
 
 0.10.0
  * Switched over to an autotools-based build system

diff --git a/README.md b/README.md
@@ -9,6 +9,11 @@ A stream processing framework for high-throughput applications.
 ### [![Paper](https://img.shields.io/badge/arXiv-1708.00720-blue.svg)](https://arxiv.org/abs/1708.00720)
 
 ### [Bifrost Documentation](http://ledatelescope.github.io/bifrost/)
+
+See also the [Bifrost tutorial notebooks](tutorial/), which can be run
+on Google Colab or any Jupyter environment where Bifrost is installed
+(and a GPU is available).
+
 ### [Bifrost Roadmap](ROADMAP.md)
 
 ## A Simple Pipeline

diff --git a/ROADMAP.md b/ROADMAP.md
@@ -11,16 +11,23 @@ stated, the items on this page have not yet been developed.
    * Harmonic summing, folding
  * Calibration and imaging algorithms
    * Gridding/degridding, compressive sensing, CLEAN
- * IO (source/sink) blocks for additional astronomy/audio/generic file formats
+ * I/O (source/sink) blocks for additional astronomy/audio/generic file formats
 
 ## Pipeline features
 
+ * Method of sending data between different servers
  * Remote control mechanisms
  * Pipeline status and performance monitoring
  * Streaming data visualisation
 
 ## Backend features
 
+ * Improved packet capture/transmission framework
+ * Support for InfiniBand verbs
  * CPU backends for existing CUDA-only algorithms
  * Support for inter-process shared memory rings
  * Optimisations for low-latency applications
+
+## Platform and dependency updates
+
+ * Python 2.x will no longer be supported after the end of 2022.
diff --git a/config/cuda.m4 b/config/cuda.m4
@@ -22,6 +22,9 @@ AC_DEFUN([AX_CHECK_CUDA],
   AC_SUBST([CUDA_HAVE_CXX11], [0])
   AC_SUBST([GPU_MIN_ARCH], [0])
   AC_SUBST([GPU_MAX_ARCH], [0])
+  AC_SUBST([GPU_SHAREDMEM], [0])
+  AC_SUBST([GPU_PASCAL_MANAGEDMEM], [0])
+  AC_SUBST([GPU_EXP_PINNED_ALLOC], [1])
   if test "$enable_cuda" != "no"; then
     AC_SUBST([HAVE_CUDA], [1])
     
@@ -31,39 +34,31 @@ AC_DEFUN([AX_CHECK_CUDA],
   fi
 
   if test "$HAVE_CUDA" = "1"; then
-    AC_MSG_CHECKING([for a working CUDA installation])
+    AC_MSG_CHECKING([for a working CUDA 10+ installation])
     
     CXXFLAGS_save="$CXXFLAGS"
     LDFLAGS_save="$LDFLAGS"
     LIBS_save="$LIBS"
     
     ac_compile='$NVCC -c $NVCCFLAGS conftest.$ac_ext >&5'
-    AC_COMPILE_IFELSE([
+    LDFLAGS="-L$CUDA_HOME/lib64 -L$CUDA_HOME/lib"
+    LIBS="$LIBS -lcuda -lcudart"
+
+    ac_link='$NVCC -o conftest$ac_exeext $NVCCFLAGS $LDFLAGS $LIBS conftest.$ac_ext >&5'
+    AC_LINK_IFELSE([
       AC_LANG_PROGRAM([[
           #include <cuda.h>
           #include <cuda_runtime.h>]],
           [[cudaMalloc(0, 0);]])],
-        [],
-        [AC_SUBST([HAVE_CUDA], [0])])
-    
-    if test "$HAVE_CUDA" = "1"; then
-      LDFLAGS="-L$CUDA_HOME/lib64 -L$CUDA_HOME/lib"
-      LIBS="$LIBS -lcuda -lcudart"
-
-      ac_link='$NVCC -o conftest$ac_exeext $NVCCFLAGS $LDFLAGS $LIBS conftest.$ac_ext >&5'
-      AC_LINK_IFELSE([
-        AC_LANG_PROGRAM([[
-            #include <cuda.h>
-            #include <cuda_runtime.h>]],
-            [[cudaMalloc(0, 0);]])],
-          [CUDA_VERSION=$( ${NVCC} --version | ${GREP} -Po -e "release.*," | cut -d,  -f1 | cut -d\  -f2 )
-           AC_MSG_RESULT(yes - v$CUDA_VERSION)],
-          [AC_MSG_RESULT(no)
-           AC_SUBST([HAVE_CUDA], [0])])
-    else
-      AC_MSG_RESULT(no)
-      AC_SUBST([HAVE_CUDA], [0])
-    fi
+        [CUDA_VERSION=$( ${NVCC} --version | ${GREP} -Po -e "release.*," | cut -d,  -f1 | cut -d\  -f2 )
+         CUDA_MAJOR=$( echo "${CUDA_VERSION}" | cut -d. -f1 )
+         if test "${CUDA_MAJOR}" -ge 10; then
+           AC_MSG_RESULT(yes - v$CUDA_VERSION)
+         else
+           AC_MSG_RESULT(no - found v$CUDA_VERSION)
+         fi],
+        [AC_MSG_RESULT(no - build failure)
+         AC_SUBST([HAVE_CUDA], [0])])
     
     CXXFLAGS="$CXXFLAGS_save"
     LDFLAGS="$LDFLAGS_save"
@@ -104,6 +99,33 @@ AC_DEFUN([AX_CHECK_CUDA],
               [with_nvcc_flags='-O3 -Xcompiler "-Wall"'])
   AC_SUBST(NVCCFLAGS, $with_nvcc_flags)
   
+  AC_ARG_WITH([stream_model],
+              [AS_HELP_STRING([--with-stream-model],
+                              [CUDA default stream model to use: 'legacy' or 'per-thread' (default='per-thread')])],
+              [],
+              [with_stream_model='per-thread'])
+  
+  
+  if test "$HAVE_CUDA" = "1"; then
+    AC_MSG_CHECKING([for different CUDA default stream models])
+    dsm_supported=$( ${NVCC} -h | ${GREP} -Po -e "--default-stream" )
+    if test "$dsm_supported" = "--default-stream"; then
+      if test "$with_stream_model" = "per-thread"; then
+        NVCCFLAGS="$NVCCFLAGS -default-stream per-thread"
+        AC_MSG_RESULT([yes, using 'per-thread'])
+      else
+        if test "$with_stream_model" = "legacy"; then
+          NVCCFLAGS="$NVCCFLAGS -default-stream legacy"
+          AC_MSG_RESULT([yes, using 'legacy'])
+        else
+          AC_MSG_ERROR(Invalid CUDA stream model: '$with_stream_model')
+        fi
+      fi
+    else
+      AC_MSG_RESULT([no, only the 'legacy' stream model is supported])
+    fi
+  fi
+  
   if test "$HAVE_CUDA" = "1"; then
     CPPFLAGS="$CPPFLAGS -DBF_CUDA_ENABLED=1"
     CXXFLAGS="$CXXFLAGS -DBF_CUDA_ENABLED=1"
@@ -202,6 +224,62 @@ AC_DEFUN([AX_CHECK_CUDA],
     ar_max_valid=$(echo $ar_valid | ${SED} -e 's/.* //g;' )
     AC_SUBST([GPU_MAX_ARCH], [$ar_max_valid])
 
+    AC_ARG_WITH([shared_mem],
+           [AS_HELP_STRING([--with-shared-mem=N],
+                           [default GPU shared memory per block in bytes (default=detect)])],
+           [],
+           [with_shared_mem='auto'])
+    if test "$with_gpu_archs" = "auto"; then
+      AC_MSG_CHECKING([for minimum shared memory per block])
+
+      CXXFLAGS_save="$CXXFLAGS"
+      LDFLAGS_save="$LDFLAGS"
+      LIBS_save="$LIBS"
+      
+      LDFLAGS="-L$CUDA_HOME/lib64 -L$CUDA_HOME/lib"
+      LIBS="-lcuda -lcudart"
+      ac_run='$NVCC -o conftest$ac_ext $LDFLAGS $LIBS conftest.$ac_ext>&5'
+      AC_RUN_IFELSE([
+        AC_LANG_PROGRAM([[
+            #include <cuda.h>
+            #include <cuda_runtime.h>
+            #include <iostream>
+            #include <fstream>
+            #include <set>]],
+            [[
+            std::set<int> smem;
+            int smemSize;
+            int deviceCount = 0;
+            cudaGetDeviceCount(&deviceCount);
+            if( deviceCount == 0 ) {
+              return 1;
+            }
+            for(int dev=0; dev<deviceCount; dev++) {
+              cudaSetDevice(dev);
+              cudaDeviceGetAttribute(&smemSize, cudaDevAttrMaxSharedMemoryPerBlock, dev);
+              if( smem.count(smemSize) == 0 ) {
+                smem.insert(smemSize);
+              }
+            }
+            std::ofstream fh;
+            fh.open("confsmem.out");
+            if( smem.empty() ) {
+              fh << 0;
+            } else {
+              fh << *smem.begin();
+            }
+            fh.close();]])],
+            [AC_SUBST([GPU_SHAREDMEM], [`cat confsmem.out`])
+             AC_MSG_RESULT([$GPU_SHAREDMEM B])],
+            [AC_MSG_ERROR(failed to determine a value)])
+
+      CXXFLAGS="$CXXFLAGS_save"
+      LDFLAGS="$LDFLAGS_save"
+      LIBS="$LIBS_save"
+    else
+      AC_SUBST([GPU_SHAREDMEM], [$with_shared_mem])
+    fi
+    
     AC_MSG_CHECKING([for Pascal-style CUDA managed memory])
     cm_invalid=$( echo $GPU_ARCHS | ${SED} -e 's/\b[[1-5]][[0-9]]\b/PRE/g;' )
     if ! echo $cm_invalid | ${GREP} -q PRE; then
@@ -211,7 +289,28 @@ AC_DEFUN([AX_CHECK_CUDA],
       AC_SUBST([GPU_PASCAL_MANAGEDMEM], [0])
       AC_MSG_RESULT([no])
     fi
-  else
-     AC_SUBST([GPU_PASCAL_MANAGEDMEM], [0])
+    
+    AC_MSG_CHECKING([for thrust pinned allocated support])
+    CXXFLAGS_save="$CXXFLAGS"
+    LDFLAGS_save="$LDFLAGS"
+    LIBS_save="$LIBS"
+    
+    LDFLAGS="-L$CUDA_HOME/lib64 -L$CUDA_HOME/lib"
+    LIBS="-lcuda -lcudart"
+    ac_run='$NVCC -o conftest$ac_ext $LDFLAGS $LIBS conftest.$ac_ext>&5'
+    AC_RUN_IFELSE([
+      AC_LANG_PROGRAM([[
+          #include <cuda.h>
+          #include <cuda_runtime.h>
+          #include <thrust/system/cuda/memory.h>]],
+          [[]])],
+          [AC_SUBST([GPU_EXP_PINNED_ALLOC], [0])
+           AC_MSG_RESULT([full])],
+          [AC_SUBST([GPU_EXP_PINNED_ALLOC], [1])
+           AC_MSG_RESULT([experimental])])
+
+    CXXFLAGS="$CXXFLAGS_save"
+    LDFLAGS="$LDFLAGS_save"
+    LIBS="$LIBS_save"
   fi
 ])