Merge branch 'strides' into queue

* strides: Update notebooks Add support for F-ordered array in the simple fft interface (C2C and R2C). Fox use of opencl_platform in tests Fix tests using F-strided arrays. Allow to select the backend also for the basic test, and add the option to select the opencl platform. Check also for OSError in fftp.py. Should fix #21 Also display the platforma and device name along vkfft error check Non-C-contiguous arrays: add systematic test, fix DCT check, upodate some doc and outputs Add support for non C-ordered arrays (F-order or different types of strides). Supports both C2C and R2C (as long as the fast axis is transformed). Adresses #20 Update changelog More elegant re-ordering array during accuracy test, when necessary During accuracy tests, make sure the array after an R2C transform still has the fastest axis along the last dimension (X) - since this is not the case after a 2D numpy rfftn transform. Fixes #19
vincefn · Jan 6, 2023 · f8c1bde · f8c1bde
2 parents b81f9bc + c0d333b
commit f8c1bde
Show file tree

Hide file tree

Showing 12 changed files with 808 additions and 168 deletions.
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -1,3 +1,11 @@
+Version 2023.1 (2021-XX-XX)
+-----------------------------
+* Fix R2C tests when using numpy (scipy unavailable) [#19]
+* Add support for F-ordered arrays (C2C and R2C)
+* Allow selection of backend for non-systemetic pvkfft-test
+* Add parameter for the OpenCL platform in pyvkfft-test
+* Fix simple fft interface import when only pycuda is used
+
 Version 2022.1.1 (2022-02-14)
 -----------------------------
 * Correct the dtype of the returned array for fft.rfftn() and fft.irfftn()

diff --git a/README.rst b/README.rst
@@ -79,14 +79,16 @@ Features
 - array can be have more dimensions than the FFT (batch transforms).
 - arbitrary array size, using Bluestein algorithm for prime numbers>13 (note that in this case
   the performance can be significantly lower, up to ~4x, depending on the transform size,
-  see example performance plot below)
+  see example performance plot below). Now also uses Rader's FFT algorithm for primes from
+  17 up to max shared memory length (~10000, see VkFFT's doc for details)
 - transform along a given list of axes - this requires that after collapsing
   non-transformed axes, the last transformed axis is at most along the 3rd dimension,
   e.g. the following axes are allowed: (-2,-3), (-1,-3), (-1,-4), (-4,-5),...
   but not (-2, -4), (-1, -3, -4) or (-2, -3, -4).
   This is not allowed for R2C transforms.
 - normalisation=0 (array L2 norm * array size on each transform) and 1 (the backward
   transform divides the L2 norm by the array size, so FFT*iFFT restores the original array)
+- Support for C (default) and F-ordered arrays, for C2C and R2C transforms
 - unit tests for all transforms: see test sub-directory. Note that these take a **long**
   time to finish due to the exhaustive number of sub-tests.
 - Note that out-of-place C2R transform currently destroys the complex array for FFT dimensions >=2

diff --git a/examples/pyvkfft-tests-CUDA.ipynb b/examples/pyvkfft-tests-CUDA.ipynb
@@ -68,9 +68,14 @@
     "\n",
     "import pyvkfft.cuda\n",
     "from pyvkfft.cuda import VkFFTApp\n",
+    "from pyvkfft.fft import fftn as vkfftn, ifftn as vkifftn, rfftn as vkrfftn, irfftn as vkirfftn\n",
+    "from pyvkfft.accuracy import l2, li\n",
     "import matplotlib.pyplot as plt\n",
     "from matplotlib.colors import LogNorm\n",
-    "from scipy.misc import ascent\n",
+    "try:\n",
+    "    from scipy.datasets import ascent\n",
+    "except:\n",
+    "    from scipy.misc import ascent\n",
     "import numpy as np\n",
     "from scipy.fft import fftn, ifftn, fftshift, rfftn, irfftn, dctn, idctn\n",
     "import timeit\n"
@@ -489,6 +494,311 @@
     "    print()"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Complex<->Complex, inplace, F-ordering"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "d0 = np.asarray(ascent().astype(np.complex64), order='F')\n",
+    "for axes in [None, (-1,), (-2,)]:\n",
+    "    d = to_gpu(d0)\n",
+    "    app = VkFFTApp(d.shape, d.dtype, strides=d.strides, axes=axes)\n",
+    "\n",
+    "    plt.figure(figsize=(15, 5))\n",
+    "    plt.subplot(121)\n",
+    "    plt.imshow(np.fft.fftshift(abs(np.fft.fftn(d0, axes=axes))), norm=LogNorm())\n",
+    "    plt.colorbar()\n",
+    "\n",
+    "    d = app.fft(d)\n",
+    "\n",
+    "    plt.subplot(122)\n",
+    "    plt.imshow(np.fft.fftshift(abs(d.get())), norm=LogNorm())\n",
+    "    plt.colorbar()\n",
+    "\n",
+    "    d = app.ifft(d)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Complex<->Complex, out-of-place, F-ordering"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "d0 = np.asarray(ascent().astype(np.complex64), order='F')\n",
+    "axes = (-1,)\n",
+    "d1 = to_gpu(d0)\n",
+    "d2 = gpu_empty_like(d1)\n",
+    "app = VkFFTApp(d1.shape, d1.dtype, strides=d1.strides, axes=axes, inplace=False)\n",
+    "\n",
+    "plt.figure(figsize=(15, 5))\n",
+    "plt.subplot(121)\n",
+    "plt.imshow(np.fft.fftshift(abs(np.fft.fftn(d0, axes=axes))), norm=LogNorm())\n",
+    "plt.colorbar()\n",
+    "\n",
+    "print((abs(d.get()) ** 2).sum())\n",
+    "d2 = app.fft(d1, d2)\n",
+    "\n",
+    "plt.subplot(122)\n",
+    "plt.imshow(np.fft.fftshift(abs(d2.get())), norm=LogNorm())\n",
+    "plt.colorbar()\n",
+    "\n",
+    "d1 = app.ifft(d2, d1)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Complex<->Complex, inplace, F-ordering, simple FFT interface"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "order = 'F'\n",
+    "for axes in [None, (-1,), (-2,)]:\n",
+    "    d0 = np.asarray(ascent().astype(np.complex64), order=order)\n",
+    "    d = to_gpu(d0)\n",
+    "\n",
+    "    plt.figure(figsize=(15, 5))\n",
+    "    plt.subplot(121)\n",
+    "    dn = np.fft.fftshift(abs(np.fft.fftn(d0, axes=axes)))\n",
+    "    plt.imshow(dn, norm=LogNorm())\n",
+    "    plt.colorbar()\n",
+    "\n",
+    "    d = vkfftn(d, d, axes=axes)\n",
+    "\n",
+    "    plt.subplot(122)\n",
+    "    plt.imshow(np.fft.fftshift(abs(d.get())), norm=LogNorm())\n",
+    "    plt.colorbar()\n",
+    "\n",
+    "    d = vkifftn(d, d, axes=axes)\n",
+    "    \n",
+    "    # Test accuracy on random array\n",
+    "    shape, dtype = d0.shape, d0.dtype\n",
+    "    d0 = (np.random.uniform(-0.5, 0.5, shape) + 1j * np.random.uniform(-0.5, 0.5, shape)).astype(dtype)\n",
+    "    d0 = np.asarray(d0, order=order)\n",
+    "    d = to_gpu(d0)\n",
+    "    d = vkfftn(d, d, axes=axes)\n",
+    "    dn = np.fft.fftn(d0, axes=axes)\n",
+    "    print(l2(dn, d.get()), li(dn, d.get()))\n",
+    "    d = to_gpu(d0)\n",
+    "    d = vkifftn(d, d, axes=axes)\n",
+    "    dn = np.fft.ifftn(d0, axes=axes)\n",
+    "    print(l2(dn, d.get()), li(dn, d.get()))\n",
+    "    print()\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Complex<->Complex, out-of-place, F-ordering, simple FFT interface\n",
+    "The destination array is automatically allocated"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "order = 'F'\n",
+    "for axes in [None, (-1,), (-2,)]:\n",
+    "    d0 = np.asarray(ascent().astype(np.complex64), order=order)\n",
+    "    d = to_gpu(d0)\n",
+    "\n",
+    "    plt.figure(figsize=(15, 5))\n",
+    "    plt.subplot(121)\n",
+    "    dn = np.fft.fftshift(abs(np.fft.fftn(d0, axes=axes)))\n",
+    "    plt.imshow(dn, norm=LogNorm())\n",
+    "    plt.colorbar()\n",
+    "\n",
+    "    d = vkfftn(d, axes=axes)\n",
+    "\n",
+    "    plt.subplot(122)\n",
+    "    plt.imshow(np.fft.fftshift(abs(d.get())), norm=LogNorm())\n",
+    "    plt.colorbar()\n",
+    "\n",
+    "    d = vkifftn(d, axes=axes)\n",
+    "    \n",
+    "    # Test accuracy on random array\n",
+    "    shape, dtype = d0.shape, d0.dtype\n",
+    "    d0 = (np.random.uniform(-0.5, 0.5, shape) + 1j * np.random.uniform(-0.5, 0.5, shape)).astype(dtype)\n",
+    "    d0 = np.asarray(d0, order=order)\n",
+    "    d = to_gpu(d0)\n",
+    "    d = vkfftn(d, axes=axes)\n",
+    "    dn = np.fft.fftn(d0, axes=axes)\n",
+    "    print(l2(dn, d.get()), li(dn, d.get()))\n",
+    "    d = to_gpu(d0)\n",
+    "    d = vkifftn(d, axes=axes)\n",
+    "    dn = np.fft.ifftn(d0, axes=axes)\n",
+    "    print(l2(dn, d.get()), li(dn, d.get()))\n",
+    "    print()\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Complex<->Complex, out-of-place pre-allocated, F-ordering, simple FFT interface\n",
+    "The destination array is pre-allocated here"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "order = 'F'\n",
+    "for axes in [None, (-1,), (-2,)]:\n",
+    "    d0 = np.asarray(ascent().astype(np.complex64), order=order)\n",
+    "    d = to_gpu(d0)\n",
+    "    dest = gpu_empty_like(d)\n",
+    "\n",
+    "    plt.figure(figsize=(15, 5))\n",
+    "    plt.subplot(121)\n",
+    "    dn = np.fft.fftshift(abs(np.fft.fftn(d0, axes=axes)))\n",
+    "    plt.imshow(dn, norm=LogNorm())\n",
+    "    plt.colorbar()\n",
+    "\n",
+    "    dest = vkfftn(d, dest, axes=axes)\n",
+    "\n",
+    "    plt.subplot(122)\n",
+    "    plt.imshow(np.fft.fftshift(abs(dest.get())), norm=LogNorm())\n",
+    "    plt.colorbar()\n",
+    "\n",
+    "    dest = vkifftn(d, axes=axes)\n",
+    "    \n",
+    "    # Test accuracy on random array\n",
+    "    shape, dtype = d0.shape, d0.dtype\n",
+    "    d0 = (np.random.uniform(-0.5, 0.5, shape) + 1j * np.random.uniform(-0.5, 0.5, shape)).astype(dtype)\n",
+    "    d0 = np.asarray(d0, order=order)\n",
+    "    \n",
+    "    d = to_gpu(d0)\n",
+    "    dest = vkfftn(d, dest, axes=axes)\n",
+    "    dn = np.fft.fftn(d0, axes=axes)\n",
+    "    print(l2(dn, dest.get()), li(dn, dest.get()))\n",
+    "    \n",
+    "    d = to_gpu(d0)\n",
+    "    dest = vkifftn(d, dest, axes=axes)\n",
+    "    dn = np.fft.ifftn(d0, axes=axes)\n",
+    "    print(l2(dn, dest.get()), li(dn, dest.get()))\n",
+    "    print()\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Real<->Complex (half-Hermitian) inplace transform, F-ordered, simple fft interface"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# single precision, R2C inplace, # F order\n",
+    "order = 'F'\n",
+    "d0 = np.zeros((514, 512), dtype=np.float32)\n",
+    "d0[:512] = ascent()\n",
+    "d0 = np.asarray(d0, order=order)\n",
+    "\n",
+    "d = to_gpu(d0)\n",
+    "\n",
+    "d = vkrfftn(d, d)\n",
+    "\n",
+    "dn = rfftn(d0[:-2], axes=(-1,-2))  # Need to force the Hermitian axis as the fast one\n",
+    "print(dn.shape)\n",
+    "print(np.allclose(d.get(), dn, rtol=1e-6, atol=dn.max() * 1e-6))\n",
+    "\n",
+    "plt.figure(figsize=(15,5), dpi=100)\n",
+    "plt.subplot(121)\n",
+    "plt.imshow(fftshift(abs(d.get()), axes=0), norm=LogNorm(vmin=100))\n",
+    "plt.colorbar()\n",
+    "plt.title(\"R->C\")\n",
+    "plt.subplot(122)\n",
+    "plt.imshow(fftshift(abs(dn), axes=0), norm=LogNorm(vmin=100))\n",
+    "plt.colorbar()\n",
+    "plt.title(\"R->C (numpy)\")\n",
+    "\n",
+    "d = vkirfftn(d, d)\n",
+    "print(np.allclose(d.get()[:-2], d0[:-2], rtol=1e-6, atol = 255e-6))\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "d.strides , dn.strides"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Real<->Complex (half-Hermitian) out-of-place, F-ordered, simple fft interface"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# single precision, R2C inplace, # F order\n",
+    "order = 'F'\n",
+    "d0 = ascent().astype(np.float32)\n",
+    "d0 = np.asarray(d0, order=order)\n",
+    "\n",
+    "d = to_gpu(d0)\n",
+    "\n",
+    "d = vkrfftn(d)\n",
+    "\n",
+    "dn = rfftn(d0, axes=(-1,-2))  # Need to force the Hermitian axis as the fast one\n",
+    "print(dn.shape, d.shape)\n",
+    "print(np.allclose(d.get(), dn, rtol=1e-6, atol=dn.max() * 1e-6))\n",
+    "\n",
+    "plt.figure(figsize=(15,5), dpi=100)\n",
+    "plt.subplot(121)\n",
+    "plt.imshow(fftshift(abs(d.get()), axes=0), norm=LogNorm(vmin=100))\n",
+    "plt.colorbar()\n",
+    "plt.title(\"R->C\")\n",
+    "plt.subplot(122)\n",
+    "plt.imshow(fftshift(abs(dn), axes=0), norm=LogNorm(vmin=100))\n",
+    "plt.colorbar()\n",
+    "plt.title(\"R->C (numpy)\")\n",
+    "\n",
+    "d = vkirfftn(d)\n",
+    "print(np.allclose(d.get(), d0, rtol=1e-6, atol = 255e-6))\n"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -499,9 +809,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "py39-pyvkfft-test-conda",
    "language": "python",
-   "name": "python3"
+   "name": "py39-pyvkfft-test-conda"
   },
   "language_info": {
    "codemirror_mode": {
@@ -513,7 +823,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.6"
+   "version": "3.9.15"
   }
  },
  "nbformat": 4,