From 396e54f1753c4c31a0171ac19f5ada352e21cc3d Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Fri, 14 Feb 2025 14:13:59 +0100 Subject: [PATCH] replace `healpy` with `cdshealpix` (#64) * experimentation on `healpix-geo` * replace the custom neighbours search with `cdshealpix` * adapt the neighbours search tests * skip the "ring" scheme kernel tests * replace `nside2resol` with a reimplementation * reuse the `compute_ring` function from the `numpy` implementation * use `grid_info` to convert points to cell ids * fix imports * replace `healpy` with `cdshealpix` and `healpix-geo` (`healpix-geo` may be a temporary dependency, we only need it to get the new functionality in `cds-healpix-rust`) * make sure multi-dimensional cell ids still work * remove a obsolete TODO * depend on `cdshealpix` and `healpix-geo` * remove the direct dependency on `numba` * reflect the new deps in the env files * use the released version of `xdggs` * enable testing on windows (`cdshealpix` supports it, and so does `healpix-geo`) * run CI for `python=3.13` * re-enable the ring scheme tests * install `xdggs` and `cdshealpix` from PyPI (`cdshealpix` is not available for macos arm on conda-forge, yet) * don't test on `python=3.12` * trim the environment a bit * install base matplotlib * don't try to install `conda` * install `xdggs` from conda-forge * Revert "install `xdggs` from conda-forge" This reverts commit cb00dfa3fdd18fd6ba42ba915bebcce6c48ec5b8. * new location of `sphinx`' intersphinx file --- .github/workflows/ci.yaml | 10 +- ci/requirements/docs.yaml | 4 +- ci/requirements/environment.yaml | 12 +- docs/conf.py | 2 +- experimentation/neighbours.ipynb | 90 +++++++- healpix_convolution/distances.py | 33 +-- healpix_convolution/kernels/gaussian.py | 9 +- healpix_convolution/neighbours.py | 194 ++---------------- healpix_convolution/plotting.py | 4 +- healpix_convolution/tests/test_distances.py | 1 + healpix_convolution/tests/test_neighbours.py | 47 +++-- .../xarray/kernels/gaussian.py | 13 +- pyproject.toml | 4 +- 13 files changed, 162 insertions(+), 261 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index aa2751e..fb8d247 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -51,8 +51,8 @@ jobs: strategy: fail-fast: false matrix: - os: ["ubuntu-latest", "macos-latest"] # no windows, because healpy doesn't support it - python-version: ["3.11", "3.12"] + os: ["ubuntu-latest", "macos-latest", "windows-latest"] + python-version: ["3.11", "3.13"] steps: - uses: actions/checkout@v4 @@ -68,17 +68,11 @@ jobs: cache-environment-key: "${{runner.os}}-${{runner.arch}}-py${{matrix.python-version}}-${{env.TODAY}}-${{hashFiles(env.CONDA_ENV_FILE)}}" create-args: >- python=${{matrix.python-version}} - conda - name: Install healpix-convolution run: | python -m pip install --no-deps -e . - - name: Version info - run: | - conda info -a - conda list - - name: Import healpix-convolution run: | python -c 'import healpix_convolution' diff --git a/ci/requirements/docs.yaml b/ci/requirements/docs.yaml index a35d627..79f8d45 100644 --- a/ci/requirements/docs.yaml +++ b/ci/requirements/docs.yaml @@ -9,10 +9,9 @@ dependencies: - sphinx-remove-toctrees - jupyter-sphinx - ipython - - numba - numpy - dask - - healpy>=1.17 + - cdshealpix - geopandas - pandas - sparse>=0.15 @@ -26,4 +25,5 @@ dependencies: - cartopy_offlinedata - pip - pip: + - healpix-geo - -e ../.. diff --git a/ci/requirements/environment.yaml b/ci/requirements/environment.yaml index 0a2941b..d2b2d3e 100644 --- a/ci/requirements/environment.yaml +++ b/ci/requirements/environment.yaml @@ -7,18 +7,14 @@ dependencies: - pytest-reportlog - pytest-cov - hypothesis - - numba - numpy - dask - - healpy>=1.17 - - geopandas - - pandas - sparse - xarray - - folium - - shapely - opt_einsum - - matplotlib + - matplotlib-base - pip - pip: - - git+https://github.com/xarray-contrib/xdggs + - xdggs + - cdshealpix + - healpix-geo diff --git a/docs/conf.py b/docs/conf.py index 79788c2..64696ff 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -95,7 +95,7 @@ intersphinx_mapping = { "python": ("https://docs.python.org/3/", None), - "sphinx": ("https://www.sphinx-doc.org/en/stable/", None), + "sphinx": ("https://www.sphinx-doc.org/en/master/", None), "numpy": ("https://numpy.org/doc/stable", None), "xarray": ("https://docs.xarray.dev/en/latest", None), "sparse": ("https://sparse.pydata.org/en/latest", None), diff --git a/experimentation/neighbours.ipynb b/experimentation/neighbours.ipynb index ea50e79..2583fef 100644 --- a/experimentation/neighbours.ipynb +++ b/experimentation/neighbours.ipynb @@ -8,6 +8,7 @@ "outputs": [], "source": [ "import dask.array as da\n", + "import healpix_geo\n", "import numpy as np\n", "\n", "import healpix_convolution.neighbours as nb" @@ -19,6 +20,87 @@ "id": "1", "metadata": {}, "outputs": [], + "source": [ + "level = 10\n", + "ring = 4\n", + "cell_ids = np.arange(12 * 4**level)\n", + "cell_ids" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2", + "metadata": {}, + "outputs": [], + "source": [ + "n_bytes = 8\n", + "12 * 4**level * (2 * ring + 1) ** 2 * n_bytes / 1e9" + ] + }, + { + "cell_type": "raw", + "id": "3", + "metadata": {}, + "source": [ + "%load_ext pyinstrument" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4", + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "hp_neighbours = nb.neighbours(\n", + " cell_ids, resolution=level, indexing_scheme=\"nested\", ring=ring\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5", + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "cds_neighbours = healpix_geo.nested.neighbours_in_kth_ring(cell_ids, level, ring=ring)" + ] + }, + { + "cell_type": "raw", + "id": "6", + "metadata": {}, + "source": [ + "cds_counts = (cds_neighbours == -1).sum(axis=1)\n", + "hp_counts = (hp_neighbours == -1).sum(axis=1)\n", + "\n", + "import matplotlib.pyplot as plt\n", + "\n", + "fig, ax = plt.subplots()\n", + "ax.plot(cds_counts)\n", + "ax.plot(hp_counts)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7", + "metadata": {}, + "outputs": [], + "source": [ + "np.testing.assert_equal(np.sort(cds_neighbours, axis=1), np.sort(hp_neighbours, axis=1))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8", + "metadata": {}, + "outputs": [], "source": [ "from distributed import Client\n", "\n", @@ -29,7 +111,7 @@ { "cell_type": "code", "execution_count": null, - "id": "2", + "id": "9", "metadata": {}, "outputs": [], "source": [ @@ -41,7 +123,7 @@ { "cell_type": "code", "execution_count": null, - "id": "3", + "id": "10", "metadata": {}, "outputs": [], "source": [ @@ -55,7 +137,7 @@ { "cell_type": "code", "execution_count": null, - "id": "4", + "id": "11", "metadata": {}, "outputs": [], "source": [ @@ -66,7 +148,7 @@ { "cell_type": "code", "execution_count": null, - "id": "5", + "id": "12", "metadata": {}, "outputs": [], "source": [ diff --git a/healpix_convolution/distances.py b/healpix_convolution/distances.py index c433b14..474b2b2 100644 --- a/healpix_convolution/distances.py +++ b/healpix_convolution/distances.py @@ -1,4 +1,3 @@ -import healpy as hp import numpy as np try: @@ -10,9 +9,18 @@ dask_array_type = () -def cell_ids2vectors(cell_ids, nside, nest): - flattened = cell_ids.flatten() - vecs = np.stack(hp.pix2vec(nside, flattened, nest=nest), axis=-1) +def spherical_to_euclidean(lon, lat): + x = np.cos(lon) * np.cos(lat) + y = np.sin(lon) * np.cos(lat) + z = np.sin(lat) + + return x, y, z + + +def cell_ids2vectors(cell_ids, grid_info): + lon, lat = grid_info.cell_ids2geographic(cell_ids.flatten()) + vecs = np.stack(spherical_to_euclidean(np.deg2rad(lon), np.deg2rad(lat)), axis=-1) + return np.reshape(vecs, cell_ids.shape + (3,)) @@ -26,12 +34,11 @@ def angle_between_vectors(a, b, axis): return np.arccos(argument) -def _distances(a, b, axis, nside, nest): - vec_a = cell_ids2vectors(a, nside, nest) +def _distances(a, b, axis, grid_info): + vec_a = cell_ids2vectors(a, grid_info) - # TODO: contains `-1`, which `pix2vec` doesn't like mask = b != -1 - vec_b_ = cell_ids2vectors(np.where(mask, b, 0), nside, nest) + vec_b_ = cell_ids2vectors(np.where(mask, b, 0), grid_info) vec_b = np.where(mask[..., None], vec_b_, np.nan) return angle_between_vectors(vec_a, vec_b, axis=axis) @@ -58,20 +65,14 @@ def angular_distances(neighbours, *, grid_info, axis=None): if axis is None: axis = -1 - nest = grid_info.nest - nside = grid_info.nside - if isinstance(neighbours, dask_array_type): return da.map_blocks( _distances, neighbours[:, :1], neighbours, axis=axis, - nside=nside, - nest=nest, + grid_info=grid_info, chunks=neighbours.chunks, ) else: - return _distances( - neighbours[:, :1], neighbours, axis=axis, nside=nside, nest=nest - ) + return _distances(neighbours[:, :1], neighbours, axis=axis, grid_info=grid_info) diff --git a/healpix_convolution/kernels/gaussian.py b/healpix_convolution/kernels/gaussian.py index 739d820..30f1bcc 100644 --- a/healpix_convolution/kernels/gaussian.py +++ b/healpix_convolution/kernels/gaussian.py @@ -1,4 +1,3 @@ -import healpy as hp import numpy as np import xdggs @@ -7,11 +6,15 @@ from healpix_convolution.neighbours import neighbours +def healpix_resolution(level): + return 2 * np.pi / np.sqrt(12 * 4**level) + + def compute_ring(resolution, sigma, truncate, kernel_size): if kernel_size is not None: - ring = int(kernel_size / 2) + ring = int(kernel_size // 2) else: - cell_distance = hp.nside2resol(2**resolution, arcmin=False) + cell_distance = healpix_resolution(resolution) ring = int((truncate * sigma / cell_distance) // 2) return ring if ring >= 1 else 1 diff --git a/healpix_convolution/neighbours.py b/healpix_convolution/neighbours.py index 5d22d00..7472549 100644 --- a/healpix_convolution/neighbours.py +++ b/healpix_convolution/neighbours.py @@ -1,7 +1,6 @@ -import healpy as hp -import numba -import numpy as np -from numba import int8, int16, int32, int64 +from functools import partial + +import healpix_geo try: import dask.array as da @@ -11,173 +10,6 @@ dask_array_type = () da = None -face_neighbours = np.array( - [ - [8, 9, 10, 11, -1, -1, -1, -1, 10, 11, 8, 9], - [5, 6, 7, 4, 8, 9, 10, 11, 9, 10, 11, 8], - [-1, -1, -1, -1, 5, 6, 7, 4, -1, -1, -1, -1], - [4, 5, 6, 7, 11, 8, 9, 10, 11, 8, 9, 10], - [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], - [1, 2, 3, 0, 0, 1, 2, 3, 5, 6, 7, 4], - [-1, -1, -1, -1, 7, 4, 5, 6, -1, -1, -1, -1], - [3, 0, 1, 2, 3, 0, 1, 2, 4, 5, 6, 7], - [2, 3, 0, 1, -1, -1, -1, -1, 0, 1, 2, 3], - ], - dtype="int8", -) -swap_arrays = np.array( - [ - [0, 0, 3], - [0, 0, 6], - [0, 0, 0], - [0, 0, 5], - [0, 0, 0], - [5, 0, 0], - [0, 0, 0], - [6, 0, 0], - [3, 0, 0], - ], - dtype="uint8", -) - - -def cell_ids2xyf(cell_ids, *, nside, indexing_scheme): - nest_values = {"nested": True, "ring": False} - nest = nest_values.get(indexing_scheme) - if nest is None: - raise ValueError(f"unsupported indexing scheme: {indexing_scheme}") - - return hp.pixelfunc.pix2xyf(nside, cell_ids, nest=nest) - - -def xyf2cell_ids(x, y, face, *, nside, indexing_scheme): - nest_values = {"nested": True, "ring": False} - nest = nest_values.get(indexing_scheme) - if nest is None: - raise ValueError(f"unsupported indexing scheme: {indexing_scheme}") - - return hp.pixelfunc.xyf2pix(nside, x, y, face, nest=nest) - - -def generate_offsets(ring): - steps = [(0, 1), (1, 0), (0, -1), (-1, 0), (0, 1)] - cx = 0 - cy = 0 - - for c in range(ring + 1): - # 1: go c steps to the left and yield - # 2: turn right - # 3: go 1 step at a time while yielding, until we reach the maximum distance - # 4: repeat 2 and 3 until we turned 4 times - # 5: go up until just before we reached the first point - x = cx - c - y = cy - 0 - - yield x, y - - if c == 0: - continue - - for index, (sx, sy) in enumerate(steps): - if index == 0: - n_steps = c - elif index == 4: - n_steps = c - 1 - else: - n_steps = 2 * c - - for _ in range(n_steps): - x = x + sx - y = y + sy - - yield (x, y) - - -@numba.njit -def adjust_xyf(cx, cy, cf, nside): - if (cx >= 0 and cx < nside) and (cy >= 0 and cy < nside): - return cx, cy, cf - - nbnum = 4 - if cx < 0: - cx = cx + nside - nbnum -= 1 - elif cx >= nside: - cx = cx - nside - nbnum += 1 - - if cy < 0: - cy = cy + nside - nbnum -= 3 - elif cy >= nside: - cy = cy - nside - nbnum += 3 - - nf = face_neighbours[nbnum][cf] - if nf < 0: - # invalid pixel - return -1, -1, -1 - - bits = swap_arrays[nbnum][cf >> 2] - if bits & 1: - nx = nside - cx - 1 - else: - nx = cx - if bits & 2: - ny = nside - cy - 1 - else: - ny = cy - - if bits & 4: - nx, ny = ny, nx - - return nx, ny, nf - - -@numba.guvectorize( - [ - (int32, int32, int32, int32, int8[:, :], int32[:], int32[:], int32[:]), - (int64, int64, int64, int32, int8[:, :], int64[:], int64[:], int64[:]), - (int32, int32, int32, int32, int16[:, :], int32[:], int32[:], int32[:]), - (int64, int64, int64, int32, int16[:, :], int64[:], int64[:], int64[:]), - ], - "(),(),(),(),(n,m)->(n),(n),(n)", -) -def _neighbours_numba(x, y, f, nside, offsets, nx, ny, nf): - for index, (x_offset, y_offset) in enumerate(offsets): - cx, cy, cf = adjust_xyf(x + x_offset, y + y_offset, f, nside) - - nx[index] = cx - ny[index] = cy - nf[index] = cf - - -def _neighbours(cell_ids, *, offsets, nside, indexing_scheme): - x, y, face = cell_ids2xyf(cell_ids, nside=nside, indexing_scheme=indexing_scheme) - neighbour_x, neighbour_y, neighbour_face = _neighbours_numba( - x, y, face, nside, offsets - ) - - n_ = xyf2cell_ids( - neighbour_x, - neighbour_y, - neighbour_face, - nside=nside, - indexing_scheme=indexing_scheme, - ) - return np.where(neighbour_face == -1, -1, n_) - - -def minimum_dtype(value): - if value < np.iinfo("int8").max: - return "int8" - elif value < np.iinfo("int16").max: - return "int16" - elif value < np.iinfo("int32").max: - return "int32" - elif value < np.iinfo("int64").max: - return "int64" - def neighbours(cell_ids, *, grid_info, ring=1): """determine the neighbours within the nth ring around the center pixel @@ -201,24 +33,22 @@ def neighbours(cell_ids, *, grid_info, ring=1): "rings containing more than the neighbouring base pixels are not supported" ) - offsets = np.asarray(list(generate_offsets(ring=ring)), dtype=minimum_dtype(ring)) + if grid_info.indexing_scheme == "nested": + neighbours_disk = healpix_geo.nested.neighbours_disk + elif grid_info.indexing_scheme == "ring": + neighbours_disk = healpix_geo.ring.neighbours_disk + else: + raise ValueError(f"unsupported indexing scheme: '{grid_info.indexing_scheme}'") + f = partial(neighbours_disk, depth=grid_info.level, ring=ring) if isinstance(cell_ids, dask_array_type): n_neighbours = (2 * ring + 1) ** 2 return da.map_blocks( - _neighbours, + f, cell_ids, - offsets=offsets, - nside=nside, - indexing_scheme=grid_info.indexing_scheme, new_axis=1, chunks=cell_ids.chunks + (n_neighbours,), dtype=cell_ids.dtype, ) else: - return _neighbours( - cell_ids, - offsets=offsets, - nside=nside, - indexing_scheme=grid_info.indexing_scheme, - ) + return f(cell_ids) diff --git a/healpix_convolution/plotting.py b/healpix_convolution/plotting.py index f2e4598..d6fd578 100644 --- a/healpix_convolution/plotting.py +++ b/healpix_convolution/plotting.py @@ -1,6 +1,5 @@ import cartopy.crs as ccrs import cartopy.feature -import healpy as hp import numpy as np @@ -17,13 +16,12 @@ def plot_healpix( **kwargs, ): nside = grid_info.nside - nest = grid_info.nest ysize = xsize // 2 full_lat = np.linspace(-90, 90, ysize) full_lon = np.linspace(-180, 180, xsize) grid_lat, grid_lon = np.meshgrid(full_lat, full_lon) - pix = hp.ang2pix(nside, grid_lon, grid_lat, lonlat=True, nest=nest) + pix = grid_info.geographic2cell_ids(lon=grid_lon, lat=grid_lat) full_map = np.full((12 * nside**2,), fill_value=np.nan) full_map[cell_ids] = data diff --git a/healpix_convolution/tests/test_distances.py b/healpix_convolution/tests/test_distances.py index e739d49..0b2857c 100644 --- a/healpix_convolution/tests/test_distances.py +++ b/healpix_convolution/tests/test_distances.py @@ -20,6 +20,7 @@ def test_angle_between_vectors(): ( (np.array([[1, 0, 2, 3]]), np.array([[0, 0.25637566, 0.3699723, 0.25574741]])), (np.array([[2, 71, 8]]), np.array([[0, 0.25637566, 0.25574741]])), + (np.array([[2, 71], [2, 8]]), np.array([[0, 0.25637566], [0, 0.25574741]])), ), ) def test_angular_distances_numpy(neighbours, expected): diff --git a/healpix_convolution/tests/test_neighbours.py b/healpix_convolution/tests/test_neighbours.py index d546a42..8d2b19f 100644 --- a/healpix_convolution/tests/test_neighbours.py +++ b/healpix_convolution/tests/test_neighbours.py @@ -1,11 +1,11 @@ -import healpy as hp +import cdshealpix import hypothesis.strategies as st import numpy as np import pytest from hypothesis import given from xdggs import HealpixInfo -from healpix_convolution.neighbours import generate_offsets, neighbours +from healpix_convolution.neighbours import neighbours try: import dask.array as da @@ -18,15 +18,13 @@ requires_dask = pytest.mark.skipif(not has_dask, reason="needs dask.array") -@given(st.integers(min_value=0, max_value=200)) -def test_generate_offsets(ring): - kernel_size = 2 * ring + 1 - kernel = np.zeros(shape=(kernel_size, kernel_size)) +def neighbours_ring(ipix, depth): + as_nested = cdshealpix.from_ring(ipix, depth) + neighbours = cdshealpix.nested.neighbours(as_nested, depth) - for x, y in generate_offsets(ring): - kernel[x + ring, y + ring] = 1 - - assert np.sum(kernel) == kernel_size**2 + mask = neighbours != -1 + as_ring = cdshealpix.to_ring(np.where(mask, neighbours, 0), depth) + return np.where(mask, as_ring.astype("int64"), -1) @pytest.mark.parametrize("resolution", [1, 2, 4, 6]) @@ -38,29 +36,36 @@ def test_neighbours_ring1_manual(resolution, indexing_scheme, dask): else: xp = np + if indexing_scheme == "nested": + reference_neighbours = cdshealpix.nested.neighbours + elif indexing_scheme == "ring": + reference_neighbours = neighbours_ring + grid_info = HealpixInfo(level=resolution, indexing_scheme=indexing_scheme) cell_ids = xp.arange(12 * 4**resolution) actual = neighbours(cell_ids, grid_info=grid_info, ring=1) - nside = grid_info.nside - nest = grid_info.nest - expected = hp.get_all_neighbours(nside, np.asarray(cell_ids), nest=nest).T - - actual_ = np.asarray(actual[:, 1:]) + expected = reference_neighbours(np.asarray(cell_ids), depth=grid_info.level) - np.testing.assert_equal(actual_, expected) + np.testing.assert_equal( + np.sort(np.asarray(actual), axis=1), np.sort(expected, axis=1) + ) -@given(st.integers(min_value=1, max_value=7), st.sampled_from(["ring", "nested"])) +@given(st.integers(min_value=1, max_value=7), st.sampled_from(["nested", "ring"])) def test_neighbours_ring1(resolution, indexing_scheme): + if indexing_scheme == "nested": + reference_neighbours = cdshealpix.nested.neighbours + else: + reference_neighbours = neighbours_ring + cell_ids = np.arange(12 * 4**resolution) grid_info = HealpixInfo(level=resolution, indexing_scheme=indexing_scheme) actual = neighbours(cell_ids, grid_info=grid_info, ring=1) - nside = grid_info.nside - nest = grid_info.nest - expected = hp.get_all_neighbours(nside, cell_ids, nest=nest).T - np.testing.assert_equal(actual[:, 1:], expected) + expected = reference_neighbours(cell_ids, depth=grid_info.level) + + np.testing.assert_equal(np.sort(actual, axis=1), np.sort(expected, axis=1)) diff --git a/healpix_convolution/xarray/kernels/gaussian.py b/healpix_convolution/xarray/kernels/gaussian.py index a9810ef..d38720e 100644 --- a/healpix_convolution/xarray/kernels/gaussian.py +++ b/healpix_convolution/xarray/kernels/gaussian.py @@ -2,16 +2,7 @@ import xdggs # noqa: F401 from healpix_convolution.kernels import gaussian - - -def compute_ring(grid_info, sigma, kernel_size, truncate): - if kernel_size is not None: - return int(kernel_size // 2) - else: - import healpy as hp - - cell_distance = hp.nside2resol(2**grid_info.level, arcmin=False) - return int((truncate * sigma / cell_distance) // 2) +from healpix_convolution.kernels.gaussian import compute_ring def gaussian_kernel( @@ -63,7 +54,7 @@ def gaussian_kernel( "method": "continuous", "sigma": sigma, "ring": compute_ring( - kernel_size=kernel_size, grid_info=grid_info, truncate=truncate, sigma=sigma + grid_info.level, kernel_size=kernel_size, truncate=truncate, sigma=sigma ), } | size_param diff --git a/pyproject.toml b/pyproject.toml index 350244d..5782c71 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,10 +4,10 @@ requires-python = ">= 3.11" license = {text = "Apache-2.0"} dependencies = [ "numpy", - "healpy", + "cdshealpix", + "healpix-geo", "xdggs", "sparse", - "numba", "opt_einsum", ] dynamic = ["version"]