Skip to content

Commit

Permalink
Wrap binstats (#1652)
Browse files Browse the repository at this point in the history
*Wrap gmtbinstats function
*Add test_binstats.py
*Add binstats to API index
*Add remote files "@capitals.gmt" to cached files

Co-authored-by: Wei Ji <23487320+weiji14@users.noreply.github.com>
Co-authored-by: Dongdong Tian <seisman.info@gmail.com>
Co-authored-by: Max Jones <meghanj@alum.mit.edu>
  • Loading branch information
4 people authored Jun 13, 2022
1 parent f605b68 commit 40ab2d9
Show file tree
Hide file tree
Showing 6 changed files with 175 additions and 0 deletions.
1 change: 1 addition & 0 deletions doc/api/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ Operations on tabular data
.. autosummary::
:toctree: generated

binstats
blockmean
blockmedian
blockmode
Expand Down
1 change: 1 addition & 0 deletions pygmt/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from pygmt.session_management import begin as _begin
from pygmt.session_management import end as _end
from pygmt.src import (
binstats,
blockmean,
blockmedian,
blockmode,
Expand Down
1 change: 1 addition & 0 deletions pygmt/helpers/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ def download_test_data():
"@earth_age_01d_g",
"@S90W180.earth_age_05m_g.nc", # Specific grid for 05m test
# Other cache files
"@capitals.gmt",
"@earth_relief_20m_holes.grd",
"@EGM96_to_36.txt",
"@MaunaLoa_CO2.txt",
Expand Down
1 change: 1 addition & 0 deletions pygmt/src/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
# pylint: disable=import-outside-toplevel

from pygmt.src.basemap import basemap
from pygmt.src.binstats import binstats
from pygmt.src.blockm import blockmean, blockmedian, blockmode
from pygmt.src.coast import coast
from pygmt.src.colorbar import colorbar
Expand Down
124 changes: 124 additions & 0 deletions pygmt/src/binstats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
"""
binstats - Bin spatial data and determine statistics per bin
"""
from pygmt.clib import Session
from pygmt.helpers import (
GMTTempFile,
build_arg_string,
fmt_docstring,
kwargs_to_strings,
use_alias,
)
from pygmt.io import load_dataarray


@fmt_docstring
@use_alias(
C="statistic",
E="empty",
G="outgrid",
I="spacing",
N="normalize",
R="region",
S="search_radius",
V="verbose",
W="weight",
a="aspatial",
b="binary",
h="header",
i="incols",
r="registration",
)
@kwargs_to_strings(I="sequence", R="sequence", i="sequence_comma")
def binstats(data, **kwargs):
r"""
Bin spatial data and determine statistics per bin.
Reads arbitrarily located (x,y[,z][,w]) points
(2-4 columns) from ``data`` and for each
node in the specified grid layout determines which points are
within the given radius. These point are then used in the
calculation of the specified statistic. The results may be
presented as is or may be normalized by the circle area to
perhaps give density estimates.
Full option list at :gmt-docs:`gmtbinstats.html`
{aliases}
Parameters
----------
data : str or {table-like}
A file name of an ASCII data table or a 2D
{table-classes}.
outgrid : str or None
The name of the output netCDF file with extension .nc to store the grid
in.
statistic : str
**a**\|\ **d**\|\ **g**\|\ **i**\|\ **l**\|\ **L**\|\ **m**\|\ **n**\
\|\ **o**\|\ **p**\|\ **q**\ [*quant*]\|\ **r**\|\ **s**\|\ **u**\
\|\ **U**\|\ **z**.
Choose the statistic that will be computed per node based on the
points that are within *radius* distance of the node. Select one of:
- **a** for mean (average)
- **d** for median absolute deviation (MAD)
- **g** for full (max-min) range
- **i** for 25-75% interquartile range
- **l** for minimum (low)
- **L** for minimum of positive values only
- **m** for median
- **n** the number of values
- **o** for LMS scale
- **p** for mode (maximum likelihood)
- **q** for selected quantile (append desired quantile in
0-100% range [50])
- **r** for the r.m.s.
- **s** for standard deviation
- **u** for maximum (upper)
- **U** for maximum of negative values only
- **z** for the sum
empty : float or int
Set the value assigned to empty nodes [Default is NaN].
normalize : bool
Normalize the resulting grid values by the area represented by the
search *radius* [no normalization].
search_radius : float or str
Sets the *search_radius* that determines which data points are
considered close to a node. Append the distance unit.
Not compatible with ``tiling``.
weight : str
Input data have an extra column containing observation point weight.
If weights are given then weighted statistical quantities will be
computed while the count will be the sum of the weights instead of
number of points. If the weights are actually uncertainties
(one sigma) then append **+s** and weight = 1/sigma.
{I}
{R}
{V}
{a}
{b}
{h}
{i}
{r}
Returns
-------
ret: xarray.DataArray or None
Return type depends on whether the ``outgrid`` parameter is set:
- :class:`xarray.DataArray` if ``outgrid`` is not set
- None if ``outgrid`` is set (grid output will be stored in file set by
``outgrid``)
"""
with GMTTempFile(suffix=".nc") as tmpfile:
with Session() as lib:
file_context = lib.virtualfile_from_data(check_kind="vector", data=data)
with file_context as infile:
if (outgrid := kwargs.get("G")) is None:
kwargs["G"] = outgrid = tmpfile.name # output to tmpfile
lib.call_module(
module="binstats", args=build_arg_string(kwargs, infile=infile)
)

return load_dataarray(outgrid) if outgrid == tmpfile.name else None
47 changes: 47 additions & 0 deletions pygmt/tests/test_binstats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
"""
Tests for binstats.
"""
import os

import numpy.testing as npt
from pygmt import binstats
from pygmt.helpers import GMTTempFile


def test_binstats_outgrid():
"""
Test binstats with a set outgrid.
"""
with GMTTempFile(suffix=".nc") as tmpfile:
result = binstats(
data="@capitals.gmt",
outgrid=tmpfile.name,
spacing=5,
statistic="z",
search_radius="1000k",
aspatial="2=population",
region="g",
)
assert result is None # return value is None
assert os.path.exists(path=tmpfile.name) # check that outgrid exists


def test_binstats_no_outgrid():
"""
Test binstats with no set outgrid.
"""
temp_grid = binstats(
data="@capitals.gmt",
spacing=5,
statistic="z",
search_radius="1000k",
aspatial="2=population",
region="g",
)
assert temp_grid.dims == ("y", "x")
assert temp_grid.gmt.gtype == 0 # Cartesian grid
assert temp_grid.gmt.registration == 0 # Gridline registration
npt.assert_allclose(temp_grid.max(), 35971536)
npt.assert_allclose(temp_grid.min(), 53)
npt.assert_allclose(temp_grid.median(), 1232714.5)
npt.assert_allclose(temp_grid.mean(), 4227489)

0 comments on commit 40ab2d9

Please sign in to comment.