Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Wrap binstats #1652

Merged
merged 35 commits into from
Jun 13, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
e3d1249
initial commit
willschlitzer Dec 13, 2021
3879019
update docstring
willschlitzer Dec 13, 2021
560b586
fix name
willschlitzer Dec 13, 2021
26c4c27
add binstats to index
willschlitzer Dec 13, 2021
bb34578
add tests for test_binstats.py
willschlitzer Dec 14, 2021
0a6b216
Merge branch 'main' into wrap/binstats
willschlitzer Dec 14, 2021
59b9a45
run make format
willschlitzer Dec 14, 2021
00c9db0
Apply suggestions from code review
willschlitzer Apr 19, 2022
29e87ec
Merge branch 'main' into wrap/binstats
willschlitzer Apr 20, 2022
ba672ab
Apply suggestions from code review
willschlitzer May 6, 2022
5d9537c
Apply suggestions from code review
willschlitzer May 7, 2022
086a20c
change "compute" to "statistic"
willschlitzer May 10, 2022
5ea19a6
change data docstring
willschlitzer May 10, 2022
0413231
run make format
willschlitzer May 10, 2022
97ed2ea
Apply suggestions from code review
willschlitzer May 23, 2022
8c8c4aa
Merge branch 'main' into wrap/binstats
willschlitzer May 23, 2022
6e9b979
add normalize docstring
willschlitzer May 23, 2022
b364177
add "search_radius" docstring
willschlitzer May 23, 2022
ae300b2
add tiling docstring
willschlitzer May 23, 2022
8ca872f
add weight docstring
willschlitzer May 23, 2022
11870ae
fix error
willschlitzer May 23, 2022
05c5ffa
Apply suggestions from code review
willschlitzer May 24, 2022
39d2e99
add docstring for empty
willschlitzer May 24, 2022
9f88a9e
Update pygmt/src/binstats.py
willschlitzer May 25, 2022
999a44f
Merge branch 'main' into wrap/binstats
willschlitzer May 25, 2022
5ec4e42
Apply suggestions from code review
willschlitzer May 31, 2022
1ce7d19
Apply suggestions from code review
willschlitzer Jun 8, 2022
0cdd7ec
Merge branch 'main' into wrap/binstats
willschlitzer Jun 8, 2022
c0c5f70
Update pygmt/tests/test_binstats.py
willschlitzer Jun 8, 2022
2b7d363
Apply suggestions from code review
willschlitzer Jun 13, 2022
aca5cf3
Merge branch 'main' into wrap/binstats
willschlitzer Jun 13, 2022
d9e26d9
add "capitals.gmt" to pygmt.helpers.testing.download_test_data
willschlitzer Jun 13, 2022
adacd0f
update cache_data.yaml to cache data on a pull request
willschlitzer Jun 13, 2022
4a16770
line length fix
willschlitzer Jun 13, 2022
8001729
comment out "pull_request" in cache_data.yaml
willschlitzer Jun 13, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/api/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ Operations on tabular data
.. autosummary::
:toctree: generated

binstats
blockmean
blockmedian
blockmode
Expand Down
1 change: 1 addition & 0 deletions pygmt/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from pygmt.session_management import begin as _begin
from pygmt.session_management import end as _end
from pygmt.src import (
binstats,
blockmean,
blockmedian,
blockmode,
Expand Down
1 change: 1 addition & 0 deletions pygmt/helpers/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ def download_test_data():
"@earth_age_01d_g",
"@S90W180.earth_age_05m_g.nc", # Specific grid for 05m test
# Other cache files
"@capitals.gmt",
"@earth_relief_20m_holes.grd",
"@EGM96_to_36.txt",
"@MaunaLoa_CO2.txt",
Expand Down
1 change: 1 addition & 0 deletions pygmt/src/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
# pylint: disable=import-outside-toplevel

from pygmt.src.basemap import basemap
from pygmt.src.binstats import binstats
from pygmt.src.blockm import blockmean, blockmedian, blockmode
from pygmt.src.coast import coast
from pygmt.src.colorbar import colorbar
Expand Down
124 changes: 124 additions & 0 deletions pygmt/src/binstats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
"""
binstats - Bin spatial data and determine statistics per bin
"""
from pygmt.clib import Session
from pygmt.helpers import (
GMTTempFile,
build_arg_string,
fmt_docstring,
kwargs_to_strings,
use_alias,
)
from pygmt.io import load_dataarray


@fmt_docstring
@use_alias(
C="statistic",
E="empty",
seisman marked this conversation as resolved.
Show resolved Hide resolved
G="outgrid",
I="spacing",
N="normalize",
R="region",
S="search_radius",
V="verbose",
W="weight",
a="aspatial",
b="binary",
h="header",
i="incols",
r="registration",
)
@kwargs_to_strings(I="sequence", R="sequence", i="sequence_comma")
def binstats(data, **kwargs):
r"""
Bin spatial data and determine statistics per bin.

Reads arbitrarily located (x,y[,z][,w]) points
(2-4 columns) from ``data`` and for each
node in the specified grid layout determines which points are
within the given radius. These point are then used in the
calculation of the specified statistic. The results may be
presented as is or may be normalized by the circle area to
perhaps give density estimates.

Full option list at :gmt-docs:`gmtbinstats.html`

{aliases}

Parameters
----------
data : str or {table-like}
A file name of an ASCII data table or a 2D
{table-classes}.
outgrid : str or None
The name of the output netCDF file with extension .nc to store the grid
in.
statistic : str
**a**\|\ **d**\|\ **g**\|\ **i**\|\ **l**\|\ **L**\|\ **m**\|\ **n**\
\|\ **o**\|\ **p**\|\ **q**\ [*quant*]\|\ **r**\|\ **s**\|\ **u**\
\|\ **U**\|\ **z**.
Choose the statistic that will be computed per node based on the
points that are within *radius* distance of the node. Select one of:

- **a** for mean (average)
- **d** for median absolute deviation (MAD)
- **g** for full (max-min) range
- **i** for 25-75% interquartile range
- **l** for minimum (low)
- **L** for minimum of positive values only
- **m** for median
- **n** the number of values
- **o** for LMS scale
- **p** for mode (maximum likelihood)
- **q** for selected quantile (append desired quantile in
0-100% range [50])
- **r** for the r.m.s.
- **s** for standard deviation
- **u** for maximum (upper)
- **U** for maximum of negative values only
- **z** for the sum
empty : float or int
Set the value assigned to empty nodes [Default is NaN].
normalize : bool
Normalize the resulting grid values by the area represented by the
search *radius* [no normalization].
search_radius : float or str
Sets the *search_radius* that determines which data points are
considered close to a node. Append the distance unit.
Not compatible with ``tiling``.
weight : str
Input data have an extra column containing observation point weight.
If weights are given then weighted statistical quantities will be
computed while the count will be the sum of the weights instead of
number of points. If the weights are actually uncertainties
(one sigma) then append **+s** and weight = 1/sigma.
{I}
{R}
{V}
seisman marked this conversation as resolved.
Show resolved Hide resolved
willschlitzer marked this conversation as resolved.
Show resolved Hide resolved
{a}
{b}
{h}
{i}
{r}

Returns
-------
ret: xarray.DataArray or None
Return type depends on whether the ``outgrid`` parameter is set:

- :class:`xarray.DataArray` if ``outgrid`` is not set
- None if ``outgrid`` is set (grid output will be stored in file set by
``outgrid``)
"""
with GMTTempFile(suffix=".nc") as tmpfile:
with Session() as lib:
file_context = lib.virtualfile_from_data(check_kind="vector", data=data)
with file_context as infile:
if (outgrid := kwargs.get("G")) is None:
kwargs["G"] = outgrid = tmpfile.name # output to tmpfile
lib.call_module(
module="binstats", args=build_arg_string(kwargs, infile=infile)
)

return load_dataarray(outgrid) if outgrid == tmpfile.name else None
47 changes: 47 additions & 0 deletions pygmt/tests/test_binstats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
"""
Tests for binstats.
"""
import os

import numpy.testing as npt
from pygmt import binstats
from pygmt.helpers import GMTTempFile


def test_binstats_outgrid():
"""
Test binstats with a set outgrid.
"""
with GMTTempFile(suffix=".nc") as tmpfile:
result = binstats(
data="@capitals.gmt",
outgrid=tmpfile.name,
spacing=5,
statistic="z",
search_radius="1000k",
aspatial="2=population",
region="g",
)
assert result is None # return value is None
assert os.path.exists(path=tmpfile.name) # check that outgrid exists


def test_binstats_no_outgrid():
"""
Test binstats with no set outgrid.
"""
temp_grid = binstats(
data="@capitals.gmt",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add this to pygmt.helpers.testing.download_test_data?

spacing=5,
statistic="z",
search_radius="1000k",
aspatial="2=population",
region="g",
)
assert temp_grid.dims == ("y", "x")
assert temp_grid.gmt.gtype == 0 # Cartesian grid
assert temp_grid.gmt.registration == 0 # Gridline registration
npt.assert_allclose(temp_grid.max(), 35971536)
npt.assert_allclose(temp_grid.min(), 53)
npt.assert_allclose(temp_grid.median(), 1232714.5)
npt.assert_allclose(temp_grid.mean(), 4227489)