From a39215a6a3c8820b89089eab6c78b8f72bd65961 Mon Sep 17 00:00:00 2001 From: Leonardo Uieda Date: Tue, 14 Dec 2021 13:13:05 +0000 Subject: [PATCH] Replace pylint with more flake8 plugins (#337) Following fatiando/harmonica#285, replace pylint with a bunch of new flake8 plugins. They cover most of the checks that were performed before, add a few new ones, and are less troublesome to maintain and silence when necessary. Fixes the formatting issues raised after applying the changes: making tutorials/examples fit 79 char comments/docstrings, silence a few issues because of scikit-learn style code, reorder imports because of isort. I also combined the CI checks into only 2: "format" and "style". If formatting is broken, we don't need to know which one is complaining, just run make format. And style is now only flake8 anyway. --- .flake8 | 10 - .github/workflows/style.yml | 60 +--- .pylintrc | 425 --------------------------- Makefile | 30 +- data/examples/baja_bathymetry.py | 12 +- data/examples/california_gps.py | 14 +- data/examples/checkerboard.py | 1 + data/examples/rio_magnetic.py | 23 +- data/examples/texas-wind.py | 13 +- doc/conf.py | 8 +- env/requirements-style.txt | 14 +- environment.yml | 22 +- examples/blockkfold.py | 5 +- examples/blockreduce.py | 3 +- examples/blockreduce_weights.py | 22 +- examples/blockreduce_weights_mean.py | 52 ++-- examples/convex_hull_mask.py | 5 +- examples/distance_mask.py | 28 +- examples/project_grid.py | 9 +- examples/scipygridder.py | 49 +-- examples/spline.py | 47 +-- examples/spline_cv.py | 28 +- examples/spline_weights.py | 42 +-- examples/train_test_split.py | 3 +- examples/trend.py | 14 +- examples/vector_trend.py | 35 +-- examples/vector_uncoupled.py | 27 +- license_notice.py | 4 +- pyproject.toml | 4 + setup.cfg | 36 +++ setup.py | 3 +- tutorials/chain.py | 85 +++--- tutorials/decimation.py | 57 ++-- tutorials/grid_coordinates.py | 134 ++++----- tutorials/model_evaluation.py | 137 ++++----- tutorials/model_selection.py | 97 +++--- tutorials/overview.py | 117 ++++---- tutorials/projections.py | 91 +++--- tutorials/trends.py | 32 +- tutorials/vectors.py | 122 ++++---- tutorials/weights.py | 132 +++++---- verde/__init__.py | 42 ++- verde/_version.py | 2 - verde/base/__init__.py | 5 +- verde/base/base_classes.py | 31 +- verde/base/least_squares.py | 2 +- verde/base/utils.py | 4 +- verde/blockreduce.py | 10 +- verde/coordinates.py | 12 +- verde/datasets/__init__.py | 13 +- verde/datasets/sample_data.py | 5 +- verde/datasets/synthetic.py | 1 - verde/distances.py | 2 +- verde/io.py | 2 +- verde/mask.py | 6 +- verde/model_selection.py | 21 +- verde/projections.py | 6 +- verde/scipygridder.py | 2 +- verde/spline.py | 8 +- verde/tests/test_base.py | 19 +- verde/tests/test_blockreduce.py | 5 +- verde/tests/test_chain.py | 6 +- verde/tests/test_coordinates.py | 4 +- verde/tests/test_datasets.py | 13 +- verde/tests/test_distances.py | 2 +- verde/tests/test_io.py | 7 +- verde/tests/test_mask.py | 4 +- verde/tests/test_minimal.py | 12 +- verde/tests/test_model_selection.py | 10 +- verde/tests/test_projections.py | 6 +- verde/tests/test_scipy.py | 4 +- verde/tests/test_spline.py | 6 +- verde/tests/test_trend.py | 3 +- verde/tests/test_utils.py | 17 +- verde/tests/test_vector.py | 9 +- verde/utils.py | 10 +- verde/vector.py | 8 +- 77 files changed, 989 insertions(+), 1382 deletions(-) delete mode 100644 .flake8 delete mode 100644 .pylintrc create mode 100644 pyproject.toml create mode 100644 setup.cfg diff --git a/.flake8 b/.flake8 deleted file mode 100644 index 22fda3640..000000000 --- a/.flake8 +++ /dev/null @@ -1,10 +0,0 @@ -[flake8] -ignore = E203, E266, E501, W503, F401, E741 -max-line-length = 88 -max-complexity = 10 -max-doc-length = 79 -exclude = - verde/_version_generated.py -per-file-ignores = - examples/*.py:W505 - data/examples/*.py:W505 diff --git a/.github/workflows/style.yml b/.github/workflows/style.yml index 0f500e640..aa09e31f9 100644 --- a/.github/workflows/style.yml +++ b/.github/workflows/style.yml @@ -4,7 +4,7 @@ # token stolen if the Action is compromised. See the comments and links here: # https://github.com/pypa/gh-action-pypi-publish/issues/27 # -name: code-style +name: checks # Only build PRs and the main branch. Pushes to branches will only be built # when a PR is opened. @@ -16,8 +16,7 @@ on: ############################################################################### jobs: - black: - name: black [format] + format: runs-on: ubuntu-latest steps: - name: Checkout @@ -28,7 +27,7 @@ jobs: - name: Setup Python uses: actions/setup-python@v2 with: - python-version: "3.8" + python-version: "3.10" - name: Install requirements run: python -m pip install -r env/requirements-style.txt @@ -37,10 +36,9 @@ jobs: run: python -m pip freeze - name: Check code format - run: make black-check + run: make black-check isort-check license-check - flake8: - name: flake8 [style] + style: runs-on: ubuntu-latest steps: - name: Checkout @@ -51,7 +49,7 @@ jobs: - name: Setup Python uses: actions/setup-python@v2 with: - python-version: "3.8" + python-version: "3.10" - name: Install requirements run: python -m pip install -r env/requirements-style.txt @@ -61,49 +59,3 @@ jobs: - name: Check code style run: make flake8 - - pylint: - name: pylint [style] - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v2 - with: - persist-credentials: false - - - name: Setup Python - uses: actions/setup-python@v2 - with: - python-version: "3.8" - - - name: Install requirements - run: python -m pip install -r env/requirements-style.txt - - - name: List installed packages - run: python -m pip freeze - - - name: Linting - run: make lint - - license: - name: license [format] - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v2 - with: - persist-credentials: false - - - name: Setup Python - uses: actions/setup-python@v2 - with: - python-version: "3.8" - - - name: Install requirements - run: python -m pip install -r env/requirements-style.txt - - - name: List installed packages - run: python -m pip freeze - - - name: Check license notice on all source files - run: make license-check diff --git a/.pylintrc b/.pylintrc deleted file mode 100644 index bc6ee1e4a..000000000 --- a/.pylintrc +++ /dev/null @@ -1,425 +0,0 @@ -[MASTER] - -# A comma-separated list of package or module names from where C extensions may -# be loaded. Extensions are loading into the active Python interpreter and may -# run arbitrary code -extension-pkg-whitelist=numpy - -# Add files or directories to the blacklist. They should be base names, not -# paths. -ignore=CVS - -# Add files or directories matching the regex patterns to the blacklist. The -# regex matches against base names, not paths. -ignore-patterns= - -# Python code to execute, usually for sys.path manipulation such as -# pygtk.require(). -#init-hook= - -# Use multiple processes to speed up Pylint. -jobs=1 - -# List of plugins (as comma separated values of python modules names) to load, -# usually to register additional checkers. -load-plugins= - -# Pickle collected data for later comparisons. -persistent=yes - -# Specify a configuration file. -#rcfile= - -# Allow loading of arbitrary C extensions. Extensions are imported into the -# active Python interpreter and may run arbitrary code. -unsafe-load-any-extension=no - - -[MESSAGES CONTROL] - -# Only show warnings with the listed confidence levels. Leave empty to show -# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED -confidence= - -# Disable the message, report, category or checker with the given id(s). You -# can either give multiple identifiers separated by comma (,) or put this -# option multiple times (only on the command line, not in the configuration -# file where it should appear only once).You can also use "--disable=all" to -# disable everything first and then reenable specific checks. For example, if -# you want to run only the similarities checker, you can use "--disable=all -# --enable=similarities". If you want to run only the classes checker, but have -# no Warning level messages displayed, use"--disable=all --enable=classes -# --disable=W" -disable=print-statement,parameter-unpacking,unpacking-in-except,old-raise-syntax,backtick,long-suffix,old-ne-operator,old-octal-literal,raw-checker-failed,bad-inline-option,locally-disabled,locally-enabled,file-ignored,suppressed-message,deprecated-pragma,apply-builtin,basestring-builtin,buffer-builtin,cmp-builtin,coerce-builtin,execfile-builtin,file-builtin,long-builtin,raw_input-builtin,reduce-builtin,standarderror-builtin,unicode-builtin,xrange-builtin,coerce-method,delslice-method,getslice-method,setslice-method,no-absolute-import,old-division,dict-iter-method,dict-view-method,next-method-called,metaclass-assignment,indexing-exception,raising-string,reload-builtin,oct-method,hex-method,nonzero-method,cmp-method,input-builtin,round-builtin,intern-builtin,unichr-builtin,map-builtin-not-iterating,zip-builtin-not-iterating,range-builtin-not-iterating,filter-builtin-not-iterating,using-cmp-argument,eq-without-hash,div-method,idiv-method,rdiv-method,exception-message-attribute,invalid-str-codec,sys-max-int,bad-python3-import,deprecated-string-function,deprecated-str-translate-call,attribute-defined-outside-init,similarities,bad-continuation,import-error,assignment-from-no-return - -# Enable the message, report, category or checker with the given id(s). You can -# either give multiple identifier separated by comma (,) or put this option -# multiple time (only on the command line, not in the configuration file where -# it should appear only once). See also the "--disable" option for examples. -enable= - - -[REPORTS] - -# Python expression which should return a note less than 10 (10 is the highest -# note). You have access to the variables errors warning, statement which -# respectively contain the number of errors / warnings messages and the total -# number of statements analyzed. This is used by the global evaluation report -# (RP0004). -evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) - -# Template used to display messages. This is a python new-style format string -# used to format the message information. See doc for all details -#msg-template= - -# Set the output format. Available formats are text, parseable, colorized, json -# and msvs (visual studio).You can also give a reporter class, eg -# mypackage.mymodule.MyReporterClass. -output-format=text - -# Tells whether to display a full report or only the messages -reports=no - -# Activate the evaluation score. -score=yes - - -[REFACTORING] - -# Maximum number of nested blocks for function / method body -max-nested-blocks=5 - - -[FORMAT] - -# Expected format of line ending, e.g. empty (any line ending), LF or CRLF. -expected-line-ending-format= - -# Regexp for a line that is allowed to be longer than the limit. -ignore-long-lines=^\s*(# )??$ - -# Number of spaces of indent required inside a hanging or continued line. -indent-after-paren=4 - -# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 -# tab). -indent-string=' ' - -# Maximum number of characters on a single line. -max-line-length=100 - -# Maximum number of lines in a module -max-module-lines=2000 - -# List of optional constructs for which whitespace checking is disabled. `dict- -# separator` is used to allow tabulation in dicts, etc.: {1 : 1,\n222: 2}. -# `trailing-comma` allows a space between comma and closing bracket: (a, ). -# `empty-line` allows space-only lines. -no-space-check=trailing-comma,dict-separator - -# Allow the body of a class to be on the same line as the declaration if body -# contains single statement. -single-line-class-stmt=no - -# Allow the body of an if to be on the same line as the test if there is no -# else. -single-line-if-stmt=no - - -[VARIABLES] - -# List of additional names supposed to be defined in builtins. Remember that -# you should avoid to define new builtins when possible. -additional-builtins= - -# Tells whether unused global variables should be treated as a violation. -allow-global-unused-variables=yes - -# List of strings which can identify a callback function by name. A callback -# name must start or end with one of those strings. -callbacks=cb_,_cb - -# A regular expression matching the name of dummy variables (i.e. expectedly -# not used). -dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_ - -# Argument names that match this expression will be ignored. Default to name -# with leading underscore -ignored-argument-names=_.*|^ignored_|^unused_ - -# Tells whether we should check for unused import in __init__ files. -init-import=no - -# List of qualified module names which can have objects that can redefine -# builtins. -redefining-builtins-modules=six.moves,future.builtins - - -[SIMILARITIES] - -# Ignore comments when computing similarities. -ignore-comments=yes - -# Ignore docstrings when computing similarities. -ignore-docstrings=yes - -# Ignore imports when computing similarities. -ignore-imports=yes - -# Minimum lines number of a similarity. -min-similarity-lines=3 - - -[LOGGING] - -# Logging modules to check that the string format arguments are in logging -# function parameter format -logging-modules=logging - - -[SPELLING] - -# Spelling dictionary name. Available dictionaries: none. To make it working -# install python-enchant package. -spelling-dict= - -# List of comma separated words that should not be checked. -spelling-ignore-words= - -# A path to a file that contains private dictionary; one word per line. -spelling-private-dict-file= - -# Tells whether to store unknown words to indicated private dictionary in -# --spelling-private-dict-file option instead of raising a message. -spelling-store-unknown-words=no - - -[BASIC] - -# Naming hint for argument names -argument-name-hint=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$ - -# Regular expression matching correct argument names -argument-rgx=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$ - -# Naming hint for attribute names -attr-name-hint=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$ - -# Regular expression matching correct attribute names -attr-rgx=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$ - -# Bad variable names which should always be refused, separated by a comma -bad-names=foo,bar,baz,toto,tutu,tata - -# Naming hint for class attribute names -class-attribute-name-hint=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$ - -# Regular expression matching correct class attribute names -class-attribute-rgx=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$ - -# Naming hint for class names -class-name-hint=[A-Z_][a-zA-Z0-9]+$ - -# Regular expression matching correct class names -class-rgx=[A-Z_][a-zA-Z0-9]+$ - -# Naming hint for constant names -const-name-hint=(([A-Z_][A-Z0-9_]*)|(__.*__))$ - -# Regular expression matching correct constant names -const-rgx=(([A-Z_][A-Z0-9_]*)|(__.*__))$ - -# Minimum line length for functions/classes that require docstrings, shorter -# ones are exempt. -docstring-min-length=-1 - -# Naming hint for function names -function-name-hint=(([a-z][a-z0-9_]{2,50})|(_[a-z0-9_]*))$ - -# Regular expression matching correct function names -function-rgx=(([a-z][a-z0-9_]{2,50})|(_[a-z0-9_]*))$ - -# Good variable names which should always be accepted, separated by a comma -good-names=i,j,k,w,e,s,n,x,y,z,cv,ax - -# Include a hint for the correct naming format with invalid-name -include-naming-hint=no - -# Naming hint for inline iteration names -inlinevar-name-hint=[A-Za-z_][A-Za-z0-9_]*$ - -# Regular expression matching correct inline iteration names -inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$ - -# Naming hint for method names -method-name-hint=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$ - -# Regular expression matching correct method names -method-rgx=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$ - -# Naming hint for module names -module-name-hint=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ - -# Regular expression matching correct module names -module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ - -# Colon-delimited sets of names that determine each other's naming style when -# the name regexes allow several styles. -name-group= - -# Regular expression which should only match function or class names that do -# not require a docstring. -no-docstring-rgx=^_ - -# List of decorators that produce properties, such as abc.abstractproperty. Add -# to this list to register other decorators that produce valid properties. -property-classes=abc.abstractproperty - -# Naming hint for variable names -variable-name-hint=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$ - -# Regular expression matching correct variable names -variable-rgx=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$ - - -[TYPECHECK] - -# List of decorators that produce context managers, such as -# contextlib.contextmanager. Add to this list to register other decorators that -# produce valid context managers. -contextmanager-decorators=contextlib.contextmanager - -# List of members which are set dynamically and missed by pylint inference -# system, and so shouldn't trigger E1101 when accessed. Python regular -# expressions are accepted. -generated-members= - -# Tells whether missing members accessed in mixin class should be ignored. A -# mixin class is detected if its name ends with "mixin" (case insensitive). -ignore-mixin-members=yes - -# This flag controls whether pylint should warn about no-member and similar -# checks whenever an opaque object is returned when inferring. The inference -# can return multiple potential results while evaluating a Python object, but -# some branches might not be evaluated, which results in partial inference. In -# that case, it might be useful to still emit no-member and other checks for -# the rest of the inferred objects. -ignore-on-opaque-inference=yes - -# List of class names for which member attributes should not be checked (useful -# for classes with dynamically set attributes). This supports the use of -# qualified names. -ignored-classes=optparse.Values,thread._local,_thread._localy - -# List of module names for which member attributes should not be checked -# (useful for modules/projects where namespaces are manipulated during runtime -# and thus existing member attributes cannot be deduced by static analysis. It -# supports qualified module names, as well as Unix pattern matching. -ignored-modules=numpy - -# Show a hint with possible names when a member name was not found. The aspect -# of finding the hint is based on edit distance. -missing-member-hint=yes - -# The minimum edit distance a name should have in order to be considered a -# similar match for a missing member name. -missing-member-hint-distance=1 - -# The total number of similar names that should be taken in consideration when -# showing a hint for a missing member. -missing-member-max-choices=1 - - -[MISCELLANEOUS] - -# List of note tags to take in consideration, separated by a comma. -notes=FIXME,XXX,TODO - - -[DESIGN] - -# Maximum number of arguments for function / method -max-args=10 - -# Maximum number of attributes for a class (see R0902). -max-attributes=10 - -# Maximum number of boolean expressions in a if statement -max-bool-expr=5 - -# Maximum number of branch for function / method body -max-branches=12 - -# Maximum number of locals for function / method body -max-locals=15 - -# Maximum number of parents for a class (see R0901). -max-parents=7 - -# Maximum number of public methods for a class (see R0904). -max-public-methods=20 - -# Maximum number of return / yield for function / method body -max-returns=6 - -# Maximum number of statements in function / method body -max-statements=50 - -# Minimum number of public methods for a class (see R0903). -min-public-methods=2 - - -[IMPORTS] - -# Allow wildcard imports from modules that define __all__. -allow-wildcard-with-all=no - -# Analyse import fallback blocks. This can be used to support both Python 2 and -# 3 compatible code, which means that the block might have code that exists -# only in one or another interpreter, leading to false positives when analysed. -analyse-fallback-blocks=no - -# Deprecated modules which should not be used, separated by a comma -deprecated-modules=optparse,tkinter.tix - -# Create a graph of external dependencies in the given file (report RP0402 must -# not be disabled) -ext-import-graph= - -# Create a graph of every (i.e. internal and external) dependencies in the -# given file (report RP0402 must not be disabled) -import-graph= - -# Create a graph of internal dependencies in the given file (report RP0402 must -# not be disabled) -int-import-graph= - -# Force import order to recognize a module as part of the standard -# compatibility libraries. -known-standard-library= - -# Force import order to recognize a module as part of a third party library. -known-third-party=enchant - - -[CLASSES] - -# List of method names used to declare (i.e. assign) instance attributes. -defining-attr-methods=__init__,__new__,setUp - -# List of member names, which should be excluded from the protected access -# warning. -exclude-protected=_asdict,_fields,_replace,_source,_make - -# List of valid names for the first argument in a class method. -valid-classmethod-first-arg=cls - -# List of valid names for the first argument in a metaclass class method. -valid-metaclass-classmethod-first-arg=mcs - - -[EXCEPTIONS] - -# Exceptions that will emit a warning when being caught. Defaults to -# "Exception" -overgeneral-exceptions=Exception diff --git a/Makefile b/Makefile index ab2d43cf9..ebb27d260 100644 --- a/Makefile +++ b/Makefile @@ -2,18 +2,15 @@ PROJECT=verde TESTDIR=tmp-test-dir-with-unique-name PYTEST_ARGS=--cov-config=../.coveragerc --cov-report=term-missing --cov=$(PROJECT) --doctest-modules -v --pyargs -LINT_FILES=setup.py $(PROJECT) license_notice.py -BLACK_FILES=setup.py $(PROJECT) examples data/examples doc/conf.py tutorials license_notice.py -FLAKE8_FILES=setup.py $(PROJECT) examples data/examples doc/conf.py license_notice.py +CHECK_STYLE=setup.py $(PROJECT) examples data/examples doc/conf.py tutorials license_notice.py help: @echo "Commands:" @echo "" @echo " install install in editable mode" @echo " test run the test suite (including doctests) and report coverage" - @echo " format run black to automatically format the code" - @echo " check run code style and quality checks (black and flake8)" - @echo " lint run pylint for a deeper (and slower) quality check" + @echo " format automatically format the code" + @echo " check run code style and quality checks" @echo " clean clean up build and generated files" @echo "" @@ -27,13 +24,15 @@ test: cp $(TESTDIR)/.coverage* . rm -rvf $(TESTDIR) -format: license - black $(BLACK_FILES) +format: license isort black -check: black-check flake8 license-check +check: black-check isort-check license-check flake8 + +black: + black $(CHECK_STYLE) black-check: - black --check $(BLACK_FILES) + black --check $(CHECK_STYLE) license: python license_notice.py @@ -41,14 +40,19 @@ license: license-check: python license_notice.py --check +isort: + isort $(CHECK_STYLE) + +isort-check: + isort --check $(CHECK_STYLE) + flake8: - flake8 $(FLAKE8_FILES) + flake8 $(CHECK_STYLE) -lint: - pylint --jobs=0 $(LINT_FILES) clean: find . -name "*.pyc" -exec rm -v {} \; find . -name ".coverage.*" -exec rm -v {} \; + find . -name "*.orig" -exec rm -v {} \; rm -rvf build dist MANIFEST *.egg-info __pycache__ .coverage .cache .pytest_cache rm -rvf $(TESTDIR) dask-worker-space diff --git a/data/examples/baja_bathymetry.py b/data/examples/baja_bathymetry.py index f423245e2..15e8e8d10 100644 --- a/data/examples/baja_bathymetry.py +++ b/data/examples/baja_bathymetry.py @@ -13,10 +13,10 @@ `__ for their tutorials and gallery. The data is downloaded to a local directory if it's not there already. """ -import matplotlib.pyplot as plt import cartopy.crs as ccrs -import verde as vd +import matplotlib.pyplot as plt +import verde as vd # The data are in a pandas.DataFrame data = vd.datasets.fetch_baja_bathymetry() @@ -26,8 +26,9 @@ plt.figure(figsize=(7, 6)) ax = plt.axes(projection=ccrs.Mercator()) ax.set_title("Bathymetry from Baja California") -# Plot the bathymetry as colored circles. Cartopy requires setting the projection of the -# original data through the transform argument. Use PlateCarree for geographic data. +# Plot the bathymetry as colored circles. Cartopy requires setting the +# projection of the original data through the transform argument. Use +# PlateCarree for geographic data. plt.scatter( data.longitude, data.latitude, @@ -36,6 +37,7 @@ transform=ccrs.PlateCarree(), ) plt.colorbar().set_label("meters") -# Use an utility function to add tick labels and land and ocean features to the map. +# Use an utility function to add tick labels and land and ocean features to the +# map. vd.datasets.setup_baja_bathymetry_map(ax) plt.show() diff --git a/data/examples/california_gps.py b/data/examples/california_gps.py index 30ce96ab8..a344e9d82 100644 --- a/data/examples/california_gps.py +++ b/data/examples/california_gps.py @@ -8,16 +8,16 @@ GPS velocities from California ============================== -We provide sample 3-component GPS velocity data from the West coast of the U.S. The data -were cut from EarthScope Plate Boundary Observatory data provided by UNAVCO. The -velocities are in the North American tectonic plate reference system (NAM08). The -velocities and their associated standard deviations are in meters/year. +We provide sample 3-component GPS velocity data from the West coast of the U.S. +The data were cut from EarthScope Plate Boundary Observatory data provided by +UNAVCO. The velocities are in the North American tectonic plate reference +system (NAM08). The velocities and their associated standard deviations are in +meters/year. """ -import matplotlib.pyplot as plt import cartopy.crs as ccrs -import numpy as np -import verde as vd +import matplotlib.pyplot as plt +import verde as vd # The data are in a pandas.DataFrame data = vd.datasets.fetch_california_gps() diff --git a/data/examples/checkerboard.py b/data/examples/checkerboard.py index fac2dcaa1..9a551adf6 100644 --- a/data/examples/checkerboard.py +++ b/data/examples/checkerboard.py @@ -13,6 +13,7 @@ gridders have: predict, grid, scatter, and profile. """ import matplotlib.pyplot as plt + import verde as vd # Instantiate the data generator class and fit it to set the data region. diff --git a/data/examples/rio_magnetic.py b/data/examples/rio_magnetic.py index ab1cce61b..64ea40647 100644 --- a/data/examples/rio_magnetic.py +++ b/data/examples/rio_magnetic.py @@ -15,14 +15,15 @@ :func:`verde.datasets.setup_rio_magnetic_map`). Please use another dataset instead. -We provide sample total-field magnetic anomaly data from an airborne survey of Rio de -Janeiro, Brazil, from the 1970s. The data are made available by the Geological Survey of -Brazil (CPRM) through their `GEOSGB portal `__. See the -documentation for :func:`verde.datasets.fetch_rio_magnetic` for more details. +We provide sample total-field magnetic anomaly data from an airborne survey of +Rio de Janeiro, Brazil, from the 1970s. The data are made available by the +Geological Survey of Brazil (CPRM) through their `GEOSGB portal +`__. See the documentation for +:func:`verde.datasets.fetch_rio_magnetic` for more details. """ -import matplotlib.pyplot as plt import cartopy.crs as ccrs -import numpy as np +import matplotlib.pyplot as plt + import verde as vd # The data are in a pandas.DataFrame @@ -34,12 +35,12 @@ plt.figure(figsize=(7, 5)) ax = plt.axes(projection=ccrs.Mercator()) ax.set_title("Total-field Magnetic Anomaly of Rio de Janeiro") -# Since the data is diverging (going from negative to positive) we need to center our -# colorbar on 0. To do this, we calculate the maximum absolute value of the data to set -# vmin and vmax. +# Since the data is diverging (going from negative to positive) we need to +# center our colorbar on 0. To do this, we calculate the maximum absolute value +# of the data to set vmin and vmax. maxabs = vd.maxabs(data.total_field_anomaly_nt) -# Cartopy requires setting the projection of the original data through the transform -# argument. Use PlateCarree for geographic data. +# Cartopy requires setting the projection of the original data through the +# transform argument. Use PlateCarree for geographic data. plt.scatter( data.longitude, data.latitude, diff --git a/data/examples/texas-wind.py b/data/examples/texas-wind.py index 02c2f2d7a..06755feec 100644 --- a/data/examples/texas-wind.py +++ b/data/examples/texas-wind.py @@ -8,14 +8,14 @@ Wind speed data from Texas ========================== -This is average wind speed and air temperature for data for the state of Texas, USA, on -February 26 2018. The original data was downloaded from `Iowa State University -`__. +This is average wind speed and air temperature for data for the state of Texas, +USA, on February 26 2018. The original data was downloaded from `Iowa State +University `__. """ -import matplotlib.pyplot as plt import cartopy.crs as ccrs -import verde as vd +import matplotlib.pyplot as plt +import verde as vd # The data are in a pandas.DataFrame data = vd.datasets.fetch_texas_wind() @@ -43,6 +43,7 @@ width=0.003, transform=ccrs.PlateCarree(), ) -# Use an utility function to add tick labels and land and ocean features to the map. +# Use an utility function to add tick labels and land and ocean features to the +# map. vd.datasets.setup_texas_wind_map(ax) plt.show() diff --git a/doc/conf.py b/doc/conf.py index 30a8d4407..bc6953dbf 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -5,12 +5,10 @@ # This code is part of the Fatiando a Terra project (https://www.fatiando.org) # # -*- coding: utf-8 -*- -import sys -import os import datetime -import sphinx_rtd_theme -import sphinx_gallery + from sphinx_gallery.sorting import FileNameSortKey + from verde import __version__ extensions = [ @@ -84,7 +82,7 @@ # General information about the project year = datetime.date.today().year project = "Verde" -copyright = "2017-{}, The Verde Developers".format(year) +copyright = "2017-{}, The Verde Developers".format(year) # noqa: A001 version = __version__ # These enable substitutions using |variable| in the rst files diff --git a/env/requirements-style.txt b/env/requirements-style.txt index 96c4dd81b..28e39b9ed 100644 --- a/env/requirements-style.txt +++ b/env/requirements-style.txt @@ -1,4 +1,12 @@ -black>=20.8b1 -pylint==2.4.* -flake8 +black pathspec +isort +flake8 +flake8-bugbear +flake8-builtins +flake8-functions +flake8-mutable +flake8-rst-docstrings +flake8-simplify +flake8-unused-arguments +pep8-naming diff --git a/environment.yml b/environment.yml index f10dcef86..d1ed7c70f 100644 --- a/environment.yml +++ b/environment.yml @@ -3,7 +3,7 @@ channels: - conda-forge - defaults dependencies: - - python==3.7 + - python==3.9 - pip # Build - setuptools_scm>=6.2 @@ -21,10 +21,6 @@ dependencies: # Optional - pykdtree - numba - # Style - - black>=20.8b1 - - pylint==2.4.* - - flake8 # Test - matplotlib - cartopy>=0.18 @@ -37,3 +33,19 @@ dependencies: - sphinx_rtd_theme==0.4.3 - sphinx-gallery - pyproj + # Style + - black + - pathspec + - isort + - flake8 + - flake8-bugbear + - flake8-builtins + - flake8-functions + - flake8-mutable + - flake8-rst-docstrings + - flake8-simplify + - pep8-naming + - pip: + # Install flake8-unused-arguments through pip + # (not available through conda yet) + - flake8-unused-arguments diff --git a/examples/blockkfold.py b/examples/blockkfold.py index 69d327f36..5616eef5d 100644 --- a/examples/blockkfold.py +++ b/examples/blockkfold.py @@ -30,9 +30,10 @@ k-fold iteration, with and without balancing. Notice that the unbalanced folds have very different numbers of data points. """ -import numpy as np -import matplotlib.pyplot as plt import cartopy.crs as ccrs +import matplotlib.pyplot as plt +import numpy as np + import verde as vd # Let's split the Baja California shipborne bathymetry data diff --git a/examples/blockreduce.py b/examples/blockreduce.py index 0f8dd2546..cca8b80d2 100644 --- a/examples/blockreduce.py +++ b/examples/blockreduce.py @@ -15,9 +15,10 @@ data in blocks. For non-smooth data, like bathymetry, a blocked median filter is a good choice. """ -import matplotlib.pyplot as plt import cartopy.crs as ccrs +import matplotlib.pyplot as plt import numpy as np + import verde as vd # We'll test this on the Baja California shipborne bathymetry data diff --git a/examples/blockreduce_weights.py b/examples/blockreduce_weights.py index eb6400c45..91fee915b 100644 --- a/examples/blockreduce_weights.py +++ b/examples/blockreduce_weights.py @@ -8,16 +8,18 @@ Using weights in blocked reduction ================================== -Sometimes data has outliers or less reliable points that might skew a blocked mean or -even a median. If the reduction function can take a ``weights`` argument, like -``numpy.average``, you can pass in weights to :class:`verde.BlockReduce` to lower the -influence of the offending data points. However, :class:`verde.BlockReduce` can't -produce weights for the blocked data (for use by a gridder, for example). If you want to -produced blocked weights as well, use :class:`verde.BlockMean`. +Sometimes data has outliers or less reliable points that might skew a blocked +mean or even a median. If the reduction function can take a ``weights`` +argument, like ``numpy.average``, you can pass in weights to +:class:`verde.BlockReduce` to lower the influence of the offending data points. +However, :class:`verde.BlockReduce` can't produce weights for the blocked data +(for use by a gridder, for example). If you want to produced blocked weights as +well, use :class:`verde.BlockMean`. """ -import matplotlib.pyplot as plt import cartopy.crs as ccrs +import matplotlib.pyplot as plt import numpy as np + import verde as vd # We'll test this on the California vertical GPS velocity data @@ -28,11 +30,13 @@ data.velocity_up[outliers] += 0.08 print("Index of outliers:", outliers) -# Create an array of weights and set the weights for the outliers to a very low value +# Create an array of weights and set the weights for the outliers to a very low +# value weights = np.ones_like(data.velocity_up) weights[outliers] = 1e-5 -# Now we can block average the points with and without weights to compare the outputs. +# Now we can block average the points with and without weights to compare the +# outputs. reducer = vd.BlockReduce(reduction=np.average, spacing=30 / 60, center_coordinates=True) coordinates, no_weights = reducer.filter( (data.longitude, data.latitude), data.velocity_up diff --git a/examples/blockreduce_weights_mean.py b/examples/blockreduce_weights_mean.py index 23ba1cfc2..93f30eabf 100644 --- a/examples/blockreduce_weights_mean.py +++ b/examples/blockreduce_weights_mean.py @@ -8,43 +8,47 @@ Using weights in blocked means ============================== -:class:`verde.BlockReduce` is not able to output weights because we need to make -assumptions about the reduction operation to know how to propagate uncertainties or -calculated weighted variances. That's why verde provides specialized reductions like -:class:`verde.BlockMean`, which can calculate weights from input data in three ways: +:class:`verde.BlockReduce` is not able to output weights because we need to +make assumptions about the reduction operation to know how to propagate +uncertainties or calculated weighted variances. That's why verde provides +specialized reductions like :class:`verde.BlockMean`, which can calculate +weights from input data in three ways: -1. Using the variance of the data. This is the only possible option when no weights are - provided. -2. Using the uncertainty of the weighted mean propagated from the uncertainties in the - data. In this case, we assume that the weights are ``1/uncertainty**2``. +1. Using the variance of the data. This is the only possible option when no + weights are provided. +2. Using the uncertainty of the weighted mean propagated from the uncertainties + in the data. In this case, we assume that the weights are + ``1/uncertainty**2``. 3. Using the weighted variance of the data. -Using the propagated uncertainties may be more adequate if your data is smooth in each -block (low variance) but have very different uncertainties. The propagation preserves a -low weight for data that have large uncertainties but don't vary much inside the block. +Using the propagated uncertainties may be more adequate if your data is smooth +in each block (low variance) but have very different uncertainties. The +propagation preserves a low weight for data that have large uncertainties but +don't vary much inside the block. -The weighted variance should be used when the data vary a lot in each block (high -variance) but have very similar uncertainties. The variance will be large when there is -a lot of variability in the data that isn't due to the uncertainties. This is also the -best choice if your data weights aren't ``1/uncertainty**2``. +The weighted variance should be used when the data vary a lot in each block +(high variance) but have very similar uncertainties. The variance will be large +when there is a lot of variability in the data that isn't due to the +uncertainties. This is also the best choice if your data weights aren't +``1/uncertainty**2``. """ -import matplotlib.pyplot as plt -from matplotlib.colors import PowerNorm, LogNorm import cartopy.crs as ccrs -import numpy as np +import matplotlib.pyplot as plt +from matplotlib.colors import LogNorm, PowerNorm + import verde as vd -# We'll test this on the California vertical GPS velocity data because it comes with the -# uncertainties +# We'll test this on the California vertical GPS velocity data because it comes +# with the uncertainties data = vd.datasets.fetch_california_gps() coordinates = (data.longitude, data.latitude) -# We'll calculate the mean on large blocks to show the effect of the different weighting -# schemes +# We'll calculate the mean on large blocks to show the effect of the different +# weighting schemes spacing = 30 / 60 # It's important that the weights are given as 1/sigma**2 for the uncertainty -# propagation. In this case, you should not use verde.variance_to_weights because it -# would normalize the weights. +# propagation. In this case, you should not use verde.variance_to_weights +# because it would normalize the weights. weights = 1 / data.std_up ** 2 reducer = vd.BlockMean(spacing, center_coordinates=True) # First produce the weighted variance weights diff --git a/examples/convex_hull_mask.py b/examples/convex_hull_mask.py index 4b9d9329d..94ebd599d 100644 --- a/examples/convex_hull_mask.py +++ b/examples/convex_hull_mask.py @@ -13,10 +13,11 @@ Function :func:`verde.convexhull_mask` allows us to set grid points that fall outside of the convex hull of the data points to NaN or some other value. """ -import matplotlib.pyplot as plt import cartopy.crs as ccrs -import pyproj +import matplotlib.pyplot as plt import numpy as np +import pyproj + import verde as vd # The Baja California bathymetry dataset has big gaps on land. We want to mask diff --git a/examples/distance_mask.py b/examples/distance_mask.py index 9d4e9ce15..2ba28696a 100644 --- a/examples/distance_mask.py +++ b/examples/distance_mask.py @@ -8,18 +8,20 @@ Mask grid points by distance ============================ -Sometimes, data points are unevenly distributed. In such cases, we might not want to -have interpolated grid points that are too far from any data point. Function -:func:`verde.distance_mask` allows us to set such points to NaN or some other value. +Sometimes, data points are unevenly distributed. In such cases, we might not +want to have interpolated grid points that are too far from any data point. +Function :func:`verde.distance_mask` allows us to set such points to NaN or +some other value. """ -import matplotlib.pyplot as plt import cartopy.crs as ccrs -import pyproj +import matplotlib.pyplot as plt import numpy as np +import pyproj + import verde as vd -# The Baja California bathymetry dataset has big gaps on land. We want to mask these -# gaps on a dummy grid that we'll generate over the region. +# The Baja California bathymetry dataset has big gaps on land. We want to mask +# these gaps on a dummy grid that we'll generate over the region. data = vd.datasets.fetch_baja_bathymetry() region = vd.get_region((data.longitude, data.latitude)) @@ -27,12 +29,12 @@ spacing = 10 / 60 coordinates = vd.grid_coordinates(region, spacing=spacing) -# Generate a mask for points that are more than 2 grid spacings away from any data -# point. The mask is True for points that are within the maximum distance. Distance -# calculations in the mask are Cartesian only. We can provide a projection function to -# convert the coordinates before distances are calculated (Mercator in this case). In -# this case, the maximum distance is also Cartesian and must be converted from degrees -# to meters. +# Generate a mask for points that are more than 2 grid spacings away from any +# data point. The mask is True for points that are within the maximum distance. +# Distance calculations in the mask are Cartesian only. We can provide a +# projection function to convert the coordinates before distances are +# calculated (Mercator in this case). In this case, the maximum distance is +# also Cartesian and must be converted from degrees to meters. mask = vd.distance_mask( (data.longitude, data.latitude), maxdist=spacing * 2 * 111e3, diff --git a/examples/project_grid.py b/examples/project_grid.py index ef5edd2fb..badacd276 100644 --- a/examples/project_grid.py +++ b/examples/project_grid.py @@ -36,11 +36,10 @@ `pyresample `__ instead. """ -import numpy as np import matplotlib.pyplot as plt import pyproj -import verde as vd +import verde as vd # We'll use synthetic data near the South pole to highlight the effects of the # projection. EPSG 3031 is a South Polar Stereographic projection. @@ -63,9 +62,9 @@ print("Geographic grid:") print(data) -# Do the projection while setting the output grid spacing (in projected meters). Set -# the coordinates names to x and y since they aren't really "northing" or -# "easting". +# Do the projection while setting the output grid spacing (in projected +# meters). Set the coordinates names to x and y since they aren't really +# "northing" or "easting". polar_data = vd.project_grid( data.checkerboard, projection, spacing=0.5 * 1e5, dims=("y", "x") ) diff --git a/examples/scipygridder.py b/examples/scipygridder.py index dee69cf49..9f6755e91 100644 --- a/examples/scipygridder.py +++ b/examples/scipygridder.py @@ -9,31 +9,33 @@ =================== Scipy offers a range of interpolation methods in :mod:`scipy.interpolate` and 3 -specifically for 2D data (linear, nearest neighbors, and bicubic). Verde offers an -interface for these 3 scipy interpolators in :class:`verde.ScipyGridder`. +specifically for 2D data (linear, nearest neighbors, and bicubic). Verde offers +an interface for these 3 scipy interpolators in :class:`verde.ScipyGridder`. -All of these interpolations work on Cartesian data, so if we want to grid geographic -data (like our Baja California bathymetry) we need to project them into a Cartesian -system. We'll use `pyproj `__ to calculate a Mercator -projection for the data. +All of these interpolations work on Cartesian data, so if we want to grid +geographic data (like our Baja California bathymetry) we need to project them +into a Cartesian system. We'll use `pyproj +`__ to calculate a Mercator projection for +the data. For convenience, Verde still allows us to make geographic grids by passing the -``projection`` argument to :meth:`verde.ScipyGridder.grid` and the like. When doing so, -the grid will be generated using geographic coordinates which will be projected prior to -interpolation. +``projection`` argument to :meth:`verde.ScipyGridder.grid` and the like. When +doing so, the grid will be generated using geographic coordinates which will be +projected prior to interpolation. """ -import matplotlib.pyplot as plt import cartopy.crs as ccrs -import pyproj +import matplotlib.pyplot as plt import numpy as np +import pyproj + import verde as vd # We'll test this on the Baja California shipborne bathymetry data data = vd.datasets.fetch_baja_bathymetry() -# Before gridding, we need to decimate the data to avoid aliasing because of the -# oversampling along the ship tracks. We'll use a blocked median with 5 arc-minute -# blocks. +# Before gridding, we need to decimate the data to avoid aliasing because of +# the oversampling along the ship tracks. We'll use a blocked median with 5 +# arc-minute blocks. spacing = 5 / 60 reducer = vd.BlockReduce(reduction=np.median, spacing=spacing) coordinates, bathymetry = reducer.filter( @@ -53,12 +55,13 @@ region = vd.get_region((data.longitude, data.latitude)) print("Data region:", region) -# The 'grid' method can still make a geographic grid if we pass in a projection function -# that converts lon, lat into the easting, northing coordinates that we used in 'fit'. -# This can be any function that takes lon, lat and returns x, y. In our case, it'll be -# the 'projection' variable that we created above. We'll also set the names of the grid -# dimensions and the name the data variable in our grid (the default would be 'scalars', -# which isn't very informative). +# The 'grid' method can still make a geographic grid if we pass in a projection +# function that converts lon, lat into the easting, northing coordinates that +# we used in 'fit'. This can be any function that takes lon, lat and returns x, +# y. In our case, it'll be the 'projection' variable that we created above. +# We'll also set the names of the grid dimensions and the name the data +# variable in our grid (the default would be 'scalars', which isn't very +# informative). grid = grd.grid( region=region, spacing=spacing, @@ -69,9 +72,9 @@ print("Generated geographic grid:") print(grid) -# Cartopy requires setting the coordinate reference system (CRS) of the original data -# through the transform argument. Their docs say to use PlateCarree to represent -# geographic data. +# Cartopy requires setting the coordinate reference system (CRS) of the +# original data through the transform argument. Their docs say to use +# PlateCarree to represent geographic data. crs = ccrs.PlateCarree() plt.figure(figsize=(7, 6)) diff --git a/examples/spline.py b/examples/spline.py index 26b6ab136..ab16cee83 100644 --- a/examples/spline.py +++ b/examples/spline.py @@ -8,23 +8,25 @@ Gridding with splines ===================== -Biharmonic spline interpolation is based on estimating vertical forces acting on an -elastic sheet that yield deformations in the sheet equal to the observed data. The -results are similar to using :class:`verde.ScipyGridder` with ``method='cubic'`` but -the interpolation is usually a bit slower. However, the advantage of using -:class:`verde.Spline` is that we can assign weights to the data and do model evaluation. +Biharmonic spline interpolation is based on estimating vertical forces acting +on an elastic sheet that yield deformations in the sheet equal to the observed +data. The results are similar to using :class:`verde.ScipyGridder` with +``method='cubic'`` but the interpolation is usually a bit slower. However, the +advantage of using :class:`verde.Spline` is that we can assign weights to the +data and do model evaluation. .. note:: Scoring on a single split of the data can be highly dependent on the - ``random_state``. See :ref:`model_selection` for more information and a better - approach. + ``random_state``. See :ref:`model_selection` for more information and a + better approach. """ -import matplotlib.pyplot as plt import cartopy.crs as ccrs -import pyproj +import matplotlib.pyplot as plt import numpy as np +import pyproj + import verde as vd # We'll test this on the air temperature data from Texas @@ -38,9 +40,10 @@ # The output grid spacing will 15 arc-minutes spacing = 15 / 60 -# Now we can chain a blocked mean and spline together. The Spline can be regularized -# by setting the damping coefficient (should be positive). It's also a good idea to set -# the minimum distance to the average data spacing to avoid singularities in the spline. +# Now we can chain a blocked mean and spline together. The Spline can be +# regularized by setting the damping coefficient (should be positive). It's +# also a good idea to set the minimum distance to the average data spacing to +# avoid singularities in the spline. chain = vd.Chain( [ ("mean", vd.BlockReduce(np.mean, spacing=spacing * 111e3)), @@ -49,9 +52,9 @@ ) print(chain) -# We can evaluate model performance by splitting the data into a training and testing -# set. We'll use the training set to grid the data and the testing set to validate our -# spline model. +# We can evaluate model performance by splitting the data into a training and +# testing set. We'll use the training set to grid the data and the testing set +# to validate our spline model. train, test = vd.train_test_split( projection(*coordinates), data.air_temperature_c, random_state=0 ) @@ -59,14 +62,15 @@ # Fit the model on the training set chain.fit(*train) -# And calculate an R^2 score coefficient on the testing set. The best possible score -# (perfect prediction) is 1. This can tell us how good our spline is at predicting data -# that was not in the input dataset. +# And calculate an R^2 score coefficient on the testing set. The best possible +# score (perfect prediction) is 1. This can tell us how good our spline is at +# predicting data that was not in the input dataset. score = chain.score(*test) print("\nScore: {:.3f}".format(score)) -# Now we can create a geographic grid of air temperature by providing a projection -# function to the grid method and mask points that are too far from the observations +# Now we can create a geographic grid of air temperature by providing a +# projection function to the grid method and mask points that are too far from +# the observations grid_full = chain.grid( region=region, spacing=spacing, @@ -88,6 +92,7 @@ ax=ax, cmap="plasma", transform=ccrs.PlateCarree(), add_colorbar=False ) plt.colorbar(tmp).set_label("Air temperature (C)") -# Use an utility function to add tick labels and land and ocean features to the map. +# Use an utility function to add tick labels and land and ocean features to the +# map. vd.datasets.setup_texas_wind_map(ax, region=region) plt.show() diff --git a/examples/spline_cv.py b/examples/spline_cv.py index 5779e9910..81b20b955 100644 --- a/examples/spline_cv.py +++ b/examples/spline_cv.py @@ -13,10 +13,10 @@ 1. ``mindist``: the minimum distance between forces and data points 2. ``damping``: the regularization parameter controlling smoothness -These parameters can be determined through cross-validation (see :ref:`model_selection`) -automatically using :class:`verde.SplineCV`. It is very similar to :class:`verde.Spline` -but takes a set of parameter values instead of only one value. When calling -:meth:`verde.SplineCV.fit`, the class will: +These parameters can be determined through cross-validation (see +:ref:`model_selection`) automatically using :class:`verde.SplineCV`. It is very +similar to :class:`verde.Spline` but takes a set of parameter values instead of +only one value. When calling :meth:`verde.SplineCV.fit`, the class will: 1. Create a spline for each combination of the input parameter sets 2. Calculate the cross-validation score for each spline using @@ -24,10 +24,10 @@ 3. Pick the spline with the highest score """ -import matplotlib.pyplot as plt import cartopy.crs as ccrs +import matplotlib.pyplot as plt import pyproj -import numpy as np + import verde as vd # We'll test this on the air temperature data from Texas @@ -41,13 +41,13 @@ # The output grid spacing will 15 arc-minutes spacing = 15 / 60 -# This spline will automatically perform cross-validation and search for the optimal -# parameter configuration. +# This spline will automatically perform cross-validation and search for the +# optimal parameter configuration. spline = vd.SplineCV(dampings=(1e-5, 1e-3, 1e-1), mindists=(10e3, 50e3, 100e3)) # Fit the model on the data. Under the hood, the class will perform K-fold -# cross-validation for each the 3*3=9 parameter combinations and pick the one with the -# highest R² score. +# cross-validation for each the 3*3=9 parameter combinations and pick the one +# with the highest R² score. spline.fit(projection(*coordinates), data.air_temperature_c) # We can show the best R² score obtained in the cross-validation @@ -58,8 +58,9 @@ print(" mindist:", spline.mindist_) print(" damping:", spline.damping_) -# Now we can create a geographic grid of air temperature by providing a projection -# function to the grid method and mask points that are too far from the observations +# Now we can create a geographic grid of air temperature by providing a +# projection function to the grid method and mask points that are too far from +# the observations grid_full = spline.grid( region=region, spacing=spacing, @@ -80,6 +81,7 @@ ax=ax, cmap="plasma", transform=ccrs.PlateCarree(), add_colorbar=False ) plt.colorbar(tmp).set_label("Air temperature (C)") -# Use an utility function to add tick labels and land and ocean features to the map. +# Use an utility function to add tick labels and land and ocean features to the +# map. vd.datasets.setup_texas_wind_map(ax, region=region) plt.show() diff --git a/examples/spline_weights.py b/examples/spline_weights.py index 19902c625..3090da7d5 100644 --- a/examples/spline_weights.py +++ b/examples/spline_weights.py @@ -9,29 +9,29 @@ ================================= An advantage of using the Green's functions based :class:`verde.Spline` over -:class:`verde.ScipyGridder` is that you can assign weights to the data to incorporate -the data uncertainties or variance into the gridding. -In this example, we'll see how to combine :class:`verde.BlockMean` to decimate the data +:class:`verde.ScipyGridder` is that you can assign weights to the data to +incorporate the data uncertainties or variance into the gridding. In this +example, we'll see how to combine :class:`verde.BlockMean` to decimate the data and use weights based on the data uncertainty during gridding. """ -import matplotlib.pyplot as plt -from matplotlib.colors import PowerNorm import cartopy.crs as ccrs +import matplotlib.pyplot as plt import pyproj -import numpy as np +from matplotlib.colors import PowerNorm + import verde as vd -# We'll test this on the California vertical GPS velocity data because it comes with the -# uncertainties +# We'll test this on the California vertical GPS velocity data because it comes +# with the uncertainties data = vd.datasets.fetch_california_gps() coordinates = (data.longitude.values, data.latitude.values) # Use a Mercator projection for our Cartesian gridder projection = pyproj.Proj(proj="merc", lat_ts=data.latitude.mean()) -# Now we can chain a block weighted mean and weighted spline together. We'll use -# uncertainty propagation to calculate the new weights from block mean because our data -# vary smoothly but have different uncertainties. +# Now we can chain a block weighted mean and weighted spline together. We'll +# use uncertainty propagation to calculate the new weights from block mean +# because our data vary smoothly but have different uncertainties. spacing = 5 / 60 # 5 arc-minutes chain = vd.Chain( [ @@ -41,9 +41,9 @@ ) print(chain) -# Split the data into a training and testing set. We'll use the training set to grid the -# data and the testing set to validate our spline model. Weights need to -# 1/uncertainty**2 for the error propagation in BlockMean to work. +# Split the data into a training and testing set. We'll use the training set to +# grid the data and the testing set to validate our spline model. Weights need +# to 1/uncertainty**2 for the error propagation in BlockMean to work. train, test = vd.train_test_split( projection(*coordinates), data.velocity_up, @@ -52,14 +52,14 @@ ) # Fit the model on the training set chain.fit(*train) -# And calculate an R^2 score coefficient on the testing set. The best possible score -# (perfect prediction) is 1. This can tell us how good our spline is at predicting data -# that was not in the input dataset. +# And calculate an R^2 score coefficient on the testing set. The best possible +# score (perfect prediction) is 1. This can tell us how good our spline is at +# predicting data that was not in the input dataset. score = chain.score(*test) print("\nScore: {:.3f}".format(score)) -# Create a grid of the vertical velocity and mask it to only show points close to the -# actual data. +# Create a grid of the vertical velocity and mask it to only show points close +# to the actual data. region = vd.get_region(coordinates) grid_full = chain.grid( region=region, @@ -79,8 +79,8 @@ # Plot the data uncertainties ax = axes[0] ax.set_title("Data uncertainty") -# Plot the uncertainties in mm/yr and using a power law for the color scale to highlight -# the smaller values +# Plot the uncertainties in mm/yr and using a power law for the color scale to +# highlight the smaller values pc = ax.scatter( *coordinates, c=data.std_up * 1000, diff --git a/examples/train_test_split.py b/examples/train_test_split.py index c74e04e96..771575084 100644 --- a/examples/train_test_split.py +++ b/examples/train_test_split.py @@ -29,8 +29,9 @@ This example compares splitting our sample dataset using both methods. """ -import matplotlib.pyplot as plt import cartopy.crs as ccrs +import matplotlib.pyplot as plt + import verde as vd # Let's split the Baja California shipborne bathymetry data diff --git a/examples/trend.py b/examples/trend.py index 81c6bf69a..3692fdf1d 100644 --- a/examples/trend.py +++ b/examples/trend.py @@ -8,15 +8,15 @@ Polynomial trend ================ -Verde offers the :class:`verde.Trend` class to fit a 2D polynomial trend to your data. -This can be useful for isolating a regional component of your data, for example, which -is a common operation for gravity and magnetic data. Let's look at how we can use Verde -to remove the clear trend from our Texas temperature dataset -(:func:`verde.datasets.fetch_texas_wind`). +Verde offers the :class:`verde.Trend` class to fit a 2D polynomial trend to +your data. This can be useful for isolating a regional component of your data, +for example, which is a common operation for gravity and magnetic data. Let's +look at how we can use Verde to remove the clear trend from our Texas +temperature dataset (:func:`verde.datasets.fetch_texas_wind`). """ -import numpy as np -import matplotlib.pyplot as plt import cartopy.crs as ccrs +import matplotlib.pyplot as plt + import verde as vd # Load the Texas wind and temperature data as a pandas.DataFrame diff --git a/examples/vector_trend.py b/examples/vector_trend.py index fc3e7f172..db8544dc5 100644 --- a/examples/vector_trend.py +++ b/examples/vector_trend.py @@ -8,27 +8,28 @@ Trends in vector data ===================== -Verde provides the :class:`verde.Trend` class to estimate a polynomial trend and the -:class:`verde.Vector` class to apply any combination of estimators to each component -of vector data, like GPS velocities. You can access each component as a separate -(fitted) :class:`verde.Trend` instance or operate on all vector components directly -using using :meth:`verde.Vector.predict`, :meth:`verde.Vector.grid`, etc, or -chaining it with a vector interpolator using :class:`verde.Chain`. +Verde provides the :class:`verde.Trend` class to estimate a polynomial trend +and the :class:`verde.Vector` class to apply any combination of estimators to +each component of vector data, like GPS velocities. You can access each +component as a separate (fitted) :class:`verde.Trend` instance or operate on +all vector components directly using using :meth:`verde.Vector.predict`, +:meth:`verde.Vector.grid`, etc, or chaining it with a vector interpolator using +:class:`verde.Chain`. """ -import matplotlib.pyplot as plt import cartopy.crs as ccrs -import numpy as np -import verde as vd +import matplotlib.pyplot as plt +import verde as vd -# Fetch the GPS data from the U.S. West coast. The data has a strong trend toward the -# North-West because of the relative movement along the San Andreas Fault System. +# Fetch the GPS data from the U.S. West coast. The data has a strong trend +# toward the North-West because of the relative movement along the San Andreas +# Fault System. data = vd.datasets.fetch_california_gps() -# We'll fit a degree 2 trend on both the East and North components and weight the data -# using the inverse of the variance of each component. -# Note: Never use [Trend(...)]*2 as an argument to Vector. This creates references -# to the same Trend instance and will mess up the fitting. +# We'll fit a degree 2 trend on both the East and North components and weight +# the data using the inverse of the variance of each component. Note: Never use +# [Trend(...)]*2 as an argument to Vector. This creates references to the same +# Trend instance and will mess up the fitting. trend = vd.Vector([vd.Trend(degree=2) for i in range(2)]) weights = vd.variance_to_weights((data.std_east ** 2, data.std_north ** 2)) trend.fit( @@ -38,8 +39,8 @@ ) print("Vector trend estimator:", trend) -# The separate Trend objects for each component can be accessed through the 'components' -# attribute. You could grid them individually if you wanted. +# The separate Trend objects for each component can be accessed through the +# 'components' attribute. You could grid them individually if you wanted. print("East component trend:", trend.components[0]) print("East trend coefficients:", trend.components[0].coef_) print("North component trend:", trend.components[1]) diff --git a/examples/vector_uncoupled.py b/examples/vector_uncoupled.py index 04297ec01..089b8bd67 100644 --- a/examples/vector_uncoupled.py +++ b/examples/vector_uncoupled.py @@ -16,12 +16,12 @@ :class:`verde.Spline`, and :class:`verde.Chain` to create a full processing pipeline. """ -import matplotlib.pyplot as plt import cartopy.crs as ccrs +import matplotlib.pyplot as plt import numpy as np import pyproj -import verde as vd +import verde as vd # Fetch the wind speed data from Texas. data = vd.datasets.fetch_texas_wind() @@ -33,8 +33,9 @@ # Use a Mercator projection because Spline is a Cartesian gridder projection = pyproj.Proj(proj="merc", lat_ts=data.latitude.mean()) -# Split the data into a training and testing set. We'll fit the gridder on the training -# set and use the testing set to evaluate how well the gridder is performing. +# Split the data into a training and testing set. We'll fit the gridder on the +# training set and use the testing set to evaluate how well the gridder is +# performing. train, test = vd.train_test_split( projection(*coordinates), (data.wind_speed_east_knots, data.wind_speed_north_knots), @@ -44,9 +45,10 @@ # We'll make a 20 arc-minute grid spacing = 20 / 60 -# Chain together a blocked mean to avoid aliasing, a polynomial trend (Spline usually -# requires de-trended data), and finally a Spline for each component. Notice that -# BlockReduce can work on multicomponent data without the use of Vector. +# Chain together a blocked mean to avoid aliasing, a polynomial trend (Spline +# usually requires de-trended data), and finally a Spline for each component. +# Notice that BlockReduce can work on multicomponent data without the use of +# Vector. chain = vd.Chain( [ ("mean", vd.BlockReduce(np.mean, spacing * 111e3)), @@ -61,13 +63,13 @@ # Fit on the training data chain.fit(*train) -# And score on the testing data. The best possible score is 1, meaning a perfect -# prediction of the test data. +# And score on the testing data. The best possible score is 1, meaning a +# perfect prediction of the test data. score = chain.score(*test) print("Cross-validation R^2 score: {:.2f}".format(score)) -# Interpolate the wind speed onto a regular geographic grid and mask the data that are -# outside of the convex hull of the data points. +# Interpolate the wind speed onto a regular geographic grid and mask the data +# that are outside of the convex hull of the data points. grid_full = chain.grid( region=region, spacing=spacing, @@ -103,6 +105,7 @@ ) ax.quiverkey(tmp, 0.17, 0.23, 5, label="5 knots", coordinates="figure") ax.legend(loc="lower left") -# Use an utility function to add tick labels and land and ocean features to the map. +# Use an utility function to add tick labels and land and ocean features to the +# map. vd.datasets.setup_texas_wind_map(ax) plt.show() diff --git a/license_notice.py b/license_notice.py index 509366c8f..1a2fc15b7 100644 --- a/license_notice.py +++ b/license_notice.py @@ -8,10 +8,10 @@ Add license notice to every source file if not present """ import sys -from pathlib import Path from argparse import ArgumentParser -from pathspec import PathSpec +from pathlib import Path +from pathspec import PathSpec PROJECT = "verde" YEAR = "2017" diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 000000000..b625b8570 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,4 @@ +# Make sure isort and Black are compatible +[tool.isort] +profile = "black" +multi_line_output = 3 diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 000000000..dbe93b1bb --- /dev/null +++ b/setup.cfg @@ -0,0 +1,36 @@ +[flake8] +max-line-length = 88 +max-doc-length = 79 +ignore = + # Too many leading '#' for block comment + E266, + # Line too long (82 > 79 characters) + E501, + # Do not use variables named 'I', 'O', or 'l' + E741, + # Line break before binary operator (conflicts with black) + W503, +exclude = + .git, + __pycache__, + .ipynb_checkpoints, +per-file-ignores = + # disable unused-imports errors on __init__.py + __init__.py: F401 + +# Configure flake8-rst-docstrings +# ------------------------------- +# Add some roles used in our docstrings +rst-roles = + class, + func, + mod, + meth, + ref, +# Ignore "Unknown target name" raised on citations +extend-ignore = RST306 + +# Configure flake8-functions +# -------------------------- +# Allow a max of 10 arguments per function +max-parameters-amount = 10 diff --git a/setup.py b/setup.py index 6150bd0db..0c23ca84f 100644 --- a/setup.py +++ b/setup.py @@ -9,8 +9,7 @@ Uses setuptools-scm to manage version numbers using git tags. """ -from setuptools import setup, find_packages - +from setuptools import find_packages, setup NAME = "verde" FULLNAME = "Verde" diff --git a/tutorials/chain.py b/tutorials/chain.py index 307f56d61..8e4635668 100644 --- a/tutorials/chain.py +++ b/tutorials/chain.py @@ -15,34 +15,37 @@ #. Fit a spline to the residual of the trend #. Grid using the spline and restore the trend -The :class:`verde.Chain` class allows us to created gridders that perform multiple -operations on data. Each step in the chain filters the input and passes the result along -to the next step. For gridders and trend estimators, filtering means fitting the model -and passing along the residuals (input data minus predicted data). When predicting data, -the predictions of each step are added together. - -Other operations, like :class:`verde.BlockReduce` and :class:`verde.BlockMean` change -the input data values and the coordinates but don't impact the predictions because they -don't implement the :meth:`~verde.base.BaseGridder.predict` method. +The :class:`verde.Chain` class allows us to created gridders that perform +multiple operations on data. Each step in the chain filters the input and +passes the result along to the next step. For gridders and trend estimators, +filtering means fitting the model and passing along the residuals (input data +minus predicted data). When predicting data, the predictions of each step are +added together. + +Other operations, like :class:`verde.BlockReduce` and :class:`verde.BlockMean` +change the input data values and the coordinates but don't impact the +predictions because they don't implement the +:meth:`~verde.base.BaseGridder.predict` method. .. note:: The :class:`~verde.Chain` class was inspired by the - :class:`sklearn.pipeline.Pipeline` class, which doesn't serve our purposes because - it only affects the feature matrix, not what we would call *data* (the target - vector). + :class:`sklearn.pipeline.Pipeline` class, which doesn't serve our purposes + because it only affects the feature matrix, not what we would call *data* + (the target vector). For example, let's create a pipeline to grid our sample bathymetry data. """ -import numpy as np -import matplotlib.pyplot as plt import cartopy.crs as ccrs +import matplotlib.pyplot as plt +import numpy as np import pyproj + import verde as vd data = vd.datasets.fetch_baja_bathymetry() region = vd.get_region((data.longitude, data.latitude)) -# The desired grid spacing in degrees (converted to meters using 1 degree approx. 111km) +# The desired grid spacing in degrees spacing = 10 / 60 # Use Mercator projection because Spline is a Cartesian gridder projection = pyproj.Proj(proj="merc", lat_ts=data.latitude.mean()) @@ -62,9 +65,9 @@ vd.datasets.setup_baja_bathymetry_map(ax) plt.show() -######################################################################################## -# We'll create a chain that applies a blocked median to the data, fits a polynomial -# trend, and then fits a standard gridder to the trend residuals. +############################################################################### +# We'll create a chain that applies a blocked median to the data, fits a +# polynomial trend, and then fits a standard gridder to the trend residuals. chain = vd.Chain( [ @@ -75,8 +78,9 @@ ) print(chain) -######################################################################################## -# Calling :meth:`verde.Chain.fit` will automatically run the data through the chain: +############################################################################### +# Calling :meth:`verde.Chain.fit` will automatically run the data through the +# chain: # # #. Apply the blocked median to the input data # #. Fit a trend to the blocked data and output the residuals @@ -84,14 +88,16 @@ chain.fit(proj_coords, data.bathymetry_m) -######################################################################################## -# Now that the data has been through the chain, calling :meth:`verde.Chain.predict` will -# sum the results of every step in the chain that has a ``predict`` method. In our case, -# that will be only the :class:`~verde.Trend` and :class:`~verde.Spline`. +############################################################################### +# Now that the data has been through the chain, calling +# :meth:`verde.Chain.predict` will sum the results of every step in the chain +# that has a ``predict`` method. In our case, that will be only the +# :class:`~verde.Trend` and :class:`~verde.Spline`. # -# We can verify the quality of the fit by inspecting a histogram of the residuals with -# respect to the original data. Remember that our spline and trend were fit on decimated -# data, not the original data, so the fit won't be perfect. +# We can verify the quality of the fit by inspecting a histogram of the +# residuals with respect to the original data. Remember that our spline and +# trend were fit on decimated data, not the original data, so the fit won't be +# perfect. residuals = data.bathymetry_m - chain.predict(proj_coords) @@ -102,10 +108,10 @@ plt.xlim(-1500, 1500) plt.show() -######################################################################################## -# Likewise, :meth:`verde.Chain.grid` creates a grid of the combined trend and spline -# predictions. This is equivalent to a *remove-compute-restore* procedure that should be -# familiar to the geodesists among us. +############################################################################### +# Likewise, :meth:`verde.Chain.grid` creates a grid of the combined trend and +# spline predictions. This is equivalent to a *remove-compute-restore* +# procedure that should be familiar to the geodesists among us. grid = chain.grid( region=region, @@ -116,7 +122,7 @@ ) print(grid) -######################################################################################## +############################################################################### # Finally, we can plot the resulting grid: plt.figure(figsize=(7, 6)) @@ -129,17 +135,18 @@ vd.datasets.setup_baja_bathymetry_map(ax) plt.show() -######################################################################################## -# Each component of the chain can be accessed separately using the ``named_steps`` -# attribute. It's a dictionary with keys and values matching the inputs given to the -# :class:`~verde.Chain`. +############################################################################### +# Each component of the chain can be accessed separately using the +# ``named_steps`` attribute. It's a dictionary with keys and values matching +# the inputs given to the :class:`~verde.Chain`. print(chain.named_steps["trend"]) print(chain.named_steps["spline"]) -######################################################################################## -# All gridders and estimators in the chain have been fitted and can be used to generate -# grids and predictions. For example, we can get a grid of the estimated trend: +############################################################################### +# All gridders and estimators in the chain have been fitted and can be used to +# generate grids and predictions. For example, we can get a grid of the +# estimated trend: grid_trend = chain.named_steps["trend"].grid( region=region, diff --git a/tutorials/decimation.py b/tutorials/decimation.py index b0674c6c3..da478efd6 100644 --- a/tutorials/decimation.py +++ b/tutorials/decimation.py @@ -8,16 +8,19 @@ Data Decimation =============== -Often times, raw spatial data can be highly oversampled in a direction. In these cases, -we need to decimate the data before interpolation to avoid aliasing effects. +Often times, raw spatial data can be highly oversampled in a direction. In +these cases, we need to decimate the data before interpolation to avoid +aliasing effects. """ -import matplotlib.pyplot as plt import cartopy.crs as ccrs +import matplotlib.pyplot as plt +import numpy as np + import verde as vd -######################################################################################## -# For example, our sample shipborne bathymetry data has a higher sampling frequency -# along the tracks than between tracks: +############################################################################### +# For example, our sample shipborne bathymetry data has a higher sampling +# frequency along the tracks than between tracks: # Load the data as a pandas.DataFrame data = vd.datasets.fetch_baja_bathymetry() @@ -32,25 +35,25 @@ vd.datasets.setup_baja_bathymetry_map(ax) plt.show() -######################################################################################## +############################################################################### # Class :class:`verde.BlockReduce` can be used to apply a reduction/aggregation -# operation (mean, median, standard deviation, etc) to the data in regular blocks. All -# data inside each block will be replaced by their aggregated value. -# :class:`~verde.BlockReduce` takes an aggregation function as input. It can be any -# function that receives a numpy array as input and returns a single scalar value. The -# :func:`numpy.mean` or :func:`numpy.median` functions are usually what we want. -import numpy as np - -######################################################################################## -# Blocked means and medians are good ways to decimate data for interpolation. Let's use -# a blocked median on our data to decimate it to our desired grid interval of 5 -# arc-minutes. The reason for using a median over a mean is because bathymetry data can -# vary abruptly and a mean would smooth the data too much. For data varies more -# smoothly (like gravity and magnetic data), a mean would be a better option. +# operation (mean, median, standard deviation, etc) to the data in regular +# blocks. All data inside each block will be replaced by their aggregated +# value. :class:`~verde.BlockReduce` takes an aggregation function as input. It +# can be any function that receives a numpy array as input and returns a single +# scalar value. The :func:`numpy.mean` or :func:`numpy.median` functions are +# usually what we want. +# +# Blocked means and medians are good ways to decimate data for interpolation. +# Let's use a blocked median on our data to decimate it to our desired grid +# interval of 5 arc-minutes. The reason for using a median over a mean is +# because bathymetry data can vary abruptly and a mean would smooth the data +# too much. For data varies more smoothly (like gravity and magnetic data), a +# mean would be a better option. reducer = vd.BlockReduce(reduction=np.median, spacing=5 / 60) print(reducer) -######################################################################################## +############################################################################### # Use the :meth:`~verde.BlockReduce.filter` method to apply the reduction: coordinates, bathymetry = reducer.filter( coordinates=(data.longitude, data.latitude), data=data.bathymetry_m @@ -65,11 +68,11 @@ plt.show() -######################################################################################## -# By default, the coordinates of the decimated data are obtained by applying the same -# reduction operation to the coordinates of the original data. Alternatively, we can -# tell :class:`~verde.BlockReduce` to return the coordinates of the center of each -# block: +############################################################################### +# By default, the coordinates of the decimated data are obtained by applying +# the same reduction operation to the coordinates of the original data. +# Alternatively, we can tell :class:`~verde.BlockReduce` to return the +# coordinates of the center of each block: reducer_center = vd.BlockReduce( reduction=np.median, spacing=5 / 60, center_coordinates=True ) @@ -85,5 +88,5 @@ vd.datasets.setup_baja_bathymetry_map(ax) plt.show() -######################################################################################## +############################################################################### # Now the data are ready for interpolation. diff --git a/tutorials/grid_coordinates.py b/tutorials/grid_coordinates.py index 6136f8e03..0835be5ba 100644 --- a/tutorials/grid_coordinates.py +++ b/tutorials/grid_coordinates.py @@ -10,25 +10,23 @@ Grid Coordinates ==================== -Creating the coordinates for regular grids in Verde is done using the -:func:`verde.grid_coordinates` function. It creates a set of regularly spaced points in -both the west-east and south-north directions, i.e. a two-dimensional spatial grid. These -points are then used by the Verde gridders to interpolate between data points. As such, all -`.grid` methods (like :meth:`verde.Spline.grid`) take as input the configuration parameters -for :func:`verde.grid_coordinates`. The grid can be specified either by the number of points -in each dimension (the shape) or by the grid node spacing. - - +Creating the coordinates for regular grids in Verde is done using the +:func:`verde.grid_coordinates` function. It creates a set of regularly spaced +points in both the west-east and south-north directions, i.e. a two-dimensional +spatial grid. These points are then used by the Verde gridders to interpolate +between data points. As such, all `.grid` methods (like +:meth:`verde.Spline.grid`) take as input the configuration parameters for +:func:`verde.grid_coordinates`. The grid can be specified either by the number +of points in each dimension (the shape) or by the grid node spacing. """ -import numpy as np import matplotlib.pyplot as plt + import verde as vd -from matplotlib.patches import Rectangle -######################################################################################## -# First let's create a region that is 1000 units west-east and 1000 units south-north, -# and we will set an initial spacing to 100 units. +############################################################################### +# First let's create a region that is 1000 units west-east and 1000 units +# south-north, and we will set an initial spacing to 100 units. spacing = 100 west, east, south, north = 0, 1000, 0, 1000 @@ -37,14 +35,14 @@ # create the grid coordinates easting, northing = vd.grid_coordinates(region=region, spacing=spacing) -######################################################################################## -# We can check the dimensions of the grid coordinates. The region is 1000 units and the -# spacing is 100 units, so the shape of the segments is 10x10. However, the number of -# grid nodes in this case is one more than the number of segments. So our grid -# coordinates have a shape of 11x11. +############################################################################### +# We can check the dimensions of the grid coordinates. The region is 1000 units +# and the spacing is 100 units, so the shape of the segments is 10x10. However, +# the number of grid nodes in this case is one more than the number of +# segments. So our grid coordinates have a shape of 11x11. print(easting.shape, northing.shape) -######################################################################################## +############################################################################### # Let's define two functions to visualize the region bounds and grid points @@ -80,7 +78,7 @@ def plot_grid(ax, coordinates, linestyles="dotted", region=None, pad=50, **kwarg plt.ylim(padded[2:]) -######################################################################################## +############################################################################### # Visualize our region and grid coordinates using our functions plt.figure(figsize=(6, 6)) @@ -101,18 +99,19 @@ def plot_grid(ax, coordinates, linestyles="dotted", region=None, pad=50, **kwarg plt.legend(loc="upper center", bbox_to_anchor=(0.5, 1.15)) plt.show() -######################################################################################## +############################################################################### # Adjusting region boundaries when creating the grid coordinates # -------------------------------------------------------------- # -# Now let's change our spacing to 300 units. Because the range of the west-east and -# south-north boundaries are not multiples of 300, we must choose to change either: +# Now let's change our spacing to 300 units. Because the range of the west-east +# and south-north boundaries are not multiples of 300, we must choose to change +# either: # # - the boundaries of the region in order to fit the spacing, or # - the spacing in order to fit the region boundaries. # -# We could tell :func:`verde.grid_coordinates` to adjust the region boundaries by -# passing ``adjust="region"``. +# We could tell :func:`verde.grid_coordinates` to adjust the region boundaries +# by passing ``adjust="region"``. spacing = 300 region_easting, region_northing = vd.grid_coordinates( @@ -120,46 +119,45 @@ def plot_grid(ax, coordinates, linestyles="dotted", region=None, pad=50, **kwarg ) print(region_easting.shape, region_northing.shape) -######################################################################################## +############################################################################### # With the spacing set at 300 units and a 4 by 4 grid of regular dimensions, -# :func:`verde.grid_coordinates` calculates the spatial location of each -# grid point and adjusts the region so that the maximum northing and maximum -# easting values are divisible by the spacing. In this example, both the easting and -# northing have 3 segments (4 nodes) that are each 300 units long, meaning the easting -# and northing span from 0 to 900. Both dimensions are divisible -# by 300. +# :func:`verde.grid_coordinates` calculates the spatial location of each grid +# point and adjusts the region so that the maximum northing and maximum easting +# values are divisible by the spacing. In this example, both the easting and +# northing have 3 segments (4 nodes) that are each 300 units long, meaning the +# easting and northing span from 0 to 900. Both dimensions are divisible by +# 300. print(region_easting) print(region_northing) -###################################################################################### +############################################################################### # By default, if ``adjust`` is not assigned to ``"region"`` or ``"spacing"``, # then :func:`verde.grid_coordinates` will adjust the spacing. With the adjust -# parameter set to ``spacing`` :func:`verde.grid_coordinates` creates grid nodes -# in a similar manner as when it adjusts the region. However, it doesn't readjust -# the region so that it is divisble by the spacing before creating the grid. -# This means the grid will have the same number of grid points no matter if -# the adjust parameter is set to ``region`` or ``spacing``. - -######################################################################################## +# parameter set to ``spacing`` :func:`verde.grid_coordinates` creates grid +# nodes in a similar manner as when it adjusts the region. However, it doesn't +# readjust the region so that it is divisble by the spacing before creating the +# grid. This means the grid will have the same number of grid points no matter +# if the adjust parameter is set to ``region`` or ``spacing``. +# # Adjusting spacing when creating the grid # ---------------------------------------- # -# Now let's adjust the spacing of the grid points by passing ``adjust="spacing"`` -# to :func:`verde.grid_coordinates`. +# Now let's adjust the spacing of the grid points by passing +# ``adjust="spacing"`` to :func:`verde.grid_coordinates`. spacing_easting, spacing_northing = vd.grid_coordinates( region=region, spacing=spacing, adjust="spacing" ) print(spacing_easting.shape, spacing_northing.shape) -###################################################################################### +############################################################################### # However the regular spacing between the grid points is no longer 300 units. print(spacing_easting) print(spacing_northing) -###################################################################################### +############################################################################### # Visualize the different adjustments # ----------------------------------- # @@ -196,19 +194,19 @@ def plot_grid(ax, coordinates, linestyles="dotted", region=None, pad=50, **kwarg plt.legend(loc="upper center", bbox_to_anchor=(0.5, 1.18)) plt.show() -###################################################################################### +############################################################################### # Pixel Registration # ------------------ # # Pixel registration locates the grid points in the middle of the grid segments # rather than in the corner of each grid node. # -# First, let's take our 1000x1000 region and use the 100 unit spacing from the first -# example and set the ``pixel_register`` parameter to ``True``. Without pixel -# registration our grid should have dimensions of 11x11. With pixel registration we -# expect the dimensions of the grid to be the dimensions of the non-registered grid -# minus one, or equal to the number of segments between the grid points in the -# non-registered grid (10x10). +# First, let's take our 1000x1000 region and use the 100 unit spacing from the +# first example and set the ``pixel_register`` parameter to ``True``. Without +# pixel registration our grid should have dimensions of 11x11. With pixel +# registration we expect the dimensions of the grid to be the dimensions of the +# non-registered grid minus one, or equal to the number of segments between the +# grid points in the non-registered grid (10x10). spacing = 100 pixel_easting, pixel_northing = vd.grid_coordinates( @@ -216,30 +214,31 @@ def plot_grid(ax, coordinates, linestyles="dotted", region=None, pad=50, **kwarg ) print(pixel_easting.shape, pixel_northing.shape) -###################################################################################### +############################################################################### # And we can check the coordinates for the grid points with pixel registration. print(pixel_easting) print(pixel_northing) -###################################################################################### +############################################################################### # If we set ``pixel_register`` to ``False`` the function will return the grid -# coordinates of the nodes instead of pixel centers, resulting in an extra point in each direction. +# coordinates of the nodes instead of pixel centers, resulting in an extra +# point in each direction. easting, northing = vd.grid_coordinates( region=region, spacing=spacing, pixel_register=False ) print(easting.shape, northing.shape) -###################################################################################### +############################################################################### # Again we can check the coordinates for grid points with spacing adjustment. print(easting) print(northing) -###################################################################################### -# Lastly, we can visualize the pixel-registered grid points to see where they fall -# within the original region bounds. +############################################################################### +# Lastly, we can visualize the pixel-registered grid points to see where they +# fall within the original region bounds. plt.figure(figsize=(6, 6)) ax = plt.subplot(111) @@ -270,15 +269,16 @@ def plot_grid(ax, coordinates, linestyles="dotted", region=None, pad=50, **kwarg plt.legend(loc="upper center", bbox_to_anchor=(0.5, 1.18)) plt.show() -###################################################################################### +############################################################################### # Extra Coordinates # ----------------- # -# In some cases, you might need an additional coordinate such as a height or a time -# that is associated with your coordinate grid. The ``extra_coords`` parameter -# in :func:`verde.grid_coordinates` creates an extra coordinate array that is the same -# shape as the coordinate grid, but contains a constant value. For example, let's -# add a constant height of 1000 units and time of 1 to our coordinate grid. +# In some cases, you might need an additional coordinate such as a height or a +# time that is associated with your coordinate grid. The ``extra_coords`` +# parameter in :func:`verde.grid_coordinates` creates an extra coordinate array +# that is the same shape as the coordinate grid, but contains a constant value. +# For example, let's add a constant height of 1000 units and time of 1 to our +# coordinate grid. easting, northing, height, time = vd.grid_coordinates( region=region, spacing=spacing, extra_coords=[1000, 1] @@ -286,12 +286,12 @@ def plot_grid(ax, coordinates, linestyles="dotted", region=None, pad=50, **kwarg print(easting.shape, northing.shape, height.shape, time.shape) -######################################################################################## +############################################################################### # And we can print the height array to verify that it is correct print(height) -######################################################################################## +############################################################################### # And we can print the time array as well print(time) diff --git a/tutorials/model_evaluation.py b/tutorials/model_evaluation.py index db6c3c125..cfec907c1 100644 --- a/tutorials/model_evaluation.py +++ b/tutorials/model_evaluation.py @@ -10,21 +10,24 @@ Evaluating Performance ====================== -The Green's functions based interpolations in Verde are all linear regressions under the -hood. This means that we can use some of the same tactics from -:mod:`sklearn.model_selection` to evaluate our interpolator's performance. Once we have -a quantified measure of the quality of a given fitted gridder, we can use it to tune the -gridder's parameters, like ``damping`` for a :class:`~verde.Spline` (see -:ref:`model_selection`). - -Verde provides adaptations of common scikit-learn tools to work better with spatial -data. Let's use these tools to evaluate the performance of a :class:`~verde.Spline` on -our sample air temperature data. +The Green's functions based interpolations in Verde are all linear regressions +under the hood. This means that we can use some of the same tactics from +:mod:`sklearn.model_selection` to evaluate our interpolator's performance. Once +we have a quantified measure of the quality of a given fitted gridder, we can +use it to tune the gridder's parameters, like ``damping`` for a +:class:`~verde.Spline` (see :ref:`model_selection`). + +Verde provides adaptations of common scikit-learn tools to work better with +spatial data. Let's use these tools to evaluate the performance of a +:class:`~verde.Spline` on our sample air temperature data. """ -import numpy as np -import matplotlib.pyplot as plt import cartopy.crs as ccrs +import dask +import matplotlib.pyplot as plt +import numpy as np import pyproj +from sklearn.model_selection import ShuffleSplit + import verde as vd data = vd.datasets.fetch_texas_wind() @@ -37,40 +40,40 @@ # For this data, we'll generate a grid with 15 arc-minute spacing spacing = 15 / 60 -######################################################################################## +############################################################################### # Splitting the data # ------------------ # -# We can't evaluate a gridder on the data that went into fitting it. The true test of a -# model is if it can correctly predict data that it hasn't seen before. scikit-learn has -# the :func:`sklearn.model_selection.train_test_split` function to separate a dataset -# into two parts: one for fitting the model (called *training* data) and a separate one -# for evaluating the model (called *testing* data). Using it with spatial data would -# involve some tedious array conversions so Verde implements -# :func:`verde.train_test_split` which does the same thing but takes coordinates and -# data arrays instead. +# We can't evaluate a gridder on the data that went into fitting it. The true +# test of a model is if it can correctly predict data that it hasn't seen +# before. scikit-learn has the :func:`sklearn.model_selection.train_test_split` +# function to separate a dataset into two parts: one for fitting the model +# (called *training* data) and a separate one for evaluating the model (called +# *testing* data). Using it with spatial data would involve some tedious array +# conversions so Verde implements :func:`verde.train_test_split` which does the +# same thing but takes coordinates and data arrays instead. # -# The split is done randomly so we specify a seed for the random number generator to -# guarantee that we'll get the same result every time we run this example. You probably -# don't want to do that for real data. We'll keep 30% of the data to use for testing -# (``test_size=0.3``). +# The split is done randomly so we specify a seed for the random number +# generator to guarantee that we'll get the same result every time we run this +# example. You probably don't want to do that for real data. We'll keep 30% of +# the data to use for testing (``test_size=0.3``). train, test = vd.train_test_split( proj_coords, data.air_temperature_c, test_size=0.3, random_state=0 ) -######################################################################################## -# The returned ``train`` and ``test`` variables are tuples containing coordinates, data, -# and (optionally) weights arrays. Since we're not using weights, the third element of -# the tuple will be ``None``: +############################################################################### +# The returned ``train`` and ``test`` variables are tuples containing +# coordinates, data, and (optionally) weights arrays. Since we're not using +# weights, the third element of the tuple will be ``None``: print(train) -######################################################################################## +############################################################################### # print(test) -######################################################################################## +############################################################################### # Let's plot these two datasets with different colors: plt.figure(figsize=(8, 6)) @@ -82,16 +85,17 @@ ax.set_aspect("equal") plt.show() -######################################################################################## -# We can pass the training dataset to the :meth:`~verde.base.BaseGridder.fit` method of -# most gridders using Python's argument expansion using the ``*`` symbol. +############################################################################### +# We can pass the training dataset to the :meth:`~verde.base.BaseGridder.fit` +# method of most gridders using Python's argument expansion using the ``*`` +# symbol. spline = vd.Spline() spline.fit(*train) -######################################################################################## -# Let's plot the gridded result to see what it looks like. First, we'll create a -# geographic grid: +############################################################################### +# Let's plot the gridded result to see what it looks like. First, we'll create +# a geographic grid: grid = spline.grid( region=region, spacing=spacing, @@ -101,9 +105,9 @@ ) print(grid) -######################################################################################## -# Then, we'll mask out grid points that are too far from any given data point and plot -# the grid: +############################################################################### +# Then, we'll mask out grid points that are too far from any given data point +# and plot the grid: mask = vd.distance_mask( (data.longitude, data.latitude), maxdist=3 * spacing * 111e3, @@ -127,28 +131,28 @@ vd.datasets.setup_texas_wind_map(ax) plt.show() -######################################################################################## +############################################################################### # Scoring # -------- # -# Gridders in Verde implement the :meth:`~verde.base.BaseGridder.score` method that -# calculates the `R² coefficient of determination -# `__ -# for a given comparison dataset (``test`` in our case). The R² score is at most 1, -# meaning a perfect prediction, but has no lower bound. +# Gridders in Verde implement the :meth:`~verde.base.BaseGridder.score` method +# that calculates the `R² coefficient of determination +# `__ for a given +# comparison dataset (``test`` in our case). The R² score is at most 1, meaning +# a perfect prediction, but has no lower bound. score = spline.score(*test) print("R² score:", score) -######################################################################################## -# That's a good score meaning that our gridder is able to accurately predict data that -# wasn't used in the gridding algorithm. +############################################################################### +# That's a good score meaning that our gridder is able to accurately predict +# data that wasn't used in the gridding algorithm. # # .. caution:: # -# Once caveat for this score is that it is highly dependent on the particular split -# that we made. Changing the random number generator seed in -# :func:`verde.train_test_split` will result in a different score. +# Once caveat for this score is that it is highly dependent on the +# particular split that we made. Changing the random number generator seed +# in :func:`verde.train_test_split` will result in a different score. # Use 1 as a seed instead of 0 train_other, test_other = vd.train_test_split( @@ -157,28 +161,27 @@ print("R² score with seed 1:", vd.Spline().fit(*train_other).score(*test_other)) -######################################################################################## +############################################################################### # Cross-validation # ---------------- # # A more robust way of scoring the gridders is to use function -# :func:`verde.cross_val_score`, which (by default) uses a `k-fold cross-validation +# :func:`verde.cross_val_score`, which (by default) uses a `k-fold +# cross-validation # `__ -# by default. It will split the data *k* times and return the score on each *fold*. We -# can then take a mean of these scores. +# by default. It will split the data *k* times and return the score on each +# *fold*. We can then take a mean of these scores. scores = vd.cross_val_score(vd.Spline(), proj_coords, data.air_temperature_c) print("k-fold scores:", scores) print("Mean score:", np.mean(scores)) -######################################################################################## +############################################################################### # You can also use most cross-validation splitter classes from -# :mod:`sklearn.model_selection` by specifying the ``cv`` argument. For example, if we -# want to shuffle then split the data *n* times +# :mod:`sklearn.model_selection` by specifying the ``cv`` argument. For +# example, if we want to shuffle then split the data *n* times # (:class:`sklearn.model_selection.ShuffleSplit`): -from sklearn.model_selection import ShuffleSplit - shuffle = ShuffleSplit(n_splits=10, test_size=0.3, random_state=0) scores = vd.cross_val_score( @@ -187,7 +190,7 @@ print("shuffle scores:", scores) print("Mean score:", np.mean(scores)) -######################################################################################## +############################################################################### # Parallel cross-validation # ------------------------- # @@ -203,18 +206,16 @@ ) print("Delayed k-fold scores:", scores) -######################################################################################## +############################################################################### # In this case, the scores haven't actually been computed yet (hence the # "delayed" term). Instead, Verde scheduled the operations with Dask. Since we # are interested only in the mean score, we can schedule the mean as well using # :func:`dask.delayed`: -import dask - mean_score = dask.delayed(np.mean)(scores) print("Delayed mean:", mean_score) -######################################################################################## +############################################################################### # To run the scheduled computations and get the mean score, use # :func:`dask.compute` or ``.compute()``. Dask will automatically execute # things in parallel. @@ -222,14 +223,14 @@ mean_score = mean_score.compute() print("Mean score:", mean_score) -######################################################################################## +############################################################################### # .. note:: # # Dask will run many ``fit`` operations in parallel, which can be memory # intensive. Make sure you have enough RAM to run multiple fits. # -######################################################################################## +############################################################################### # Improving the score # ------------------- # diff --git a/tutorials/model_selection.py b/tutorials/model_selection.py index 39e8e86e3..041a947a3 100644 --- a/tutorials/model_selection.py +++ b/tutorials/model_selection.py @@ -10,18 +10,22 @@ Model Selection =============== -In :ref:`model_evaluation`, we saw how to check the performance of an interpolator using -cross-validation. We found that the default parameters for :class:`verde.Spline` are not -good for predicting our sample air temperature data. Now, let's see how we can tune the -:class:`~verde.Spline` to improve the cross-validation performance. - -Once again, we'll start by importing the required packages and loading our sample data. +In :ref:`model_evaluation`, we saw how to check the performance of an +interpolator using cross-validation. We found that the default parameters for +:class:`verde.Spline` are not good for predicting our sample air temperature +data. Now, let's see how we can tune the :class:`~verde.Spline` to improve the +cross-validation performance. + +Once again, we'll start by importing the required packages and loading our +sample data. """ -import numpy as np -import matplotlib.pyplot as plt -import cartopy.crs as ccrs import itertools + +import cartopy.crs as ccrs +import matplotlib.pyplot as plt +import numpy as np import pyproj + import verde as vd data = vd.datasets.fetch_texas_wind() @@ -31,12 +35,12 @@ proj_coords = projection(data.longitude.values, data.latitude.values) region = vd.get_region((data.longitude, data.latitude)) -# The desired grid spacing in degrees (converted to meters using 1 degree approx. 111km) +# The desired grid spacing in degrees spacing = 15 / 60 -######################################################################################## -# Before we begin tuning, let's reiterate what the results were with the default -# parameters. +############################################################################### +# Before we begin tuning, let's reiterate what the results were with the +# default parameters. spline_default = vd.Spline() score_default = np.mean( @@ -46,17 +50,18 @@ print("R² with defaults:", score_default) -######################################################################################## +############################################################################### # Tuning # ------ # -# :class:`~verde.Spline` has many parameters that can be set to modify the final result. -# Mainly the ``damping`` regularization parameter and the ``mindist`` "fudge factor" -# which smooths the solution. Would changing the default values give us a better score? +# :class:`~verde.Spline` has many parameters that can be set to modify the +# final result. Mainly the ``damping`` regularization parameter and the +# ``mindist`` "fudge factor" which smooths the solution. Would changing the +# default values give us a better score? # # We can answer these questions by changing the values in our ``spline`` and -# re-evaluating the model score repeatedly for different values of these parameters. -# Let's test the following combinations: +# re-evaluating the model score repeatedly for different values of these +# parameters. Let's test the following combinations: dampings = [None, 1e-4, 1e-3, 1e-2] mindists = [5e3, 10e3, 50e3, 100e3] @@ -69,8 +74,9 @@ print("Number of combinations:", len(parameter_sets)) print("Combinations:", parameter_sets) -######################################################################################## -# Now we can loop over the combinations and collect the scores for each parameter set. +############################################################################### +# Now we can loop over the combinations and collect the scores for each +# parameter set. spline = vd.Spline() scores = [] @@ -80,7 +86,7 @@ scores.append(score) print(scores) -######################################################################################## +############################################################################### # The largest score will yield the best parameter combination. best = np.argmax(scores) @@ -88,44 +94,45 @@ print("Score with defaults:", score_default) print("Best parameters:", parameter_sets[best]) -######################################################################################## +############################################################################### # **That is a nice improvement over our previous score!** # -# This type of tuning is important and should always be performed when using a new -# gridder or a new dataset. However, the above implementation requires a lot of -# coding. Fortunately, Verde provides convenience classes that perform the -# cross-validation and tuning automatically when fitting a dataset. +# This type of tuning is important and should always be performed when using a +# new gridder or a new dataset. However, the above implementation requires a +# lot of coding. Fortunately, Verde provides convenience classes that perform +# the cross-validation and tuning automatically when fitting a dataset. -######################################################################################## +############################################################################### # Cross-validated gridders # ------------------------ # # The :class:`verde.SplineCV` class provides a cross-validated version of -# :class:`verde.Spline`. It has almost the same interface but does all of the above -# automatically when fitting a dataset. The only difference is that you must provide a -# list of ``damping`` and ``mindist`` parameters to try instead of only a single value: +# :class:`verde.Spline`. It has almost the same interface but does all of the +# above automatically when fitting a dataset. The only difference is that you +# must provide a list of ``damping`` and ``mindist`` parameters to try instead +# of only a single value: spline = vd.SplineCV( dampings=dampings, mindists=mindists, ) -######################################################################################## +############################################################################### # Calling :meth:`~verde.SplineCV.fit` will run a grid search over all parameter # combinations to find the one that maximizes the cross-validation score. spline.fit(proj_coords, data.air_temperature_c) -######################################################################################## -# The estimated best damping and mindist, as well as the cross-validation -# scores, are stored in class attributes: +############################################################################### +# The estimated best ``damping`` and ``mindist``, as well as the +# cross-validation scores, are stored in class attributes: print("Highest score:", spline.scores_.max()) print("Best damping:", spline.damping_) print("Best mindist:", spline.mindist_) -######################################################################################## +############################################################################### # The cross-validated gridder can be used like any other gridder (including in # :class:`verde.Chain` and :class:`verde.Vector`): @@ -138,14 +145,14 @@ ) print(grid) -######################################################################################## +############################################################################### # Like :func:`verde.cross_val_score`, :class:`~verde.SplineCV` can also run the # grid search in parallel using `Dask `__ by specifying the # ``delayed`` attribute: spline = vd.SplineCV(dampings=dampings, mindists=mindists, delayed=True) -######################################################################################## +############################################################################### # Unlike :func:`verde.cross_val_score`, calling :meth:`~verde.SplineCV.fit` # does **not** result in :func:`dask.delayed` objects. The full grid search is # executed and the optimal parameters are found immediately. @@ -155,20 +162,20 @@ print("Best damping:", spline.damping_) print("Best mindist:", spline.mindist_) -######################################################################################## +############################################################################### # The one caveat is the that the ``scores_`` attribute will be a list of # :func:`dask.delayed` objects instead because the scores are only computed as # intermediate values in the scheduled computations. print("Delayed scores:", spline.scores_) -######################################################################################## +############################################################################### # Calling :func:`dask.compute` on the scores will calculate their values but # will unfortunately run the entire grid search again. So using # ``delayed=True`` is not recommended if you need the scores of each parameter # combination. -######################################################################################## +############################################################################### # The importance of tuning # ------------------------ # @@ -183,7 +190,7 @@ data_names="temperature", ) -######################################################################################## +############################################################################### # Let's plot our grids side-by-side: mask = vd.distance_mask( @@ -216,7 +223,7 @@ vd.datasets.setup_texas_wind_map(ax) plt.show() -######################################################################################## +############################################################################### # Notice that, for sparse data like these, **smoother models tend to be better -# predictors**. This is a sign that you should probably not trust many of the short -# wavelength features that we get from the defaults. +# predictors**. This is a sign that you should probably not trust many of the +# short wavelength features that we get from the defaults. diff --git a/tutorials/overview.py b/tutorials/overview.py index cc51d0913..4b27755f9 100644 --- a/tutorials/overview.py +++ b/tutorials/overview.py @@ -10,56 +10,60 @@ Overview ======== -Verde provides classes and functions for processing spatial data, like bathymetry, GPS, -temperature, gravity, or anything else that is measured along a surface. -The main focus is on methods for gridding such data (interpolating on a regular grid). -You'll also find other analysis methods that are often used in combination with -gridding, like trend removal and blocked operations. +Verde provides classes and functions for processing spatial data, like +bathymetry, GPS, temperature, gravity, or anything else that is measured along +a surface. The main focus is on methods for gridding such data (interpolating +on a regular grid). You'll also find other analysis methods that are often used +in combination with gridding, like trend removal and blocked operations. Conventions ----------- Before we get started, here are a few of the conventions we use across Verde: -* Coordinates can be Cartesian or Geographic. We generally make no assumptions about - which one you're using. -* All functions and classes expect coordinates in the order: West-East and South-North. - This applies to the actual coordinate values, bounding regions, grid spacing, etc. - Exceptions to this rule are the ``dims`` and ``shape`` arguments. -* We don't use names like "x" and "y" to avoid ambiguity. Cartesian coordinates are - "easting" and "northing" and Geographic coordinates are "longitude" and "latitude". -* The term "region" means the bounding box of the data. It is ordered west, east, south, - north. +* Coordinates can be Cartesian or Geographic. We generally make no assumptions + about which one you're using. +* All functions and classes expect coordinates in the order: West-East and + South-North. This applies to the actual coordinate values, bounding regions, + grid spacing, etc. Exceptions to this rule are the ``dims`` and ``shape`` + arguments. +* We don't use names like "x" and "y" to avoid ambiguity. Cartesian coordinates + are "easting" and "northing" and Geographic coordinates are "longitude" and + "latitude". +* The term "region" means the bounding box of the data. It is ordered west, + east, south, north. The library ----------- -Most classes and functions are available through the :mod:`verde` top level package. -The only exceptions are the functions related to loading sample data, which are in -:mod:`verde.datasets`. Throughout the documentation we'll use ``vd`` as the alias for -:mod:`verde`. +Most classes and functions are available through the :mod:`verde` top level +package. The only exceptions are the functions related to loading sample data, +which are in :mod:`verde.datasets`. Throughout the documentation we'll use +``vd`` as the alias for :mod:`verde`. """ +import matplotlib.pyplot as plt + import verde as vd -######################################################################################## +############################################################################### # .. _gridder_interface: # # The gridder interface # --------------------- # -# All gridding and trend estimation classes in Verde share the same interface (they all -# inherit from :class:`verde.base.BaseGridder`). Since most gridders in Verde are linear -# models, we based our gridder interface on the `scikit-learn -# `__ estimator interface: they all implement a -# :meth:`~verde.base.BaseGridder.fit` method that estimates the model parameters based -# on data and a :meth:`~verde.base.BaseGridder.predict` method that calculates new data -# based on the estimated parameters. +# All gridding and trend estimation classes in Verde share the same interface +# (they all inherit from :class:`verde.base.BaseGridder`). Since most gridders +# in Verde are linear models, we based our gridder interface on the +# `scikit-learn `__ estimator interface: they all +# implement a :meth:`~verde.base.BaseGridder.fit` method that estimates the +# model parameters based on data and a :meth:`~verde.base.BaseGridder.predict` +# method that calculates new data based on the estimated parameters. # -# Unlike scikit-learn, our data model is not a feature matrix and a target vector (e.g., -# ``est.fit(X, y)``) but a tuple of coordinate arrays and a data vector (e.g., -# ``grd.fit((easting, northing), data)``). This makes more sense for spatial data and is -# common to all classes and functions in Verde. +# Unlike scikit-learn, our data model is not a feature matrix and a target +# vector (e.g., ``est.fit(X, y)``) but a tuple of coordinate arrays and a data +# vector (e.g., ``grd.fit((easting, northing), data)``). This makes more sense +# for spatial data and is common to all classes and functions in Verde. # # As an example, let's generate some synthetic data using # :class:`verde.datasets.CheckerBoard`: @@ -68,33 +72,31 @@ print(data.head()) -######################################################################################## -# The data are random points taken from a checkerboard function and returned to us in a -# :class:`pandas.DataFrame`: - -import matplotlib.pyplot as plt +############################################################################### +# The data are random points taken from a checkerboard function and returned to +# us in a :class:`pandas.DataFrame`: plt.figure() plt.scatter(data.easting, data.northing, c=data.scalars, cmap="RdBu_r") plt.colorbar() plt.show() -######################################################################################## -# Now we can use the bi-harmonic spline method [Sandwell1987]_ to fit this data. First, -# we create a new :class:`verde.Spline`: +############################################################################### +# Now we can use the bi-harmonic spline method [Sandwell1987]_ to fit this +# data. First, we create a new :class:`verde.Spline`: spline = vd.Spline() # Printing a gridder shows the class and all of it's configuration options. print(spline) -######################################################################################## -# Before we can use the spline, we need to fit it to our synthetic data. After that, we -# can use the spline to predict values anywhere: +############################################################################### +# Before we can use the spline, we need to fit it to our synthetic data. After +# that, we can use the spline to predict values anywhere: spline.fit((data.easting, data.northing), data.scalars) -# Generate coordinates for a regular grid with 100 m grid spacing (assuming coordinates -# are in meters). +# Generate coordinates for a regular grid with 100 m grid spacing (assuming +# coordinates are in meters). grid_coords = vd.grid_coordinates(region=(0, 5000, -5000, 0), spacing=100) gridded_scalars = spline.predict(grid_coords) @@ -103,15 +105,16 @@ plt.colorbar() plt.show() -######################################################################################## -# We can compare our predictions with the true values for the checkerboard function -# using the :meth:`~verde.Spline.score` method to calculate the `R² coefficient of -# determination `__. +############################################################################### +# We can compare our predictions with the true values for the checkerboard +# function using the :meth:`~verde.Spline.score` method to calculate the +# `R² coefficient of determination +# `__. true_values = vd.datasets.CheckerBoard().predict(grid_coords) print(spline.score(grid_coords, true_values)) -######################################################################################## +############################################################################### # Generating grids and profiles # ----------------------------- # @@ -122,10 +125,10 @@ grid = spline.grid(spacing=30) print(grid) -######################################################################################## -# :meth:`~verde.base.BaseGridder.grid` uses default names for the coordinates ("easting" -# and "northing") and data variables ("scalars"). You can overwrite these names by -# setting the ``dims`` and ``data_names`` arguments. +############################################################################### +# :meth:`~verde.base.BaseGridder.grid` uses default names for the coordinates +# ("easting" and "northing") and data variables ("scalars"). You can overwrite +# these names by setting the ``dims`` and ``data_names`` arguments. grid = spline.grid(spacing=30, dims=["latitude", "longitude"], data_names="gravity") print(grid) @@ -134,10 +137,10 @@ grid.gravity.plot.pcolormesh() plt.show() -######################################################################################## -# Gridders can also be used to interpolate data on a straight line between two points -# using the :meth:`~verde.base.BaseGridder.profile` method. The profile data are -# returned as a :class:`pandas.DataFrame`. +############################################################################### +# Gridders can also be used to interpolate data on a straight line between two +# points using the :meth:`~verde.base.BaseGridder.profile` method. The profile +# data are returned as a :class:`pandas.DataFrame`. prof = spline.profile(point1=(0, 0), point2=(5000, -5000), size=200) print(prof.head()) @@ -147,7 +150,7 @@ plt.show() -######################################################################################## +############################################################################### # Wrap up # ------- # diff --git a/tutorials/projections.py b/tutorials/projections.py index b9b4cc261..01ae7903b 100644 --- a/tutorials/projections.py +++ b/tutorials/projections.py @@ -8,37 +8,39 @@ Geographic Coordinates ====================== -Most gridders and processing methods in Verde operate under the assumption that the data -coordinates are Cartesian. To process data in geographic (longitude and latitude) -coordinates, we must first project them. There are different ways of doing this in -Python but most of them rely on the `PROJ library `__. We'll use -`pyproj `__ to access PROJ directly and handle the +Most gridders and processing methods in Verde operate under the assumption that +the data coordinates are Cartesian. To process data in geographic (longitude +and latitude) coordinates, we must first project them. There are different ways +of doing this in Python but most of them rely on the `PROJ library +`__. We'll use `pyproj +`__ to access PROJ directly and handle the projection operations. """ -import pyproj -import numpy as np -import matplotlib.pyplot as plt import cartopy.crs as ccrs +import matplotlib.pyplot as plt +import numpy as np +import pyproj + import verde as vd -######################################################################################## -# With pyproj, we can create functions that will project our coordinates to and from -# different coordinate systems. For our Baja California bathymetry data, we'll use a -# Mercator projection. +############################################################################### +# With pyproj, we can create functions that will project our coordinates to and +# from different coordinate systems. For our Baja California bathymetry data, +# we'll use a Mercator projection. data = vd.datasets.fetch_baja_bathymetry() -# We're choosing the latitude of true scale as the mean latitude of our dataset. +# We're choosing the latitude of true scale as the mean latitude of our dataset projection = pyproj.Proj(proj="merc", lat_ts=data.latitude.mean()) -######################################################################################## -# The Proj object is a callable (meaning that it behaves like a function) that will take -# longitude and latitude and return easting and northing coordinates. +############################################################################### +# The Proj object is a callable (meaning that it behaves like a function) that +# will take longitude and latitude and return easting and northing coordinates. # pyproj doesn't play well with Pandas so we need to convert to numpy arrays proj_coords = projection(data.longitude.values, data.latitude.values) print(proj_coords) -######################################################################################## +############################################################################### # We can plot our projected coordinates using matplotlib. plt.figure(figsize=(7, 6)) @@ -51,28 +53,28 @@ plt.tight_layout() plt.show() -######################################################################################## +############################################################################### # Cartesian grids # --------------- # -# Now we can use :class:`verde.BlockReduce` and :class:`verde.Spline` on our projected -# coordinates. We'll specify the desired grid spacing as degrees and convert it to -# Cartesian using the 1 degree approx. 111 km rule-of-thumb. +# Now we can use :class:`verde.BlockReduce` and :class:`verde.Spline` on our +# projected coordinates. We'll specify the desired grid spacing as degrees and +# convert it to Cartesian using the 1 degree approx. 111 km rule-of-thumb. spacing = 10 / 60 reducer = vd.BlockReduce(np.median, spacing=spacing * 111e3) filter_coords, filter_bathy = reducer.filter(proj_coords, data.bathymetry_m) spline = vd.Spline().fit(filter_coords, filter_bathy) -######################################################################################## -# If we now call :meth:`verde.Spline.grid` we'll get back a grid evenly spaced in -# projected Cartesian coordinates. +############################################################################### +# If we now call :meth:`verde.Spline.grid` we'll get back a grid evenly spaced +# in projected Cartesian coordinates. grid = spline.grid(spacing=spacing * 111e3, data_names="bathymetry") print("Cartesian grid:") print(grid) -######################################################################################## -# We'll mask our grid using :func:`verde.distance_mask` to get rid of all the spurious -# solutions far away from the data points. +############################################################################### +# We'll mask our grid using :func:`verde.distance_mask` to get rid of all the +# spurious solutions far away from the data points. grid = vd.distance_mask(proj_coords, maxdist=30e3, grid=grid) plt.figure(figsize=(7, 6)) @@ -87,19 +89,20 @@ plt.show() -######################################################################################## +############################################################################### # Geographic grids # ---------------- # # The Cartesian grid that we generated won't be evenly spaced if we convert the -# coordinates back to geographic latitude and longitude. Verde gridders allow you to -# generate an evenly spaced grid in geographic coordinates through the ``projection`` -# argument of the :meth:`~verde.base.BaseGridder.grid` method. +# coordinates back to geographic latitude and longitude. Verde gridders allow +# you to generate an evenly spaced grid in geographic coordinates through the +# ``projection`` argument of the :meth:`~verde.base.BaseGridder.grid` method. # -# By providing a projection function (like our pyproj ``projection`` object), Verde will -# generate coordinates for a regular grid and then pass them through the projection -# function before predicting data values. This way, you can generate a grid in a -# coordinate system other than the one you used to fit the spline. +# By providing a projection function (like our pyproj ``projection`` object), +# Verde will generate coordinates for a regular grid and then pass them through +# the projection function before predicting data values. This way, you can +# generate a grid in a coordinate system other than the one you used to fit the +# spline. # Get the geographic bounding region of the data region = vd.get_region((data.longitude, data.latitude)) @@ -116,18 +119,18 @@ print("Geographic grid:") print(grid_geo) -######################################################################################## -# Notice that grid has longitude and latitude coordinates and slightly different number -# of points than the Cartesian grid. +############################################################################### +# Notice that grid has longitude and latitude coordinates and slightly +# different number of points than the Cartesian grid. # -# The :func:`verde.distance_mask` function also supports the ``projection`` argument and -# will project the coordinates before calculating distances. +# The :func:`verde.distance_mask` function also supports the ``projection`` +# argument and will project the coordinates before calculating distances. grid_geo = vd.distance_mask( (data.longitude, data.latitude), maxdist=30e3, grid=grid_geo, projection=projection ) -######################################################################################## +############################################################################### # Now we can use the Cartopy library to plot our geographic grid. plt.figure(figsize=(7, 6)) @@ -140,7 +143,7 @@ vd.datasets.setup_baja_bathymetry_map(ax, land=None) plt.show() -######################################################################################## +############################################################################### # Profiles # -------- # @@ -172,7 +175,7 @@ ) print(profile) -######################################################################################## +############################################################################### # Plot the profile location on our geographic grid from above. plt.figure(figsize=(7, 6)) @@ -188,7 +191,7 @@ vd.datasets.setup_baja_bathymetry_map(ax, land=None) plt.show() -######################################################################################## +############################################################################### # And finally plot the profile. plt.figure(figsize=(8, 3)) diff --git a/tutorials/trends.py b/tutorials/trends.py index b40c046ef..203ea3633 100644 --- a/tutorials/trends.py +++ b/tutorials/trends.py @@ -14,13 +14,15 @@ The :class:`verde.Trend` class fits a 2D polynomial trend of arbitrary degree to the data and can be used to remove it. """ -import matplotlib.pyplot as plt import cartopy.crs as ccrs +import matplotlib.pyplot as plt import numpy as np + import verde as vd -######################################################################################## -# Our sample air temperature data from Texas has a clear trend from land to the ocean: +############################################################################### +# Our sample air temperature data from Texas has a clear trend from land to the +# ocean: data = vd.datasets.fetch_texas_wind() coordinates = (data.longitude, data.latitude) @@ -39,14 +41,15 @@ vd.datasets.setup_texas_wind_map(ax) plt.show() -######################################################################################## +############################################################################### # We can estimate the polynomial coefficients for this trend: trend = vd.Trend(degree=1).fit(coordinates, data.air_temperature_c) print(trend.coef_) -######################################################################################## -# More importantly, we can predict the trend values and remove them from our data: +############################################################################### +# More importantly, we can predict the trend values and remove them from our +# data: trend_values = trend.predict(coordinates) residuals = data.air_temperature_c - trend_values @@ -85,16 +88,17 @@ vd.datasets.setup_texas_wind_map(ax) plt.show() -######################################################################################## -# The fitting, prediction, and residual calculation can all be done in a single step -# using the :meth:`~verde.Trend.filter` method: +############################################################################### +# The fitting, prediction, and residual calculation can all be done in a single +# step using the :meth:`~verde.Trend.filter` method: -# filter always outputs coordinates and weights as well, which we don't need and will -# ignore here. +# ``filter`` always outputs coordinates and weights as well, which we don't +# need and will ignore here. __, res_filter, __ = vd.Trend(degree=1).filter(coordinates, data.air_temperature_c) print(np.allclose(res_filter, residuals)) -######################################################################################## -# Additionally, :class:`verde.Trend` implements the :ref:`gridder interface ` -# and has the :meth:`~verde.Trend.grid` and :meth:`~verde.Trend.profile` methods. +############################################################################### +# Additionally, :class:`verde.Trend` implements the :ref:`gridder interface +# ` and has the :meth:`~verde.Trend.grid` and +# :meth:`~verde.Trend.profile` methods. diff --git a/tutorials/vectors.py b/tutorials/vectors.py index 333e18974..7d36088ab 100644 --- a/tutorials/vectors.py +++ b/tutorials/vectors.py @@ -8,13 +8,14 @@ Vector Data =========== -Some datasets have multiple vector components measured for each location, like the East -and West components of wind speed or GPS velocities. For example, let's look at our -sample GPS velocity data from the U.S. West coast. +Some datasets have multiple vector components measured for each location, like +the East and West components of wind speed or GPS velocities. For example, +let's look at our sample GPS velocity data from the U.S. West coast. """ -import matplotlib.pyplot as plt import cartopy.crs as ccrs +import matplotlib.pyplot as plt import pyproj + import verde as vd data = vd.datasets.fetch_california_gps() @@ -44,23 +45,24 @@ plt.show() -######################################################################################## -# Verde classes and functions are equipped to deal with vector data natively or through -# the use of the :class:`verde.Vector` class. Function and classes that can take vector -# data as input will accept tuples as the ``data`` and ``weights`` arguments. Each -# element of the tuple must be an array with the data values for a component of the -# vector data. As with ``coordinates``, **the order of components must be** -# ``(east_component, north_component, up_component)``. +############################################################################### +# Verde classes and functions are equipped to deal with vector data natively or +# through the use of the :class:`verde.Vector` class. Function and classes that +# can take vector data as input will accept tuples as the ``data`` and +# ``weights`` arguments. Each element of the tuple must be an array with the +# data values for a component of the vector data. As with ``coordinates``, +# **the order of components must be** ``(east_component, north_component, +# up_component)``. # # # Blocked reductions # ------------------ # -# Operations with :class:`verde.BlockReduce` and :class:`verde.BlockMean` can handle -# multi-component data automatically. The reduction operation is applied to each data -# component separately. The blocked data and weights will be returned in tuples as well -# following the same ordering as the inputs. This will work for an arbitrary number of -# components. +# Operations with :class:`verde.BlockReduce` and :class:`verde.BlockMean` can +# handle multi-component data automatically. The reduction operation is applied +# to each data component separately. The blocked data and weights will be +# returned in tuples as well following the same ordering as the inputs. This +# will work for an arbitrary number of components. # Use a blocked mean with uncertainty type weights reducer = vd.BlockMean(spacing=spacing * 111e3, uncertainty=True) @@ -71,9 +73,9 @@ ) print(len(block_data), len(block_weights)) -######################################################################################## -# We can convert the blocked coordinates back to longitude and latitude to plot with -# Cartopy. +############################################################################### +# We can convert the blocked coordinates back to longitude and latitude to plot +# with Cartopy. block_lon, block_lat = projection(*block_coords, inverse=True) @@ -94,29 +96,30 @@ vd.datasets.setup_california_gps_map(ax) plt.show() -######################################################################################## +############################################################################### # Trends # ------ # # Trends can't handle vector data automatically, so you can't pass -# ``data=(data.velocity_east, data.velocity_north)`` to :meth:`verde.Trend.fit`. To get -# around that, you can use the :class:`verde.Vector` class to create multi-component -# estimators and gridders from single component ones. +# ``data=(data.velocity_east, data.velocity_north)`` to +# :meth:`verde.Trend.fit`. To get around that, you can use the +# :class:`verde.Vector` class to create multi-component estimators and gridders +# from single component ones. # # :class:`~verde.Vector` takes an estimator/gridder for each data component and -# implements the :ref:`gridder interface ` for vector data, fitting -# each estimator/gridder given to a different component of the data. +# implements the :ref:`gridder interface ` for vector data, +# fitting each estimator/gridder given to a different component of the data. # -# For example, to fit a trend to our GPS velocities, we need to make a 2-component -# vector trend: +# For example, to fit a trend to our GPS velocities, we need to make a +# 2-component vector trend: trend = vd.Vector([vd.Trend(4), vd.Trend(1)]) print(trend) -######################################################################################## -# We can use the ``trend`` as if it were a regular :class:`verde.Trend` but passing in -# 2-component data to fit. This will fit each data component to a different -# :class:`verde.Trend`. +############################################################################### +# We can use the ``trend`` as if it were a regular :class:`verde.Trend` but +# passing in 2-component data to fit. This will fit each data component to a +# different :class:`verde.Trend`. trend.fit( coordinates=proj_coords, @@ -124,17 +127,17 @@ weights=(1 / data.std_east ** 2, 1 / data.std_north ** 2), ) -######################################################################################## +############################################################################### # Each estimator can be accessed through the ``components`` attribute: print(trend.components) print("East trend coefficients:", trend.components[0].coef_) print("North trend coefficients:", trend.components[1].coef_) -######################################################################################## -# When we call :meth:`verde.Vector.predict` or :meth:`verde.Vector.grid`, we'll get back -# predictions for two components instead of just one. Each prediction comes from a -# different :class:`verde.Trend`. +############################################################################### +# When we call :meth:`verde.Vector.predict` or :meth:`verde.Vector.grid`, we'll +# get back predictions for two components instead of just one. Each prediction +# comes from a different :class:`verde.Trend`. pred_east, pred_north = trend.predict(proj_coords) @@ -148,9 +151,9 @@ ax.set_xlabel("Velocity (m/yr)") plt.show() -######################################################################################## -# As expected, the residuals are higher for the North component because of the lower -# degree polynomial. +############################################################################### +# As expected, the residuals are higher for the North component because of the +# lower degree polynomial. # # Let's make geographic grids of these trends. @@ -164,10 +167,10 @@ ) print(grid) -######################################################################################## -# By default, the names of the data components in the :class:`xarray.Dataset` are -# ``east_component`` and ``north_component``. This can be customized using the -# ``data_names`` argument. +############################################################################### +# By default, the names of the data components in the :class:`xarray.Dataset` +# are ``east_component`` and ``north_component``. This can be customized using +# the ``data_names`` argument. # # Now we can map the trends. @@ -195,13 +198,13 @@ ax.coastlines(color="white") plt.show() -######################################################################################## +############################################################################### # Gridding # -------- # # You can use :class:`verde.Vector` to create multi-component gridders out of -# :class:`verde.Spline` the same way as we did for trends. In this case, each component -# is treated separately. +# :class:`verde.Spline` the same way as we did for trends. In this case, each +# component is treated separately. # # We can start by splitting the data into training and testing sets (see # :ref:`model_selection`). Notice that :func:`verde.train_test_split` work for @@ -214,14 +217,14 @@ random_state=1, ) -######################################################################################## +############################################################################### # Now we can make a 2-component spline. Since :class:`verde.Vector` implements -# ``fit``, ``predict``, and ``filter``, we can use it in a :class:`verde.Chain` to build -# a pipeline. +# ``fit``, ``predict``, and ``filter``, we can use it in a :class:`verde.Chain` +# to build a pipeline. # -# We need to use a bit of damping so that the weights can be taken into account. Splines -# without damping provide a perfect fit to the data and ignore the weights as a -# consequence. +# We need to use a bit of damping so that the weights can be taken into +# account. Splines without damping provide a perfect fit to the data and ignore +# the weights as a consequence. chain = vd.Chain( [ @@ -232,15 +235,16 @@ ) print(chain) -######################################################################################## +############################################################################### # # .. warning:: # -# Never generate the component gridders with ``[vd.Spline()]*2``. This will result -# in each component being a represented by **the same Spline object**, causing -# problems when trying to fit it to different components. +# Never generate the component gridders with ``[vd.Spline()]*2``. This will +# result in each component being a represented by **the same Spline +# object**, causing problems when trying to fit it to different components. # -# Fitting the spline and gridding is exactly the same as what we've done before. +# Fitting the spline and gridding is exactly the same as what we've done +# before. chain.fit(*train) @@ -255,7 +259,7 @@ ) print(grid) -######################################################################################## +############################################################################### # Mask out the points too far from data and plot the gridded vectors. grid = vd.distance_mask( @@ -281,7 +285,7 @@ vd.datasets.setup_california_gps_map(ax) plt.show() -######################################################################################## +############################################################################### # GPS/GNSS data # +++++++++++++ # diff --git a/tutorials/weights.py b/tutorials/weights.py index c683de40c..185c8339f 100644 --- a/tutorials/weights.py +++ b/tutorials/weights.py @@ -8,32 +8,37 @@ Using Weights ============= -One of the advantages of using a Green's functions approach to interpolation is that we -can easily weight the data to give each point more or less influence over the results. -This is a good way to not let data points with large uncertainties bias the -interpolation or the data decimation. +One of the advantages of using a Green's functions approach to interpolation is +that we can easily weight the data to give each point more or less influence +over the results. This is a good way to not let data points with large +uncertainties bias the interpolation or the data decimation. """ -# The weights vary a lot so it's better to plot them using a logarithmic color scale -from matplotlib.colors import LogNorm -import matplotlib.pyplot as plt import cartopy.crs as ccrs +import matplotlib.pyplot as plt import numpy as np +import pyproj + +# The weights vary a lot so it's better to plot them using a logarithmic color +# scale +from matplotlib.colors import LogNorm + import verde as vd -######################################################################################## +############################################################################### # We'll use some sample GPS vertical ground velocity which has some variable # uncertainties associated with each data point. The data are loaded as a # pandas.DataFrame: data = vd.datasets.fetch_california_gps() print(data.head()) -######################################################################################## -# Let's plot our data using Cartopy to see what the vertical velocities and their -# uncertainties look like. We'll make a function for this so we can reuse it later on. +############################################################################### +# Let's plot our data using Cartopy to see what the vertical velocities and +# their uncertainties look like. We'll make a function for this so we can reuse +# it later on. def plot_data(coordinates, velocity, weights, title_data, title_weights): - "Make two maps of our data, one with the data and one with the weights/uncertainty" + "Make two maps of our data, one with the data and one with the weights" fig, axes = plt.subplots( 1, 2, figsize=(9.5, 7), subplot_kw=dict(projection=ccrs.Mercator()) ) @@ -71,28 +76,29 @@ def plot_data(coordinates, velocity, weights, title_data, title_weights): "Uncertainty (m/yr)", ) -######################################################################################## +############################################################################### # Weights in data decimation # -------------------------- # -# :class:`~verde.BlockReduce` can't output weights for each data point because it -# doesn't know which reduction operation it's using. If you want to do a weighted -# interpolation, like :class:`verde.Spline`, :class:`~verde.BlockReduce` won't propagate -# the weights to the interpolation function. If your data are relatively smooth, you can -# use :class:`verde.BlockMean` instead to decimated data and produce weights. It can -# calculate different kinds of weights, depending on configuration options and what you -# give it as input. +# :class:`~verde.BlockReduce` can't output weights for each data point because +# it doesn't know which reduction operation it's using. If you want to do a +# weighted interpolation, like :class:`verde.Spline`, +# :class:`~verde.BlockReduce` won't propagate the weights to the interpolation +# function. If your data are relatively smooth, you can use +# :class:`verde.BlockMean` instead to decimated data and produce weights. It +# can calculate different kinds of weights, depending on configuration options +# and what you give it as input. # # Let's explore all of the possibilities. mean = vd.BlockMean(spacing=15 / 60) print(mean) -######################################################################################## +############################################################################### # Option 1: No input weights # ++++++++++++++++++++++++++ # -# In this case, we'll get a standard mean and the output weights will be 1 over the -# variance of the data in each block: +# In this case, we'll get a standard mean and the output weights will be 1 over +# the variance of the data in each block: # # .. math:: # @@ -102,13 +108,13 @@ def plot_data(coordinates, velocity, weights, title_data, title_weights): # \: , \qquad # w = \dfrac{1}{\sigma^2} # -# in which :math:`N` is the number of data points in the block, :math:`d_i` are the -# data values in the block, and the output values for the block are the mean data -# :math:`\bar{d}` and the weight :math:`w`. +# in which :math:`N` is the number of data points in the block, :math:`d_i` are +# the data values in the block, and the output values for the block are the +# mean data :math:`\bar{d}` and the weight :math:`w`. # -# Notice that data points that are more uncertain don't necessarily have smaller -# weights. Instead, the blocks that contain data with sharper variations end up having -# smaller weights, like the data points in the south. +# Notice that data points that are more uncertain don't necessarily have +# smaller weights. Instead, the blocks that contain data with sharper +# variations end up having smaller weights, like the data points in the south. coordinates, velocity, weights = mean.filter( coordinates=(data.longitude, data.latitude), data=data.velocity_up ) @@ -121,14 +127,15 @@ def plot_data(coordinates, velocity, weights, title_data, title_weights): "Weights based on data variance", ) -######################################################################################## +############################################################################### # Option 2: Input weights are not related to the uncertainty of the data # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # # This is the case when data weights are chosen by the user, not based on the -# measurement uncertainty. For example, when you need to give less importance to a -# portion of the data and no uncertainties are available. The mean will be weighted and -# the output weights will be 1 over the weighted variance of the data in each block: +# measurement uncertainty. For example, when you need to give less importance +# to a portion of the data and no uncertainties are available. The mean will be +# weighted and the output weights will be 1 over the weighted variance of the +# data in each block: # # .. math:: # @@ -141,8 +148,9 @@ def plot_data(coordinates, velocity, weights, title_data, title_weights): # # in which :math:`w_i` are the input weights in the block. # -# The output will be similar to the one above but points with larger initial weights -# will have a smaller influence on the mean and also on the output weights. +# The output will be similar to the one above but points with larger initial +# weights will have a smaller influence on the mean and also on the output +# weights. # We'll use 1 over the squared data uncertainty as our input weights. data["weights"] = 1 / data.std_up ** 2 @@ -162,16 +170,17 @@ def plot_data(coordinates, velocity, weights, title_data, title_weights): "Weights based on weighted data variance", ) -######################################################################################## +############################################################################### # Option 3: Input weights are 1 over the data uncertainty squared # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # -# If input weights are 1 over the data uncertainty squared, we can use uncertainty -# propagation to calculate the uncertainty of the weighted mean and use it to define our -# output weights. Use option ``uncertainty=True`` to tell :class:`~verde.BlockMean` to -# calculate weights based on the propagated uncertainty of the data. The output weights -# will be 1 over the propagated uncertainty squared. In this case, the **input weights -# must not be normalized**. This is preferable if you know the uncertainty of the data. +# If input weights are 1 over the data uncertainty squared, we can use +# uncertainty propagation to calculate the uncertainty of the weighted mean and +# use it to define our output weights. Use option ``uncertainty=True`` to tell +# :class:`~verde.BlockMean` to calculate weights based on the propagated +# uncertainty of the data. The output weights will be 1 over the propagated +# uncertainty squared. In this case, the **input weights must not be +# normalized**. This is preferable if you know the uncertainty of the data. # # .. math:: # @@ -182,11 +191,12 @@ def plot_data(coordinates, velocity, weights, title_data, title_weights): # w = \dfrac{1}{\sigma_{\bar{d}^*}^2} # # in which :math:`\sigma_i` are the input data uncertainties in the block and -# :math:`\sigma_{\bar{d}^*}` is the propagated uncertainty of the weighted mean in the -# block. +# :math:`\sigma_{\bar{d}^*}` is the propagated uncertainty of the weighted mean +# in the block. # -# Notice that in this case the output weights reflect the input data uncertainties. Less -# weight is given to the data points that had larger uncertainties from the start. +# Notice that in this case the output weights reflect the input data +# uncertainties. Less weight is given to the data points that had larger +# uncertainties from the start. # Configure BlockMean to assume that the input weights are 1/uncertainty**2 mean = vd.BlockMean(spacing=15 / 60, uncertainty=True) @@ -205,7 +215,7 @@ def plot_data(coordinates, velocity, weights, title_data, title_weights): "Weights based on data uncertainty", ) -######################################################################################## +############################################################################### # # .. note:: # @@ -216,13 +226,13 @@ def plot_data(coordinates, velocity, weights, title_data, title_weights): # -------------------------- # # The Green's functions based interpolation classes in Verde, like -# :class:`~verde.Spline`, can take input weights if you want to give less importance to -# some data points. In our case, the points with larger uncertainties shouldn't have the -# same influence in our gridded solution as the points with lower uncertainties. +# :class:`~verde.Spline`, can take input weights if you want to give less +# importance to some data points. In our case, the points with larger +# uncertainties shouldn't have the same influence in our gridded solution as +# the points with lower uncertainties. # -# Let's setup a projection to grid our geographic data using the Cartesian spline -# gridder. -import pyproj +# Let's setup a projection to grid our geographic data using the Cartesian +# spline gridder. projection = pyproj.Proj(proj="merc", lat_ts=data.latitude.mean()) proj_coords = projection(data.longitude.values, data.latitude.values) @@ -230,13 +240,13 @@ def plot_data(coordinates, velocity, weights, title_data, title_weights): region = vd.get_region(coordinates) spacing = 5 / 60 -######################################################################################## -# Now we can grid our data using a weighted spline. We'll use the block mean results -# with uncertainty based weights. +############################################################################### +# Now we can grid our data using a weighted spline. We'll use the block mean +# results with uncertainty based weights. # -# Note that the weighted spline solution will only work on a non-exact interpolation. So -# we'll need to use some damping regularization or not use the data locations for the -# point forces. Here, we'll apply a bit of damping. +# Note that the weighted spline solution will only work on a non-exact +# interpolation. So we'll need to use some damping regularization or not use +# the data locations for the point forces. Here, we'll apply a bit of damping. spline = vd.Chain( [ # Convert the spacing to meters because Spline is a Cartesian gridder @@ -254,7 +264,7 @@ def plot_data(coordinates, velocity, weights, title_data, title_weights): # Avoid showing interpolation outside of the convex hull of the data points. grid = vd.convexhull_mask(coordinates, grid=grid, projection=projection) -######################################################################################## +############################################################################### # Calculate an unweighted spline as well for comparison. spline_unweighted = vd.Chain( [ @@ -273,7 +283,7 @@ def plot_data(coordinates, velocity, weights, title_data, title_weights): coordinates, grid=grid_unweighted, projection=projection ) -######################################################################################## +############################################################################### # Finally, plot the weighted and unweighted grids side by side. fig, axes = plt.subplots( 1, 2, figsize=(9.5, 7), subplot_kw=dict(projection=ccrs.Mercator()) diff --git a/verde/__init__.py b/verde/__init__.py index 2b47978a3..5da1f31d2 100644 --- a/verde/__init__.py +++ b/verde/__init__.py @@ -4,44 +4,38 @@ # # This code is part of the Fatiando a Terra project (https://www.fatiando.org) # -# pylint: disable=missing-docstring,import-outside-toplevel # Import functions/classes to make the public API -from ._version import __version__ from . import datasets +from ._version import __version__ +from .blockreduce import BlockMean, BlockReduce +from .chain import Chain from .coordinates import ( - scatter_points, - grid_coordinates, - inside, block_split, - rolling_window, expanding_window, - profile_coordinates, get_region, - pad_region, + grid_coordinates, + inside, longitude_continuity, + pad_region, + profile_coordinates, + rolling_window, + scatter_points, ) -from .mask import distance_mask, convexhull_mask -from .utils import ( - variance_to_weights, - maxabs, - grid_to_table, - make_xarray_grid, -) -from .io import load_surfer from .distances import median_distance -from .blockreduce import BlockReduce, BlockMean -from .scipygridder import ScipyGridder -from .trend import Trend -from .chain import Chain -from .spline import Spline, SplineCV +from .io import load_surfer +from .mask import convexhull_mask, distance_mask from .model_selection import ( + BlockKFold, + BlockShuffleSplit, cross_val_score, train_test_split, - BlockShuffleSplit, - BlockKFold, ) +from .projections import project_grid, project_region +from .scipygridder import ScipyGridder +from .spline import Spline, SplineCV +from .trend import Trend +from .utils import grid_to_table, make_xarray_grid, maxabs, variance_to_weights from .vector import Vector, VectorSpline2D -from .projections import project_region, project_grid def test(doctest=True, verbose=True, coverage=False, figures=True): diff --git a/verde/_version.py b/verde/_version.py index 82422c69c..cbfc4d2cc 100644 --- a/verde/_version.py +++ b/verde/_version.py @@ -4,7 +4,6 @@ # # This code is part of the Fatiando a Terra project (https://www.fatiando.org) # -# pylint: disable=no-name-in-module """ Get the automatically generated version information from setuptools_scm and format it nicely. @@ -13,6 +12,5 @@ # This file is generated automatically by setuptools_scm from . import _version_generated - # Add a "v" to the version number made by setuptools_scm __version__ = f"v{_version_generated.version}" diff --git a/verde/base/__init__.py b/verde/base/__init__.py index 27bfdf31e..53ace785d 100644 --- a/verde/base/__init__.py +++ b/verde/base/__init__.py @@ -4,7 +4,6 @@ # # This code is part of the Fatiando a Terra project (https://www.fatiando.org) # -# pylint: disable=missing-docstring -from .base_classes import BaseGridder, BaseBlockCrossValidator +from .base_classes import BaseBlockCrossValidator, BaseGridder from .least_squares import least_squares -from .utils import n_1d_arrays, check_fit_input +from .utils import check_fit_input, n_1d_arrays diff --git a/verde/base/base_classes.py b/verde/base/base_classes.py index cab8752f0..154b73360 100644 --- a/verde/base/base_classes.py +++ b/verde/base/base_classes.py @@ -7,25 +7,21 @@ """ Base classes for all gridders. """ +import warnings from abc import ABCMeta, abstractmethod -import warnings import pandas as pd from sklearn.base import BaseEstimator from sklearn.model_selection import BaseCrossValidator from ..coordinates import grid_coordinates, profile_coordinates, scatter_points -from .utils import check_data, check_data_names, score_estimator from ..utils import ( + check_meshgrid, + get_ndim_horizontal_coords, make_xarray_grid, meshgrid_from_1d, - get_ndim_horizontal_coords, - check_meshgrid, ) - - -# Pylint doesn't like X, y scikit-learn argument names. -# pylint: disable=invalid-name,unused-argument +from .utils import check_data, check_data_names, score_estimator class BaseBlockCrossValidator(BaseCrossValidator, metaclass=ABCMeta): @@ -58,7 +54,7 @@ def __init__( self.shape = shape self.n_splits = n_splits - def split(self, X, y=None, groups=None): + def split(self, X, y=None, groups=None): # noqa: N803 """ Generate indices to split data into training and test set. @@ -89,7 +85,7 @@ def split(self, X, y=None, groups=None): for train, test in super().split(X, y, groups): yield train, test - def get_n_splits(self, X=None, y=None, groups=None): + def get_n_splits(self, X=None, y=None, groups=None): # noqa: U100,N803 """ Returns the number of splitting iterations in the cross-validator @@ -110,7 +106,7 @@ def get_n_splits(self, X=None, y=None, groups=None): return self.n_splits @abstractmethod - def _iter_test_indices(self, X=None, y=None, groups=None): + def _iter_test_indices(self, X=None, y=None, groups=None): # noqa: U100,N803 """ Generates integer indices corresponding to test sets. @@ -136,9 +132,6 @@ def _iter_test_indices(self, X=None, y=None, groups=None): """ -# pylint: enable=invalid-name,unused-argument - - class BaseGridder(BaseEstimator): """ Base class for gridders. @@ -229,7 +222,7 @@ class BaseGridder(BaseEstimator): ("east_component", "north_component", "vertical_component"), ] - def predict(self, coordinates): + def predict(self, coordinates): # noqa: U100 """ Predict data on the given coordinate values. NOT IMPLEMENTED. @@ -249,7 +242,7 @@ def predict(self, coordinates): """ raise NotImplementedError() - def fit(self, coordinates, data, weights=None): + def fit(self, coordinates, data, weights=None): # noqa: U100 """ Fit the gridder to observed data. NOT IMPLEMENTED. @@ -277,7 +270,7 @@ def fit(self, coordinates, data, weights=None): """ raise NotImplementedError() - def filter(self, coordinates, data, weights=None): + def filter(self, coordinates, data, weights=None): # noqa: A003 """ Filter the data through the gridder and produce residuals. @@ -367,7 +360,7 @@ def grid( projection=None, coordinates=None, **kwargs, - ): # pylint: disable=too-many-locals + ): """ Interpolate the data onto a regular grid. @@ -813,5 +806,5 @@ def get_instance_region(instance, region): if region is None: if not hasattr(instance, "region_"): raise ValueError("No default region found. Argument must be supplied.") - region = getattr(instance, "region_") + region = instance.region_ return region diff --git a/verde/base/least_squares.py b/verde/base/least_squares.py index db464cecf..79bcca764 100644 --- a/verde/base/least_squares.py +++ b/verde/base/least_squares.py @@ -9,8 +9,8 @@ """ from warnings import warn -from sklearn.preprocessing import StandardScaler from sklearn.linear_model import LinearRegression, Ridge +from sklearn.preprocessing import StandardScaler def least_squares(jacobian, data, weights, damping=None, copy_jacobian=False): diff --git a/verde/base/utils.py b/verde/base/utils.py index ef736eaab..a9e66c9e7 100644 --- a/verde/base/utils.py +++ b/verde/base/utils.py @@ -80,11 +80,11 @@ class DummyEstimator: def __init__(self, predicted): self._predicted = predicted - def predict(self, *args, **kwargs): # pylint: disable=unused-argument + def predict(self, *args, **kwargs): # noqa: U100 "Return the stored predicted values" return self._predicted - def fit(self, *args, **kwards): # pylint: disable=unused-argument + def fit(self, *args, **kwards): # noqa: U100 "Does nothing. Just here to satisfy the API." return self diff --git a/verde/blockreduce.py b/verde/blockreduce.py index 7a796cc91..ea2bdfab7 100644 --- a/verde/blockreduce.py +++ b/verde/blockreduce.py @@ -11,8 +11,8 @@ import pandas as pd from sklearn.base import BaseEstimator -from .coordinates import block_split from .base import check_fit_input +from .coordinates import block_split from .utils import variance_to_weights @@ -33,7 +33,7 @@ def weighted_reduction(values): return weighted_reduction -class BlockReduce(BaseEstimator): # pylint: disable=too-few-public-methods +class BlockReduce(BaseEstimator): """ Apply a reduction/aggregation operation to the data in blocks/windows. @@ -114,7 +114,7 @@ def __init__( self.center_coordinates = center_coordinates self.drop_coords = drop_coords - def filter(self, coordinates, data, weights=None): + def filter(self, coordinates, data, weights=None): # noqa: A003 """ Apply the blocked aggregation to the given data. @@ -243,7 +243,7 @@ def _block_coordinates(self, coordinates, block_coordinates, labels): ) -class BlockMean(BlockReduce): # pylint: disable=too-few-public-methods +class BlockMean(BlockReduce): """ Apply a (weighted) mean to the data in blocks/windows. @@ -357,7 +357,7 @@ def __init__( ) self.uncertainty = uncertainty - def filter(self, coordinates, data, weights=None): + def filter(self, coordinates, data, weights=None): # noqa: A003 """ Apply the blocked mean to the given data. diff --git a/verde/coordinates.py b/verde/coordinates.py index 07fca694e..0852fa5a5 100644 --- a/verde/coordinates.py +++ b/verde/coordinates.py @@ -12,7 +12,7 @@ import numpy as np from sklearn.utils import check_random_state -from .base.utils import n_1d_arrays, check_coordinates +from .base.utils import check_coordinates, n_1d_arrays from .utils import kdtree @@ -487,14 +487,14 @@ def spacing_to_shape(region, spacing, adjust): ) spacing = np.atleast_1d(spacing) - if len(spacing) == 1: - deast = dnorth = spacing[0] - elif len(spacing) == 2: - dnorth, deast = spacing - else: + if len(spacing) > 2: raise ValueError( "Only two values allowed for grid spacing: {}".format(str(spacing)) ) + elif len(spacing) == 1: + deast = dnorth = spacing[0] + elif len(spacing) == 2: + dnorth, deast = spacing w, e, s, n = region # Add 1 to get the number of nodes, not segments diff --git a/verde/datasets/__init__.py b/verde/datasets/__init__.py index dd7bc453f..61017a056 100644 --- a/verde/datasets/__init__.py +++ b/verde/datasets/__init__.py @@ -4,16 +4,15 @@ # # This code is part of the Fatiando a Terra project (https://www.fatiando.org) # -# pylint: disable=missing-docstring -from .synthetic import CheckerBoard from .sample_data import ( - locate, fetch_baja_bathymetry, - setup_baja_bathymetry_map, - fetch_rio_magnetic, - setup_rio_magnetic_map, fetch_california_gps, - setup_california_gps_map, + fetch_rio_magnetic, fetch_texas_wind, + locate, + setup_baja_bathymetry_map, + setup_california_gps_map, + setup_rio_magnetic_map, setup_texas_wind_map, ) +from .synthetic import CheckerBoard diff --git a/verde/datasets/sample_data.py b/verde/datasets/sample_data.py index 2e0199008..564e733df 100644 --- a/verde/datasets/sample_data.py +++ b/verde/datasets/sample_data.py @@ -9,20 +9,19 @@ """ import warnings -import pkg_resources import numpy as np import pandas as pd +import pkg_resources import pooch try: import cartopy.crs as ccrs - from cartopy.mpl.ticker import LongitudeFormatter, LatitudeFormatter + from cartopy.mpl.ticker import LatitudeFormatter, LongitudeFormatter except ImportError: pass from .._version import __version__ - REGISTRY = pooch.create( path=pooch.os_cache("verde"), base_url="https://github.com/fatiando/verde/raw/{version}/data/", diff --git a/verde/datasets/synthetic.py b/verde/datasets/synthetic.py index 1e89bfd48..763df0c33 100644 --- a/verde/datasets/synthetic.py +++ b/verde/datasets/synthetic.py @@ -4,7 +4,6 @@ # # This code is part of the Fatiando a Terra project (https://www.fatiando.org) # -# pylint: disable=abstract-method """ Generators of synthetic datasets. """ diff --git a/verde/distances.py b/verde/distances.py index 2576f55b0..f0e041eb8 100644 --- a/verde/distances.py +++ b/verde/distances.py @@ -9,8 +9,8 @@ """ import numpy as np -from .utils import kdtree from .base.utils import n_1d_arrays +from .utils import kdtree def median_distance(coordinates, k_nearest=1, projection=None): diff --git a/verde/io.py b/verde/io.py index c0438b470..ad2dcf9f2 100644 --- a/verde/io.py +++ b/verde/io.py @@ -51,7 +51,7 @@ def load_surfer(fname, dtype="float64"): # Only open a file if given a path instead of a file-like object ispath = not hasattr(fname, "readline") if ispath: - input_file = open(fname, "r") + input_file = open(fname, "r") # noqa: SIM115 else: input_file = fname try: diff --git a/verde/mask.py b/verde/mask.py index 3a09fe608..0f842e1b4 100644 --- a/verde/mask.py +++ b/verde/mask.py @@ -8,11 +8,9 @@ Mask grid points based on different criteria. """ import numpy as np +from scipy.spatial import Delaunay -# pylint doesn't pick up on this import for some reason -from scipy.spatial import Delaunay # pylint: disable=no-name-in-module - -from .base.utils import n_1d_arrays, check_coordinates +from .base.utils import check_coordinates, n_1d_arrays from .utils import kdtree diff --git a/verde/model_selection.py b/verde/model_selection.py index 55ac7a531..23bd24691 100644 --- a/verde/model_selection.py +++ b/verde/model_selection.py @@ -10,20 +10,16 @@ import warnings import numpy as np -from sklearn.model_selection import KFold, ShuffleSplit from sklearn.base import clone +from sklearn.model_selection import KFold, ShuffleSplit from sklearn.utils import check_random_state -from .base import check_fit_input, n_1d_arrays, BaseBlockCrossValidator +from .base import BaseBlockCrossValidator, check_fit_input, n_1d_arrays from .base.utils import score_estimator from .coordinates import block_split from .utils import dispatch, partition_by_sum -# Pylint doesn't like X, y scikit-learn argument names. -# pylint: disable=invalid-name,unused-argument - - class BlockShuffleSplit(BaseBlockCrossValidator): """ Random permutation of spatial blocks cross-validator. @@ -159,7 +155,7 @@ def __init__( self.random_state = random_state self.balancing = balancing - def _iter_test_indices(self, X=None, y=None, groups=None): + def _iter_test_indices(self, X=None, y=None, groups=None): # noqa: N803,U100 """ Generates integer indices corresponding to test sets. @@ -203,12 +199,7 @@ def _iter_test_indices(self, X=None, y=None, groups=None): for _ in range(self.n_splits): test_sets, balance = [], [] for _ in range(self.balancing): - # This is a false positive in pylint which is why the warning - # is disabled at the top of this file: - # https://github.com/PyCQA/pylint/issues/1830 - # pylint: disable=stop-iteration-return train_blocks, test_blocks = next(shuffle) - # pylint: enable=stop-iteration-return train_points = np.where(np.isin(labels, block_ids[train_blocks]))[0] test_points = np.where(np.isin(labels, block_ids[test_blocks]))[0] # The proportion of data points assigned to each group should @@ -372,7 +363,7 @@ def __init__( self.random_state = random_state self.balance = balance - def _iter_test_indices(self, X=None, y=None, groups=None): + def _iter_test_indices(self, X=None, y=None, groups=None): # noqa: N803,U100 """ Generates integer indices corresponding to test sets. @@ -431,9 +422,6 @@ def _iter_test_indices(self, X=None, y=None, groups=None): yield test_points -# pylint: enable=invalid-name,unused-argument - - def train_test_split( coordinates, data, weights=None, spacing=None, shape=None, **kwargs ): @@ -807,6 +795,7 @@ def select(arrays, index): Parameters ---------- arrays : tuple of arrays + The arrays to index index : array An array of indices to select from arrays. diff --git a/verde/projections.py b/verde/projections.py index e4d3800da..ff6db9d8c 100644 --- a/verde/projections.py +++ b/verde/projections.py @@ -9,12 +9,12 @@ """ import numpy as np -from .coordinates import grid_coordinates, get_region, shape_to_spacing, check_region -from .utils import grid_to_table -from .scipygridder import ScipyGridder from .blockreduce import BlockReduce from .chain import Chain +from .coordinates import check_region, get_region, grid_coordinates, shape_to_spacing from .mask import convexhull_mask +from .scipygridder import ScipyGridder +from .utils import grid_to_table def project_region(region, projection): diff --git a/verde/scipygridder.py b/verde/scipygridder.py index 8d74768b5..2fcc1ca28 100644 --- a/verde/scipygridder.py +++ b/verde/scipygridder.py @@ -11,9 +11,9 @@ import numpy as np from scipy.interpolate import ( + CloughTocher2DInterpolator, LinearNDInterpolator, NearestNDInterpolator, - CloughTocher2DInterpolator, ) from sklearn.utils.validation import check_is_fitted diff --git a/verde/spline.py b/verde/spline.py index 38b2c781e..5b25efeee 100644 --- a/verde/spline.py +++ b/verde/spline.py @@ -13,10 +13,10 @@ import numpy as np from sklearn.utils.validation import check_is_fitted -from .base import n_1d_arrays, BaseGridder, check_fit_input, least_squares +from .base import BaseGridder, check_fit_input, least_squares, n_1d_arrays from .coordinates import get_region -from .utils import dispatch, parse_engine from .model_selection import cross_val_score +from .utils import dispatch, parse_engine try: import numba @@ -540,7 +540,7 @@ def jacobian_numpy(east, north, force_east, force_north, mindist, jac): @jit(nopython=True, fastmath=True, parallel=True) def predict_numba(east, north, force_east, force_north, mindist, forces, result): "Calculate the predicted data using numba to speed things up." - for i in numba.prange(east.size): # pylint: disable=not-an-iterable + for i in numba.prange(east.size): result[i] = 0 for j in range(forces.size): green = GREENS_FUNC_JIT( @@ -553,7 +553,7 @@ def predict_numba(east, north, force_east, force_north, mindist, forces, result) @jit(nopython=True, fastmath=True, parallel=True) def jacobian_numba(east, north, force_east, force_north, mindist, jac): "Calculate the Jacobian matrix using numba to speed things up." - for i in numba.prange(east.size): # pylint: disable=not-an-iterable + for i in numba.prange(east.size): for j in range(force_east.size): jac[i, j] = GREENS_FUNC_JIT( east[i] - force_east[j], north[i] - force_north[j], mindist diff --git a/verde/tests/test_base.py b/verde/tests/test_base.py index 51d339ef6..45fa26e0f 100644 --- a/verde/tests/test_base.py +++ b/verde/tests/test_base.py @@ -4,22 +4,22 @@ # # This code is part of the Fatiando a Terra project (https://www.fatiando.org) # -# pylint: disable=unused-argument,too-many-locals,protected-access """ Test the base classes and their utility functions. """ import warnings + import numpy as np import numpy.testing as npt import pytest -from ..base.least_squares import least_squares -from ..base.utils import check_fit_input, check_coordinates from ..base.base_classes import ( - BaseGridder, BaseBlockCrossValidator, + BaseGridder, get_instance_region, ) +from ..base.least_squares import least_squares +from ..base.utils import check_coordinates, check_fit_input from ..coordinates import grid_coordinates, scatter_points @@ -96,7 +96,7 @@ def __init__(self, degree=1): super().__init__() self.degree = degree - def fit(self, coordinates, data, weights=None): + def fit(self, coordinates, data, weights=None): # noqa: U100 "Fit an easting polynomial" ndata = data.size nparams = self.degree + 1 @@ -433,25 +433,18 @@ def test_check_fit_input_fails_weights(): check_fit_input(coords, (data, data), weights) -# Pylint doesn't like X, y scikit-learn argument names. -# pylint: disable=invalid-name,unused-argument - - class DummyCrossValidator(BaseBlockCrossValidator): """ Dummy class to test the base cross-validator. """ - def _iter_test_indices(self, X=None, y=None, groups=None): + def _iter_test_indices(self, X=None, y=None, groups=None): # noqa: U100,N803 """ Yields a list of indices for the entire X. """ yield list(range(X.shape[0])) -# pylint: enable=invalid-name,unused-argument - - def test_baseblockedcrossvalidator_n_splits(): "Make sure get_n_splits returns the correct value" cv = DummyCrossValidator(spacing=1, n_splits=14) diff --git a/verde/tests/test_blockreduce.py b/verde/tests/test_blockreduce.py index ee455f4ee..2e3442993 100644 --- a/verde/tests/test_blockreduce.py +++ b/verde/tests/test_blockreduce.py @@ -4,17 +4,16 @@ # # This code is part of the Fatiando a Terra project (https://www.fatiando.org) # -# pylint: disable=protected-access """ Test the grid math functions """ -import pandas as pd import numpy as np import numpy.testing as npt +import pandas as pd import pytest +from ..blockreduce import BlockMean, BlockReduce from ..coordinates import grid_coordinates, scatter_points -from ..blockreduce import BlockReduce, BlockMean def test_block_reduce(): diff --git a/verde/tests/test_chain.py b/verde/tests/test_chain.py index 3eecec6a7..fdf54463f 100644 --- a/verde/tests/test_chain.py +++ b/verde/tests/test_chain.py @@ -7,16 +7,16 @@ """ Test the Chain class """ -import numpy.testing as npt import numpy as np +import numpy.testing as npt -from ..datasets.synthetic import CheckerBoard from ..blockreduce import BlockReduce from ..chain import Chain +from ..coordinates import grid_coordinates +from ..datasets.synthetic import CheckerBoard from ..scipygridder import ScipyGridder from ..spline import Spline from ..trend import Trend -from ..coordinates import grid_coordinates from ..vector import Vector diff --git a/verde/tests/test_coordinates.py b/verde/tests/test_coordinates.py index 3625b17b0..38cfe7198 100644 --- a/verde/tests/test_coordinates.py +++ b/verde/tests/test_coordinates.py @@ -15,11 +15,11 @@ from ..coordinates import ( check_region, - spacing_to_shape, - profile_coordinates, grid_coordinates, longitude_continuity, + profile_coordinates, rolling_window, + spacing_to_shape, ) diff --git a/verde/tests/test_datasets.py b/verde/tests/test_datasets.py index eebe92f2c..71a1e6ddf 100644 --- a/verde/tests/test_datasets.py +++ b/verde/tests/test_datasets.py @@ -10,20 +10,19 @@ import os import warnings -import matplotlib.pyplot as plt import cartopy.crs as ccrs - +import matplotlib.pyplot as plt import pytest from ..datasets.sample_data import ( - locate, fetch_baja_bathymetry, - setup_baja_bathymetry_map, - fetch_rio_magnetic, - setup_rio_magnetic_map, fetch_california_gps, - setup_california_gps_map, + fetch_rio_magnetic, fetch_texas_wind, + locate, + setup_baja_bathymetry_map, + setup_california_gps_map, + setup_rio_magnetic_map, setup_texas_wind_map, ) diff --git a/verde/tests/test_distances.py b/verde/tests/test_distances.py index 5860dd10a..04f97af8d 100644 --- a/verde/tests/test_distances.py +++ b/verde/tests/test_distances.py @@ -10,8 +10,8 @@ import numpy as np import numpy.testing as npt -from ..distances import median_distance from ..coordinates import grid_coordinates +from ..distances import median_distance def test_distance_nearest(): diff --git a/verde/tests/test_io.py b/verde/tests/test_io.py index 3ee847140..4eff31741 100644 --- a/verde/tests/test_io.py +++ b/verde/tests/test_io.py @@ -4,19 +4,18 @@ # # This code is part of the Fatiando a Terra project (https://www.fatiando.org) # -# pylint: disable=redefined-outer-name """ Test the I/O functions. """ import os -from tempfile import NamedTemporaryFile from io import StringIO +from tempfile import NamedTemporaryFile -import pytest import numpy as np import numpy.testing as npt +import pytest -from ..io import load_surfer, _read_surfer_header, _check_surfer_integrity +from ..io import _check_surfer_integrity, _read_surfer_header, load_surfer @pytest.fixture diff --git a/verde/tests/test_mask.py b/verde/tests/test_mask.py index 7b383ac58..aa3bfda66 100644 --- a/verde/tests/test_mask.py +++ b/verde/tests/test_mask.py @@ -9,11 +9,11 @@ """ import numpy as np import numpy.testing as npt -import xarray as xr import pytest +import xarray as xr -from ..mask import distance_mask, convexhull_mask from ..coordinates import grid_coordinates +from ..mask import convexhull_mask, distance_mask def test_convexhull_mask(): diff --git a/verde/tests/test_minimal.py b/verde/tests/test_minimal.py index faefdabbb..d5f60ea92 100644 --- a/verde/tests/test_minimal.py +++ b/verde/tests/test_minimal.py @@ -10,15 +10,15 @@ """ import numpy.testing as npt -from ..datasets import fetch_california_gps -from ..spline import Spline -from ..vector import Vector -from ..trend import Trend -from ..chain import Chain -from ..model_selection import train_test_split from ..blockreduce import BlockMean +from ..chain import Chain from ..coordinates import get_region +from ..datasets import fetch_california_gps from ..mask import distance_mask +from ..model_selection import train_test_split +from ..spline import Spline +from ..trend import Trend +from ..vector import Vector def projection(longitude, latitude): diff --git a/verde/tests/test_model_selection.py b/verde/tests/test_model_selection.py index 66ad508fe..8c82915f1 100644 --- a/verde/tests/test_model_selection.py +++ b/verde/tests/test_model_selection.py @@ -9,15 +9,15 @@ """ import warnings -import pytest -from sklearn.model_selection import ShuffleSplit -from sklearn.metrics import get_scorer import numpy as np import numpy.testing as npt +import pytest from dask.distributed import Client +from sklearn.metrics import get_scorer +from sklearn.model_selection import ShuffleSplit -from .. import Vector, Trend, grid_coordinates, scatter_points -from ..model_selection import cross_val_score, BlockShuffleSplit, BlockKFold +from .. import Trend, Vector, grid_coordinates, scatter_points +from ..model_selection import BlockKFold, BlockShuffleSplit, cross_val_score @pytest.fixture(name="trend") diff --git a/verde/tests/test_projections.py b/verde/tests/test_projections.py index 546d6012b..43833e0e7 100644 --- a/verde/tests/test_projections.py +++ b/verde/tests/test_projections.py @@ -7,13 +7,13 @@ """ Test the projection functions. """ -import numpy.testing as npt import numpy as np -import xarray as xr +import numpy.testing as npt import pytest +import xarray as xr -from ..scipygridder import ScipyGridder from ..projections import project_grid +from ..scipygridder import ScipyGridder def projection(longitude, latitude): diff --git a/verde/tests/test_scipy.py b/verde/tests/test_scipy.py index 6c19c9aee..456982284 100644 --- a/verde/tests/test_scipy.py +++ b/verde/tests/test_scipy.py @@ -9,14 +9,14 @@ """ import warnings -import pandas as pd import numpy as np import numpy.testing as npt +import pandas as pd import pytest -from ..scipygridder import ScipyGridder from ..coordinates import grid_coordinates from ..datasets.synthetic import CheckerBoard +from ..scipygridder import ScipyGridder def test_scipy_gridder_same_points(): diff --git a/verde/tests/test_spline.py b/verde/tests/test_spline.py index 0da9b7c57..3e3f362cc 100644 --- a/verde/tests/test_spline.py +++ b/verde/tests/test_spline.py @@ -9,14 +9,14 @@ """ import warnings -import pytest import numpy as np import numpy.testing as npt -from sklearn.model_selection import ShuffleSplit +import pytest from dask.distributed import Client +from sklearn.model_selection import ShuffleSplit -from ..spline import Spline, SplineCV from ..datasets.synthetic import CheckerBoard +from ..spline import Spline, SplineCV from .utils import requires_numba diff --git a/verde/tests/test_trend.py b/verde/tests/test_trend.py index 31959a85a..46a150bd3 100644 --- a/verde/tests/test_trend.py +++ b/verde/tests/test_trend.py @@ -4,7 +4,6 @@ # # This code is part of the Fatiando a Terra project (https://www.fatiando.org) # -# pylint: disable=redefined-outer-name """ Test the trend estimators. """ @@ -12,8 +11,8 @@ import numpy.testing as npt import pytest -from ..trend import Trend, polynomial_power_combinations from ..coordinates import grid_coordinates +from ..trend import Trend, polynomial_power_combinations @pytest.fixture() diff --git a/verde/tests/test_utils.py b/verde/tests/test_utils.py index 6aa5f3a75..1a54e62ab 100644 --- a/verde/tests/test_utils.py +++ b/verde/tests/test_utils.py @@ -11,23 +11,23 @@ import numpy as np import numpy.testing as npt -import xarray as xr -from scipy.spatial import cKDTree # pylint: disable=no-name-in-module import pytest +import xarray as xr +from scipy.spatial import cKDTree +from .. import utils from ..coordinates import grid_coordinates, scatter_points from ..utils import ( - parse_engine, dummy_jit, - kdtree, + get_ndim_horizontal_coords, grid_to_table, - partition_by_sum, + kdtree, make_xarray_grid, - meshgrid_to_1d, meshgrid_from_1d, - get_ndim_horizontal_coords, + meshgrid_to_1d, + parse_engine, + partition_by_sum, ) -from .. import utils def test_parse_engine(): @@ -324,7 +324,6 @@ def test_check_ndim_easting_northing(): Test if check_ndim_easting_northing works as expected """ # Easting and northing as 1d arrays - # pylint: disable=unbalanced-tuple-unpacking easting, northing = scatter_points((-5, 5, 0, 4), 50, random_state=42) assert get_ndim_horizontal_coords(easting, northing) == 1 # Easting and northing as 2d arrays diff --git a/verde/tests/test_vector.py b/verde/tests/test_vector.py index 3a08c2bc4..f671ccf55 100644 --- a/verde/tests/test_vector.py +++ b/verde/tests/test_vector.py @@ -4,7 +4,6 @@ # # This code is part of the Fatiando a Terra project (https://www.fatiando.org) # -# pylint: disable=redefined-outer-name """ Test the vector data interpolators """ @@ -12,12 +11,12 @@ import numpy.testing as npt import pytest -from ..datasets.synthetic import CheckerBoard +from ..base import n_1d_arrays from ..coordinates import grid_coordinates +from ..datasets.synthetic import CheckerBoard from ..trend import Trend -from ..base import n_1d_arrays +from ..vector import Vector, VectorSpline2D from .utils import requires_numba -from ..vector import VectorSpline2D, Vector @pytest.fixture @@ -144,7 +143,7 @@ def test_vector_trend_weights(simple_2d_model): outlier = np.abs(data[0]).max() * 3 data_out = tuple(i.copy() for i in data) weights = tuple(np.ones_like(i) for i in data) - for i, coef in enumerate(coefs): + for i, _ in enumerate(coefs): data_out[i][20, 20] += outlier weights[i][20, 20] = 1e-10 trend = Vector([Trend(degree=1), Trend(degree=1)]) diff --git a/verde/utils.py b/verde/utils.py index 5143adaca..ee6289d8b 100644 --- a/verde/utils.py +++ b/verde/utils.py @@ -13,12 +13,12 @@ import numpy as np import pandas as pd import xarray as xr -from scipy.spatial import cKDTree # pylint: disable=no-name-in-module +from scipy.spatial import cKDTree try: from pykdtree.kdtree import KDTree as pyKDTree except ImportError: - pyKDTree = None + pyKDTree = None # noqa: N816 try: import numba @@ -27,9 +27,9 @@ from .base.utils import ( check_coordinates, - check_extra_coords_names, check_data, check_data_names, + check_extra_coords_names, n_1d_arrays, ) @@ -87,7 +87,7 @@ def parse_engine(engine): return engine -def dummy_jit(**kwargs): # pylint: disable=unused-argument +def dummy_jit(**kwargs): # noqa: U100 """ Replace numba.jit if not installed with a function that raises RunTimeError @@ -110,7 +110,7 @@ def dummy_decorator(function): "The actual decorator" @functools.wraps(function) - def dummy_function(*args, **kwargs): # pylint: disable=unused-argument + def dummy_function(*args, **kwargs): # noqa: U100 "Just raise an exception." raise RuntimeError("Could not find numba.") diff --git a/verde/vector.py b/verde/vector.py index dd70b67ff..8b478927d 100644 --- a/verde/vector.py +++ b/verde/vector.py @@ -12,10 +12,10 @@ import numpy as np from sklearn.utils.validation import check_is_fitted -from .base import n_1d_arrays, check_fit_input, least_squares, BaseGridder +from .base import BaseGridder, check_fit_input, least_squares, n_1d_arrays +from .coordinates import get_region from .spline import warn_weighted_exact_solution from .utils import parse_engine -from .coordinates import get_region try: import numba @@ -449,7 +449,7 @@ def predict_2d_numba( ): "Calculate the predicted data using numba to speed things up." nforces = forces.size // 2 - for i in numba.prange(east.size): # pylint: disable=not-an-iterable + for i in numba.prange(east.size): vec_east[i] = 0 vec_north[i] = 0 for j in range(nforces): @@ -466,7 +466,7 @@ def jacobian_2d_numba(east, north, force_east, force_north, mindist, poisson, ja "Calculate the Jacobian matrix using numba to speed things up." nforces = force_east.size npoints = east.size - for i in numba.prange(npoints): # pylint: disable=not-an-iterable + for i in numba.prange(npoints): for j in range(nforces): green_ee, green_nn, green_ne = GREENS_FUNC_2D_JIT( east[i] - force_east[j], north[i] - force_north[j], mindist, poisson