icesat2py · mfisher87 · Aug 18, 2024 · Aug 13, 2024 · Aug 16, 2024 · Aug 16, 2024
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -10,6 +10,13 @@ repos:
       - id: trailing-whitespace
         args: [--markdown-linebreak-ext=md]
 
+  - repo: https://github.com/codespell-project/codespell
+    rev: v2.3.0
+    hooks:
+      - id: codespell
+        additional_dependencies:
+          - tomli
+
   - repo: https://github.com/astral-sh/ruff-pre-commit
     rev: v0.6.0
     hooks:

diff --git a/ATTRIBUTION.rst b/ATTRIBUTION.rst
@@ -99,7 +99,7 @@ to identify core issues and propose solutions to challenges around software cred
 For software, current best practices (`e.g. <https://arxiv.org/pdf/2012.13117.pdf>`_) emphasize the importance of having a document
 such as this one to describe an individual community's policies for credit, authorship, and attribution.
 This document is an effort to describe icepyx's policies, with an awareness that they may change
-to accomodate community growth, best practices, and feedback.
+to accommodate community growth, best practices, and feedback.
 
 We do not attempt to identify contribution levels through the number of commits made to the repository (e.g. ``git shortlog -sne``)
 or active engagement on GitHub (e.g. through issues, discussions, and pull requests) and Discourse.

diff --git a/doc/source/community/resources/2020_IS2_HW.rst b/doc/source/community/resources/2020_IS2_HW.rst
@@ -7,7 +7,7 @@ first virtual Hackweek held by the University of Washington.
 While originally planned as a five-day, in-person workshop, the event was shifted to a fully virtual/remote setting in light of
 stay-at-home orders and travel restrictions in place to curb the spread of COVID-19.
 
-To accomodate multiple time zones and limit the daily duration of online tutorial sessions, the event was spread out over the course of ten days.
+To accommodate multiple time zones and limit the daily duration of online tutorial sessions, the event was spread out over the course of ten days.
 The first week had three half-days of interactive tutorials/lectures.
 The second week had four days that included some interactive tutorials/lectures and scheduled times where instructors were
 available to help participants with a facilitated exploration of datasets and hands-on software development.

diff --git a/doc/source/contributing/icepyx_internals.rst b/doc/source/contributing/icepyx_internals.rst
@@ -13,7 +13,7 @@ by any modules that need an Earthdata login.
 
 **Property Access**
 
-Even though they aren't explicity defined in the init method, properties
+Even though they aren't explicitly defined in the init method, properties
 like ``.session`` are accessible on a Query object because they are inherited. The
 code that indicates this to Python is ``EarthdataAuthMixin.__init__(self)``.
 

diff --git a/doc/source/example_notebooks/IS2_DEM_comparison_WIP.ipynb b/doc/source/example_notebooks/IS2_DEM_comparison_WIP.ipynb
@@ -81,7 +81,7 @@
     "import shutil\n",
     "import h5py\n",
     "import xarray as xr\n",
-    "# depedencies\n",
+    "# dependencies\n",
     "import getpass\n",
     "#from topolib.subsetDat import subsetBBox;\n",
     "from topolib import icesat2_data\n",
@@ -265,7 +265,7 @@
    "metadata": {},
    "source": [
     "## Preprocess #2\n",
-    "- Convert data into geopandas dataframe, which allows for doing basing geospatial opertaions"
+    "- Convert data into geopandas dataframe, which allows for doing basing geospatial operations"
    ]
   },
   {
@@ -316,7 +316,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# dict containing data entries to retrive\n",
+    "# dict containing data entries to retrieve\n",
     "dataset_dict = {'land_segments':['delta_time','longitude','latitude','atl06_quality_summary','quality','terrain_flg'], 'land_segments/terrain':['h_te_best_fit']}"
    ]
   },
@@ -438,7 +438,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## We will use the TANDEM-X Global DEM for our comparison. The resolution of the globally avaialable product is 90 m, with *horizontal* and *vertical* accuracy better than 2 to 3 m.\n",
+    "## We will use the TANDEM-X Global DEM for our comparison. The resolution of the globally available product is 90 m, with *horizontal* and *vertical* accuracy better than 2 to 3 m.\n",
     "- TANDEM-X DEM for the region was downloaded and preprocessed, filtered using scripts from the [tandemx](https://github.com/dshean/tandemx) repository"
    ]
   },
@@ -741,7 +741,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "### Save the geodataframe in the specified way as expected by Ames Stereo Pipline\n",
+    "### Save the geodataframe in the specified way as expected by Ames Stereo Pipeline\n",
     "icesat2_pc = '/home/jovyan/icesat2/icesat2_colombia_pc.csv' \n",
     "gdf_colombia_dem_extent[['latitude','longitude','h_te_best_fit']].to_csv(icesat2_pc,header=False,index=None)"
    ]
@@ -761,7 +761,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "### Save the geodataframe in the specified way as expected by Ames Stereo Pipline\n",
+    "### Save the geodataframe in the specified way as expected by Ames Stereo Pipeline\n",
     "icesat2_pc = '/home/jovyan/icesat2/icesat2_colombia_pc.csv'\n",
     "pc_rename_dict = {'latitude':'lat','longitude':'lon','h_te_best_fit':'height_above_datum'}\n",
     "gdf_colombia_dem_extent = gdf_colombia_dem_extent.rename(columns=pc_rename_dict)\n",
@@ -867,7 +867,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "- We see that after coregistration, the bias reduces to an extent. Note that this is a very preliminary analysis, results will be better after filtering the ATL08 points based on quality metrics and finding truly static surfaces (snow free during acquisiton time of ICESat-2 points)"
+    "- We see that after coregistration, the bias reduces to an extent. Note that this is a very preliminary analysis, results will be better after filtering the ATL08 points based on quality metrics and finding truly static surfaces (snow free during acquisition time of ICESat-2 points)"
    ]
   },
   {

diff --git a/doc/source/example_notebooks/IS2_cloud_data_access.ipynb b/doc/source/example_notebooks/IS2_cloud_data_access.ipynb
@@ -352,7 +352,7 @@
    "source": [
     "### Some important caveats\n",
     "\n",
-    "While the cloud data reading is functional within icepyx, it is very slow. Approximate timing shows it takes ~6 minutes of load time per variable per file from s3. Because of this you will recieve a warning if you try to load either more than three variables or two files at once.\n",
+    "While the cloud data reading is functional within icepyx, it is very slow. Approximate timing shows it takes ~6 minutes of load time per variable per file from s3. Because of this you will receive a warning if you try to load either more than three variables or two files at once.\n",
     "\n",
     "The slow load speed is a demonstration of the many steps involved in making cloud data actionable - the data supply chain needs optimized source data, efficient low level data readers, and high level libraries which are enabled to use the fastest low level data readers. Not all of these pieces fully developed right now, but the progress being made it exciting and there is lots of room for contribution!"
    ]

diff --git a/doc/source/example_notebooks/IS2_data_access.ipynb b/doc/source/example_notebooks/IS2_data_access.ipynb
@@ -485,7 +485,7 @@
    },
    "source": [
     "```{admonition} Important Authentication Update\n",
-    "Previously, icepyx required you to explicitly use the `.earthdata_login()` function to login. Running this function is deprecated and will result in an error, as icepyx will call the login function as needed. The user will still need to provide their credentials using one of the three methods decribed above.\n",
+    "Previously, icepyx required you to explicitly use the `.earthdata_login()` function to login. Running this function is deprecated and will result in an error, as icepyx will call the login function as needed. The user will still need to provide their credentials using one of the three methods described above.\n",
     "```"
    ]
   },
@@ -504,7 +504,7 @@
     "\n",
     "#### More details about the configuration parameters\n",
     "`request_mode` is \"asynchronous\" by default, which allows concurrent requests to be queued and processed without the need for a continuous connection between you and the API endpoint.\n",
-    "In contrast, using a \"synchronous\" `request_mode` means that the request relies on a direct, continous connection between you and the API endpoint.\n",
+    "In contrast, using a \"synchronous\" `request_mode` means that the request relies on a direct, continuous connection between you and the API endpoint.\n",
     "Outputs are directly downloaded, or \"streamed\", to your working directory.\n",
     "For this tutorial, we will set the request mode to asynchronous.\n",
     "\n",

diff --git a/doc/source/example_notebooks/IS2_data_read-in.ipynb b/doc/source/example_notebooks/IS2_data_read-in.ipynb
@@ -15,7 +15,7 @@
     "\n",
     "### Motivation\n",
     "Most often, when you open a data file, you must specify the underlying data structure and how you'd like the information to be read in.\n",
-    "A simple example of this, for instance when opening a csv or similarly delimited file, is letting the software know if the data contains a header row, what the data type is (string, double, float, boolean, etc.) for each column, what the delimeter is, and which columns or rows you'd like to be loaded.\n",
+    "A simple example of this, for instance when opening a csv or similarly delimited file, is letting the software know if the data contains a header row, what the data type is (string, double, float, boolean, etc.) for each column, what the delimiter is, and which columns or rows you'd like to be loaded.\n",
     "Many ICESat-2 data readers are quite manual in nature, requiring that you accurately type out a list of string paths to the various data variables.\n",
     "\n",
     "icepyx simplifies this process by relying on its awareness of ICESat-2 specific data file variable storage structure.\n",

diff --git a/doc/source/example_notebooks/IS2_data_variables.ipynb b/doc/source/example_notebooks/IS2_data_variables.ipynb
@@ -278,7 +278,7 @@
     "user_expressions": []
    },
    "source": [
-    "Now that you know how to create or access Variables the remainder of this notebook showcases the functions availble for building and modifying variables lists. Remember, the example shown below uses a Query object, but the same methods are available if you are using a Read object or a Variables object."
+    "Now that you know how to create or access Variables the remainder of this notebook showcases the functions available for building and modifying variables lists. Remember, the example shown below uses a Query object, but the same methods are available if you are using a Read object or a Variables object."
    ]
   },
   {
@@ -360,7 +360,7 @@
     "### ICESat-2 data variables\n",
     "\n",
     "ICESat-2 data is natively stored in a nested file format called hdf5.\n",
-    "Much like a directory-file system on a computer, each variable (file) has a unique path through the heirarchy (directories) within the file.\n",
+    "Much like a directory-file system on a computer, each variable (file) has a unique path through the hierarchy (directories) within the file.\n",
     "Thus, some variables (e.g. `'latitude'`, `'longitude'`) have multiple paths (one for each of the six beams in most products).\n",
     "\n",
     "#### Determine what variables are available\n",
@@ -887,7 +887,7 @@
    "source": [
     "#### Example 2.8: add all possible values for variables+paths\n",
     "Append all `longitude` paths and all variables/paths with keyword `high_rate`.\n",
-    "Simlarly to what is shown in Example 4, if you submit only one `append` call as `region_a.order_vars.append(var_list=['longitude'], keyword_list=['high_rate'])` rather than the two `append` calls shown below, you will only add the variable `longitude` and only paths containing `high_rate`, not ALL paths for `longitude` and ANY variables with `high_rate` in their path."
+    "Similarly to what is shown in Example 4, if you submit only one `append` call as `region_a.order_vars.append(var_list=['longitude'], keyword_list=['high_rate'])` rather than the two `append` calls shown below, you will only add the variable `longitude` and only paths containing `high_rate`, not ALL paths for `longitude` and ANY variables with `high_rate` in their path."
    ]
   },
   {

diff --git a/doc/source/example_notebooks/IS2_data_visualization.ipynb b/doc/source/example_notebooks/IS2_data_visualization.ipynb
@@ -145,7 +145,7 @@
     "\n",
     "**Note: this function currently only supports products `ATL06, ATL07, ATL08, ATL10, ATL12, ATL13`**\n",
     "\n",
-    "Now that we have produced an interactive map showing the spatial extent of ICESat-2 data to be requested from NSIDC using icepyx, what if we want to have a quick check on the ICESat-2 elevations we plan to download from NSIDC? [OpenAltimetry API](https://openaltimetry.org/data/swagger-ui/#/) provides a nice way to achieve this. By sending metadata (product, date, bounding box, trackId) of each ICESat-2 file to the API, it can return elevation data almost instantaneously. The major drawback is requests are limited to 5x5 degree spatial bounding box selection for most of the ICESat-2 L3A products [ATL06, ATL07, ATL08, ATL10, ATL12, ATL13](https://icesat-2.gsfc.nasa.gov/science/data-products). To solve this issue, if you input spatial extent exceeds the 5 degree maximum in either horizontal dimension, your input spatial extent will be splited into 5x5 degree lat/lon grids first, use icepyx to query the metadata of ICESat-2 files located in each grid, and send each request to OpenAltimetry. Data sampling rates are 1/50 for ATL06 and 1/20 for other products.\n",
+    "Now that we have produced an interactive map showing the spatial extent of ICESat-2 data to be requested from NSIDC using icepyx, what if we want to have a quick check on the ICESat-2 elevations we plan to download from NSIDC? [OpenAltimetry API](https://openaltimetry.org/data/swagger-ui/#/) provides a nice way to achieve this. By sending metadata (product, date, bounding box, trackId) of each ICESat-2 file to the API, it can return elevation data almost instantaneously. The major drawback is requests are limited to 5x5 degree spatial bounding box selection for most of the ICESat-2 L3A products [ATL06, ATL07, ATL08, ATL10, ATL12, ATL13](https://icesat-2.gsfc.nasa.gov/science/data-products). To solve this issue, if you input spatial extent exceeds the 5 degree maximum in either horizontal dimension, your input spatial extent will be split into 5x5 degree lat/lon grids first, use icepyx to query the metadata of ICESat-2 files located in each grid, and send each request to OpenAltimetry. Data sampling rates are 1/50 for ATL06 and 1/20 for other products.\n",
     "\n",
     "There are multiple ways to access icepyx's visualization module. This option assumes you are visualizing the data as part of a workflow that will result in a data download. Alternative options for accessing the OpenAltimetry-based visualization module directly are provided at the end of this example."
    ]

diff --git a/icepyx/core/APIformatting.py b/icepyx/core/APIformatting.py
@@ -243,7 +243,7 @@ def poss_keys(self):
     @property
     def fmted_keys(self):
         """
-        Returns the dictionary of formated keys associated with the
+        Returns the dictionary of formatted keys associated with the
         parameter object.
         """
         return self._fmted_keys
@@ -320,7 +320,7 @@ def check_req_values(self):
         if all(keys in self.fmted_keys.keys() for keys in reqkeys):
             assert all(
                 self.fmted_keys.get(key, -9999) != -9999 for key in reqkeys
-            ), "One of your formated parameters is missing a value"
+            ), "One of your formatted parameters is missing a value"
             return True
         else:
             return False
@@ -340,7 +340,7 @@ def check_values(self):
         if any(keys in self._fmted_keys.keys() for keys in spatial_keys):
             assert any(
                 self.fmted_keys.get(key, -9999) != -9999 for key in spatial_keys
-            ), "One of your formated parameters is missing a value"
+            ), "One of your formatted parameters is missing a value"
             return True
         else:
             return False

diff --git a/icepyx/core/auth.py b/icepyx/core/auth.py
@@ -49,7 +49,7 @@ class EarthdataAuthMixin:
 
     def __init__(self, auth=None):
         self._auth = copy.deepcopy(auth)
-        # initializatin of session and s3 creds is not allowed because those are generated
+        # initialization of session and s3 creds is not allowed because those are generated
         # from the auth object
         self._session = None
         self._s3login_credentials = None

diff --git a/icepyx/core/query.py b/icepyx/core/query.py
@@ -450,7 +450,7 @@ def __str__(self):
     @property
     def dataset(self):
         """
-        Legacy property included to provide depracation warning.
+        Legacy property included to provide deprecation warning.
 
         See Also
         --------
@@ -722,7 +722,7 @@ def order_vars(self):
     @property
     def granules(self):
         """
-        Return the granules object, which provides the underlying funtionality for searching, ordering,
+        Return the granules object, which provides the underlying functionality for searching, ordering,
         and downloading granules for the specified product.
         Users are encouraged to use the built-in wrappers
         rather than trying to access the granules object themselves.

diff --git a/icepyx/core/read.py b/icepyx/core/read.py
@@ -164,7 +164,7 @@ def _confirm_proceed():
 class Read(EarthdataAuthMixin):
     """
     Data object to read ICESat-2 data into the specified formats.
-    Provides flexiblity for reading nested hdf5 files into common analysis formats.
+    Provides flexibility for reading nested hdf5 files into common analysis formats.
 
     Parameters
     ----------

diff --git a/icepyx/core/variables.py b/icepyx/core/variables.py
@@ -437,7 +437,7 @@ def append(self, defaults=False, var_list=None, beam_list=None, keyword_list=Non
         ----------
         defaults : boolean, default False
             Include the variables in the default variable list. Defaults are defined per-data product.
-            When specified in conjuction with a var_list, default variables not on the user-
+            When specified in conjunction with a var_list, default variables not on the user-
             specified list will be added to the order.
 
         var_list : list of strings, default None
@@ -451,8 +451,8 @@ def append(self, defaults=False, var_list=None, beam_list=None, keyword_list=Non
             For all other products, acceptable values are ['gt1l', 'gt1r', 'gt2l', 'gt2r', 'gt3l', 'gt3r'].
 
         keyword_list : list of strings, default None
-            A list of subdirectory names (keywords), from any heirarchy level within the data structure, to select variables within
-            the product that include that keyword in their path. A list of availble keywords can be obtained by
+            A list of subdirectory names (keywords), from any hierarchy level within the data structure, to select variables within
+            the product that include that keyword in their path. A list of available keywords can be obtained by
             entering `keyword_list=['']` into the function.
 
         Notes
@@ -545,7 +545,7 @@ def remove(self, all=False, var_list=None, beam_list=None, keyword_list=None):
             For all other products, acceptable values are ['gt1l', 'gt1r', 'gt2l', 'gt2r', 'gt3l', 'gt3r'].
 
         keyword_list : list of strings, default None
-            A list of subdirectory names (keywords), from any heirarchy level within the data structure, to select variables within
+            A list of subdirectory names (keywords), from any hierarchy level within the data structure, to select variables within
             the product that include that keyword in their path.
 
         Notes

diff --git a/icepyx/quest/quest.py b/icepyx/quest/quest.py
@@ -157,7 +157,7 @@ def add_argo(self, params=["temperature"], presRange=None) -> None:
     # error handling? what happens when the user tries to re-query?
     def search_all(self, **kwargs):
         """
-        Searches for requred dataset within platform (i.e. ICESat-2, Argo) of interest.
+        Searches for required dataset within platform (i.e. ICESat-2, Argo) of interest.
 
         Parameters
         ----------

diff --git a/icepyx/tests/test_granules.py b/icepyx/tests/test_granules.py
@@ -30,7 +30,7 @@
 # region_a.avail_granules(ids=True)
 
 # add test that s3urls are gotten for ALL products (e.g. ATL15 was failing
-# due to .nc extention instead of .h5))
+# due to .nc extension instead of .h5))
 
 
 # DevNote: clearly there's a better way that doesn't make the function so long...

diff --git a/icepyx/tests/test_quest.py b/icepyx/tests/test_quest.py
@@ -15,7 +15,7 @@ def quest_instance(scope="module", autouse=True):
 ########## PER-DATASET ADDITION TESTS ##########
 
 
-# Paramaterize these add_dataset tests once more datasets are added
+# Parameterize these add_dataset tests once more datasets are added
 def test_add_is2(quest_instance):
     # Add ATL06 as a test to QUEST