diff --git a/README.md b/README.md index d46b948..c53b08e 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ approach to assessing the likelihood of extreme events. #### Dependencies ``` -$ conda install xarray dask dask-jobqueue zarr pyyaml cmdline_provenance gitpython geopandas regionmask +$ conda install xarray dask dask-jobqueue zarr pyyaml cmdline_provenance gitpython geopandas regionmask xclim ``` #### Reference diff --git a/notebooks/preprocess.ipynb b/notebooks/preprocess.ipynb index b6542b0..b136032 100644 --- a/notebooks/preprocess.ipynb +++ b/notebooks/preprocess.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "id": "730db7d9", "metadata": {}, "outputs": [], @@ -11,6 +11,7 @@ "sys.path.append('../unseen')\n", "\n", "from dask.distributed import Client, LocalCluster\n", + "import xclim\n", "\n", "import myfuncs\n", "import indices" @@ -18,7 +19,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "id": "9b47934e", "metadata": {}, "outputs": [], @@ -29,7 +30,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "id": "9b3140c1", "metadata": {}, "outputs": [ @@ -41,7 +42,7 @@ "\n", "

Client

\n", "\n", "\n", @@ -57,10 +58,10 @@ "" ], "text/plain": [ - "" + "" ] }, - "execution_count": 4, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -71,7 +72,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 7, "id": "17f50e50", "metadata": {}, "outputs": [ @@ -79,7 +80,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Tue Jun 22 17:03:19 2021: /g/data/e14/dbi599/miniconda3/envs/unseen/bin/jupyter notebook /home/599/dbi599/unseen/notebooks/preprocess.ipynb\n" + "Thu Jun 24 12:05:30 2021: /g/data/e14/dbi599/miniconda3/envs/unseen/bin/jupyter notebook /home/599/dbi599/unseen/notebooks/preprocess.ipynb\n" ] } ], @@ -98,7 +99,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "id": "926d464a", "metadata": {}, "outputs": [], @@ -109,12 +110,12 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "id": "cd03403a", "metadata": {}, "outputs": [], "source": [ - "cafe_kwargs = {'metadata_file': '../config/cafe.yml',\n", + "cafe_kwargs = {'metadata_file': '../config/dataset_cafe.yml',\n", " 'no_leap_days': True,\n", " 'region': 'TAS-POINT',\n", " 'variables': ['pr'],\n", @@ -124,7 +125,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "id": "50ff756b", "metadata": {}, "outputs": [], @@ -134,7 +135,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 8, "id": "ef43e2a0", "metadata": {}, "outputs": [ @@ -500,27 +501,27 @@ " * init_date (init_date) datetime64[ns] 1991-11-01 1992-11-01\n", " time (lead_time, init_date) datetime64[ns] 1991-11-01 ... 2002-10-29\n", "Data variables:\n", - " pr (init_date, lead_time, ensemble) float64 dask.array<chunksize=(1, 28, 96), meta=np.ndarray>\n", + " pr (init_date, lead_time, ensemble) float32 dask.array<chunksize=(1, 28, 96), meta=np.ndarray>\n", "Attributes:\n", " comment: pressure level interpolator, version 3.0, precision=double\n", " filename: atmos_isobaric_daily.zarr\n", " grid_tile: N/A\n", " grid_type: regular\n", " title: AccessOcean-AM2\n", - " history: Tue Jun 22 17:00:09 2021: /g/data/e14/dbi599/miniconda3/envs/...
  • comment :
    pressure level interpolator, version 3.0, precision=double
    filename :
    atmos_isobaric_daily.zarr
    grid_tile :
    N/A
    grid_type :
    regular
    title :
    AccessOcean-AM2
    history :
    Thu Jun 24 11:20:51 2021: /g/data/e14/dbi599/miniconda3/envs/unseen/bin/jupyter notebook /home/599/dbi599/unseen/notebooks/preprocess.ipynb
  • " ], "text/plain": [ "\n", @@ -627,17 +628,17 @@ " * init_date (init_date) datetime64[ns] 1991-11-01 1992-11-01\n", " time (lead_time, init_date) datetime64[ns] 1991-11-01 ... 2002-10-29\n", "Data variables:\n", - " pr (init_date, lead_time, ensemble) float64 dask.array\n", + " pr (init_date, lead_time, ensemble) float32 dask.array\n", "Attributes:\n", " comment: pressure level interpolator, version 3.0, precision=double\n", " filename: atmos_isobaric_daily.zarr\n", " grid_tile: N/A\n", " grid_type: regular\n", " title: AccessOcean-AM2\n", - " history: Tue Jun 22 17:00:09 2021: /g/data/e14/dbi599/miniconda3/envs/..." + " history: Thu Jun 24 11:20:51 2021: /g/data/e14/dbi599/miniconda3/envs/..." ] }, - "execution_count": 10, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -650,7 +651,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 9, "id": "1b48837a", "metadata": {}, "outputs": [ @@ -1009,7 +1010,7 @@ " fill: currentColor;\n", "}\n", "
    <xarray.DataArray 'pr' (init_date: 2, lead_time: 3650, ensemble: 96)>\n",
    -       "dask.array<concatenate, shape=(2, 3650, 96), dtype=float64, chunksize=(1, 28, 96), chunktype=numpy.ndarray>\n",
    +       "dask.array<concatenate, shape=(2, 3650, 96), dtype=float32, chunksize=(1, 28, 96), chunktype=numpy.ndarray>\n",
            "Coordinates:\n",
            "  * ensemble   (ensemble) int64 1 2 3 4 5 6 7 8 9 ... 88 89 90 91 92 93 94 95 96\n",
            "  * lead_time  (lead_time) int64 0 1 2 3 4 5 6 ... 3644 3645 3646 3647 3648 3649\n",
    @@ -1020,7 +1021,7 @@
            "    interp_method:  conserve_order1\n",
            "    long_name:      Total precipitation rate\n",
            "    time_avg_info:  average_T1,average_T2,average_DT\n",
    -       "    units:          kg/m2/s
    " + " dtype='datetime64[ns]')
  • cell_methods :
    time: mean
    interp_method :
    conserve_order1
    long_name :
    Total precipitation rate
    time_avg_info :
    average_T1,average_T2,average_DT
    units :
    kg/m2/s
  • " ], "text/plain": [ "\n", - "dask.array\n", + "dask.array\n", "Coordinates:\n", " * ensemble (ensemble) int64 1 2 3 4 5 6 7 8 9 ... 88 89 90 91 92 93 94 95 96\n", " * lead_time (lead_time) int64 0 1 2 3 4 5 6 ... 3644 3645 3646 3647 3648 3649\n", @@ -1147,7 +1148,7 @@ " units: kg/m2/s" ] }, - "execution_count": 11, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -1158,7 +1159,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 19, "id": "c7733f60", "metadata": {}, "outputs": [], @@ -1168,7 +1169,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 20, "id": "faa22b55", "metadata": {}, "outputs": [ @@ -1527,7 +1528,7 @@ " fill: currentColor;\n", "}\n", "
    <xarray.DataArray 'pr' (init_date: 2, lead_time: 3650, ensemble: 96)>\n",
    -       "dask.array<rechunk-merge, shape=(2, 3650, 96), dtype=float64, chunksize=(1, 50, 96), chunktype=numpy.ndarray>\n",
    +       "dask.array<mul, shape=(2, 3650, 96), dtype=float32, chunksize=(1, 50, 96), chunktype=numpy.ndarray>\n",
            "Coordinates:\n",
            "  * ensemble   (ensemble) int64 1 2 3 4 5 6 7 8 9 ... 88 89 90 91 92 93 94 95 96\n",
            "  * lead_time  (lead_time) int64 0 1 2 3 4 5 6 ... 3644 3645 3646 3647 3648 3649\n",
    @@ -1538,7 +1539,7 @@
            "    interp_method:  conserve_order1\n",
            "    long_name:      Total precipitation rate\n",
            "    time_avg_info:  average_T1,average_T2,average_DT\n",
    -       "    units:          kg/m2/s
    " ], "text/plain": [ "\n", - "dask.array\n", + "dask.array\n", "Coordinates:\n", " * ensemble (ensemble) int64 1 2 3 4 5 6 7 8 9 ... 88 89 90 91 92 93 94 95 96\n", " * lead_time (lead_time) int64 0 1 2 3 4 5 6 ... 3644 3645 3646 3647 3648 3649\n", @@ -1708,10 +1709,10 @@ " interp_method: conserve_order1\n", " long_name: Total precipitation rate\n", " time_avg_info: average_T1,average_T2,average_DT\n", - " units: kg/m2/s" + " units: mm d-1" ] }, - "execution_count": 13, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -1722,7 +1723,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 21, "id": "282ca523", "metadata": {}, "outputs": [], @@ -1740,7 +1741,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 4, "id": "1524f430", "metadata": {}, "outputs": [], @@ -1751,12 +1752,12 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 5, "id": "65f4bb78", "metadata": {}, "outputs": [], "source": [ - "awap_kwargs = {'metadata_file': '../config/awap.yml',\n", + "awap_kwargs = {'metadata_file': '../config/dataset_awap.yml',\n", " 'no_leap_days': True,\n", " 'region': 'TAS-POINT',\n", " 'variables': ['pr'],\n", @@ -1766,7 +1767,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 8, "id": "9a100cce", "metadata": {}, "outputs": [ @@ -2131,14 +2132,14 @@ "Data variables:\n", " pr (time) float64 dask.array<chunksize=(44136,), meta=np.ndarray>\n", "Attributes:\n", - " history: Tue Jun 22 17:00:09 2021: /g/data/e14/dbi599/miniconda3/envs/un...
  • analysis_time :
    2010-10-23 14:49:46 UTC
    analysis_version_number :
    3.01
    cell_methods :
    time: sum
    frequency :
    daily
    grid_mapping :
    crs
    length_scale_for_analysis :
    80.0
    long_name :
    Daily precipitation
    number_of_stations_reporting :
    2757
    source :
    Interpolated surface observations
    standard_name :
    lwe_thickness_of_precipitation_amount
    units :
    mm d-1
    valid_range :
    [-1.0, 100000.0]
  • " ], "text/plain": [ "\n", @@ -2633,11 +2634,11 @@ " number_of_stations_reporting: 2757\n", " source: Interpolated surface observations\n", " standard_name: lwe_thickness_of_precipitation_amount\n", - " units: mm\n", + " units: mm d-1\n", " valid_range: [-1.0, 100000.0]" ] }, - "execution_count": 20, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } diff --git a/unseen/myfuncs.py b/unseen/myfuncs.py index 73da253..cb5c5e9 100644 --- a/unseen/myfuncs.py +++ b/unseen/myfuncs.py @@ -18,6 +18,7 @@ import geopandas as gp import regionmask import cmdline_provenance as cmdprov +import xclim ## Miscellanous utilities @@ -45,35 +46,20 @@ def __call__(self, parser, namespace, values, option_string=None): def convert_units(da, target_units): - """Convert kg m-2 s-1 to mm day-1. + """Convert units. Args: - da (xarray DataArray): Precipitation data + da (xarray DataArray) + target_units (str) """ - #TODO: Consider using the pint-xarray package for unit conversion - - xr.set_options(keep_attrs=True) - error_msg = f'{da.units} to {target_units} conversion not supported' - - if da.units == target_units: - pass - elif da.units in ['kg m-2 s-1', 'kg/m2/s']: - assert target_units == 'mm/day', error_msg - da = da * 86400 - elif da.units == 'mm': - assert target_units == 'mm/day', error_msg - elif da.units in ['K', 'deg_k']: - assert target_units == 'C', error_msg - da = da - 273.15 - elif da.units == 'm/s': - assert target_units == 'km/h', error_msg - da = da * 3.6 - elif da.units == 'percent': - assert target_units == '%' - else: - raise ValueError(f"Unrecognised input units: {da.units}") - da.attrs['units'] == target_units + xclim_unit_check = {'deg_k': 'degK', + 'mm' : 'mm d-1' + } + if da.units in xclim_unit_check: + da.attrs['units'] = xclim_unit_check[da.units] + + da = xclim.units.convert_units_to(da, target_units) return da