From 1893b20a7b755004f561037161c242db24e2870c Mon Sep 17 00:00:00 2001 From: "Adam R. Jensen" <39184289+AdamRJensen@users.noreply.github.com> Date: Mon, 14 Mar 2022 18:37:58 +0100 Subject: [PATCH] Add variable mapping of psm3 (#1374) * Add variable mapping of psm3 * Add enhancement entry in whatsnew * Fix stickler * Map keys in metadata dict * Remove double spaces in docs * Fix stickler * Doc update Co-authored-by: Kevin Anderson <57452607+kanderso-nrel@users.noreply.github.com> * Reformatting - changes by kanderso-nrel * Update docstring table with 2020 * Add deprecation warning test coverage * Rename to VARIABLE_MAP * Change apparent_zenith to solar_zenith Based on the decision in #1403 * Update attributes docstring * Change elevation to altitude when mapping variables * Update psm3 variable mapping test Co-authored-by: Kevin Anderson <57452607+kanderso-nrel@users.noreply.github.com> --- docs/sphinx/source/whatsnew/v0.9.1.rst | 2 + pvlib/iotools/psm3.py | 79 +++++++++++++++++++++----- pvlib/tests/iotools/test_psm3.py | 56 +++++++++++++++--- 3 files changed, 115 insertions(+), 22 deletions(-) diff --git a/docs/sphinx/source/whatsnew/v0.9.1.rst b/docs/sphinx/source/whatsnew/v0.9.1.rst index 685a7880a4..8aa1abb71f 100644 --- a/docs/sphinx/source/whatsnew/v0.9.1.rst +++ b/docs/sphinx/source/whatsnew/v0.9.1.rst @@ -19,6 +19,8 @@ Deprecations Enhancements ~~~~~~~~~~~~ +* Added ``map_variables`` option to :py:func:`pvlib.iotools.get_psm3` and + :py:func:`pvlib.iotools.read_psm3` (:pull:`1374`) * Added `pvlib.bifacial.infinite_sheds`, containing a model for irradiance on front and back surfaces of bifacial arrays. (:pull:`717`) * Added ``map_variables`` option to :func:`~pvlib.iotools.read_crn` (:pull:`1368`) diff --git a/pvlib/iotools/psm3.py b/pvlib/iotools/psm3.py index 10ff919c56..a8f9781c22 100644 --- a/pvlib/iotools/psm3.py +++ b/pvlib/iotools/psm3.py @@ -1,4 +1,3 @@ - """ Get PSM3 TMY see https://developer.nrel.gov/docs/solar/nsrdb/psm3_data_download/ @@ -8,6 +7,8 @@ import requests import pandas as pd from json import JSONDecodeError +import warnings +from pvlib._deprecation import pvlibDeprecationWarning NSRDB_API_BASE = "https://developer.nrel.gov" PSM_URL = NSRDB_API_BASE + "/api/nsrdb/v2/solar/psm3-download.csv" @@ -20,12 +21,31 @@ 'surface_pressure', 'wind_direction', 'wind_speed') PVLIB_PYTHON = 'pvlib python' +# Dictionary mapping PSM3 names to pvlib names +VARIABLE_MAP = { + 'GHI': 'ghi', + 'DHI': 'dhi', + 'DNI': 'dni', + 'Clearsky GHI': 'ghi_clear', + 'Clearsky DHI': 'dhi_clear', + 'Clearsky DNI': 'dni_clear', + 'Solar Zenith Angle': 'solar_zenith', + 'Temperature': 'temp_air', + 'Relative Humidity': 'relative_humidity', + 'Dew point': 'temp_dew', + 'Pressure': 'pressure', + 'Wind Direction': 'wind_direction', + 'Wind Speed': 'wind_speed', + 'Surface Albedo': 'albedo', + 'Precipitable Water': 'precipitable_water', +} + def get_psm3(latitude, longitude, api_key, email, names='tmy', interval=60, attributes=ATTRIBUTES, leap_day=False, full_name=PVLIB_PYTHON, - affiliation=PVLIB_PYTHON, timeout=30): + affiliation=PVLIB_PYTHON, map_variables=None, timeout=30): """ - Retrieve NSRDB PSM3 timeseries weather data from the PSM3 API. The NSRDB + Retrieve NSRDB PSM3 timeseries weather data from the PSM3 API. The NSRDB is described in [1]_ and the PSM3 API is described in [2]_, [3]_, and [4]_. .. versionchanged:: 0.9.0 @@ -48,19 +68,23 @@ def get_psm3(latitude, longitude, api_key, email, names='tmy', interval=60, PSM3 API parameter specifing year or TMY variant to download, see notes below for options interval : int, {60, 5, 15, 30} - interval size in minutes, must be 5, 15, 30 or 60. Only used for + interval size in minutes, must be 5, 15, 30 or 60. Only used for single-year requests (i.e., it is ignored for tmy/tgy/tdy requests). attributes : list of str, optional meteorological fields to fetch. If not specified, defaults to ``pvlib.iotools.psm3.ATTRIBUTES``. See references [2]_, [3]_, and [4]_ - for lists of available fields. + for lists of available fields. Alternatively, pvlib names may also be + used (e.g. 'ghi' rather than 'GHI'); see :const:`VARIABLE_MAP`. leap_day : boolean, default False - include leap day in the results. Only used for single-year requests + include leap day in the results. Only used for single-year requests (i.e., it is ignored for tmy/tgy/tdy requests). full_name : str, default 'pvlib python' optional affiliation : str, default 'pvlib python' optional + map_variables: boolean, optional + When true, renames columns of the Dataframe to pvlib variable names + where applicable. See variable :const:`VARIABLE_MAP`. timeout : int, default 30 time in seconds to wait for server response before timeout @@ -96,14 +120,15 @@ def get_psm3(latitude, longitude, api_key, email, names='tmy', interval=60, +===========+=============================================================+ | Year | 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, | | | 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, | - | | 2018, 2019 | + | | 2018, 2019, 2020 | +-----------+-------------------------------------------------------------+ | TMY | tmy, tmy-2016, tmy-2017, tdy-2017, tgy-2017, | | | tmy-2018, tdy-2018, tgy-2018, tmy-2019, tdy-2019, tgy-2019 | + | | tmy-2020, tdy-2020, tgy-2020 | +-----------+-------------------------------------------------------------+ .. warning:: PSM3 is limited to data found in the NSRDB, please consult the - references below for locations with available data. Additionally, + references below for locations with available data. Additionally, querying data with < 30-minute resolution uses a different API endpoint with fewer available fields (see [4]_). @@ -133,6 +158,13 @@ def get_psm3(latitude, longitude, api_key, email, names='tmy', interval=60, # convert to string to accomodate integer years being passed in names = str(names) + # convert pvlib names in attributes to psm3 convention (reverse mapping) + # unlike psm3 columns, attributes are lower case and with underscores + amap = {value: key.lower().replace(' ', '_') for (key, value) in + VARIABLE_MAP.items()} + attributes = [amap.get(a, a) for a in attributes] + attributes = list(set(attributes)) # remove duplicate values + # required query-string parameters for request to PSM3 API params = { 'api_key': api_key, @@ -167,12 +199,12 @@ def get_psm3(latitude, longitude, api_key, email, names='tmy', interval=60, # the CSV is in the response content as a UTF-8 bytestring # to use pandas we need to create a file buffer from the response fbuf = io.StringIO(response.content.decode('utf-8')) - return parse_psm3(fbuf) + return parse_psm3(fbuf, map_variables) -def parse_psm3(fbuf): +def parse_psm3(fbuf, map_variables=None): """ - Parse an NSRDB PSM3 weather file (formatted as SAM CSV). The NSRDB + Parse an NSRDB PSM3 weather file (formatted as SAM CSV). The NSRDB is described in [1]_ and the SAM CSV format is described in [2]_. .. versionchanged:: 0.9.0 @@ -184,6 +216,9 @@ def parse_psm3(fbuf): ---------- fbuf: file-like object File-like object containing data to read. + map_variables: bool + When true, renames columns of the Dataframe to pvlib variable names + where applicable. See variable VARIABLE_MAP. Returns ------- @@ -296,12 +331,25 @@ def parse_psm3(fbuf): tz = 'Etc/GMT%+d' % -metadata['Time Zone'] data.index = pd.DatetimeIndex(dtidx).tz_localize(tz) + if map_variables is None: + warnings.warn( + 'PSM3 variable names will be renamed to pvlib conventions by ' + 'default starting in pvlib 0.11.0. Specify map_variables=True ' + 'to enable that behavior now, or specify map_variables=False ' + 'to hide this warning.', pvlibDeprecationWarning) + map_variables = False + if map_variables: + data = data.rename(columns=VARIABLE_MAP) + metadata['latitude'] = metadata.pop('Latitude') + metadata['longitude'] = metadata.pop('Longitude') + metadata['altitude'] = metadata.pop('Elevation') + return data, metadata -def read_psm3(filename): +def read_psm3(filename, map_variables=None): """ - Read an NSRDB PSM3 weather file (formatted as SAM CSV). The NSRDB + Read an NSRDB PSM3 weather file (formatted as SAM CSV). The NSRDB is described in [1]_ and the SAM CSV format is described in [2]_. .. versionchanged:: 0.9.0 @@ -313,6 +361,9 @@ def read_psm3(filename): ---------- filename: str Filename of a file containing data to read. + map_variables: bool + When true, renames columns of the Dataframe to pvlib variable names + where applicable. See variable VARIABLE_MAP. Returns ------- @@ -334,5 +385,5 @@ def read_psm3(filename): `_ """ with open(str(filename), 'r') as fbuf: - content = parse_psm3(fbuf) + content = parse_psm3(fbuf, map_variables) return content diff --git a/pvlib/tests/iotools/test_psm3.py b/pvlib/tests/iotools/test_psm3.py index 92451a23f1..2c20155cdd 100644 --- a/pvlib/tests/iotools/test_psm3.py +++ b/pvlib/tests/iotools/test_psm3.py @@ -4,13 +4,14 @@ import os from pvlib.iotools import psm3 -from ..conftest import DATA_DIR, RERUNS, RERUNS_DELAY +from ..conftest import DATA_DIR, RERUNS, RERUNS_DELAY, assert_index_equal import numpy as np import pandas as pd import pytest from requests import HTTPError from io import StringIO import warnings +from pvlib._deprecation import pvlibDeprecationWarning TMY_TEST_DATA = DATA_DIR / 'test_psm3_tmy-2017.csv' YEAR_TEST_DATA = DATA_DIR / 'test_psm3_2017.csv' @@ -76,7 +77,8 @@ def assert_psm3_equal(data, metadata, expected): def test_get_psm3_tmy(nrel_api_key): """test get_psm3 with a TMY""" data, metadata = psm3.get_psm3(LATITUDE, LONGITUDE, nrel_api_key, - PVLIB_EMAIL, names='tmy-2017') + PVLIB_EMAIL, names='tmy-2017', + map_variables=False) expected = pd.read_csv(TMY_TEST_DATA) assert_psm3_equal(data, metadata, expected) @@ -86,7 +88,8 @@ def test_get_psm3_tmy(nrel_api_key): def test_get_psm3_singleyear(nrel_api_key): """test get_psm3 with a single year""" data, metadata = psm3.get_psm3(LATITUDE, LONGITUDE, nrel_api_key, - PVLIB_EMAIL, names='2017', interval=30) + PVLIB_EMAIL, names='2017', + map_variables=False, interval=30) expected = pd.read_csv(YEAR_TEST_DATA) assert_psm3_equal(data, metadata, expected) @@ -96,7 +99,8 @@ def test_get_psm3_singleyear(nrel_api_key): def test_get_psm3_5min(nrel_api_key): """test get_psm3 for 5-minute data""" data, metadata = psm3.get_psm3(LATITUDE, LONGITUDE, nrel_api_key, - PVLIB_EMAIL, names='2019', interval=5) + PVLIB_EMAIL, names='2019', interval=5, + map_variables=False) assert len(data) == 525600/5 first_day = data.loc['2019-01-01'] expected = pd.read_csv(YEAR_TEST_DATA_5MIN) @@ -108,7 +112,7 @@ def test_get_psm3_5min(nrel_api_key): def test_get_psm3_check_leap_day(nrel_api_key): data_2012, _ = psm3.get_psm3(LATITUDE, LONGITUDE, nrel_api_key, PVLIB_EMAIL, names="2012", interval=60, - leap_day=True) + leap_day=True, map_variables=False) assert len(data_2012) == (8760 + 24) @@ -133,7 +137,7 @@ def test_get_psm3_tmy_errors( """ with pytest.raises(HTTPError) as excinfo: psm3.get_psm3(latitude, longitude, api_key, PVLIB_EMAIL, - names=names, interval=interval) + names=names, interval=interval, map_variables=False) # ensure the HTTPError caught isn't due to overuse of the API key assert "OVER_RATE_LIMIT" not in str(excinfo.value) @@ -149,13 +153,49 @@ def io_input(request): def test_parse_psm3(io_input): """test parse_psm3""" - data, metadata = psm3.parse_psm3(io_input) + data, metadata = psm3.parse_psm3(io_input, map_variables=False) expected = pd.read_csv(YEAR_TEST_DATA) assert_psm3_equal(data, metadata, expected) def test_read_psm3(): """test read_psm3""" - data, metadata = psm3.read_psm3(MANUAL_TEST_DATA) + data, metadata = psm3.read_psm3(MANUAL_TEST_DATA, map_variables=False) expected = pd.read_csv(YEAR_TEST_DATA) assert_psm3_equal(data, metadata, expected) + + +def test_read_psm3_map_variables(): + """test read_psm3 map_variables=True""" + data, metadata = psm3.read_psm3(MANUAL_TEST_DATA, map_variables=True) + columns_mapped = ['Year', 'Month', 'Day', 'Hour', 'Minute', 'dhi', 'dni', + 'ghi', 'dhi_clear', 'dni_clear', 'ghi_clear', + 'Cloud Type', 'Dew Point', 'apparent_zenith', + 'Fill Flag', 'albedo', 'wind_speed', + 'precipitable_water', 'wind_direction', + 'relative_humidity', 'temp_air', 'pressure'] + data, metadata = psm3.read_psm3(MANUAL_TEST_DATA, map_variables=True) + assert_index_equal(data.columns, pd.Index(columns_mapped)) + + +@pytest.mark.remote_data +@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY) +def test_get_psm3_attribute_mapping(nrel_api_key): + """Test that pvlib names can be passed in as attributes and get correctly + reverse mapped to PSM3 names""" + data, meta = psm3.get_psm3(LATITUDE, LONGITUDE, nrel_api_key, PVLIB_EMAIL, + names=2019, interval=60, + attributes=['ghi', 'wind_speed'], + map_variables=True) + assert 'ghi' in data.columns + assert 'wind_speed' in data.columns + assert 'latitude' in meta.keys() + assert 'longitude' in meta.keys() + assert 'altitude' in meta.keys() + + +@pytest.mark.remote_data +@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY) +def test_psm3_variable_map_deprecation_warning(nrel_api_key): + with pytest.warns(pvlibDeprecationWarning, match='names will be renamed'): + _ = psm3.read_psm3(MANUAL_TEST_DATA)