-
Notifications
You must be signed in to change notification settings - Fork 12
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Adding new reader and lat/lon match transform (#184)
We are adding a new reader to handle geoval files, but with the idea that it can be generalized to other types of files. A new latlon match transform was added using some work that @danholdaway had developed. List of changes: - Method to `data_collections.py` to retrieve a collection - Generic `DataFile` reader - `latlon_match` transform to match lat/lon coordinates from one collection to another - `DataFile` test yaml and some files for testing Resolves #177
- Loading branch information
Showing
9 changed files
with
315 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
# (C) Copyright 2024 NOAA/NWS/EMC | ||
# | ||
# (C) Copyright 2024 United States Government as represented by the Administrator of the | ||
# National Aeronautics and Space Administration. All Rights Reserved. | ||
# | ||
# This software is licensed under the terms of the Apache Licence Version 2.0 | ||
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. | ||
|
||
# -------------------------------------------------------------------------------------------------- | ||
|
||
import os | ||
import netCDF4 as nc | ||
import numpy as np | ||
from xarray import Dataset, open_dataset | ||
from eva.utilities.config import get | ||
from eva.data.eva_dataset_base import EvaDatasetBase | ||
from eva.utilities.utils import parse_channel_list | ||
|
||
|
||
class GeovalSpace(EvaDatasetBase): | ||
|
||
""" | ||
A class for handling geoval files | ||
""" | ||
|
||
def execute(self, dataset_config, data_collections, timing): | ||
|
||
""" | ||
Executes the processing of data file dataset. | ||
Args: | ||
dataset_config (dict): Configuration dictionary for the dataset. | ||
data_collections (DataCollections): Object for managing data collections. | ||
timing (Timing): Timing object for tracking execution time. | ||
""" | ||
|
||
# Set the collection name | ||
# ----------------------- | ||
collection_name = get(dataset_config, self.logger, 'name') | ||
|
||
# Get missing value threshold | ||
# --------------------------- | ||
threshold = float(get(dataset_config, self.logger, 'missing_value_threshold', 1.0e30)) | ||
|
||
# Get levels to plot profiles | ||
# --------------------------_ | ||
levels_str_or_list = get(dataset_config, self.logger, 'levels', []) | ||
|
||
# Convert levels to list | ||
levels = [] | ||
if levels_str_or_list is not []: | ||
levels = parse_channel_list(levels_str_or_list, self.logger) | ||
|
||
# Filename to be used for reads | ||
# --------------------------------------- | ||
data_filename = get(dataset_config, self.logger, 'data_file') | ||
|
||
# Get instrument name | ||
instr_name = get(dataset_config, self.logger, 'instrument_name') | ||
|
||
# Open instrument files xarray dataset | ||
instr_ds = open_dataset(data_filename) | ||
|
||
# Enforce that a variable exists, do not default to all variables | ||
variables = get(dataset_config, self.logger, 'variables') | ||
if not variables: | ||
self.logger.abort('A variables list needs to be defined in the config file.') | ||
vars_to_remove = list(set(list(instr_ds.keys())) - set(variables)) | ||
instr_ds = instr_ds.drop_vars(vars_to_remove) | ||
|
||
# Rename variables and nval dimension | ||
rename_dict = {} | ||
rename_dims_dict = {} | ||
for v in variables: | ||
# Retrieve dimension names | ||
dims = instr_ds[v].dims | ||
if np.size(dims) > 1: | ||
rename_dims_dict[dims[1]] = f'Level' | ||
rename_dict[v] = f'{instr_name}::{v}' | ||
instr_ds = instr_ds.rename(rename_dict) | ||
instr_ds = instr_ds.rename_dims(rename_dims_dict) | ||
|
||
# Add the dataset_config to the collections | ||
data_collections.create_or_add_to_collection(collection_name, instr_ds) | ||
|
||
# Nan out unphysical values | ||
data_collections.nan_float_values_outside_threshold(threshold) | ||
|
||
# Display the contents of the collections for helping the user with making plots | ||
data_collections.display_collections() | ||
|
||
def generate_default_config(self, filenames, collection_name): | ||
|
||
""" | ||
Generate a default configuration for the dataset. | ||
This method generates a default configuration for the dataset based on the provided | ||
filenames and collection name. It can be used as a starting point for creating a | ||
configuration for the dataset. | ||
Args: | ||
filenames: Filenames or file paths relevant to the dataset. | ||
collection_name (str): Name of the collection for the dataset. | ||
Returns: | ||
dict: A dictionary representing the default configuration for the dataset. | ||
""" | ||
|
||
pass |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
datasets: | ||
|
||
- name: exp_geovals_with_lvls | ||
type: GeovalSpace | ||
data_file: ${data_input_path}/swell-hofx.amsua_n19-geovals.20211211T210000Z.nc4 | ||
levels: &exp_levels 33,60 | ||
instrument_name: amsua_n19 | ||
variables: &exp_vars_with_lvls ['mole_fraction_of_carbon_dioxide_in_air'] | ||
|
||
- name: exp_geovals | ||
type: GeovalSpace | ||
data_file: ${data_input_path}/swell-hofx.amsua_n19-geovals.20211211T210000Z.nc4 | ||
instrument_name: amsua_n19 | ||
variables: &exp_vars ['vegetation_area_fraction', 'leaf_area_index'] | ||
|
||
- name: exp_latlon | ||
type: IodaObsSpace | ||
filenames: | ||
- ${data_input_path}/swell-hofx.amsua_n19.20211211T210000Z.nc4 | ||
groups: | ||
- name: MetaData | ||
|
||
- name: ctrl_geovals_with_lvls | ||
type: GeovalSpace | ||
data_file: ${data_input_path}/ncdiag.x0048v2-geovals.ob.PT6H.amsua_n19.2021-12-11T21:00:00Z.nc4 | ||
levels: &ctrl_levels 33,60 | ||
instrument_name: amsua_n19 | ||
variables: &ctrl_vars_with_lvls ['mole_fraction_of_carbon_dioxide_in_air'] | ||
|
||
- name: ctrl_geovals | ||
type: GeovalSpace | ||
data_file: ${data_input_path}/ncdiag.x0048v2-geovals.ob.PT6H.amsua_n19.2021-12-11T21:00:00Z.nc4 | ||
instrument_name: amsua_n19 | ||
variables: &ctrl_vars ['vegetation_area_fraction', 'leaf_area_index'] | ||
|
||
- name: ctrl_latlon | ||
type: IodaObsSpace | ||
filenames: | ||
- ${data_input_path}/ncdiag.x0048v2.ob.PT6H.amsua_n19.2021-12-11T21:00:00Z.nc4 | ||
groups: | ||
- name: MetaData | ||
|
||
transforms: | ||
|
||
- transform: latlon_match | ||
new_collection_name: ctrl_geovals_matched_index | ||
base_latlon: ctrl_latlon | ||
match_base_latlon_to: exp_latlon | ||
base_collection: ctrl_geovals::amsua_n19::${variable} | ||
for: | ||
variable: *ctrl_vars | ||
|
||
- transform: latlon_match | ||
new_collection_name: ctrl_geovals_with_lvls_matched_index | ||
base_latlon: ctrl_latlon | ||
match_base_latlon_to: exp_latlon | ||
base_collection: ctrl_geovals_with_lvls::amsua_n19::${variable} | ||
for: | ||
variable: *ctrl_vars_with_lvls | ||
|
||
- transform: arithmetic | ||
new name: exp_geovals::amsua_n19::exp_minus_ctrl_${variable} | ||
equals: exp_geovals::amsua_n19::${variable}-ctrl_geovals_matched_index::amsua_n19::${variable} | ||
for: | ||
variable: *exp_vars | ||
|
||
graphics: | ||
|
||
plotting_backend: Emcpy | ||
figure_list: | ||
|
||
- batch figure: | ||
variables: *exp_vars | ||
dynamic options: | ||
- type: vminvmaxcmap | ||
data variable: exp_geovals::amsua_n19::exp_minus_ctrl_${variable} | ||
figure: | ||
figure size: [20,10] | ||
layout: [1,1] | ||
title: 'JEDI - GSI | AMSU-A NOAA-19 | Geoval | ${variable}' | ||
output name: map_plots/geovals/amsua_n19/${variable}/observations_amsua_n19_${variable}.png | ||
plots: | ||
- mapping: | ||
projection: plcarr | ||
domain: global | ||
add_map_features: ['coastline'] | ||
add_colorbar: | ||
label: '${variable}' | ||
layers: | ||
- type: MapScatter | ||
longitude: | ||
variable: exp_latlon::MetaData::longitude | ||
latitude: | ||
variable: exp_latlon::MetaData::latitude | ||
data: | ||
variable: exp_geovals::amsua_n19::exp_minus_ctrl_${variable} | ||
markersize: 2 | ||
cmap: ${dynamic_cmap} | ||
vmin: ${dynamic_vmin} | ||
vmax: ${dynamic_vmax} |
3 changes: 3 additions & 0 deletions
3
src/eva/tests/data/ncdiag.x0048v2-geovals.ob.PT6H.amsua_n19.2021-12-11T21:00:00Z.nc4
Git LFS file not shown
3 changes: 3 additions & 0 deletions
3
src/eva/tests/data/ncdiag.x0048v2.ob.PT6H.amsua_n19.2021-12-11T21:00:00Z.nc4
Git LFS file not shown
3 changes: 3 additions & 0 deletions
3
src/eva/tests/data/swell-hofx.amsua_n19-geovals.20211211T210000Z.nc4
Git LFS file not shown
Git LFS file not shown
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
# (C) Copyright 2024 NOAA/NWS/EMC | ||
# | ||
# (C) Copyright 2024 United States Government as represented by the Administrator of the | ||
# National Aeronautics and Space Administration. All Rights Reserved. | ||
# | ||
# This software is licensed under the terms of the Apache Licence Version 2.0 | ||
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. | ||
|
||
import numpy as np | ||
from xarray import Dataset, DataArray | ||
from eva.utilities.config import get | ||
from eva.utilities.logger import Logger | ||
from eva.transforms.transform_utils import parse_for_dict, split_collectiongroupvariable | ||
|
||
|
||
def latlon_match(config, data_collections): | ||
|
||
""" | ||
Applies lat/lon match transform to a given collection. | ||
Args: | ||
config (dict): A configuration dictionary containing transformation parameters. | ||
data_collections (DataCollections): An instance of the DataCollections class containing | ||
input data. | ||
Returns: | ||
None | ||
This function applies lat/lon matching to variables in the base collection. A new collection | ||
with matched variables is added to the data collection. | ||
base collection: collection to perform the latlon matching on | ||
base_latlon: the collection with lat/lon coordiates corresponding to base collection | ||
match_base_latlon_to: the collection with lat/lon coordinates corresponding to what you want to | ||
match the base latlon to. | ||
""" | ||
|
||
# Create a logger | ||
logger = Logger('LatLonMatchTransform') | ||
|
||
# Parse the for dictionary | ||
_, _, variables = parse_for_dict(config, logger) | ||
|
||
# Parse config for names | ||
base_collection = get(config, logger, 'base_collection') | ||
base_latlon_name = get(config, logger, 'base_latlon') | ||
match_latlon_name = get(config, logger, 'match_base_latlon_to') | ||
|
||
# Extract collection and group | ||
cgv = split_collectiongroupvariable(logger, base_collection) | ||
|
||
# Retrieve collections using collection names | ||
base_lat = data_collections.get_variable_data_array(base_latlon_name, 'MetaData', | ||
'latitude').to_numpy() | ||
base_lon = data_collections.get_variable_data_array(base_latlon_name, 'MetaData', | ||
'longitude').to_numpy() | ||
match_lat = data_collections.get_variable_data_array(match_latlon_name, 'MetaData', | ||
'latitude').to_numpy() | ||
match_lon = data_collections.get_variable_data_array(match_latlon_name, 'MetaData', | ||
'longitude').to_numpy() | ||
|
||
# Find matching index (this can be updated using dask) | ||
matching_index = [] | ||
for i in range(len(base_lat)): | ||
matching_index.append((abs(base_lat - match_lat[i]) + | ||
abs(base_lon - match_lon[i])).argmin()) | ||
|
||
# Retrieve data collection from data collections | ||
match_ds = data_collections.get_data_collection(cgv[0]) | ||
|
||
# Loop through starting_dataset and update all variable arrays | ||
update_ds_list = [] | ||
for variable in variables: | ||
var_array = data_collections.get_variable_data_array(cgv[0], cgv[1], variable) | ||
var_values = var_array.values | ||
|
||
# Index data array with matching_index and then save to new collection | ||
var_values = var_values[matching_index] | ||
var_array.values = var_values | ||
match_ds[f'{cgv[1]}::{variable}'] = var_array | ||
|
||
# get new collection name | ||
new_collection_name = get(config, logger, 'new_collection_name') | ||
|
||
# add new collection to data collections | ||
data_collections.create_or_add_to_collection(new_collection_name, match_ds) | ||
match_ds.close() |