Skip to content

Commit

Permalink
Add Var.split_by_season
Browse files Browse the repository at this point in the history
Bug introduced: The documentation show the wrong module name for
Var.split_by_season. It is ClimaAnalysis.Utils.split_by_season instead
of ClimaAnalysis.Var.split_by_season.
  • Loading branch information
ph-kev committed Sep 9, 2024
1 parent 3757c91 commit 9b3c473
Show file tree
Hide file tree
Showing 5 changed files with 204 additions and 2 deletions.
38 changes: 38 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,44 @@ julia> long_name(integrated_var) # updated long name to reflect the data being i
"f integrated over lon (-179.5 to 179.5degrees_east) and integrated over lat (-89.5 to 89.5degrees_north)"
```

### Split by season
`OutputVar`s can be split by seasons using `split_by_season(var)` provided that a start date
can be found in `var.attributes["start_date"]` and time is a dimension in the `OutputVar`.
The unit of time is expected to be seconds. The function `split_by_season(var)` return a
vector of four `OutputVar`s with each `OutputVar` corresponding to a season. The months of
the seasons are March to May, June to August, September to November, and December to
February. The order of the vector is MAM, JJA, SON, and DJF.

```@julia split_by_season
julia> attribs = Dict("start_date" => "2024-1-1");
julia> time = [0., 5_184_000., 13_132_800., 21_081_600.]; # correspond to dates 2024-1-1, 2024-3-1, 2024-6-1, 2024-9-1
julia> dims = OrderedDict(["time" => time]);
julia> dim_attribs = OrderedDict(["time" => Dict("units" => "s")]);
julia> data = [1., 2., 3., 4.];
julia> var = ClimaAnalysis.OutputVar(attribs, dims, dim_attribs, data);
julia> MAM, JJA, SON, DJF = ClimaAnalysis.split_by_season(var);
julia> [MAM.dims["time"], JJA.dims["time"], SON.dims["time"], DJF.dims["time"]]
4-element Vector{Vector{Float64}}:
[5.184e6]
[1.31328e7]
[2.10816e7]
[0.0]
julia> [MAM.data, JJA.data, SON.data, DJF.data]
4-element Vector{SubArray{Float64, 1, Vector{Float64}, Tuple{Vector{Int64}}, false}}:
[2.0]
[3.0]
[4.0]
[1.0]
```

## Bug fixes

- Increased the default value for `warp_string` to 72.
Expand Down
1 change: 1 addition & 0 deletions docs/src/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ Var.convert_units
Var.integrate_lonlat
Var.integrate_lat
Var.integrate_lon
Var.split_by_season(var::OutputVar)
```

## Utilities
Expand Down
38 changes: 38 additions & 0 deletions docs/src/var.md
Original file line number Diff line number Diff line change
Expand Up @@ -100,3 +100,41 @@ julia> integrated_var.data # approximately 4π (the surface area of a sphere)
julia> long_name(integrated_var) # updated long name to reflect the data being integrated
"f integrated over lon (-179.5 to 179.5degrees_east) and integrated over lat (-89.5 to 89.5degrees_north)"
```

## Split by season
`OutputVar`s can be split by seasons using `split_by_season(var)` provided that a start date
can be found in `var.attributes["start_date"]` and time is a dimension in the `OutputVar`.
The unit of time is expected to be seconds. The function `split_by_season(var)` return a
vector of four `OutputVar`s with each `OutputVar` corresponding to a season. The months of
the seasons are March to May, June to August, September to November, and December to
February. The order of the vector is MAM, JJA, SON, and DJF.

```@julia split_by_season
julia> attribs = Dict("start_date" => "2024-1-1");
julia> time = [0., 5_184_000., 13_132_800., 21_081_600.]; # correspond to dates 2024-1-1, 2024-3-1, 2024-6-1, 2024-9-1
julia> dims = OrderedDict(["time" => time]);
julia> dim_attribs = OrderedDict(["time" => Dict("units" => "s")]); # unit is second
julia> data = [1., 2., 3., 4.];
julia> var = ClimaAnalysis.OutputVar(attribs, dims, dim_attribs, data);
julia> MAM, JJA, SON, DJF = ClimaAnalysis.split_by_season(var);
julia> [MAM.dims["time"], JJA.dims["time"], SON.dims["time"], DJF.dims["time"]]
4-element Vector{Vector{Float64}}:
[5.184e6]
[1.31328e7]
[2.10816e7]
[0.0]
julia> [MAM.data, JJA.data, SON.data, DJF.data]
4-element Vector{SubArray{Float64, 1, Vector{Float64}, Tuple{Vector{Int64}}, false}}:
[2.0]
[3.0]
[4.0]
[1.0]
```
69 changes: 67 additions & 2 deletions src/Var.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
module Var

import Dates
import NCDatasets
import OrderedCollections: OrderedDict

Expand All @@ -8,7 +9,14 @@ import Statistics: mean
import NaNStatistics: nanmean

import ..Numerics
import ..Utils: nearest_index, seconds_to_prettystr, squeeze
import ..Utils:
nearest_index,
seconds_to_prettystr,
squeeze,
split_by_season,
time_to_date,
date_to_time,
_data_at_dim_vals

export OutputVar,
read_var,
Expand All @@ -34,7 +42,8 @@ export OutputVar,
convert_units,
integrate_lonlat,
integrate_lon,
integrate_lat
integrate_lat,
split_by_season

"""
Representing an output variable
Expand Down Expand Up @@ -886,6 +895,62 @@ function _integrate_over_angle(var::OutputVar, integrate_on, angle_dim_name)
return integrated_var
end


"""
split_by_season(var::OutputVar)
Return a vector of four OutputVars split by season.
The months of the seasons are March to May, June to August, September to November, and
December to February. The order of the vector is MAM, JJA, SON, and DJF.
The function will use the start date in `var.attributes["start_date"]`. Also, the
interpolations will be inaccurate in time intervals outside of their respective season for
the returned OutputVars.
"""
function split_by_season(var::OutputVar)
# Check time exists and unit is second
has_time(var) || error("Time is not a dimension in var")
dim_units(var, time_name(var)) == "s" ||
error("Unit for time is not second")

# Check start date exists
haskey(var.attributes, "start_date") ?
start_date = Dates.DateTime(var.attributes["start_date"]) :
error("Start date is not found in var")

season_dates =
split_by_season(time_to_date.(start_date, var.dims[time_name(var)]))
season_times =
(date_to_time.(start_date, season) for season in season_dates)

# Split data according to seasons
season_data = (
_data_at_dim_vals(
var.data,
var.dims[time_name(var)],
var.dim2index[time_name(var)],
season_time,
) for season_time in season_times
)

# Construct an OutputVar for each season
return map(season_times, season_data) do time, data
ret_dims = deepcopy(var.dims)
ret_attribs = deepcopy(var.attributes)
ret_dim_attribs = deepcopy(var.dim_attributes)

ret_dims[time_name(var)] = time

# Put empty string for starting date if time array is empty
# otherwise, use the start date in the provided OutputVar
length(time) == 0 && (ret_attribs["start_date"] = "")

ret_dim_attribs = deepcopy(var.dim_attributes)
OutputVar(ret_attribs, ret_dims, ret_dim_attribs, data)
end
end

"""
overload_binary_op(op)
Expand Down
60 changes: 60 additions & 0 deletions test/test_Var.jl
Original file line number Diff line number Diff line change
Expand Up @@ -821,3 +821,63 @@ end
var,
)
end

@testset "split_by_season" begin
lon = collect(range(-179.5, 179.5, 360))
lat = collect(range(-89.5, 89.5, 180))
time = [0.0]
push!(time, 5_184_000.0) # correspond to 2024-3-1
push!(time, 5_184_001.0)
push!(time, 13_132_800.0) # correspond to 2024-6-1
push!(time, 13_132_802.0)
push!(time, 13_132_803.0)
data = ones(length(lat), length(time), length(lon))
dims = OrderedDict(["lat" => lat, "time" => time, "lon" => lon])
attribs = Dict("long_name" => "hi", "start_date" => "2024-1-1")
dim_attribs = OrderedDict([
"lat" => Dict("units" => "deg"),
"time" => Dict("units" => "s"),
"lon" => Dict("units" => "deg"),
])
var = ClimaAnalysis.OutputVar(attribs, dims, dim_attribs, data)

MAM, JJA, SON, DJF = ClimaAnalysis.split_by_season(var)

# Check size of data
@test size(MAM.data) == (length(lat), 2, length(lon))
@test size(JJA.data) == (length(lat), 3, length(lon))
@test size(SON.data) == (length(lat), 0, length(lon))
@test size(DJF.data) == (length(lat), 1, length(lon))

# Check times are correct in OutputVars
@test MAM.dims["time"] == [5_184_000.0, 5_184_001.0]
@test JJA.dims["time"] == [13_132_800.0, 13_132_802.0, 13_132_803.0]
@test SON.dims["time"] == []
@test DJF.dims["time"] == [0.0]

# Check start date
MAM.attributes["start_date"] == "2024-1-1"
SON.attributes["start_date"] == ""

# Check error handling
attribs_no_start_date = Dict("long_name" => "hi")
var =
ClimaAnalysis.OutputVar(attribs_no_start_date, dims, dim_attribs, data)
@test_throws ErrorException ClimaAnalysis.split_by_season(var)

dim_attribs_no_sec = OrderedDict([
"lat" => Dict("units" => "deg"),
"time" => Dict("units" => "min"),
"lon" => Dict("units" => "deg"),
])
var = ClimaAnalysis.OutputVar(attribs, dims, dim_attribs_no_sec, data)
@test_throws ErrorException ClimaAnalysis.split_by_season(var)

lon = collect(range(-179.5, 179.5, 360))
data = ones(length(lon))
dims = OrderedDict(["lon" => lon])
attribs = Dict("long_name" => "hi", "start_date" => "2024-1-1")
dim_attribs = OrderedDict(["lon" => Dict("units" => "deg")])
var = ClimaAnalysis.OutputVar(attribs, dims, dim_attribs, data)
@test_throws ErrorException ClimaAnalysis.split_by_season(var)
end

0 comments on commit 9b3c473

Please sign in to comment.