Skip to content

Commit

Permalink
Add Var.split_by_season
Browse files Browse the repository at this point in the history
Bug introduced: The documentation show the wrong module name for
Var.split_by_season. It is ClimaAnalysis.Utils.split_by_season instead
of ClimaAnalysis.Var.split_by_season.
  • Loading branch information
ph-kev committed Sep 10, 2024
1 parent 83b4dbb commit 313225b
Show file tree
Hide file tree
Showing 5 changed files with 209 additions and 2 deletions.
39 changes: 39 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,45 @@ julia> long_name(integrated_var) # updated long name to reflect the data being i
"f integrated over lon (-179.5 to 179.5degrees_east) and integrated over lat (-89.5 to 89.5degrees_north)"
```

### Split by season
`OutputVar`s can be split by seasons using `split_by_season(var)` provided that a start date
can be found in `var.attributes["start_date"]` and time is a dimension in the `OutputVar`.
The unit of time is expected to be second. The function `split_by_season(var)` returns a
vector of four `OutputVar`s with each `OutputVar` corresponding to a season. The months of
the seasons are March to May, June to August, September to November, and December to
February. The order of the vector is MAM, JJA, SON, and DJF. If there are no dates found for
a season, then the `OutputVar` for that season will be an empty `OutputVar`.

```@julia split_by_season
julia> attribs = Dict("start_date" => "2024-1-1");
julia> time = [0., 5_184_000., 13_132_800., 21_081_600.]; # correspond to dates 2024-1-1, 2024-3-1, 2024-6-1, 2024-9-1
julia> dims = OrderedDict(["time" => time]);
julia> dim_attribs = OrderedDict(["time" => Dict("units" => "s")]);
julia> data = [1., 2., 3., 4.];
julia> var = ClimaAnalysis.OutputVar(attribs, dims, dim_attribs, data);
julia> MAM, JJA, SON, DJF = ClimaAnalysis.split_by_season(var);
julia> [MAM.dims["time"], JJA.dims["time"], SON.dims["time"], DJF.dims["time"]]
4-element Vector{Vector{Float64}}:
[5.184e6]
[1.31328e7]
[2.10816e7]
[0.0]
julia> [MAM.data, JJA.data, SON.data, DJF.data]
4-element Vector{SubArray{Float64, 1, Vector{Float64}, Tuple{Vector{Int64}}, false}}:
[2.0]
[3.0]
[4.0]
[1.0]
```

## Bug fixes

- Increased the default value for `warp_string` to 72.
Expand Down
1 change: 1 addition & 0 deletions docs/src/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ Var.convert_units
Var.integrate_lonlat
Var.integrate_lat
Var.integrate_lon
Var.split_by_season(var::OutputVar)
```

## Utilities
Expand Down
39 changes: 39 additions & 0 deletions docs/src/var.md
Original file line number Diff line number Diff line change
Expand Up @@ -100,3 +100,42 @@ julia> integrated_var.data # approximately 4π (the surface area of a sphere)
julia> long_name(integrated_var) # updated long name to reflect the data being integrated
"f integrated over lon (-179.5 to 179.5degrees_east) and integrated over lat (-89.5 to 89.5degrees_north)"
```

## Split by season
`OutputVar`s can be split by seasons using `split_by_season(var)` provided that a start date
can be found in `var.attributes["start_date"]` and time is a dimension in the `OutputVar`.
The unit of time is expected to be second. The function `split_by_season(var)` returns a
vector of four `OutputVar`s with each `OutputVar` corresponding to a season. The months of
the seasons are March to May, June to August, September to November, and December to
February. The order of the vector is MAM, JJA, SON, and DJF. If there are no dates found for
a season, then the `OutputVar` for that season will be an empty `OutputVar`.

```@julia split_by_season
julia> attribs = Dict("start_date" => "2024-1-1");
julia> time = [0., 5_184_000., 13_132_800., 21_081_600.]; # correspond to dates 2024-1-1, 2024-3-1, 2024-6-1, 2024-9-1
julia> dims = OrderedDict(["time" => time]);
julia> dim_attribs = OrderedDict(["time" => Dict("units" => "s")]); # unit is second
julia> data = [1., 2., 3., 4.];
julia> var = ClimaAnalysis.OutputVar(attribs, dims, dim_attribs, data);
julia> MAM, JJA, SON, DJF = ClimaAnalysis.split_by_season(var);
julia> [MAM.dims["time"], JJA.dims["time"], SON.dims["time"], DJF.dims["time"]]
4-element Vector{Vector{Float64}}:
[5.184e6]
[1.31328e7]
[2.10816e7]
[0.0]
julia> [MAM.data, JJA.data, SON.data, DJF.data]
4-element Vector{SubArray{Float64, 1, Vector{Float64}, Tuple{Vector{Int64}}, false}}:
[2.0]
[3.0]
[4.0]
[1.0]
```
69 changes: 67 additions & 2 deletions src/Var.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
module Var

import Dates
import NCDatasets
import OrderedCollections: OrderedDict

Expand All @@ -8,7 +9,14 @@ import Statistics: mean
import NaNStatistics: nanmean

import ..Numerics
import ..Utils: nearest_index, seconds_to_prettystr, squeeze
import ..Utils:
nearest_index,
seconds_to_prettystr,
squeeze,
split_by_season,
time_to_date,
date_to_time,
_data_at_dim_vals

export OutputVar,
read_var,
Expand All @@ -35,7 +43,8 @@ export OutputVar,
integrate_lonlat,
integrate_lon,
integrate_lat,
isempty
isempty,
split_by_season

"""
Representing an output variable
Expand Down Expand Up @@ -901,6 +910,62 @@ function _integrate_over_angle(var::OutputVar, integrate_on, angle_dim_name)
return integrated_var
end


"""
split_by_season(var::OutputVar)
Return a vector of four `OutputVar`s split by season.
The months of the seasons are March to May, June to August, September to November, and
December to February. The order of the vector is MAM, JJA, SON, and DJF. If there are no
dates found for a season, then the `OutputVar` for that season will be an empty `OutputVar`.
The function will use the start date in `var.attributes["start_date"]`. The unit of time is
expected to be second. Also, the interpolations will be inaccurate in time intervals
outside of their respective season for the returned `OutputVar`s.
"""
function split_by_season(var::OutputVar)
# Check time exists and unit is second
has_time(var) || error("Time is not a dimension in var")
dim_units(var, time_name(var)) == "s" ||
error("Unit for time is not second")

# Check start date exists
haskey(var.attributes, "start_date") ?
start_date = Dates.DateTime(var.attributes["start_date"]) :
error("Start date is not found in var")

season_dates = split_by_season(time_to_date.(start_date, times(var)))
season_times =
(date_to_time.(start_date, season) for season in season_dates)

# Split data according to seasons
season_data = (
collect(
_data_at_dim_vals(
var.data,
times(var),
var.dim2index[time_name(var)],
season_time,
),
) for season_time in season_times
)

# Construct an OutputVar for each season
return map(season_times, season_data) do time, data
if isempty(time)
dims = empty(var.dims)
data = similar(var.data, 0)
return OutputVar(dims, data)
end
ret_dims = deepcopy(var.dims)
ret_attribs = deepcopy(var.attributes)
ret_dim_attribs = deepcopy(var.dim_attributes)
ret_dims[time_name(var)] = time
OutputVar(ret_attribs, ret_dims, ret_dim_attribs, data)
end
end

"""
overload_binary_op(op)
Expand Down
63 changes: 63 additions & 0 deletions test/test_Var.jl
Original file line number Diff line number Diff line change
Expand Up @@ -841,3 +841,66 @@ end
var,
)
end

@testset "split_by_season" begin
lon = collect(range(-179.5, 179.5, 360))
lat = collect(range(-89.5, 89.5, 180))
time = [0.0]
push!(time, 5_184_000.0) # correspond to 2024-3-1
push!(time, 5_184_001.0)
push!(time, 13_132_800.0) # correspond to 2024-6-1
push!(time, 13_132_802.0)
push!(time, 13_132_803.0)
data = ones(length(lat), length(time), length(lon))
dims = OrderedDict(["lat" => lat, "time" => time, "lon" => lon])
attribs = Dict("long_name" => "hi", "start_date" => "2024-1-1")
dim_attribs = OrderedDict([
"lat" => Dict("units" => "deg"),
"time" => Dict("units" => "s"),
"lon" => Dict("units" => "deg"),
])
var = ClimaAnalysis.OutputVar(attribs, dims, dim_attribs, data)

MAM, JJA, SON, DJF = ClimaAnalysis.split_by_season(var)

# Check size of data
@test size(MAM.data) == (length(lat), 2, length(lon))
@test size(JJA.data) == (length(lat), 3, length(lon))
@test size(SON.data) == (0,)
@test size(DJF.data) == (length(lat), 1, length(lon))

# Check times are correct in OutputVars
@test MAM.dims["time"] == [5_184_000.0, 5_184_001.0]
@test JJA.dims["time"] == [13_132_800.0, 13_132_802.0, 13_132_803.0]
@test DJF.dims["time"] == [0.0]

# Check start date
MAM.attributes["start_date"] == "2024-1-1"
JJA.attributes["start_date"] == "2024-1-1"
DJF.attributes["start_date"] == "2024-1-1"

# Check empty OutputVar
@test isempty(SON) == true

# Check error handling
attribs_no_start_date = Dict("long_name" => "hi")
var =
ClimaAnalysis.OutputVar(attribs_no_start_date, dims, dim_attribs, data)
@test_throws ErrorException ClimaAnalysis.split_by_season(var)

dim_attribs_no_sec = OrderedDict([
"lat" => Dict("units" => "deg"),
"time" => Dict("units" => "min"),
"lon" => Dict("units" => "deg"),
])
var = ClimaAnalysis.OutputVar(attribs, dims, dim_attribs_no_sec, data)
@test_throws ErrorException ClimaAnalysis.split_by_season(var)

lon = collect(range(-179.5, 179.5, 360))
data = ones(length(lon))
dims = OrderedDict(["lon" => lon])
attribs = Dict("long_name" => "hi", "start_date" => "2024-1-1")
dim_attribs = OrderedDict(["lon" => Dict("units" => "deg")])
var = ClimaAnalysis.OutputVar(attribs, dims, dim_attribs, data)
@test_throws ErrorException ClimaAnalysis.split_by_season(var)
end

0 comments on commit 313225b

Please sign in to comment.