diff --git a/NEWS.md b/NEWS.md index 439c225..2ecaa6a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -164,6 +164,45 @@ julia> long_name(integrated_var) # updated long name to reflect the data being i "f integrated over lon (-179.5 to 179.5degrees_east) and integrated over lat (-89.5 to 89.5degrees_north)" ``` +### Split by season +`OutputVar`s can be split by seasons using `split_by_season(var)` provided that a start date +can be found in `var.attributes["start_date"]` and time is a dimension in the `OutputVar`. +The unit of time is expected to be second. The function `split_by_season(var)` returns a +vector of four `OutputVar`s with each `OutputVar` corresponding to a season. The months of +the seasons are March to May, June to August, September to November, and December to +February. The order of the vector is MAM, JJA, SON, and DJF. If there are no dates found for +a season, then the `OutputVar` for that season will be an empty `OutputVar`. + +```@julia split_by_season +julia> attribs = Dict("start_date" => "2024-1-1"); + +julia> time = [0., 5_184_000., 13_132_800., 21_081_600.]; # correspond to dates 2024-1-1, 2024-3-1, 2024-6-1, 2024-9-1 + +julia> dims = OrderedDict(["time" => time]); + +julia> dim_attribs = OrderedDict(["time" => Dict("units" => "s")]); + +julia> data = [1., 2., 3., 4.]; + +julia> var = ClimaAnalysis.OutputVar(attribs, dims, dim_attribs, data); + +julia> MAM, JJA, SON, DJF = ClimaAnalysis.split_by_season(var); + +julia> [MAM.dims["time"], JJA.dims["time"], SON.dims["time"], DJF.dims["time"]] +4-element Vector{Vector{Float64}}: + [5.184e6] + [1.31328e7] + [2.10816e7] + [0.0] + +julia> [MAM.data, JJA.data, SON.data, DJF.data] +4-element Vector{SubArray{Float64, 1, Vector{Float64}, Tuple{Vector{Int64}}, false}}: + [2.0] + [3.0] + [4.0] + [1.0] +``` + ## Bug fixes - Increased the default value for `warp_string` to 72. diff --git a/docs/src/api.md b/docs/src/api.md index 890f1c9..819ba1a 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -59,6 +59,7 @@ Var.convert_units Var.integrate_lonlat Var.integrate_lat Var.integrate_lon +Var.split_by_season(var::OutputVar) ``` ## Utilities diff --git a/docs/src/var.md b/docs/src/var.md index 81d2517..8cfac87 100644 --- a/docs/src/var.md +++ b/docs/src/var.md @@ -100,3 +100,42 @@ julia> integrated_var.data # approximately 4π (the surface area of a sphere) julia> long_name(integrated_var) # updated long name to reflect the data being integrated "f integrated over lon (-179.5 to 179.5degrees_east) and integrated over lat (-89.5 to 89.5degrees_north)" ``` + +## Split by season +`OutputVar`s can be split by seasons using `split_by_season(var)` provided that a start date +can be found in `var.attributes["start_date"]` and time is a dimension in the `OutputVar`. +The unit of time is expected to be second. The function `split_by_season(var)` returns a +vector of four `OutputVar`s with each `OutputVar` corresponding to a season. The months of +the seasons are March to May, June to August, September to November, and December to +February. The order of the vector is MAM, JJA, SON, and DJF. If there are no dates found for +a season, then the `OutputVar` for that season will be an empty `OutputVar`. + +```@julia split_by_season +julia> attribs = Dict("start_date" => "2024-1-1"); + +julia> time = [0., 5_184_000., 13_132_800., 21_081_600.]; # correspond to dates 2024-1-1, 2024-3-1, 2024-6-1, 2024-9-1 + +julia> dims = OrderedDict(["time" => time]); + +julia> dim_attribs = OrderedDict(["time" => Dict("units" => "s")]); # unit is second + +julia> data = [1., 2., 3., 4.]; + +julia> var = ClimaAnalysis.OutputVar(attribs, dims, dim_attribs, data); + +julia> MAM, JJA, SON, DJF = ClimaAnalysis.split_by_season(var); + +julia> [MAM.dims["time"], JJA.dims["time"], SON.dims["time"], DJF.dims["time"]] +4-element Vector{Vector{Float64}}: + [5.184e6] + [1.31328e7] + [2.10816e7] + [0.0] + +julia> [MAM.data, JJA.data, SON.data, DJF.data] +4-element Vector{SubArray{Float64, 1, Vector{Float64}, Tuple{Vector{Int64}}, false}}: + [2.0] + [3.0] + [4.0] + [1.0] +``` \ No newline at end of file diff --git a/src/Var.jl b/src/Var.jl index e7a2c9d..91c3fd7 100644 --- a/src/Var.jl +++ b/src/Var.jl @@ -1,5 +1,6 @@ module Var +import Dates import NCDatasets import OrderedCollections: OrderedDict @@ -8,7 +9,14 @@ import Statistics: mean import NaNStatistics: nanmean import ..Numerics -import ..Utils: nearest_index, seconds_to_prettystr, squeeze +import ..Utils: + nearest_index, + seconds_to_prettystr, + squeeze, + split_by_season, + time_to_date, + date_to_time, + _data_at_dim_vals export OutputVar, read_var, @@ -35,7 +43,8 @@ export OutputVar, integrate_lonlat, integrate_lon, integrate_lat, - isempty + isempty, + split_by_season """ Representing an output variable @@ -901,6 +910,62 @@ function _integrate_over_angle(var::OutputVar, integrate_on, angle_dim_name) return integrated_var end + +""" + split_by_season(var::OutputVar) + +Return a vector of four `OutputVar`s split by season. + +The months of the seasons are March to May, June to August, September to November, and +December to February. The order of the vector is MAM, JJA, SON, and DJF. If there are no +dates found for a season, then the `OutputVar` for that season will be an empty `OutputVar`. + +The function will use the start date in `var.attributes["start_date"]`. The unit of time is +expected to be second. Also, the interpolations will be inaccurate in time intervals +outside of their respective season for the returned `OutputVar`s. +""" +function split_by_season(var::OutputVar) + # Check time exists and unit is second + has_time(var) || error("Time is not a dimension in var") + dim_units(var, time_name(var)) == "s" || + error("Unit for time is not second") + + # Check start date exists + haskey(var.attributes, "start_date") ? + start_date = Dates.DateTime(var.attributes["start_date"]) : + error("Start date is not found in var") + + season_dates = split_by_season(time_to_date.(start_date, times(var))) + season_times = + (date_to_time.(start_date, season) for season in season_dates) + + # Split data according to seasons + season_data = ( + collect( + _data_at_dim_vals( + var.data, + times(var), + var.dim2index[time_name(var)], + season_time, + ), + ) for season_time in season_times + ) + + # Construct an OutputVar for each season + return map(season_times, season_data) do time, data + if isempty(time) + dims = empty(var.dims) + data = similar(var.data, 0) + return OutputVar(dims, data) + end + ret_dims = deepcopy(var.dims) + ret_attribs = deepcopy(var.attributes) + ret_dim_attribs = deepcopy(var.dim_attributes) + ret_dims[time_name(var)] = time + OutputVar(ret_attribs, ret_dims, ret_dim_attribs, data) + end +end + """ overload_binary_op(op) diff --git a/test/test_Var.jl b/test/test_Var.jl index 5c0c520..9019e60 100644 --- a/test/test_Var.jl +++ b/test/test_Var.jl @@ -841,3 +841,66 @@ end var, ) end + +@testset "split_by_season" begin + lon = collect(range(-179.5, 179.5, 360)) + lat = collect(range(-89.5, 89.5, 180)) + time = [0.0] + push!(time, 5_184_000.0) # correspond to 2024-3-1 + push!(time, 5_184_001.0) + push!(time, 13_132_800.0) # correspond to 2024-6-1 + push!(time, 13_132_802.0) + push!(time, 13_132_803.0) + data = ones(length(lat), length(time), length(lon)) + dims = OrderedDict(["lat" => lat, "time" => time, "lon" => lon]) + attribs = Dict("long_name" => "hi", "start_date" => "2024-1-1") + dim_attribs = OrderedDict([ + "lat" => Dict("units" => "deg"), + "time" => Dict("units" => "s"), + "lon" => Dict("units" => "deg"), + ]) + var = ClimaAnalysis.OutputVar(attribs, dims, dim_attribs, data) + + MAM, JJA, SON, DJF = ClimaAnalysis.split_by_season(var) + + # Check size of data + @test size(MAM.data) == (length(lat), 2, length(lon)) + @test size(JJA.data) == (length(lat), 3, length(lon)) + @test size(SON.data) == (0,) + @test size(DJF.data) == (length(lat), 1, length(lon)) + + # Check times are correct in OutputVars + @test MAM.dims["time"] == [5_184_000.0, 5_184_001.0] + @test JJA.dims["time"] == [13_132_800.0, 13_132_802.0, 13_132_803.0] + @test DJF.dims["time"] == [0.0] + + # Check start date + MAM.attributes["start_date"] == "2024-1-1" + JJA.attributes["start_date"] == "2024-1-1" + DJF.attributes["start_date"] == "2024-1-1" + + # Check empty OutputVar + @test isempty(SON) == true + + # Check error handling + attribs_no_start_date = Dict("long_name" => "hi") + var = + ClimaAnalysis.OutputVar(attribs_no_start_date, dims, dim_attribs, data) + @test_throws ErrorException ClimaAnalysis.split_by_season(var) + + dim_attribs_no_sec = OrderedDict([ + "lat" => Dict("units" => "deg"), + "time" => Dict("units" => "min"), + "lon" => Dict("units" => "deg"), + ]) + var = ClimaAnalysis.OutputVar(attribs, dims, dim_attribs_no_sec, data) + @test_throws ErrorException ClimaAnalysis.split_by_season(var) + + lon = collect(range(-179.5, 179.5, 360)) + data = ones(length(lon)) + dims = OrderedDict(["lon" => lon]) + attribs = Dict("long_name" => "hi", "start_date" => "2024-1-1") + dim_attribs = OrderedDict(["lon" => Dict("units" => "deg")]) + var = ClimaAnalysis.OutputVar(attribs, dims, dim_attribs, data) + @test_throws ErrorException ClimaAnalysis.split_by_season(var) +end