diff --git a/NEWS.md b/NEWS.md index 9130e0b..e508913 100644 --- a/NEWS.md +++ b/NEWS.md @@ -120,7 +120,7 @@ contour2D_on_globe!(fig, CairoMakie.save("myfigure.pdf", fig) ``` -## Integrating `OutputVar` with respect to longitude or latitude +### Integrating `OutputVar` with respect to longitude or latitude You can use the `integrate_lon(var)`, `integrate_lat(var)`, or `integrate_lonlat(var)` functions for integrating along longitude, latitude, or both respectively. The bounds of @@ -164,6 +164,46 @@ julia> long_name(integrated_var) # updated long name to reflect the data being i "f integrated over lon (-179.5 to 179.5degrees_east) and integrated over lat (-89.5 to 89.5degrees_north)" ``` +### Split by season +`OutputVar`s can be split by seasons using `split_by_season(var)` provided that a start date +can be found in `var.attributes["start_date"]` and time is a dimension in the `OutputVar`. +The unit of time is expected to be second. The function `split_by_season(var)` returns a +vector of four `OutputVar`s with each `OutputVar` corresponding to a season. The months of +the seasons are March to May, June to August, September to November, and December to +February. The order of the vector is MAM, JJA, SON, and DJF. If there are no dates found for +a season, then the `OutputVar` for that season will be an empty `OutputVar`. + +```@julia split_by_season +julia> attribs = Dict("start_date" => "2024-1-1"); + +julia> time = [0.0, 5_184_000.0, 13_132_800.0]; # correspond to dates 2024-1-1, 2024-3-1, 2024-6-1 + +julia> dims = OrderedDict(["time" => time]); + +julia> dim_attribs = OrderedDict(["time" => Dict("units" => "s")]); # unit is second + +julia> data = [1.0, 2.0, 3.0]; + +julia> var = ClimaAnalysis.OutputVar(attribs, dims, dim_attribs, data); + +julia> MAM, JJA, SON, DJF = ClimaAnalysis.split_by_season(var); + +julia> ClimaAnalysis.isempty(SON) # empty OutputVar because no dates between September to November +true + +julia> [MAM.dims["time"], JJA.dims["time"], DJF.dims["time"]] +3-element Vector{Vector{Float64}}: + [5.184e6] + [1.31328e7] + [0.0] + +julia> [MAM.data, JJA.data, DJF.data] +3-element Vector{Vector{Float64}}: + [2.0] + [3.0] + [1.0] +``` + ## Bug fixes - Increased the default value for `warp_string` to 72. diff --git a/docs/src/api.md b/docs/src/api.md index 9c7a1c6..819ba1a 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -20,6 +20,7 @@ Sim.available_periods Var.OutputVar Var.read_var Var.is_z_1D +Base.isempty(var::OutputVar) Var.short_name Var.long_name Var.units @@ -58,6 +59,7 @@ Var.convert_units Var.integrate_lonlat Var.integrate_lat Var.integrate_lon +Var.split_by_season(var::OutputVar) ``` ## Utilities @@ -71,7 +73,6 @@ Utils.nearest_index Utils.kwargs Utils.seconds_to_prettystr Utils.warp_string -Utils.split_by_season ``` ## Atmos diff --git a/docs/src/var.md b/docs/src/var.md index 81d2517..64eedc6 100644 --- a/docs/src/var.md +++ b/docs/src/var.md @@ -100,3 +100,43 @@ julia> integrated_var.data # approximately 4π (the surface area of a sphere) julia> long_name(integrated_var) # updated long name to reflect the data being integrated "f integrated over lon (-179.5 to 179.5degrees_east) and integrated over lat (-89.5 to 89.5degrees_north)" ``` + +## Split by season +`OutputVar`s can be split by seasons using `split_by_season(var)` provided that a start date +can be found in `var.attributes["start_date"]` and time is a dimension in the `OutputVar`. +The unit of time is expected to be second. The function `split_by_season(var)` returns a +vector of four `OutputVar`s with each `OutputVar` corresponding to a season. The months of +the seasons are March to May, June to August, September to November, and December to +February. The order of the vector is MAM, JJA, SON, and DJF. If there are no dates found for +a season, then the `OutputVar` for that season will be an empty `OutputVar`. + +```@julia split_by_season +julia> attribs = Dict("start_date" => "2024-1-1"); + +julia> time = [0.0, 5_184_000.0, 13_132_800.0]; # correspond to dates 2024-1-1, 2024-3-1, 2024-6-1 + +julia> dims = OrderedDict(["time" => time]); + +julia> dim_attribs = OrderedDict(["time" => Dict("units" => "s")]); # unit is second + +julia> data = [1.0, 2.0, 3.0]; + +julia> var = ClimaAnalysis.OutputVar(attribs, dims, dim_attribs, data); + +julia> MAM, JJA, SON, DJF = ClimaAnalysis.split_by_season(var); + +julia> ClimaAnalysis.isempty(SON) # empty OutputVar because no dates between September to November +true + +julia> [MAM.dims["time"], JJA.dims["time"], DJF.dims["time"]] +3-element Vector{Vector{Float64}}: + [5.184e6] + [1.31328e7] + [0.0] + +julia> [MAM.data, JJA.data, DJF.data] +3-element Vector{Vector{Float64}}: + [2.0] + [3.0] + [1.0] +``` \ No newline at end of file diff --git a/src/Utils.jl b/src/Utils.jl index 326ae5a..fd3659e 100644 --- a/src/Utils.jl +++ b/src/Utils.jl @@ -1,12 +1,7 @@ module Utils export match_nc_filename, - squeeze, - nearest_index, - kwargs, - seconds_to_prettystr, - warp_string, - split_by_season + squeeze, nearest_index, kwargs, seconds_to_prettystr, warp_string import Dates @@ -316,4 +311,139 @@ function _isequispaced(arr::Vector) return all(diff(arr) .≈ arr[begin + 1] - arr[begin]) end +""" + time_to_date(start_date::Dates.DateTime, time::AbstractFloat) + +Convert the given time to a calendar date starting from `start_date`. + +Examples +========= + +```jldoctest +julia> import Dates + +julia> Utils.time_to_date(Dates.DateTime(2013, 7, 1, 12), 86400.0) +2013-07-02T12:00:00 + +julia> Utils.time_to_date(Dates.DateTime(2013, 7, 1, 12), 3600.0) +2013-07-01T13:00:00 + +julia> Utils.time_to_date(Dates.DateTime(2013, 7, 1, 12), 60.0) +2013-07-01T12:01:00 + +julia> Utils.time_to_date(Dates.DateTime(2013, 7, 1, 12), 1.0) +2013-07-01T12:00:01 +``` +""" +function time_to_date(start_date::Dates.DateTime, time::AbstractFloat) + # We go through milliseconds to allow fractions of a second (otherwise, Second(0.8) + # would fail). Milliseconds is the level of resolution that one gets when taking the + # difference between two DateTimes. In addition to this, we add a round to account for + # floating point errors. If the floating point error is small enough, round will correct + # it. + milliseconds = Dates.Millisecond.(round.(1_000 * time)) + return start_date + milliseconds +end + +""" + date_to_time(reference_date::Dates.DateTime, date::Dates.DateTime) + +Convert the given calendar date to a time (in seconds) where t=0 is `reference_date`. + +Examples +========= + +```jldoctest +julia> import Dates + +julia> Utils.date_to_time(Dates.DateTime(2013, 7, 1, 12), Dates.DateTime(2013, 7, 2, 12)) +86400.0 + +julia> Utils.date_to_time(Dates.DateTime(2013, 7, 1, 12), Dates.DateTime(2013, 7, 1, 13)) +3600.0 + +julia> Utils.date_to_time(Dates.DateTime(2013, 7, 1, 12), Dates.DateTime(2013, 7, 1, 12, 1)) +60.0 + +julia> Utils.date_to_time(Dates.DateTime(2013, 7, 1, 12), Dates.DateTime(2013, 7, 1, 12, 0, 1)) +1.0 +``` +""" +function date_to_time(reference_date::Dates.DateTime, date::Dates.DateTime) + return period_to_seconds_float(date - reference_date) +end + +""" + period_to_seconds_float(period::Dates.Period) + +Convert the given `period` to seconds in Float64. + +Examples +========= + +```jldoctest +julia> import Dates + +julia> Utils.period_to_seconds_float(Dates.Millisecond(1)) +0.001 + +julia> Utils.period_to_seconds_float(Dates.Second(1)) +1.0 + +julia> Utils.period_to_seconds_float(Dates.Minute(1)) +60.0 + +julia> Utils.period_to_seconds_float(Dates.Hour(1)) +3600.0 + +julia> Utils.period_to_seconds_float(Dates.Day(1)) +86400.0 + +julia> Utils.period_to_seconds_float(Dates.Week(1)) +604800.0 + +julia> Utils.period_to_seconds_float(Dates.Month(1)) +2.629746e6 + +julia> Utils.period_to_seconds_float(Dates.Year(1)) +3.1556952e7 +``` +""" +function period_to_seconds_float(period::Dates.Period) + # See https://github.com/JuliaLang/julia/issues/55406 + period isa Dates.OtherPeriod && + (period = Dates.Second(Dates.Day(1)) * Dates.days(period)) + return period / Dates.Second(1) +end + +""" + _data_at_dim_vals(data, dim_arr, dim_idx, vals) + +Return a view of `data` by slicing along `dim_idx`. The slices are indexed by the indices +corresponding to values in `dim_arr` closest to the values in `vals`. + +Examples +========= + +```jldoctest +julia> data = [[1, 4, 7] [2, 5, 8] [3, 6, 9]]; + +julia> dim_arr = [1.0, 2.0, 4.0]; + +julia> dim_idx = 2; + +julia> vals = [1.1, 4.0]; + +julia> Utils._data_at_dim_vals(data, dim_arr, dim_idx, vals) +3×2 view(::Matrix{Int64}, :, [1, 3]) with eltype Int64: + 1 3 + 4 6 + 7 9 +``` +""" +function _data_at_dim_vals(data, dim_arr, dim_idx, vals) + nearest_indices = map(val -> nearest_index(dim_arr, val), vals) + return selectdim(data, dim_idx, nearest_indices) +end + end diff --git a/src/Var.jl b/src/Var.jl index 4259767..9546a11 100644 --- a/src/Var.jl +++ b/src/Var.jl @@ -1,5 +1,6 @@ module Var +import Dates import NCDatasets import OrderedCollections: OrderedDict @@ -8,7 +9,14 @@ import Statistics: mean import NaNStatistics: nanmean import ..Numerics -import ..Utils: nearest_index, seconds_to_prettystr, squeeze +import ..Utils: + nearest_index, + seconds_to_prettystr, + squeeze, + split_by_season, + time_to_date, + date_to_time, + _data_at_dim_vals export OutputVar, read_var, @@ -34,7 +42,9 @@ export OutputVar, convert_units, integrate_lonlat, integrate_lon, - integrate_lat + integrate_lat, + isempty, + split_by_season """ Representing an output variable @@ -302,6 +312,20 @@ function is_z_1D(var::OutputVar) return length(size(altitudes(var))) == 1 end +""" + isempty(var::OutputVar) + +Determine whether an OutputVar is empty. +""" +function Base.isempty(var::OutputVar) + # Do not include :interpolant because var.interpolant is Nothing if data is + # zero dimensional and empty and isempty(Nothing) throws an error + return map( + fieldname -> isempty(getproperty(var, fieldname)), + filter(x -> x != :interpolant, fieldnames(OutputVar)), + ) |> all +end + function Base.copy(var::OutputVar) fields = fieldnames(OutputVar) # We have nested containers and we have to make sure we hand ownership off, @@ -886,6 +910,62 @@ function _integrate_over_angle(var::OutputVar, integrate_on, angle_dim_name) return integrated_var end + +""" + split_by_season(var::OutputVar) + +Return a vector of four `OutputVar`s split by season. + +The months of the seasons are March to May, June to August, September to November, and +December to February. The order of the vector is MAM, JJA, SON, and DJF. If there are no +dates found for a season, then the `OutputVar` for that season will be an empty `OutputVar`. + +The function will use the start date in `var.attributes["start_date"]`. The unit of time is +expected to be second. Also, the interpolations will be inaccurate in time intervals +outside of their respective season for the returned `OutputVar`s. +""" +function split_by_season(var::OutputVar) + # Check time exists and unit is second + has_time(var) || error("Time is not a dimension in var") + dim_units(var, time_name(var)) == "s" || + error("Unit for time is not second") + + # Check start date exists + haskey(var.attributes, "start_date") ? + start_date = Dates.DateTime(var.attributes["start_date"]) : + error("Start date is not found in var") + + season_dates = split_by_season(time_to_date.(start_date, times(var))) + season_times = + (date_to_time.(start_date, season) for season in season_dates) + + # Split data according to seasons + season_data = ( + collect( + _data_at_dim_vals( + var.data, + times(var), + var.dim2index[time_name(var)], + season_time, + ), + ) for season_time in season_times + ) + + # Construct an OutputVar for each season + return map(season_times, season_data) do time, data + if isempty(time) + dims = empty(var.dims) + data = similar(var.data, 0) + return OutputVar(dims, data) + end + ret_dims = deepcopy(var.dims) + ret_attribs = deepcopy(var.attributes) + ret_dim_attribs = deepcopy(var.dim_attributes) + ret_dims[time_name(var)] = time + OutputVar(ret_attribs, ret_dims, ret_dim_attribs, data) + end +end + """ overload_binary_op(op) diff --git a/test/test_Utils.jl b/test/test_Utils.jl index 4b1ecfb..e59a1b8 100644 --- a/test/test_Utils.jl +++ b/test/test_Utils.jl @@ -103,3 +103,46 @@ end equispaced = Utils._isequispaced([0.0, 2.0, 3.0]) @test equispaced == false end + +@testset "Date and time conversion" begin + reference_date = Dates.DateTime(2013, 7, 1, 12) + date_one_day = Dates.DateTime(2013, 7, 2, 12) + date_one_hour = Dates.DateTime(2013, 7, 1, 13) + date_one_min = Dates.DateTime(2013, 7, 1, 12, 1) + date_one_sec = Dates.DateTime(2013, 7, 1, 12, 0, 1) + + # Test time_to_date + @test Utils.time_to_date(reference_date, 86400.0) == date_one_day + @test Utils.time_to_date(reference_date, 3600.0) == date_one_hour + @test Utils.time_to_date(reference_date, 60.0) == date_one_min + @test Utils.time_to_date(reference_date, 1.0) == date_one_sec + + # Test date_to_time + @test Utils.date_to_time(reference_date, date_one_day) == 86400.0 + @test Utils.date_to_time(reference_date, date_one_hour) == 3600.0 + @test Utils.date_to_time(reference_date, date_one_min) == 60.0 + @test Utils.date_to_time(reference_date, date_one_sec) == 1.0 + + # Test period_to_seconds_float + @test Utils.period_to_seconds_float(Dates.Millisecond(1)) == 0.001 + @test Utils.period_to_seconds_float(Dates.Second(1)) == 1.0 + @test Utils.period_to_seconds_float(Dates.Minute(1)) == 60.0 + @test Utils.period_to_seconds_float(Dates.Hour(1)) == 3600.0 + @test Utils.period_to_seconds_float(Dates.Day(1)) == 86400.0 + @test Utils.period_to_seconds_float(Dates.Week(1)) == 604800.0 + @test Utils.period_to_seconds_float(Dates.Month(1)) == 2.629746e6 + @test Utils.period_to_seconds_float(Dates.Year(1)) == 3.1556952e7 +end + +@testset "data_at_dim_vals" begin + data = [[1, 2, 3] [4, 5, 6] [7, 8, 9]] + dim_arr = [2.0, 3.0, 4.0] + dim_idx = 2 + + @test Utils._data_at_dim_vals(data, dim_arr, dim_idx, []) == + reshape([], 3, 0) + @test Utils._data_at_dim_vals(data, dim_arr, dim_idx, [2.1]) == + reshape([1; 2; 3], 3, 1) + @test Utils._data_at_dim_vals(data, dim_arr, dim_idx, [2.1, 2.9, 4.0]) == + data +end diff --git a/test/test_Var.jl b/test/test_Var.jl index d451d92..0c4acdb 100644 --- a/test/test_Var.jl +++ b/test/test_Var.jl @@ -85,6 +85,26 @@ import Unitful: @u_str ) end +@testset "empty" begin + dims = OrderedDict{String, Vector{Float64}}() + data = Float64[] + empty_var = ClimaAnalysis.OutputVar(dims, data) + @test ClimaAnalysis.isempty(empty_var) + + dims = OrderedDict{String, Vector{Float64}}() + data = fill(1.0) + empty_var = ClimaAnalysis.OutputVar(dims, data) + @test !ClimaAnalysis.isempty(empty_var) + + long = 0.0:180.0 |> collect + dims = OrderedDict(["long" => long]) + data = ones(size(long)) + dim_attributes = OrderedDict(["lon" => Dict("b" => 2)]) + attribs = Dict("short_name" => "bob", "long_name" => "hi") + not_empty_var = ClimaAnalysis.OutputVar(attribs, dims, dim_attributes, data) + @test !ClimaAnalysis.isempty(not_empty_var) +end + @testset "Arithmetic operations" begin long = 0.0:180.0 |> collect lat = 0.0:90.0 |> collect @@ -821,3 +841,66 @@ end var, ) end + +@testset "split_by_season" begin + lon = collect(range(-179.5, 179.5, 360)) + lat = collect(range(-89.5, 89.5, 180)) + time = [0.0] + push!(time, 5_184_000.0) # correspond to 2024-3-1 + push!(time, 5_184_001.0) + push!(time, 13_132_800.0) # correspond to 2024-6-1 + push!(time, 13_132_802.0) + push!(time, 13_132_803.0) + data = ones(length(lat), length(time), length(lon)) + dims = OrderedDict(["lat" => lat, "time" => time, "lon" => lon]) + attribs = Dict("long_name" => "hi", "start_date" => "2024-1-1") + dim_attribs = OrderedDict([ + "lat" => Dict("units" => "deg"), + "time" => Dict("units" => "s"), + "lon" => Dict("units" => "deg"), + ]) + var = ClimaAnalysis.OutputVar(attribs, dims, dim_attribs, data) + + MAM, JJA, SON, DJF = ClimaAnalysis.split_by_season(var) + + # Check size of data + @test size(MAM.data) == (length(lat), 2, length(lon)) + @test size(JJA.data) == (length(lat), 3, length(lon)) + @test size(SON.data) == (0,) + @test size(DJF.data) == (length(lat), 1, length(lon)) + + # Check times are correct in OutputVars + @test MAM.dims["time"] == [5_184_000.0, 5_184_001.0] + @test JJA.dims["time"] == [13_132_800.0, 13_132_802.0, 13_132_803.0] + @test DJF.dims["time"] == [0.0] + + # Check start date + MAM.attributes["start_date"] == "2024-1-1" + JJA.attributes["start_date"] == "2024-1-1" + DJF.attributes["start_date"] == "2024-1-1" + + # Check empty OutputVar + @test isempty(SON) + + # Check error handling + attribs_no_start_date = Dict("long_name" => "hi") + var = + ClimaAnalysis.OutputVar(attribs_no_start_date, dims, dim_attribs, data) + @test_throws ErrorException ClimaAnalysis.split_by_season(var) + + dim_attribs_no_sec = OrderedDict([ + "lat" => Dict("units" => "deg"), + "time" => Dict("units" => "min"), + "lon" => Dict("units" => "deg"), + ]) + var = ClimaAnalysis.OutputVar(attribs, dims, dim_attribs_no_sec, data) + @test_throws ErrorException ClimaAnalysis.split_by_season(var) + + lon = collect(range(-179.5, 179.5, 360)) + data = ones(length(lon)) + dims = OrderedDict(["lon" => lon]) + attribs = Dict("long_name" => "hi", "start_date" => "2024-1-1") + dim_attribs = OrderedDict(["lon" => Dict("units" => "deg")]) + var = ClimaAnalysis.OutputVar(attribs, dims, dim_attribs, data) + @test_throws ErrorException ClimaAnalysis.split_by_season(var) +end