diff --git a/NEWS.md b/NEWS.md index 5d4273a..a270466 100644 --- a/NEWS.md +++ b/NEWS.md @@ -378,6 +378,17 @@ ClimaAnalysis.find_worst_single_model(rmse_var, category_name = "DJF") ClimaAnalysis.median(rmse_var) ``` +#### Plotting RMSEVariable +`RMSEVariable` can be visualized as a box plot or heat map using `plot_boxplot!` and +`plot_leaderboard!`. The function `plot_boxplot!(fig, rmse_var::ClimaAnalysis.RMSEVariable; +model_names = ["CliMA"], ploc = (1, 1), best_and_worst_category_name = "ANN")` makes a box +plot for each category in the `RMSEVariable` and plots any other models as specified by +`model_names`. The function `plot_leaderboard!(fig, +rmse_vars::ClimaAnalysis.RMSEVariable...; ploc = (1, 1), model_names = ["CliMA"], +best_category_name = "ANN")` makes a heatmap of the RMSEs between the variables of interest +and the categories. The values of the heatmap are normalized by dividing over the median +model's RMSEs for each variable. + ## Bug fixes - Increased the default value for `warp_string` to 72. diff --git a/docs/make.jl b/docs/make.jl index 0a72879..3747f21 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -31,6 +31,7 @@ makedocs(; "OutputVars" => "var.md", "Visualizing OutputVars" => "visualize.md", "RMSEVariables" => "rmse_var.md", + "Visualizing RMSEVariables" => "visualize_rmse_var.md", "APIs" => "api.md", "How do I?" => "howdoi.md", ], diff --git a/docs/src/api.md b/docs/src/api.md index 557566e..d77164d 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -123,6 +123,8 @@ Visualize.line_plot1D! Visualize.sliced_line_plot! Visualize.sliced_plot! Visualize.plot! +Visualize.plot_boxplot! +Visualize.plot_leaderboard! ``` ## GeoMakie diff --git a/docs/src/visualize_rmse_var.md b/docs/src/visualize_rmse_var.md new file mode 100644 index 0000000..c547bc1 --- /dev/null +++ b/docs/src/visualize_rmse_var.md @@ -0,0 +1,107 @@ +# Visualizing `RMSEVariable`s + +Instead of computing summary statistics, it may be more helpful to plot a box plot or a +heatmap. `ClimaAnalysis` provides the functions `plot_boxplot!` and `plot_leaderboard!` +to help visualize the root mean squared errors (RMSEs) in a `RMSEVariable`. + +The function `plot_boxplot!(fig, rmse_var::ClimaAnalysis.RMSEVariable; model_names = +["CliMA"], ploc = (1, 1), best_and_worst_category_name = "ANN")` makes a box plot for each +category in the `RMSEVariable`. The best model and worst model and any other models in +`model_names` are plotted. The category to find the best and worst model defaults to +"ANN", but can be changed using the parameter `best_and_worst_category_name`. + +The function `plot_leaderboard!(fig, rmse_vars::ClimaAnalysis.RMSEVariable...; +ploc = (1, 1), model_names = ["CliMA"], best_category_name = "ANN")` makes a heatmap of the +RMSEs between the variables of interest and the categories. The best model for each variable +of interest and the models in `model_names` are shown in the heatmap. Similar to +`plot_boxplot!`, the category to find the best model defaults to "ANN", but can be changed +using the parameter `best_category_name`. The values of the heatmap are normalized by +dividing over the median model's RMSEs for each variable. + +```@setup plotting +import ClimaAnalysis +import CairoMakie + +csv_file_path = "./data/test_csv.csv" +rmse_var_ta = ClimaAnalysis.read_rmses(csv_file_path, "ta") +rmse_var_ta = ClimaAnalysis.add_model(rmse_var_ta, "CliMA", "test1", "test2") +rmse_var_ta[:, :] = [ + [10.0 11.0 12.0 13.0 14.0] + [36.0 37.0 38.0 39.0 30.0] + [11.0 12.0 13.0 14.0 15.0] + [13.0 13.0 13.0 13.0 15.0] + [24.0 24.0 24.0 24.0 24.0] +] +ClimaAnalysis.add_unit!( + rmse_var_ta, + Dict( + "ACCESS-ESM1-5" => "K", + "ACCESS-CM2" => "K", + "CliMA" => "K", + "test1" => "K", + "test2" => "K", + ), +) + +rmse_var_pr = ClimaAnalysis.read_rmses(csv_file_path, "pr") +rmse_var_pr = ClimaAnalysis.add_model(rmse_var_pr, "CliMA") +rmse_var_pr[:, :] = [ + [6.0 7.0 8.0 9.0 10.0] + [11.0 12.0 13.0 14.0 15.0] + [1.0 2.0 3.0 4.0 11.0] +] +ClimaAnalysis.add_unit!( + rmse_var_pr, + Dict( + "ACCESS-ESM1-5" => "kg m^-2 s^-1", + "ACCESS-CM2" => "kg m^-2 s^-1", + "CliMA" => "kg m^-2 s^-1", + ), +) + +rmse_var_ha = ClimaAnalysis.read_rmses(csv_file_path, "ha") +rmse_var_ha = ClimaAnalysis.add_model(rmse_var_ha, "CliMA") +rmse_var_ha[:, :] = [ + [0.5 1.0 1.5 2.0 2.5] + [6.0 7.0 8.0 9.0 10.0] + [11.0 12.0 13.0 14.0 7.0] +] +ClimaAnalysis.add_unit!( + rmse_var_ha, + Dict( + "ACCESS-ESM1-5" => "m^2 s^-2", + "ACCESS-CM2" => "m^2 s^-2", + "CliMA" => "m^2 s^-2", + ), +) +``` + +```@example plotting +import ClimaAnalysis +import CairoMakie + +# Plot box plots +rmse_vars = (rmse_var_ta, rmse_var_pr, rmse_var_ha) +fig = CairoMakie.Figure(; size = (800, 300 * 3 + 400), fontsize = 20) +for i in 1:3 + ClimaAnalysis.Visualize.plot_boxplot!( + fig, + rmse_vars[i], + ploc = (i, 1), + best_and_worst_category_name = "ANN", + ) +end + +# Plot leaderboard +ClimaAnalysis.Visualize.plot_leaderboard!( + fig, + rmse_vars..., + best_category_name = "ANN", + ploc = (4, 1), +) +CairoMakie.save("./assets/boxplot_and_leaderboard.png", fig) + +nothing # hide +``` + +![box plot](./assets/boxplot_and_leaderboard.png) \ No newline at end of file diff --git a/ext/ClimaAnalysisMakieExt.jl b/ext/ClimaAnalysisMakieExt.jl index 4b03dab..11e5101 100644 --- a/ext/ClimaAnalysisMakieExt.jl +++ b/ext/ClimaAnalysisMakieExt.jl @@ -630,4 +630,240 @@ function Visualize._constrained_cmap( return cmap end +""" + Visualize.plot_boxplot!(fig, + rmse_var::ClimaAnalysis.RMSEVariable; + model_names = ["CliMA"], + ploc = (1, 1), + best_and_worst_category_name = "ANN") + +Plot a Tukey style boxplot for each category in `rmse_var`. + +The best and worst single models are found for the category `best_and_worst_category_name` +and are plotted on the boxplot. Additionally, any model in `model_names` will also be +plotted on the boxplot. + +The parameter `ploc` determines where to place the plot on the figure. +""" +function Visualize.plot_boxplot!( + fig, + rmse_var::ClimaAnalysis.RMSEVariable; + model_names = ["CliMA"], + ploc = (1, 1), + best_and_worst_category_name = "ANN", +) + # Unit checking + ClimaAnalysis.Leaderboard._unit_check(rmse_var) + + num_cats = length(rmse_var.category2index) + units = values(rmse_var.units) |> collect |> first + + # Title and labels for x-axis and y-axis + ax = Makie.Axis( + fig[ploc...], + ylabel = "$(rmse_var.short_name) [$units]", + xticks = (1:num_cats, ClimaAnalysis.category_names(rmse_var)), + title = "Global RMSE $(rmse_var.short_name) [$units]", + ) + + # Set up for box plot + cats = reduce( + vcat, + [ + fill(cat_val, length(rmse_var.model2index)) for + cat_val in 1:length(rmse_var.category2index) + ], + ) + vals = reduce(vcat, rmse_var.RMSEs) + + # Filter out NaNs because we can't plot with NaNs + not_nan_idices = findall(!isnan, vals) + cats = cats[not_nan_idices] + vals = vals[not_nan_idices] + + # Add box plot + Makie.boxplot!( + ax, + cats, + vals, + whiskerwidth = 1, + width = 0.35, + mediancolor = :black, + color = :gray, + whiskerlinewidth = 1, + ) + + # Plotting best and worst model + absolute_worst_values, absolute_worst_model_name = + ClimaAnalysis.find_worst_single_model( + rmse_var, + category_name = best_and_worst_category_name, + ) + absolute_best_values, absolute_best_model_name = + ClimaAnalysis.find_best_single_model( + rmse_var, + category_name = best_and_worst_category_name, + ) + Makie.scatter!( + ax, + 1:num_cats, + absolute_worst_values, + label = absolute_worst_model_name, + ) + Makie.scatter!( + ax, + 1:num_cats, + absolute_best_values, + label = absolute_best_model_name, + ) + + # Plotting the median model + Makie.scatter!( + ax, + 1:num_cats, + ClimaAnalysis.median(rmse_var), + label = "Median", + color = :black, + marker = :hline, + markersize = 10, + visible = false, + ) + + # Plot CliMA model and other models + for model_name in model_names + ClimaAnalysis.Leaderboard._model_name_check(rmse_var, model_name) + if model_name == "CliMA" + Makie.scatter!( + ax, + 1:num_cats, + rmse_var[model_name], + label = model_name, + marker = :star5, + markersize = 20, + color = :green, + ) + else + Makie.scatter!( + ax, + 1:num_cats, + rmse_var[model_name], + label = model_name, + markersize = 20, + color = :red, + ) + end + end + + # Hack to make legend appear better + Makie.axislegend() + Makie.scatter!(ax, [num_cats + 2.5], [0.1], markersize = 0.01) +end + +""" + Visualize.plot_leaderboard!(fig, + rmse_vars::ClimaAnalysis.RMSEVariable...; + ploc = (1, 1), + model_names = ["CliMA"], + best_category_name = "ANN") + +Plot a heatmap over the categories and models. The models that appear is the best model +as found for the category `best_category_name` and any other models in `model_names`. The +root mean squared errors for each variable of interest is normalized by dividing over the +median root mean squared error of each variable. + +The parameter `ploc` determines where to place the plot on the figure. +""" +function Visualize.plot_leaderboard!( + fig, + rmse_vars::ClimaAnalysis.RMSEVariable...; + ploc = (1, 1), + model_names = ["CliMA"], + best_category_name = "ANN", +) + # Check if rmse_model_vars all have the same categories + categories_names = ClimaAnalysis.category_names.(rmse_vars) + categories_same = length(unique(categories_names)) == 1 + categories_same || + error("Categories are not all the same across the RMSEVariable") + + rmse_var = first(rmse_vars) + categ_names = ClimaAnalysis.category_names(rmse_var) + num_variables = length(rmse_vars) + num_boxes = length(categ_names) # number of categories + num_models = 1 + length(model_names) # best model plus the other models in model_names + + # Initialize variables we need for storing RMSEs for plotting and short names for axis + rmse_normalized_arr = zeros(num_boxes * num_models, num_variables) + short_names = String[] + + for (idx, var) in enumerate(reverse(rmse_vars)) + # Get all the short name of the rmse_vars + push!(short_names, var.short_name) + + # Compute median and best values for RMSE + med_vals = ClimaAnalysis.median(var) + best_vals, _ = ClimaAnalysis.find_best_single_model( + var, + category_name = best_category_name, + ) + + # Find normalized values for the models we are interested in and the normalized best + # value and store them + normalized_vals = [var[model] ./ med_vals for model in model_names] + normalized_vals = reduce(vcat, normalized_vals) + rmse_normalized_arr[:, idx] = + vcat(normalized_vals, best_vals ./ med_vals)' + end + + # Finding the midpoint for placing labels + start_x_tick = div(num_boxes, 2, RoundUp) + + ax_bottom_and_left = Makie.Axis( + fig[ploc...], + yticks = (1:length(short_names), short_names), + xticks = ( + [start_x_tick, start_x_tick + num_boxes], + vcat(model_names, ["Best model"]), + ), + aspect = num_boxes * num_models, + xgridvisible = false, + ygridvisible = false, + ) + ax_top = Makie.Axis( + fig[ploc...], + xaxisposition = :top, + xticks = (0.5:1.0:length(categ_names), categ_names), + aspect = num_boxes * num_models, + xgridvisible = false, + ygridvisible = false, + ) + Makie.hidespines!(ax_top) + Makie.hideydecorations!(ax_top) + + colormap = Makie.Reverse(:RdYlGn) + + # Filter out NaNs here because we need to take the maximum and extrema for the + # colorrange and limits + rmse_no_nan_vec = rmse_normalized_arr |> vec |> filter(!isnan) + Makie.heatmap!( + ax_bottom_and_left, + rmse_normalized_arr, + colormap = colormap, + # Trick to exclude the zeros + lowclip = :white, + colorrange = (1e-10, maximum(rmse_no_nan_vec)), + ) + for idx in eachindex(model_names) + Makie.vlines!(ax_top, num_boxes * idx, color = :black, linewidth = 3.0) + end + row, col = ploc + col += 1 + Makie.Colorbar( + fig[row, col], + limits = extrema(rmse_no_nan_vec), + label = "RMSE/median(RMSE)", + colormap = colormap, + ) +end + end diff --git a/src/Visualize.jl b/src/Visualize.jl index a5abc9e..5800e87 100644 --- a/src/Visualize.jl +++ b/src/Visualize.jl @@ -30,4 +30,8 @@ function heatmap2D_on_globe! end function plot_bias_on_globe! end +function plot_boxplot! end + +function plot_leaderboard! end + end diff --git a/test/test_MakieExt.jl b/test/test_MakieExt.jl index f69a302..3159cd9 100644 --- a/test/test_MakieExt.jl +++ b/test/test_MakieExt.jl @@ -177,4 +177,173 @@ using OrderedCollections output_name = joinpath(tmp_dir, "test2D_title.png") Makie.save(output_name, fig) + # Plotting box plot + csv_file_path = joinpath(@__DIR__, "sample_data/test_csv.csv") + rmse_var = ClimaAnalysis.read_rmses(csv_file_path, "ta") + rmse_var = ClimaAnalysis.add_model(rmse_var, "CliMA") + rmse_var["CliMA", :] = [12.0, 12.0, 11.0, 14.0, 6.0] + ClimaAnalysis.add_unit!( + rmse_var, + Dict( + "ACCESS-ESM1-5" => "units", + "ACCESS-CM2" => "units", + "CliMA" => "units", + ), + ) + rmse_var[2, 5] = 4.0 + fig = Makie.Figure(; size = (800, 300 * 3 + 400), fontsize = 20) + ClimaAnalysis.Visualize.plot_boxplot!( + fig, + rmse_var, + model_names = ["CliMA"], + best_and_worst_category_name = "ANN", + ) + ClimaAnalysis.Visualize.plot_boxplot!( + fig, + rmse_var, + model_names = ["CliMA"], + ploc = (2, 1), + best_and_worst_category_name = "ANN", + ) + ClimaAnalysis.Visualize.plot_boxplot!( + fig, + rmse_var, + model_names = ["CliMA", "ACCESS-ESM1-5"], + ploc = (3, 1), + best_and_worst_category_name = "ANN", + ) + output_name = joinpath(tmp_dir, "test_boxplots.png") + Makie.save(output_name, fig) + + # Plotting leaderboard + csv_file_path = joinpath(@__DIR__, "sample_data/test_csv.csv") + rmse_var = ClimaAnalysis.read_rmses(csv_file_path, "ta") + rmse_var = ClimaAnalysis.add_model(rmse_var, "CliMA") + rmse_var[:, :] = [ + [10.0 11.0 12.0 13.0 14.0] + [36.0 37.0 38.0 39.0 30.0] + [11.0 12.0 13.0 14.0 15.0] + ] + ClimaAnalysis.add_unit!( + rmse_var, + Dict( + "ACCESS-ESM1-5" => "units", + "ACCESS-CM2" => "units", + "CliMA" => "units", + ), + ) + + rmse_var1 = ClimaAnalysis.read_rmses(csv_file_path, "ta1") + rmse_var1 = ClimaAnalysis.add_model(rmse_var1, "CliMA") + rmse_var1[:, :] = [ + [6.0 7.0 8.0 9.0 10.0] + [11.0 12.0 13.0 14.0 15.0] + [1.0 2.0 3.0 4.0 5.0] + ] + ClimaAnalysis.add_unit!( + rmse_var1, + Dict( + "ACCESS-ESM1-5" => "units", + "ACCESS-CM2" => "units", + "CliMA" => "units", + ), + ) + + rmse_var2 = ClimaAnalysis.read_rmses(csv_file_path, "ta2") + rmse_var2 = ClimaAnalysis.add_model(rmse_var2, "CliMA") + rmse_var2[:, :] = [ + [0.5 1.0 1.5 2.0 2.5] + [6.0 7.0 8.0 9.0 10.0] + [11.0 12.0 13.0 14.0 15.0] + ] + ClimaAnalysis.add_unit!( + rmse_var2, + Dict( + "ACCESS-ESM1-5" => "units", + "ACCESS-CM2" => "units", + "CliMA" => "units", + ), + ) + + + # Normalized RMSEs should improve going from ta to ta1 to ta2 for CliMA model + # Colors for ta of both models should be similar (close to 1) + # Colors for ta2 of best model should be greener (improve) from ta1 to ta2 + # for CliMA, the normalized RMSEs from greatest to least should be ta1, ta, and ta2 + fig = Makie.Figure(; fontsize = 20) + ClimaAnalysis.Visualize.plot_leaderboard!( + fig, + rmse_var, + rmse_var1, + rmse_var2, + best_category_name = "ANN", + ) + output_name = joinpath(tmp_dir, "test_leaderboard.png") + Makie.save(output_name, fig) + + # Plot box plots and leaderboard in one plot + rmse_vars = (rmse_var, rmse_var1, rmse_var2) + fig = Makie.Figure(; size = (800, 300 * 3 + 400), fontsize = 20) + for i in 1:3 + ClimaAnalysis.Visualize.plot_boxplot!( + fig, + rmse_vars[i], + ploc = (i, 1), + best_and_worst_category_name = "ANN", + ) + end + ClimaAnalysis.Visualize.plot_leaderboard!( + fig, + rmse_vars..., + best_category_name = "ANN", + ploc = (4, 1), + ) + output_name = joinpath(tmp_dir, "test_boxplot_and_leaderboard.png") + Makie.save(output_name, fig) + + # Plotting box plot with NaN + rmse_var = ClimaAnalysis.read_rmses(csv_file_path, "ta") + rmse_var = ClimaAnalysis.add_model(rmse_var, "CliMA") + ClimaAnalysis.add_unit!( + rmse_var, + Dict( + "ACCESS-ESM1-5" => "units", + "ACCESS-CM2" => "units", + "CliMA" => "units", + ), + ) + rmse_var[2, 5] = 10.0 + fig = Makie.Figure(; fontsize = 20) + ClimaAnalysis.Visualize.plot_boxplot!( + fig, + rmse_var, + model_names = ["CliMA"], + best_and_worst_category_name = "ANN", + ) + output_name = joinpath(tmp_dir, "test_boxplot_nan.png") + Makie.save(output_name, fig) + + fig = Makie.Figure(; fontsize = 20) + ClimaAnalysis.Visualize.plot_leaderboard!( + fig, + rmse_var, + model_names = ["CliMA"], + best_category_name = "ANN", + ) + output_name = joinpath(tmp_dir, "test_leaderboard_nan.png") + Makie.save(output_name, fig) + + # Test error handling for plot_leaderboard + csv_file_path = joinpath(@__DIR__, "sample_data/test_csv.csv") + rmse_var1 = ClimaAnalysis.read_rmses(csv_file_path, "ta") + rmse_var1 = ClimaAnalysis.add_category(rmse_var1, "hi") + rmse_var2 = ClimaAnalysis.read_rmses(csv_file_path, "ta") + rmse_var2 = ClimaAnalysis.add_category(rmse_var2, "hello") + @test_throws ErrorException ClimaAnalysis.Visualize.plot_leaderboard!( + fig, + rmse_var1, + rmse_var2, + model_names = ["CliMA"], + best_category_name = "ANN", + ) end