From 232734dd86a27611b8ce9da52015a52700e25a73 Mon Sep 17 00:00:00 2001
From: odow <o.dowson@gmail.com>
Date: Thu, 15 Jun 2023 09:24:54 +1200
Subject: [PATCH 1/5] WIP: add quadratic regularization for deterministic
 first-stage

---
 docs/src/tutorial/regularization.jl | 40 +++++++++++++++++++
 src/plugins/forward_passes.jl       | 61 +++++++++++++++++++++++++++++
 2 files changed, 101 insertions(+)
 create mode 100644 docs/src/tutorial/regularization.jl

diff --git a/docs/src/tutorial/regularization.jl b/docs/src/tutorial/regularization.jl
new file mode 100644
index 000000000..973c46942
--- /dev/null
+++ b/docs/src/tutorial/regularization.jl
@@ -0,0 +1,40 @@
+using SDDP, Gurobi
+
+function main(capacity_cost)
+    graph = SDDP.LinearGraph(2)
+    SDDP.add_edge(graph, 2 => 2, 0.95)
+    model = SDDP.PolicyGraph(
+        graph;
+        sense = :Min,
+        lower_bound = 0.0,
+        optimizer = Gurobi.Optimizer,
+    ) do sp, node
+        @variable(sp, 0 <= x_capacity <= 400, SDDP.State, initial_value = 0)
+        @variable(sp, 0 <= x_prod, SDDP.State, initial_value = 0)
+        if node == 1
+            @stageobjective(sp, capacity_cost * x_capacity.out)
+            @constraint(sp, x_prod.out == x_prod.in)
+        else
+            @variable(sp, 0 <= u_prod <= 200)
+            @variable(sp, u_overtime >= 0)
+            @stageobjective(sp, 100u_prod + 300u_overtime + 50x_prod.out)
+            @constraint(sp, x_capacity.out == x_capacity.in)
+            @constraint(sp, x_prod.out <= x_capacity.in)
+            @constraint(sp, c_bal, x_prod.out == x_prod.in + u_prod + u_overtime)
+            SDDP.parameterize(sp,  [100, 300]) do ω
+                JuMP.set_normalized_rhs(c_bal, -ω)
+            end
+        end
+        return
+    end
+    SDDP.train(
+        model;
+        forward_pass = SDDP.RegularizedForwardPass(),
+    )
+    results = SDDP.simulate(model, 1, [:x_capacity])
+    return results[1][1][:x_capacity].out
+end
+
+# results = main.([0, 100, 200, 300, 400])
+
+main(100)
diff --git a/src/plugins/forward_passes.jl b/src/plugins/forward_passes.jl
index 61e221793..d14fab2e5 100644
--- a/src/plugins/forward_passes.jl
+++ b/src/plugins/forward_passes.jl
@@ -290,3 +290,64 @@ function forward_pass(
     )
     return pass
 end
+
+mutable struct RegularizedForwardPass{T<:AbstractForwardPass} <: AbstractForwardPass
+    forward_pass::T
+    trial_centre::Dict{Symbol,Float64}
+    penalty::Union{Nothing,Float64}
+    ρ::Float64
+
+    function RegularizedForwardPass(;
+        rho::Float64 = 0.97,
+        forward_pass::AbstractForwardPass = DefaultForwardPass(),
+    )
+        centre = Dict{Symbol,Float64}()
+        return new{typeof(forward_pass)}(forward_pass, centre, nothing, rho)
+    end
+end
+
+function forward_pass(
+    model::PolicyGraph,
+    options::Options,
+    fp::RegularizedForwardPass,
+)
+    if length(model.root_children) != 1
+        error("RegularizedForwardPass cannot be applied")
+    end
+    node = model[model.root_children[1].term]
+    if length(node.noise_terms) > 1
+        error("RegularizedForwardPass cannot be applied")
+    end
+    if isempty(fp.trial_centre)
+        for (k, v) in model.initial_root_state
+            fp.trial_centre[k] = v
+        end
+    end
+    penalty(x, y::Float64) = (x - y)^2 / max(1.0, abs(y))
+    @show fp.penalty
+    penalty_obj = @expression(
+        node.subproblem,
+        sum(
+            something(fp.penalty, 1) * penalty(x.out, fp.trial_centre[k]) for
+            (k, x) in node.states
+        ),
+    )
+    original_objective = node.stage_objective
+    set_stage_objective(node.subproblem, node.stage_objective + penalty_obj)
+    pass = forward_pass(model, options, fp.forward_pass)
+    divergence = sum(
+        penalty(pass.sampled_states[1][k], fp.trial_centre[k]) for
+        (k, x) in node.states
+    )
+    if fp.penalty === nothing
+        N = length(pass.scenario_path)
+        fp.penalty = pass.cumulative_value / max(1, divergence) / N
+    end
+    @show fp.penalty * divergence
+    for k in keys(fp.trial_centre)
+        fp.trial_centre[k] = pass.sampled_states[1][k]
+    end
+    fp.penalty *= fp.ρ
+    set_stage_objective(node.subproblem, original_objective)
+    return pass
+end

From 49636e7aa6737ed93fe9b63458916d86413c927c Mon Sep 17 00:00:00 2001
From: odow <o.dowson@gmail.com>
Date: Thu, 15 Jun 2023 17:16:42 +1200
Subject: [PATCH 2/5] Update

---
 docs/src/apireference.md            |  1 +
 docs/src/tutorial/regularization.jl | 40 ---------------
 src/plugins/forward_passes.jl       | 75 +++++++++++++++++------------
 test/plugins/forward_passes.jl      | 59 +++++++++++++++++++++++
 4 files changed, 104 insertions(+), 71 deletions(-)
 delete mode 100644 docs/src/tutorial/regularization.jl

diff --git a/docs/src/apireference.md b/docs/src/apireference.md
index 26ed0823e..9008286bd 100644
--- a/docs/src/apireference.md
+++ b/docs/src/apireference.md
@@ -79,6 +79,7 @@ SDDP.RevisitingForwardPass
 SDDP.RiskAdjustedForwardPass
 SDDP.AlternativeForwardPass
 SDDP.AlternativePostIterationCallback
+SDDP.RegularizedForwardPass
 ```
 
 ### Risk Measures
diff --git a/docs/src/tutorial/regularization.jl b/docs/src/tutorial/regularization.jl
deleted file mode 100644
index 973c46942..000000000
--- a/docs/src/tutorial/regularization.jl
+++ /dev/null
@@ -1,40 +0,0 @@
-using SDDP, Gurobi
-
-function main(capacity_cost)
-    graph = SDDP.LinearGraph(2)
-    SDDP.add_edge(graph, 2 => 2, 0.95)
-    model = SDDP.PolicyGraph(
-        graph;
-        sense = :Min,
-        lower_bound = 0.0,
-        optimizer = Gurobi.Optimizer,
-    ) do sp, node
-        @variable(sp, 0 <= x_capacity <= 400, SDDP.State, initial_value = 0)
-        @variable(sp, 0 <= x_prod, SDDP.State, initial_value = 0)
-        if node == 1
-            @stageobjective(sp, capacity_cost * x_capacity.out)
-            @constraint(sp, x_prod.out == x_prod.in)
-        else
-            @variable(sp, 0 <= u_prod <= 200)
-            @variable(sp, u_overtime >= 0)
-            @stageobjective(sp, 100u_prod + 300u_overtime + 50x_prod.out)
-            @constraint(sp, x_capacity.out == x_capacity.in)
-            @constraint(sp, x_prod.out <= x_capacity.in)
-            @constraint(sp, c_bal, x_prod.out == x_prod.in + u_prod + u_overtime)
-            SDDP.parameterize(sp,  [100, 300]) do ω
-                JuMP.set_normalized_rhs(c_bal, -ω)
-            end
-        end
-        return
-    end
-    SDDP.train(
-        model;
-        forward_pass = SDDP.RegularizedForwardPass(),
-    )
-    results = SDDP.simulate(model, 1, [:x_capacity])
-    return results[1][1][:x_capacity].out
-end
-
-# results = main.([0, 100, 200, 300, 400])
-
-main(100)
diff --git a/src/plugins/forward_passes.jl b/src/plugins/forward_passes.jl
index d14fab2e5..668f1dffe 100644
--- a/src/plugins/forward_passes.jl
+++ b/src/plugins/forward_passes.jl
@@ -291,18 +291,41 @@ function forward_pass(
     return pass
 end
 
+"""
+    RegularizedForwardPass(;
+        rho::Float64 = 0.05,
+        forward_pass::AbstractForwardPass = DefaultForwardPass(),
+    )
+
+A forward pass that regularizes the outgoing first-stage state variables with an
+L-infty trust-region constraint about the previous iteration's solution.
+Specifically, the bounds of the outgoing state variable `x` are updated from
+`(l, u)` to `max(l, x^k - rho * (u - l)) <= x <= min(u, x^k + rho * (u - l))`,
+where `x^k` is the optimal solution of `x` in the previous iteration. On the
+first iteration, the value of the state at the root node is used.
+
+By default, `rho` is set to 5%, which seems to work well empirically.
+
+Pass a different `forward_pass` to control the forward pass within the
+regularized forward pass.
+
+This forward pass is largely intended to be used for investment problems in
+which the first stage makes a series of capacity decisions that then influence
+the rest of the graph. An error is thrown if the first stage problem is not
+deterministic, and states are silently skipped if they do not have finite
+bounds.
+"""
 mutable struct RegularizedForwardPass{T<:AbstractForwardPass} <: AbstractForwardPass
     forward_pass::T
     trial_centre::Dict{Symbol,Float64}
-    penalty::Union{Nothing,Float64}
     ρ::Float64
 
     function RegularizedForwardPass(;
-        rho::Float64 = 0.97,
+        rho::Float64 = 0.05,
         forward_pass::AbstractForwardPass = DefaultForwardPass(),
     )
         centre = Dict{Symbol,Float64}()
-        return new{typeof(forward_pass)}(forward_pass, centre, nothing, rho)
+        return new{typeof(forward_pass)}(forward_pass, centre, rho)
     end
 end
 
@@ -312,42 +335,32 @@ function forward_pass(
     fp::RegularizedForwardPass,
 )
     if length(model.root_children) != 1
-        error("RegularizedForwardPass cannot be applied")
+        error(
+            "RegularizedForwardPass cannot be applied because first-stage is " *
+            "not deterministic",
+        )
     end
     node = model[model.root_children[1].term]
     if length(node.noise_terms) > 1
-        error("RegularizedForwardPass cannot be applied")
+        error(
+            "RegularizedForwardPass cannot be applied because first-stage is " *
+            "not deterministic",
+        )
     end
-    if isempty(fp.trial_centre)
-        for (k, v) in model.initial_root_state
-            fp.trial_centre[k] = v
+    old_bounds = Dict{Symbol,Tuple{Float64,Float64}}()
+    for (k, v) in node.states
+        if has_lower_bound(v.out) && has_upper_bound(v.out)
+            old_bounds[k] = (l, u) = (lower_bound(v.out), upper_bound(v.out))
+            x = get(fp.trial_centre, k, model.initial_root_state[k])
+            set_lower_bound(v.out, max(l, x - fp.ρ * (u - l)))
+            set_upper_bound(v.out, min(u, x + fp.ρ * (u - l)))
         end
     end
-    penalty(x, y::Float64) = (x - y)^2 / max(1.0, abs(y))
-    @show fp.penalty
-    penalty_obj = @expression(
-        node.subproblem,
-        sum(
-            something(fp.penalty, 1) * penalty(x.out, fp.trial_centre[k]) for
-            (k, x) in node.states
-        ),
-    )
-    original_objective = node.stage_objective
-    set_stage_objective(node.subproblem, node.stage_objective + penalty_obj)
     pass = forward_pass(model, options, fp.forward_pass)
-    divergence = sum(
-        penalty(pass.sampled_states[1][k], fp.trial_centre[k]) for
-        (k, x) in node.states
-    )
-    if fp.penalty === nothing
-        N = length(pass.scenario_path)
-        fp.penalty = pass.cumulative_value / max(1, divergence) / N
-    end
-    @show fp.penalty * divergence
-    for k in keys(fp.trial_centre)
+    for (k, (l, u)) in old_bounds
         fp.trial_centre[k] = pass.sampled_states[1][k]
+        set_lower_bound(node.states[k].out, l)
+        set_upper_bound(node.states[k].out, u)
     end
-    fp.penalty *= fp.ρ
-    set_stage_objective(node.subproblem, original_objective)
     return pass
 end
diff --git a/test/plugins/forward_passes.jl b/test/plugins/forward_passes.jl
index 74f98e40d..8563934b3 100644
--- a/test/plugins/forward_passes.jl
+++ b/test/plugins/forward_passes.jl
@@ -8,6 +8,7 @@ module TestForwardPasses
 using SDDP
 using Test
 import HiGHS
+import Random
 
 function runtests()
     for name in names(@__MODULE__, all = true)
@@ -222,6 +223,64 @@ function test_DefaultForwardPass_acyclic_include_last_node()
     return
 end
 
+function test_RegularizedForwardPass()
+    function main(capacity_cost, forward_pass, hint)
+        Random.seed!(1245)
+        graph = SDDP.LinearGraph(2)
+        SDDP.add_edge(graph, 2 => 2, 0.95)
+        model = SDDP.PolicyGraph(
+            graph;
+            sense = :Min,
+            lower_bound = 0.0,
+            optimizer = HiGHS.Optimizer,
+        ) do sp, node
+            @variable(
+                sp,
+                0 <= x_capacity <= 400,
+                SDDP.State,
+                initial_value = hint,
+            )
+            @variable(sp, 0 <= x_prod, SDDP.State, initial_value = 0)
+            if node == 1
+                @stageobjective(sp, capacity_cost * x_capacity.out)
+                @constraint(sp, x_prod.out == x_prod.in)
+            else
+                @variable(sp, 0 <= u_prod <= 200)
+                @variable(sp, u_overtime >= 0)
+                @stageobjective(sp, 100u_prod + 300u_overtime + 50x_prod.out)
+                @constraint(sp, x_capacity.out == x_capacity.in)
+                @constraint(sp, x_prod.out <= x_capacity.in)
+                @constraint(sp, c_bal, x_prod.out == x_prod.in + u_prod + u_overtime)
+                SDDP.parameterize(sp,  [100, 300]) do ω
+                    set_normalized_rhs(c_bal, -ω)
+                end
+            end
+            return
+        end
+        SDDP.train(
+            model;
+            print_level = 0,
+            forward_pass = forward_pass,
+        )
+        results = SDDP.simulate(model, 1, [:x_capacity])
+        log = model.most_recent_training_results.log
+        return results[1][1][:x_capacity].out, length(log)
+    end
+    for (cost, hint) in [(0, 400), (200, 100), (400, 0)]
+        fp = SDDP.RegularizedForwardPass()
+        reg_capacity, reg_num_iterations = main(cost, fp, hint)
+        capacity, num_iterations = main(cost, SDDP.DefaultForwardPass(), hint)
+        @test isapprox(reg_capacity, capacity; atol = 1e-2)
+        @test reg_num_iterations <= num_iterations
+    end
+    fp = SDDP.RegularizedForwardPass()
+    reg_capacity, reg_num_iterations = main(0, fp, 0)
+    capacity, num_iterations = main(0, SDDP.DefaultForwardPass(), 0)
+    @test isapprox(reg_capacity, capacity; atol = 1e-2)
+    @test reg_num_iterations > num_iterations
+    return
+end
+
 end  # module
 
 TestForwardPasses.runtests()

From 3056339e16047830ea22556fe8bbc883945e31d8 Mon Sep 17 00:00:00 2001
From: odow <o.dowson@gmail.com>
Date: Thu, 15 Jun 2023 17:21:49 +1200
Subject: [PATCH 3/5] Update

---
 test/plugins/forward_passes.jl | 34 +++++++++++++---------------------
 1 file changed, 13 insertions(+), 21 deletions(-)

diff --git a/test/plugins/forward_passes.jl b/test/plugins/forward_passes.jl
index 8563934b3..7bfd9df1e 100644
--- a/test/plugins/forward_passes.jl
+++ b/test/plugins/forward_passes.jl
@@ -234,37 +234,29 @@ function test_RegularizedForwardPass()
             lower_bound = 0.0,
             optimizer = HiGHS.Optimizer,
         ) do sp, node
-            @variable(
-                sp,
-                0 <= x_capacity <= 400,
-                SDDP.State,
-                initial_value = hint,
-            )
-            @variable(sp, 0 <= x_prod, SDDP.State, initial_value = 0)
+            @variable(sp, 0 <= x <= 400, SDDP.State, initial_value = hint)
+            @variable(sp, 0 <= y, SDDP.State, initial_value = 0)
             if node == 1
-                @stageobjective(sp, capacity_cost * x_capacity.out)
-                @constraint(sp, x_prod.out == x_prod.in)
+                @stageobjective(sp, capacity_cost * x.out)
+                @constraint(sp, y.out == y.in)
             else
                 @variable(sp, 0 <= u_prod <= 200)
                 @variable(sp, u_overtime >= 0)
-                @stageobjective(sp, 100u_prod + 300u_overtime + 50x_prod.out)
-                @constraint(sp, x_capacity.out == x_capacity.in)
-                @constraint(sp, x_prod.out <= x_capacity.in)
-                @constraint(sp, c_bal, x_prod.out == x_prod.in + u_prod + u_overtime)
-                SDDP.parameterize(sp,  [100, 300]) do ω
+                @stageobjective(sp, 100u_prod + 300u_overtime + 50y.out)
+                @constraint(sp, x.out == x.in)
+                @constraint(sp, y.out <= x.in)
+                @constraint(sp, c_bal, y.out == y.in + u_prod + u_overtime)
+                SDDP.parameterize(sp, [100, 300]) do ω
                     set_normalized_rhs(c_bal, -ω)
+                    return
                 end
             end
             return
         end
-        SDDP.train(
-            model;
-            print_level = 0,
-            forward_pass = forward_pass,
-        )
-        results = SDDP.simulate(model, 1, [:x_capacity])
+        SDDP.train(model; print_level = 0, forward_pass = forward_pass)
+        results = SDDP.simulate(model, 1, [:x])
         log = model.most_recent_training_results.log
-        return results[1][1][:x_capacity].out, length(log)
+        return results[1][1][:x].out, length(log)
     end
     for (cost, hint) in [(0, 400), (200, 100), (400, 0)]
         fp = SDDP.RegularizedForwardPass()

From f9e7a64b03e9be007fd58cf2c72ec509d0395a8d Mon Sep 17 00:00:00 2001
From: odow <o.dowson@gmail.com>
Date: Thu, 15 Jun 2023 17:22:19 +1200
Subject: [PATCH 4/5] Update

---
 src/plugins/forward_passes.jl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/plugins/forward_passes.jl b/src/plugins/forward_passes.jl
index 668f1dffe..a62db0f5c 100644
--- a/src/plugins/forward_passes.jl
+++ b/src/plugins/forward_passes.jl
@@ -315,7 +315,8 @@ the rest of the graph. An error is thrown if the first stage problem is not
 deterministic, and states are silently skipped if they do not have finite
 bounds.
 """
-mutable struct RegularizedForwardPass{T<:AbstractForwardPass} <: AbstractForwardPass
+mutable struct RegularizedForwardPass{T<:AbstractForwardPass} <:
+               AbstractForwardPass
     forward_pass::T
     trial_centre::Dict{Symbol,Float64}
     ρ::Float64

From 15d1f2a2c3303b848b98927fedd3028f23de3251 Mon Sep 17 00:00:00 2001
From: odow <o.dowson@gmail.com>
Date: Fri, 30 Jun 2023 15:41:11 +1200
Subject: [PATCH 5/5] Update

---
 test/plugins/forward_passes.jl | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/test/plugins/forward_passes.jl b/test/plugins/forward_passes.jl
index 7bfd9df1e..5b89353c9 100644
--- a/test/plugins/forward_passes.jl
+++ b/test/plugins/forward_passes.jl
@@ -253,23 +253,25 @@ function test_RegularizedForwardPass()
             end
             return
         end
-        SDDP.train(model; print_level = 0, forward_pass = forward_pass)
-        results = SDDP.simulate(model, 1, [:x])
-        log = model.most_recent_training_results.log
-        return results[1][1][:x].out, length(log)
+        SDDP.train(
+            model;
+            print_level = 0,
+            forward_pass = forward_pass,
+            iteration_limit = 10,
+        )
+        return SDDP.calculate_bound(model)
     end
     for (cost, hint) in [(0, 400), (200, 100), (400, 0)]
         fp = SDDP.RegularizedForwardPass()
-        reg_capacity, reg_num_iterations = main(cost, fp, hint)
-        capacity, num_iterations = main(cost, SDDP.DefaultForwardPass(), hint)
-        @test isapprox(reg_capacity, capacity; atol = 1e-2)
-        @test reg_num_iterations <= num_iterations
+        reg_bound = main(cost, fp, hint)
+        bound = main(cost, SDDP.DefaultForwardPass(), hint)
+        @test reg_bound >= bound - 1e-6
     end
+    # Test that initializingn with a bad guess performs poorly
     fp = SDDP.RegularizedForwardPass()
-    reg_capacity, reg_num_iterations = main(0, fp, 0)
-    capacity, num_iterations = main(0, SDDP.DefaultForwardPass(), 0)
-    @test isapprox(reg_capacity, capacity; atol = 1e-2)
-    @test reg_num_iterations > num_iterations
+    reg_bound = main(400, fp, 400)
+    bound = main(400, SDDP.DefaultForwardPass(), 0)
+    @test reg_bound < bound
     return
 end