notes.tex

\documentclass[10pt,a4paper]{article}
\usepackage[utf8]{inputenc}
\usepackage{amsmath}
\usepackage{amsfonts}
\usepackage{amssymb}
\usepackage{graphicx}
\usepackage{caption}
\usepackage{subcaption}
\usepackage{epstopdf}
\usepackage{abstract}
\usepackage[toc,page]{appendix}
\usepackage[sort]{cite}


\usepackage{hyperref}


\newcommand{\half}[0]{\frac{1}{2}}
\newcommand{\bvec}[1]{\mathbf{#1}}
\newcommand{\bigO}[2]{\mathcal{O}\left(#1^{#2} \right)}
\newcommand{\dotprod}[2]{ \left<#1 , #2 \right> }

\newcommand{\dd}[0]{ \mathrm{d} }

\title{ Some notes on implicit Runge-Kutta methods }
\author{ Stefan Paquay }
\date{  }

\begin{document}
\maketitle

\section{The idea}
Runge-Kutta methods are multi-stage, single-step methods aimed towards solving (systems of) ODEs.
They work by constructing at each time step approximations to the derivative of the function in between the current time level $t$ and the next $t + \Delta t,$ after which a linear combination of these stages is used to advance the numerical approximation to $y$ to the next level.
That is, if we have $\dd t/ \dd t = f(t,y),$ and $y_n$ is the numerical approximation to $y$ at $t_n = n \Delta t,$ then we have
\begin{equation*}
  k_i = f\left( t + c_i \Delta t, y_n + \Delta t \sum_{j=0}^{N-1} \alpha_{i,j} k_j \right), \qquad y_{n+1} = y_n + \Delta t \sum_{i=0}^{N-1} b_i k_i.
\end{equation*}
The methods can be summarized easily in so-called Butcher tableaus, which conveniently list $b_i,~c_i$ and $\alpha_{i,j}.$ See \ref{tab:butcher} for the Butcher tableau of some methods.

\begin{table}[b]
  \centering
  \caption{Butcher tableaus for some Runge-Kutta methods: The implicit midpoint (left), the classical Runge-Kutta method (center), and the fourth order Gauss-Legendre method (right). \label{tab:butcher}}
  \begin{tabular}{c|c}
    $\half$ & $\half$ \\ \hline
    & 1
  \end{tabular} \quad
  \begin{tabular}{c|cccc}
    0 & & & & \\
    $\half$ & $\half$ & & & \\
    $\half$ & & $\half$ & & \\
    1 & & & 1 & \\ \hline
    {} & $\frac{1}{6}$ & $\frac{2}{6}$ & $\frac{2}{6}$ & $\frac{1}{6}$
  \end{tabular}
  \begin{tabular}{c|cc}
    $\half - \frac{\sqrt{3}}{6}$ & $\frac{1}{4}$ & $\frac{1}{4} - \frac{\sqrt{3}}{6}$ \\
    $\half + \frac{\sqrt{3}}{6}$ & $\frac{1}{4} + \frac{\sqrt{3}}{6}$ & $\frac{1}{4}$ \\ \hline
    {} & $\half$ & $\half$ \\
    {} & $\half + \half\sqrt{3}$ & $\half - \half\sqrt{3}$ \\
  \end{tabular}
\end{table}

\section{The implementation}
A general, implicit Runge-Kutta method (RK method) involves solving a system of non-linear equations every time step.
The exact shape of this system depends on the number of equations in the system of ODEs \emph{and} the number of stages in the method.
If the ODE has $N_{ode}$ equations and the RK method has $N_s$ stages then the total non-linear system has $N_{ode} \times N_s$ equations.
To ease implementation of a general implicit RK method we deduce the general form of the system of equations to solve for a general number of stages and ODEs.
If we denote $\bvec{f}(t,\bvec{y}) = (\bvec{f}_0, \bvec{f}_1, \hdots, \bvec{f}_{N-1})^T$ then we have
\begin{align*}
  \bvec{F} = 
  \begin{pmatrix}
    \bvec{f}\left( t + c_0\Delta t, \bvec{y} + \Delta t \sum_{j=0}^{N_s-1} \alpha_{0,j} \bvec{k}_j \right) - \bvec{k}_0 \\
    \vdots \\
    \bvec{f}\left( t + c_0\Delta t, \bvec{y} + \Delta t \sum_{j=0}^{N_s-1} \alpha_{n,j} \bvec{k}_j \right) - \bvec{k}_n \\
    \vdots \\
        \bvec{f}\left( t + c_0\Delta t, \bvec{y} + \Delta t \sum_{j=0}^{N_s-1} \alpha_{N_s-1,j} \bvec{k}_j \right) - \bvec{k}_{N_s-1}
      \end{pmatrix} =
  \bvec{0}
\end{align*}
To solve such a system we employ Newton iteration.
Although Newton iteration is typically costly, we shall see we only need to evaluate $N_{s}$ evaluations of $\bvec{f}$ and its Jacobi matrix $\bvec{J}.$
To find the general expression of the Jacobi matrix, we will apply a general three-stage method to the following system of ODEs:
\begin{align*}
  \bvec{f}(t, \bvec{y}) = -\lambda \bvec{y}
\end{align*}
Then we have
\begin{align*}
  \bvec{F} = \begin{pmatrix}
    -\lambda \bvec{y} - \lambda \Delta t \left( \alpha_{0,0}\bvec{k}_0 + \alpha_{0,1}\bvec{k}_1 + \alpha_{0,2}\bvec{k}_2  \right) - \bvec{k}_0 \\
    -\lambda \bvec{y} - \lambda \Delta t \left( \alpha_{1,0}\bvec{k}_0 + \alpha_{1,1}\bvec{k}_1 + \alpha_{1,2}\bvec{k}_2  \right) - \bvec{k}_1 \\
    -\lambda \bvec{y} - \lambda \Delta t \left( \alpha_{2,0}\bvec{k}_0 + \alpha_{2,1}\bvec{k}_1 + \alpha_{2,2}\bvec{k}_2  \right) - \bvec{k}_2
\end{pmatrix}\end{align*}
The Jacobi matrix of $\bvec{f}$ is given by $-\lambda \bvec{I},$ so the Jacobi matrix of $F$ with respect to $\bvec{k}_i$ becomes
\begin{equation*}
  \bvec{J}_\bvec{k} = \begin{pmatrix}
    -\lambda \Delta t \alpha_{0,0}\bvec{I} - \bvec{I} & -\lambda \Delta t \alpha_{0,1}\bvec{I} & -\lambda \Delta t \alpha_{0,2}\bvec{I} \\
    -\lambda \Delta t \alpha_{1,0}\bvec{I}  & -\lambda \Delta t \alpha_{1,1}\bvec{I}  - \bvec{I} & -\lambda \Delta t \alpha_{1,2}\bvec{I} \\
    -\lambda \Delta t \alpha_{2,0}\bvec{I} & -\lambda \Delta t \alpha_{2,1}\bvec{I} & -\lambda \Delta t \alpha_{2,2}\bvec{I} - \bvec{I} \\
  \end{pmatrix}
\end{equation*}
Now note that $-\lambda \bvec{I}$ is really the Jacobi matrix of $\bvec{J}$ evaluated at specific points. If we write $\bvec{J}\left( t + c_i \Delta t, \bvec{y}_n + \Delta t \sum_{j=0}^{N_s-1} \alpha_{i,j} \bvec{k}_j \right) := \bvec{J}_{i}$ then we have in general that
\begin{align*}
  \bvec{J}_\bvec{k} =& \Delta t\begin{pmatrix}
      \alpha_{0,0} \bvec{J}_0 &  \alpha_{0,1} \bvec{J}_0 &  \alpha_{0,2} \bvec{J}_0 \\
     \alpha_{1,0} \bvec{J}_1  &  \alpha_{1,1} \bvec{J}_1   &  \alpha_{1,2} \bvec{J}_2 \\
     \alpha_{2,0} \bvec{J}_2 &  \alpha_{2,1} \bvec{J}_2 &  \alpha_{2,2} \bvec{J}_2  \\
   \end{pmatrix} - \bvec{I}
\end{align*}
Note that we only need $N_s$ evaluations of the Jacobi matrix of $f$ and not $N_s^2.$
If we have a more general system of ODEs
\begin{equation*}
  \bvec{f}(t,\bvec{y}) = \begin{pmatrix} f_1( t, \bvec{y} ) \\
    f_2( t, \bvec{y} ) 
  \end{pmatrix}, \qquad \left(\frac{\partial f_k}{\partial z}\right)_i := \frac{ \partial f}{\partial z}\left(t + c_i \Delta t, \bvec{y} + \Delta t \sum_{j=0}^{N_s-1}\alpha_{i,j} \bvec{k}_j\right)
\end{equation*}
with $z = x,y$ and $k = 1,2$ then the total Jacobi matrix system becomes
\begin{align*}
  \bvec{J}_\bvec{k} =& \Delta t\begin{pmatrix}
    \alpha_{0,0} \left( \frac{\partial f_1}{\partial x} \right)_0 & \alpha_{0,0} \left( \frac{\partial f_1}{\partial y} \right)_0 &  \alpha_{0,1} \left( \frac{\partial f_1}{\partial x} \right)_0 & \alpha_{0,1} \left( \frac{\partial f_1}{\partial y} \right)_0 &  \alpha_{0,2} \left( \frac{\partial f_1}{\partial x} \right)_0 & \alpha_{0,2} \left( \frac{\partial f_1}{\partial y} \right)_0 \\
    \alpha_{0,0} \left( \frac{\partial f_2}{\partial x} \right)_0 & \alpha_{0,0} \left( \frac{\partial f_2}{\partial y} \right)_0 &  \alpha_{0,1} \left( \frac{\partial f_2}{\partial x} \right)_0 & \alpha_{0,1} \left( \frac{\partial f_2}{\partial y} \right)_0 &  \alpha_{0,2} \left( \frac{\partial f_2}{\partial x} \right)_0 & \alpha_{0,2} \left( \frac{\partial f_2}{\partial y} \right)_0 \\
    \alpha_{1,0} \left( \frac{\partial f_1}{\partial x} \right)_1 & \alpha_{1,0} \left( \frac{\partial f_1}{\partial y} \right)_1 &  \alpha_{1,1} \left( \frac{\partial f_1}{\partial x} \right)_1 & \alpha_{1,1} \left( \frac{\partial f_1}{\partial y} \right)_1 &  \alpha_{1,2} \left( \frac{\partial f_1}{\partial x} \right)_1 & \alpha_{1,2} \left( \frac{\partial f_1}{\partial y} \right)_1 \\
    \alpha_{1,0} \left( \frac{\partial f_2}{\partial x} \right)_1 & \alpha_{1,0} \left( \frac{\partial f_2}{\partial y} \right)_1 &  \alpha_{1,1} \left( \frac{\partial f_2}{\partial x} \right)_1 & \alpha_{1,1} \left( \frac{\partial f_2}{\partial y} \right)_1 &  \alpha_{1,2} \left( \frac{\partial f_2}{\partial x} \right)_1 & \alpha_{1,2} \left( \frac{\partial f_2}{\partial y} \right)_1 \\
    \alpha_{2,0} \left( \frac{\partial f_1}{\partial x} \right)_2 & \alpha_{2,0} \left( \frac{\partial f_1}{\partial y} \right)_2 &  \alpha_{2,1} \left( \frac{\partial f_1}{\partial x} \right)_2 & \alpha_{2,1} \left( \frac{\partial f_1}{\partial y} \right)_2 &  \alpha_{2,2} \left( \frac{\partial f_1}{\partial x} \right)_2 & \alpha_{2,2} \left( \frac{\partial f_1}{\partial y} \right)_2 \\
    \alpha_{2,0} \left( \frac{\partial f_2}{\partial x} \right)_2 & \alpha_{2,0} \left( \frac{\partial f_2}{\partial y} \right)_2 &  \alpha_{2,1} \left( \frac{\partial f_2}{\partial x} \right)_2 & \alpha_{2,1} \left( \frac{\partial f_2}{\partial y} \right)_2 &  \alpha_{2,2} \left( \frac{\partial f_2}{\partial x} \right)_2 & \alpha_{2,2} \left( \frac{\partial f_2}{\partial y} \right)_2 \\    
   \end{pmatrix} - \bvec{I}
\end{align*}
which, in ``prettier'' notation, is
\begin{equation*}
  \bvec{J}_\bvec{k} = \Delta t \begin{pmatrix}
    \alpha_{0,0}\left( \frac{\partial \bvec{f}}{\partial \bvec{y}} \right)_0 & \alpha_{0,1}\left( \frac{\partial \bvec{f}}{\partial \bvec{y}} \right)_0 & \alpha_{0,2}\left( \frac{\partial \bvec{f}}{\partial \bvec{y}} \right)_0 \\
    \alpha_{1,0}\left( \frac{\partial \bvec{f}}{\partial \bvec{y}} \right)_1 & \alpha_{1,1}\left( \frac{\partial \bvec{f}}{\partial \bvec{y}} \right)_1 & \alpha_{1,2}\left( \frac{\partial \bvec{f}}{\partial \bvec{y}} \right)_1 \\
    \alpha_{2,0}\left( \frac{\partial \bvec{f}}{\partial \bvec{y}} \right)_2 & \alpha_{2,1}\left( \frac{\partial \bvec{f}}{\partial \bvec{y}} \right)_2 & \alpha_{2,2}\left( \frac{\partial \bvec{f}}{\partial \bvec{y}} \right)_2
  \end{pmatrix} - \bvec{I}
\end{equation*}

Since we have now deduced a general form for the non-linear function that defines all the stages as well as its Jacobi matrix, we can use Newton iteration to solve the system of equations.
For explicit methods, we will of course not resort to Newton iteration because the stages can be computed in a single step.

\section{A test case}
A simple way to test the integrators is to apply them to an ODE whose solution is known.
We consider
\begin{equation*}
  \frac{\dd}{\dd t} \bvec{y} = \begin{pmatrix}
    -\alpha & -\omega \\
    \omega & -\alpha \end{pmatrix} \bvec{y}
\end{equation*}
The solution can be composed in terms of the eigenvalues $\lambda_\pm$ of the matrix as
\begin{equation*}
  \bvec{y} = \bvec{C}_1e^{\lambda_+t} + \bvec{C}_2e^{\lambda_-t}, \qquad \lambda_\pm = \left[-\alpha \pm i \omega\right].
\end{equation*}
In other words, we have $\bvec{y} = \left[ \bvec{A}_1\cos(\omega t) + \bvec{A}_2 \sin(\omega t) \right]e^{-\alpha t}.$
To find $\bvec{A}_1$ and $\bvec{A}_2$, we apply the initial value $(x,y) = (1,0)^T$ and find
\begin{equation*}
  \bvec{y} = \left[ \begin{pmatrix} 1 \\ 0 \end{pmatrix} \cos(\omega t) +
  \begin{pmatrix} 0 \\ 1 \end{pmatrix} \sin( \omega t ) \right]e^{-\alpha t}
\end{equation*}

\section{Adaptive time step control and embedding methods}

Given a Runge-Kutta method, it is sometimes possible to find a second set of weights $\hat{\bvec{b}}$ in such a way that the approximation $\hat{\bvec{y}} = $

\section{Stability regions}

For a given Runge-Kutta method, the stability region can be determined by applying the method to a
simple test problem $y' = \lambda y.$
For a general Runge-Kutta method, the stages are implicitly defined as
\begin{align*}
  \begin{pmatrix} \bvec{k}_0 \\
    \bvec{k}_1 \\
    \vdots \\
    \bvec{k}_{N-1} 
  \end{pmatrix}
  =& \lambda \begin{pmatrix} \bvec{y}_0 \\
    \bvec{y}_0 \\
    \vdots \\
    \bvec{y}_0
  \end{pmatrix}
  + \lambda \Delta t \sum_{j=0}^{N-1} \begin{pmatrix}
    \alpha_{0j}\bvec{k}_j \\
    \alpha_{1j}\bvec{k}_j \\
    \vdots \\
    \alpha_{(N-1)j}\bvec{k}_j
  \end{pmatrix} \\
  =& \lambda \begin{pmatrix} \bvec{y}_0 \\
    \bvec{y}_0 \\
    \vdots \\
    \bvec{y}_0
  \end{pmatrix} + \lambda \Delta t \begin{pmatrix}
    \alpha_{00}\bvec{I} &\alpha_{01}\bvec{I} &\hdots &\alpha_{0(N-1)}\bvec{I} \\
    \alpha_{00}\bvec{I} &\alpha_{1j}\bvec{I} &\hdots &\alpha_{1(N-1)}\bvec{I} \\
    \vdots & \vdots & \ddots & \vdots \\
    \alpha_{00}\bvec{I} &\alpha_{(N-1)j}\bvec{I} &\hdots &\alpha_{(N-1)(N-1)}\bvec{I} \\
  \end{pmatrix} \begin{pmatrix}
    \bvec{k}_0 \\
    \bvec{k}_1 \\
    \vdots \\
    \bvec{k}_{N-1} 
  \end{pmatrix}
\end{align*}
and hence are given by
\begin{align*}
  \left[ \bvec{I}_{MN\times MN} - \lambda \Delta t \begin{pmatrix}
    \alpha_{00}\bvec{I} &\alpha_{01}\bvec{I} &\hdots &\alpha_{0(N-1)}\bvec{I} \\
    \alpha_{00}\bvec{I} &\alpha_{1j}\bvec{I} &\hdots &\alpha_{1(N-1)}\bvec{I} \\
    \vdots & \vdots & \ddots & \vdots \\
    \alpha_{00}\bvec{I} &\alpha_{(N-1)j}\bvec{I} &\hdots &\alpha_{(N-1)(N-1)}\bvec{I} \\
  \end{pmatrix}\right] \begin{pmatrix} \bvec{k}_0 \\
    \bvec{k}_1 \\
    \vdots \\
    \bvec{k}_{N-1} 
  \end{pmatrix} = \lambda
  \begin{pmatrix}
    \bvec{y}_0 \\
    \bvec{y}_0 \\
    \vdots \\
    \bvec{y}_0 \\
  \end{pmatrix},
\end{align*}
where $\bvec{I}_{MN\times MN}$ is a $MN$ ny $MN$ identity matrix, as opposed to $\bvec{I}$, which is an $N\times N$ identity matrix, with $M$ the number of equations (1 for our problem) and $N$ the number of stages.
From the solution of this system the new value of $\bvec{y}$ is computed as
\begin{equation*}
  \bvec{y}_1 = \bvec{y}_0 + \Delta t
  \begin{pmatrix}
    \vert & \vert & \hdots & \vert \\
    \bvec{k}_0 & \bvec{k}_1 & \hdots & \bvec{k}_{N-1}\\
        \vert & \vert & \hdots & \vert \\
  \end{pmatrix}\begin{pmatrix}
    b_0 \\
    b_1 \\
    \vdots \\
    b_{N-1}
  \end{pmatrix}
\end{equation*}.
Since we only have one equation, the solution for $\bvec{K} := \left(k_0, k_1, \hdots, k_{N-1}\right)^T$ becomes
\begin{equation*}
  \bvec{K} = \lambda \left[ \bvec{I} - \lambda \Delta t \bvec{A}  \right]^{-1}
  \begin{pmatrix}
    y_0 \\
    y_0 \\
    \vdots \\
    y_0
  \end{pmatrix}
\end{equation*}
and so
\begin{equation*}
  y_1/y_0 = 1 + \lambda \Delta t \left(b_0, b_1, \hdots, b_{N-1}\right)\left[\bvec{I} - \lambda \Delta t \bvec{A}\right]^{-1}\left(1, 1, \hdots, 1\right)^T
\end{equation*}

\section{General linear methods}
A generalization to Runge-Kutta methods is the family of so-called general linear methods (GLM).
They can be thought of as a combination of RK and multistep methods.
A GLM consists of $s$ stages and $r$ history points. For $r=1$ we recover normal Runge-Kutta methods.
The $s$ stage points $Y_i,~0<i<s-1$ and stage derivatives $F_i$ are defined as follows:
\begin{align*}
  Y_i = \sum_{j=0}^{s-1} a_{ij}\Delta t F_j + \sum_{j=0}^{r-1}u_{ij} y_{n-j}, \qquad F_j := f(t+\Delta t c_i, Y_j).
\end{align*}
As one can see, we now have two matrices $\bvec{A} = (a_{ij})$ and $\bvec{U} = (u_{ij})$ that define the stages.
For the update to the numerical solution we similarly have two vectors $\bvec{b}$ and $\bvec{v}$:
\begin{align*}
  y_{n+1} = \sum_{i=0}^{s-1} b_i\Delta t F_i + \sum_{i=0}^{r-1}v_i y_{n-i}.
\end{align*}
Applying a GLM to the test problem $f(t,y) = \lambda y$ leads to the following:
\begin{align*}
  Y_i =& \sum_{j=0}^{s-1} z a_{ij} Y_j + \sum_{j=0}^{r-1} u_{ij}y_{n-j} \\
  y_{n+1} =& \sum_{i=0}^{s-1} z b_i Y_i + \sum_{i=0}^{r-1} v_iy_{n-i},
\end{align*}
where $z:=\lambda \Delta t.$
Applying some linear algebra we can write the stage values the following way:
\begin{align*}
  \bvec{Y} := \begin{pmatrix}
    Y_0 \\
    Y_1 \\
    Y_2 \\
    \vdots \\
    Y_{s-1} 
  \end{pmatrix} = z \bvec{A} \bvec{Y} + \bvec{U}\begin{pmatrix}
    y_n \\
    y_{n-1} \\
    y_{n-2} \\
    \vdots \\
    y_{n-r+1}
  \end{pmatrix},
\end{align*}
which can be compacted into $(\bvec{I} - z \bvec{A})\bvec{Y} = \bvec{U} \bvec{y},$ with $\bvec{y} = (y_n, y_{n-1}, y_{n-2}, \hdots, y_{n-r+1})^T.$
Thus, for the stage points we simply find
\begin{equation*}
  \bvec{Y} = (\bvec{I} - z\bvec{A})^{-1}\bvec{Uy}
\end{equation*}
and hence the update becomes
\begin{equation*}
  \bvec{y}(t_{n+1}) = z \bvec{b} \cdot (\bvec{I} - z\bvec{A})^{-1}\bvec{Uy} + \bvec{v}\cdot\bvec{y}.
\end{equation*}
As a check, consider $\bvec{U}=1$ and $\bvec{v} = 1$ and $r=1,$ in which case we should recover the result for RK methods.
We see that in this case
\begin{equation*}
  \bvec{y}(t_{n+1}) = z \bvec{b} \cdot (\bvec{I} - z\bvec{A})^{-1}\bvec{y}(t_n) + \bvec{y}(t_n).
\end{equation*}
If we have $y(t_n) = 1$ and divide by it, we recover the stability function:
\begin{equation*}
  y(t_{n+1})/y(t_n) = z \bvec{b} \cdot (\bvec{I} - z\bvec{A})^{-1}(1,1,1,\hdots,1)^T + 1,
\end{equation*}
which indeed is the stability function for RK methods.


\end{document}