Basically finished first draft of blueprint!

teorth · Nov 16, 2023 · ab757c1 · ab757c1
1 parent 85f7710
commit ab757c1
Show file tree

Hide file tree

Showing 4 changed files with 95 additions and 15 deletions.
diff --git a/PFR/endgame.lean b/PFR/endgame.lean
@@ -36,3 +36,11 @@ lemma sum_dist_diff_le : 0 = 1 := by sorry
 /-- $U+V+W=0$. -/
 lemma sum_uvw_eq_zero : 0 = 1 := by sorry
 
+/-- If $T_1, T_2, T_3$ are $G$-valued randomv variables with $T_1+T_2+T_3=0$ holds identically and
+$$ \delta := \sum_{1 \leq i < j \leq 3} I[T_i;T_j]$$
+Then there exist random variables $T'_1, T'_2$ such that
+$$ d[T'_1;T'_2] + \eta (d[X_1^0;T'_1] - d[X_1^0;X_1]) + \eta(d[X_2^0;T'_2] - d[X_2^0;X_2]) $$
+is at most
+$$\delta + \frac{\eta}{3} \biggl( \delta + \sum_{i=1}^2 \sum_{j = 1}^3 (d[X^0_i;T_j] - d[X^0_i; X_i]) \biggr).$$
+-/
+lemma construct_good : 0 = 1 := by sorry
diff --git a/blueprint/src/chapter/distance.tex b/blueprint/src/chapter/distance.tex
@@ -4,11 +4,12 @@ \chapter{Ruzsa calculus}
 
 \begin{lemma}[Negation preserves entropy]\label{neg-ent}
   \uses{entropy-def}
-  \lean{ent-of-neg}
+  \lean{entropy-neg}\leanok
   If $X$ is $G$-valued, then $\bbH[-X]=\bbH[X]$.
 \end{lemma}
 
-\begin{proof}\uses{relabeled-entropy} Immediate from Lemma \ref{relabeled-entropy}.
+\begin{proof}
+  \uses{relabeled-entropy}\leanok Immediate from Lemma \ref{relabeled-entropy}.
 \end{proof}
 
 \begin{lemma}[Lower bound of sumset]\label{sumset-lower-gen}
@@ -78,15 +79,15 @@ \chapter{Ruzsa calculus}
 
 \begin{definition}[Ruzsa distance]\label{ruz-dist-def}
   \uses{entropy-def, independent-exist}
-  \lean{dist}
+  \lean{rdist_def}\leanok
   Let $X,Y$ be $G$-valued random variables (not necessarily on the same sample space).  The \emph{Ruzsa distance} $d[X;Y]$ between $X$ and $Y$ is defined to be
 $$ d[X;Y] := H[X' - Y'] - H[X']/2 - H[Y']/2$$
 where $X',Y'$ are (the canonical) independent copies of $X,Y$ from Lemma \ref{independent-exist}.
 \end{definition}
 
 \begin{lemma}[Copy preserves Ruzsa distance]\label{ruz-copy}
   \uses{ruz-dist-def,copy-def}
-  \lean{dist_of_copy}
+  \lean{rdist_of_copy}
   If $X',Y'$ are copies of $X,Y$ respectively then $d[X';Y']=d[X;Y]$.
 \end{lemma}
 
@@ -95,27 +96,27 @@ \chapter{Ruzsa calculus}
 
 \begin{lemma}[Ruzsa distance in independent case]\label{ruz-indep}
   \uses{ruz-dist-def, entropy-def, independent-def}
-  \lean{dist_of_indep}
+  \lean{rdist_of_indep}\leanok
   If $X,Y$ are independent $G$-random variables then
   $$ d[X;Y] := H[X - Y] - H[X]/2 - H[Y]/2.$$
 \end{lemma}
 
-\begin{proof} \uses{relabeled-entropy, copy-ent} Immediate from Definition \ref{ruz-dist-def} and Lemmas \ref{relabeled-entropy}, \ref{copy-ent}.
+\begin{proof} \uses{relabeled-entropy, copy-ent}\leanok Immediate from Definition \ref{ruz-dist-def} and Lemmas \ref{relabeled-entropy}, \ref{copy-ent}.
 \end{proof}
 
 \begin{lemma}[Distance symmetric]\label{ruzsa-symm}
   \uses{ruz-dist-def}
-  \lean{dist_symm}
+  \lean{rdist_symm}\leanok
   If $X,Y$ are $G$-valued random variables, then
   $$ d[X;Y] = d[Y;X].$$
 \end{lemma}
 
-\begin{proof} \uses{neg-ent} Immediate from Lemma \ref{neg-ent} and Definition \ref{ruz-dist-def}.
+\begin{proof} \uses{neg-ent}\leanok Immediate from Lemma \ref{neg-ent} and Definition \ref{ruz-dist-def}.
 \end{proof}
 
 \begin{lemma}[Distance controls entropy difference]\label{ruzsa-diff}
   \uses{entropy-def, ruz-dist-def}
-  \lean{diff_ent_le_dist}
+  \lean{diff_ent_le_rdist}\leanok
   If $X,Y$ are $G$-valued random variables, then
 $$|H[X]-H[Y]| \leq 2 d[X;Y].$$
 \end{lemma}
@@ -125,7 +126,7 @@ \chapter{Ruzsa calculus}
 
 \begin{lemma}[Distance controls entropy growth]\label{ruzsa-growth}
   \uses{ruz-dist-def, entropy-def}
-  \lean{diff_ent_le_dist'}
+  \lean{diff_ent_le_rdist'}
   If $X,Y$ are $G$-valued random variables, then
 $$  \bbH[X-Y] - \bbH[X], \bbH[X-Y] - \bbH[Y] \leq 2d[X;Y].$$
 \end{lemma}
@@ -135,7 +136,7 @@ \chapter{Ruzsa calculus}
 
 \begin{lemma}[Distance nonnegative]\label{ruzsa-nonneg}
   \uses{ruz-dist-def}
-  \lean{ruzsa_dist_nonneg}
+  \lean{rdist_nonneg}\leanok
   If $X,Y$ are $G$-valued random variables, then
   $$ d[X;Y] \geq 0.$$
 \end{lemma}
@@ -144,7 +145,7 @@ \chapter{Ruzsa calculus}
 \end{proof}
 
 \begin{lemma}[Ruzsa triangle inequality]\label{ruzsa-triangle}\uses{ruz-dist-def}
-\lean{Ruzsa-triangle}
+\lean{rdist-triangle}\leanok
   If $X,Y,Z$ are $G$-valued random variables, then
 $$ d[X;Y] \leq d[X;Z] + d[Z;Y].$$
 \end{lemma}

diff --git a/blueprint/src/chapter/entropy.tex b/blueprint/src/chapter/entropy.tex
@@ -23,7 +23,7 @@ \chapter{Shannon entropy inequalities}
   \uses{entropy-def}
   \lean{ProbabilityTheory.entropy_le_log_card}
   \leanok
-  If $X$ is an $S$-valued random variable, then $H[X] \leq \log |X|$.
+  If $X$ is an $S$-valued random variable, then $H[X] \leq \log |S|$.
 \end{lemma}
 
 \begin{proof}\uses{jensen}\leanok

diff --git a/blueprint/src/chapter/pfr-entropy.tex b/blueprint/src/chapter/pfr-entropy.tex
@@ -278,8 +278,55 @@ \subsection{Endgame}
 \begin{proof} Obvious because we are in characteristic two.
 \end{proof}
 
+\begin{lemma}[Constructing good variables]\label{construct-good}
+  \lean{construct_good}
+  Let $G=\F_2^n$ and let $(T_1,T_2,T_3)$ be a $G^3$-valued random variable such that $T_1+T_2+T_3=0$ holds identically. Set
+  \begin{equation}\label{delta-t1t2t3-def}
+    \delta := \sum_{1 \leq i < j \leq 3} I[T_i;T_j].
+  \end{equation}
+  Then there exist random variables $T'_1, T'_2$ such that
+  \begin{align*}  d[T'_1;T'_2] + & \eta (d[X_1^0;T'_1] - d[X_1^0;X_1]) + \eta(d[X_2^0;T'_2] - d[X_2^0;X_2]) \\ & \leq  \delta + \frac{\eta}{3} \biggl( \delta + \sum_{i=1}^2 \sum_{j = 1}^3 (d[X^0_i;T_j] - d[X^0_i; X_i]) \biggr).
+  \end{align*}
+\end{lemma}
+
+\begin{proof}\uses{lem-bsg, cond-dist-fact}
+  We apply Lemma \ref{lem-bsg} with $(A,B) = (T_1, T_2)$ there.
+  Since $T_1 + T_2 = T_3$, the conclusion is that
+  \begin{align} \nonumber \sum_{t_3} P[T_3 = t_3] & d[(T_1 | T_3 = t_3); (T_2 | T_3 = t_3)] \\ & \leq 3 I[{T_1 : T_2] + 2 H[T_3] - H[T_1] - H[T_2].\label{bsg-t1t2}\end{align}
+  The right-hand side in~\eqref{bsg-t1t2} can be rearranged as
+  \begin{align*} & 2( H[T_1] + H[T_2] + H[T_3]) - 3 H[T_1,T_2] \\ & = 2(H[T_1] + H[T_2] + H[T_3]) - H[T_1,T_2] - H[T_2,T_3] - H[T_1, T_3] = \delta,\end{align*}
+  using the fact (from Lemma \ref{relabeled-entropy}) that all three terms $H[T_i,T_j]$ are equal to $H[T_1,T_2,T_3]$ and hence to each other.
+  We also have
+  \begin{align*}
+  &  \sum_{t_3} P[T_3 = t_3] \bigl(d[X^0_1; (T_1 | T_3=t_3)] - d[X^0_1;X_1]\bigr) \\
+  &\quad = d[X^0_1; T_1 | T_3] - d[X^0_1;X_1] \leq d[X^0_1;T_1] - d[X^0_1;X_1] + \tfrac{1}{2} I[T_1 : T_3]
+  \end{align*}
+  by Lemma \ref{cond-dist-fact}, and similarly
+  \begin{align*}
+  &  \sum_{t_3} P[T_3 = t_3] (d[X^0_2;(T_2 | T_3=t_3)] - d[X^0_2; X_2]) \\
+  &\quad\quad\quad\quad\quad\quad \leq d[X^0_2;T_2] - d[X^0_2;X_2] + \tfrac{1}{2} I[T_2 : T_3].
+  \end{align*}
+  Temporarily define
+  \[ \psi[Y_1; Y_2] := d[Y_1;Y_2] +  \eta (d[X_1^0;Y_1] - d[X_1^0;X_1]) + \eta(d[X_2^0;Y_2] - d[X_2^0;X_2]).\]
+
+  Putting the above observations together, we have
+  \begin{align*}
+   \sum_{t_3}p P[T_3=t_3] \psi[(T_1 | T_3=t_3); (T_2 | T_3=t_3)] \leq \delta + \eta (d[X^0_1;T_1]-d[X^0_1;X_1]) \\
+     + \eta (d[X^0_2;T_2]-d[X^0_2;X_2]) + \tfrac12 \eta I[T_1:T_3] + \tfrac12 \eta I[T_2:T_3].
+   \end{align*}
+   Choosing some $t_3$ in the support of $T_3$ that minimizes the $\psi[-;-]$ value, and setting $T'_{1,3} \coloneqq (T_1 | T_3 = t_3)$, $T'_{2,3} \coloneqq (T_2 | T_3 = t_3)$, we have
+  \begin{align}\nonumber
+   \psi[T_{1,3}';T_{2,3}'] \leq \delta + \eta (& d[X^0_1;T_1]-d[X^0_1;X_1])
+      + \eta (d[X^0_2;T_2]-d[X^0_2;X_2]) \\ & + \tfrac12 \eta I[T_1:T_3] + \tfrac12 \eta I[T_2:T_3].
+     \label{eq:t1323}
+  \end{align}
+  We now repeat this analysis for all permutations of $\{T_1,T_2,T_3\}$ to get variables $T'_{\alpha,\gamma}, T'_{\beta,\gamma}$ for $\{\alpha,\beta,\gamma\}$ ranging over all six permutations of $\{1,2,3\}$.
+  Averaging the resulting inequalities~\eqref{eq:t1323}, and recalling the definition~\eqref{delta-t1t2t3-def} of $\delta$, we get
+  \[ \tfrac16 \sum_{\alpha,\beta,\gamma} \psi[T_{\alpha,\gamma}';T_{\beta,\gamma}']  \leq  \delta + \frac{\eta}{3} \biggl( \delta + \sum_{i=1}^2 \sum_{j = 1}^3 (d[X^0_i;T_j]-d[X^0_i;X_i]) \biggr),
+  \]
+  from which the result follows (taking $T'_1,T'_2$ to be $T'_{\alpha,\gamma},T'_{\beta,\gamma}$ for some $(\alpha,\beta,\gamma)$ that gives at most the average value).
+\end{proof}
 
-...
 
 
 
@@ -291,7 +338,31 @@ \subsection{Endgame}
 $$
 \end{theorem}
 
-\begin{proof}  Sorry!
+\begin{proof}
+  \uses{construct-good, cond-distance-lower, uvw-sum, uvw-s, total-dist, first-est}
+  We take contrapostives and assume that $X_1,X_2$ are tau-minimizers, and wish to show that $k := d[X_1;X_2]$ vanishes.  Applying Lemma \ref{construct-good} with any random variables $(T_1,T_2,T_3)$ such that $T_1+T_2+T_3=0$ holds identically, and applying Lemma \ref{cond-distance-lower} with $X'_1 = T'_1$, $X'_2 = T'_2$, we deduce that
+\[
+  k \leq \delta + \frac{\eta}{3} \biggl( \delta + \sum_{i=1}^2 \sum_{j = 1}^3 (d[X^0_1;T_j] - d[X^0_i;X_i]) \biggr).
+\]
+Note that $\delta$ is still defined by~\eqref{delta-t1t2t3-def} and thus depends on $T_1,T_2,T_3$.
+In particular we may apply this for
+\[
+  T_1 = (U | S = s),\qquad T_2 = (V | S = s), \qquad T_3 = (W | S = s)
+\]
+for $s$ in the range of $S$ (which is a valid choice by Lemma \ref{uvw-sum}) and then average over $s$ with weights $p_S(s)$, to obtain
+\[ k \leq \tilde \delta + \frac{\eta}{3} \biggl( \tilde \delta + \sum_{i=1}^2 \sum_{A\in\{U,V,W\}} \bigl(  d[X^0_i;A|S] - d[X^0_i;X_i]\bigr) \biggr),\]
+where
+\[
+  \tilde \delta \coloneqq  I[U : V | S] + I[V : W | S] + I[W : U | S].
+\]
+Putting this together with Lemma \ref{uvw-s} and Lemma \ref{total-dist}, we conclude that
+\begin{align*}
+  k &\leq \Bigl(1+\frac{\eta}{3}\Bigr)\Bigl(6\eta k-\frac{1-5\eta}{1-\eta}(2\eta k-I_1)\Bigr)+\frac{\eta}{3}\Bigl((6-3\eta)k+3(2\eta k-I_1)\Bigr)\\
+  &= (8\eta + \eta^2) k - \biggl( \frac{1 - 5 \eta}{1-\eta}\Bigl(1 + \frac{\eta}{3}\Bigr) -  \eta \biggr)(2 \eta k - I_1)\\
+  &\leq (8 \eta + \eta^2) k
+ \end{align*}
+since the quantity $2 \eta k - I_1$ is non-negative (by Lmma \ref{first-est}), and its coefficient in the above expression is non-positive provided that $\eta(2\eta + 17) \le 3$, which is certainly the case for our choice $\eta = \frac{1}{9}$ (and in fact for any $\eta \leq \frac{1}{6}$).
+Moreover, for\footnote{In fact we can take any $\eta<\frac{1}{4 + \sqrt{17}} = \frac{1}{8.1231\dots}$, and the other constants in the paper can be improved accordingly.} $\eta=\tfrac{1}{9}$ we have $8 \eta + \eta^2 < 1$. It follows that $k=0$, as desired.
 \end{proof}