From bb51fcf897a62bb5b19688fb3ce304e69a9810f9 Mon Sep 17 00:00:00 2001
From: Meven <meven.bertrand@univ-nantes.fr>
Date: Fri, 15 Apr 2022 17:41:31 +0200
Subject: [PATCH] =?UTF-8?q?nits=20before=20release=20(including=20Martin?=
 =?UTF-8?q?=E2=80=99s=20proofreading)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 Manuscript/biblio.bib                 |  2 +-
 Manuscript/bidir-ccw.tex              |  8 +--
 Manuscript/bidir-intro.tex            |  2 +-
 Manuscript/bidir-pcuic.tex            |  4 +-
 Manuscript/gradual-dependent.tex      | 85 +++++++++++++++------------
 Manuscript/gradual-elab.tex           | 32 +++++-----
 Manuscript/intro-en.tex               | 24 ++++----
 Manuscript/intro-fr.tex               | 19 +++---
 Manuscript/kaobook                    |  2 +-
 Manuscript/knowledge_declarations.tex | 25 +++++---
 Manuscript/main.tex                   |  6 +-
 Manuscript/metacoq-intro.tex          |  2 +-
 Manuscript/technical-intro.tex        | 15 +++--
 13 files changed, 130 insertions(+), 96 deletions(-)

diff --git a/Manuscript/biblio.bib b/Manuscript/biblio.bib
index 745c076..ae3b09b 100644
--- a/Manuscript/biblio.bib
+++ b/Manuscript/biblio.bib
@@ -3457,7 +3457,7 @@ @InProceedings{Nguyen2019
 }
 
 @Online{CoqManual,
-  author   = {{The Coq Development Team}},
+  author   = {{Coq Development Team}, The},
   date     = {2022-01},
   title    = {The Coq proof assistant reference manual},
   url      = {https://coq.inria.fr/refman/},
diff --git a/Manuscript/bidir-ccw.tex b/Manuscript/bidir-ccw.tex
index 0ca10eb..b809c02 100644
--- a/Manuscript/bidir-ccw.tex
+++ b/Manuscript/bidir-ccw.tex
@@ -236,7 +236,7 @@ \subsection{Constrained inference in disguise}
 and the other judgements are inlined.
 Once again, reduction is not enough to perform constrained inference, this time
 because type constructors can be hidden in subsets:
-a type such as $\{f : \Nat \to \Nat \mid f\ 0 = 0 \}$
+an inhabitant of a type such as $\{f : \Nat \to \Nat \mid f\ 0 = 0 \}$
 should be usable as a function of type $\Nat \to \Nat$.
 An erasure procedure is therefore required on top of reduction to remove subsets in the places where we use constrained inference.
 
@@ -270,8 +270,8 @@ \section{Properties of the Bidirectional System}
 The first two relate it to the
 undirected one: it is both \kl(bidir){correct} – terms typeable in the bidirectional system are typeable in the undirected system – and \kl(bidir){complete} – all terms typeable in the undirected system are also typeable in the bidirectional system.
 Next, we investigate \kl{uniqueness of types}, and its relation to the choice of a strategy for reduction.
-Finally, we show how \kl{strengthening} can be shown for undirected \kl{CCω} by proving it on the
-directed side.
+Finally, we expose how \kl{strengthening} can be shown for undirected \kl{CCω}
+by proving it on the bidirectional side.
 
 \subsection{Correctness}
 
@@ -480,7 +480,7 @@ \subsection{Uniqueness}
 \end{theorem}
 
 \begin{proof}
-  Since $\Gamma \vdash t \ty T'$, then by correctness
+  Since $\Gamma \vdash t \ty T$, by correctness
   there exists some $T'$ such that $\inferty{\Gamma}{t}{T'}$ and moreover $T' \conv T$.
   Similarly, there exists some $S'$ such that $\inferty{\Gamma}{t}{S'}$
   and moreover $S' \conv S$.
diff --git a/Manuscript/bidir-intro.tex b/Manuscript/bidir-intro.tex
index f0f9cbf..b979dd0 100644
--- a/Manuscript/bidir-intro.tex
+++ b/Manuscript/bidir-intro.tex
@@ -47,7 +47,7 @@
 In the context of \kl{Church-style} abstraction, the closest there is to a description of
 bidirectional typing for \kl{CIC} is probably the one given by the
 \kl{Matita} team \sidecite{Asperti2012},
-who however again concentrate on the challenges posed by the
+which however concentrates again on the challenges posed by the
 elaboration and unification algorithms.
 They also do not consider the problem of completeness with respect to a given undirected system, as it would fail in their setting due to the undecidability of higher order unification.
 
diff --git a/Manuscript/bidir-pcuic.tex b/Manuscript/bidir-pcuic.tex
index 90d7c05..48fdc48 100644
--- a/Manuscript/bidir-pcuic.tex
+++ b/Manuscript/bidir-pcuic.tex
@@ -233,5 +233,5 @@ \subsection{Polymorphic Inductive Types}
 in theory and in practise. Can we give a presentation of polymorphic inductive types
 that is as lightweight as pair types in \cref{fig:bidir-pair}?
 The bidirectional presentation is valuable there, because now
-it is clear what the specification of an alternative syntax:
-it should stay be complete, in the sense of \cref{thm:comp-cumul}.
\ No newline at end of file
+it is clear what the specification of an alternative syntax is:
+it should remain complete, in the sense of \cref{thm:comp-cumul}.
\ No newline at end of file
diff --git a/Manuscript/gradual-dependent.tex b/Manuscript/gradual-dependent.tex
index 00584eb..7e9d5be 100644
--- a/Manuscript/gradual-dependent.tex
+++ b/Manuscript/gradual-dependent.tex
@@ -187,7 +187,7 @@ \subsection{Fundamental Trade-Offs}
 and revisiting them in the context of a dependent type theory (\cref{sec:graduality}).
 This finally leads us to establish a fundamental impossibility in the gradualization
 of \kl{CIC}, which means that at least one of the desired properties has to be sacrificed (\cref{sec:fire-triangle}).
-With all set up, we can finally present our \kl{gradual},
+With all set up, we can finally present our \kl(typ){gradual},
 \kl{dependently} typed system, \kl{GCIC}, and its main characteristics
 (\cref{sec:gcic-overview}).
 
@@ -250,7 +250,7 @@ \section{Safety and Normalization, Endangered}[Safety and Normalization]
 term $\Omega$, defined as $\delta~\delta$
 where $\delta$ is $\l x.\ x\ x)$.
 In the \intro{simply-typed lambda calculus}%
-  \sidenote[][2em]{Hereafter abbreviated as \intro{STLC}.},
+  \sidenote{Hereafter abbreviated as \intro{STLC}.},
 as in \kl{CIC}, \emph{self-applications} like $\delta\ \delta$ and $x\ x$ are ill-typed.
 However, when introducing gradual types, one usually expects to accommodate such idioms,
 and therefore in a standard gradually-typed calculus such as
@@ -274,7 +274,7 @@ \subsection{Axioms}
 why would one want to gradualize \kl{CIC} instead of simply postulating
 an axiom for any term (be it a program or a proof) that one does not feel like providing (yet)?
 
-Indeed, we can augment \kl{CIC} with a wildcard axiom $\axiom \ty \P A : \uni.\ A$.
+\AP Indeed, we can augment \kl{CIC} with a wildcard axiom $\axiom \ty \P A : \uni.\ A$.
 The resulting system, called \intro{CICax}, has an obvious practical benefit: we can use
 $\axiom A$%
 %\sidenote{Hereafter written $\axiom[A]$.}
@@ -329,7 +329,7 @@ \subsection{Axioms}
 \subsection{Exceptions}
 \label{sec:extt}
 
-\sidetextcite[0em]{Pedrot2018} present the exceptional type theory \intro{ExTT},
+\AP \sidetextcite{Pedrot2018} present the exceptional type theory \intro{ExTT},
 demonstrating that it is possible to extend a
 type theory with a wildcard term while enjoying a satisfying notion of \kl{safety},
 which coincides with that of programming languages with exceptions.
@@ -343,7 +343,7 @@ \subsection{Exceptions}
 For instance, in \kl{ExTT} the following conversion holds:
 \[\ind{\Bool}{\rai[\Bool]}{\Nat}{0,1} \conv \rai[\Nat]\]
 
-Notably, such exceptions are \intro{call-by-name} exceptions, so one can only
+\AP Notably, such exceptions are \intro{call-by-name} exceptions, so one can only
 discriminate exceptions on positive types – \ie inductive types –, not on negative
 types – \ie function types. In particular, in \kl{ExTT}, $\rai[A \to B]$ reduces to
 $\l x : A.\ \rai[B]$ are \kl{convertible}.
@@ -405,7 +405,8 @@ \section{Gradual Simple Types}
 
 \subsection{Static semantics}
 
-\intro[gradual]{Gradually typed} languages introduce the \reintro{unknown type}, written $\?$,
+\AP \intro[gradual typing]{Gradually typed} languages introduce the
+\reintro{unknown type}, written $\?$,
 which is used to indicate the lack of static typing information \sidecite{Siek2006}.
 One can understand such an unknown type as an abstraction of the
 set of possible types that it stands for \sidecite{Garcia2016}.
@@ -438,7 +439,7 @@ \subsection{Dynamic semantics}
 a gradual language must detect inconsistencies at runtime if it is to satisfy \kl{safety},
 which therefore has to be formulated in a way that encompasses runtime errors.
 
-For instance, if the function $f$ above returns $\false$,
+\AP For instance, if the function $f$ above returns $\false$,
 then an \kl{error} must be raised to avoid reducing to $\false + 1$ – a closed stuck term,
 corresponding to a violation of safety.
 The traditional approach to do so is to avoid giving a direct reduction semantics to gradual
@@ -460,7 +461,8 @@ \subsection{Dynamic semantics}
 There are many ways to realize each of these approaches,
 which vary in terms of efficiency and eagerness of checking \sidecite{Herman2010,TobinHochstadt2008,Siek2010,Siek2009,Toro2020,BanadosSchwerter2021}.
 
-\subsection{Conservativity} A first important property of a gradual language is that it is a
+\subsection{Conservativity}
+\AP A first important property of a gradual language is that it is a
 \reintro{conservative extension} of a related static typing discipline:
 the gradual and static systems should coincide on static terms.
 This property is hereafter called \reintro{Conservativity},
@@ -483,7 +485,7 @@ \subsection{Conservativity} A first important property of a gradual language is
 \kl{CIC} \emph{as a logic} – which it clearly is not!
 
 \subsection{Gradual guarantees}
-The early accounts of gradual typing emphasized \kl(grad){consistency} as the central idea.
+\AP The early accounts of gradual typing emphasized \kl(grad){consistency} as the central idea.
 However, \sidetextcite{Siek2015} observed that this characterization left too many
 possibilities for the impact of type information on program behaviour,
 compared to what was originally intended \sidecite{Siek2006}.
@@ -507,10 +509,14 @@ \subsection{Gradual guarantees}
 
 \AP The \kl{static gradual guarantee} (\intro{SGG})
 ensures that imprecision does not break typeability:
+
+\begin{minipage}{\textwidth}
 \begin{property}[\intro{Static Gradual Guarantee}]
 If $t \pre u$ and $\vdash t \ty T$, then $\vdash u \ty U$
 for some $U$ such that $T \pre U$.
-\end{property}
+\end{property}  
+\end{minipage}
+
 %
 This \kl{SGG} captures the intuition that “sprinkling $\?$ over a term“
 maintains its typeability. As such, the notion of \kl{precision} $\pre$ used to
@@ -579,8 +585,8 @@ \subsection{Graduality}
 characterize the good dynamic behaviour of a gradual language:
 the runtime checking mechanism used to define it, such as casting,
 should only perform type-checking, and not otherwise affect behaviour.
-%
-Specifically, they mandate that precision gives rise
+
+\AP Specifically, they mandate that precision gives rise
 to \intro{embedding-projection pairs} (\reintro{ep-pairs}):
 the cast induced by two types related by precision forms an adjunction,
 which induces a retraction.
@@ -611,7 +617,7 @@ \subsection{Graduality}
 equivalent} to the original one.
 
 % A couple of additional observations need to be made here, as they will play a major role in the development that follows.
-\AP These two approaches to characterizing gradual typing highlight
+These two approaches to characterizing gradual typing highlight
 the need to distinguish
 \emph{syntactic} from \emph{semantic} notions of precision.
 Indeed, with the usual syntactic \kl{precision} from \sidetextcite{Siek2015},
@@ -640,7 +646,7 @@ \subsection{Graduality}
 But as we uncover later on, it turns out that in certain settings – and in particular dependent types – the \kl{embedding-projection} property imposes \emph{more}
 desirable constraints on the behaviour of casts than the \kl{DGG} alone.
 
-In regard of these two remarks, in what follows we use the term \intro{graduality}
+\AP In regard of these two remarks, in what follows we use the term \intro{graduality}
 for the \kl{DGG} established with respect to a notion of \kl{precision} which also
 induces \kl{embedding-projection pairs}.
 
@@ -718,7 +724,7 @@ \subsection{Relaxing conversion}
 The proper notion to relax in the gradual dependently-typed setting is therefore \kl{conversion},
 not syntactic equality.
 
-\sidetextcite{Garcia2016} give a general framework for gradual typing that explains how to relax any type predicate to account for imprecision:
+\AP \sidetextcite{Garcia2016} give a general framework for gradual typing that explains how to relax any type predicate to account for imprecision:
 for a binary type predicate $P$, its \intro{consistent lifting} $\mathop{\tilde{P}}(A,B)$ holds
 if there exist static types $A'$ and $B'$ in the denotation%
 \sidenote{Concretization, in abstract interpretation parlance.}
@@ -805,8 +811,8 @@ \subsection{Observational refinement}
 The situation in the dependent setting is however more
 complicated if the theory admits divergence.%
 \sidenote{
-  There exist non-gradual dependently-typed programming languages that admit divergence, \eg \kl{Dependent Haskell} \cite{Eisenberg2016},
-  \kl{Idris} \sidecite{Brady2013}.
+  There exist non-gradual dependently-typed programming languages that admit divergence, \eg \kl{Dependent Haskell} \cite{Eisenberg2016} or
+  \kl{Idris} \cite{Brady2013}.
   We will also present one such theory in this article. 
 }%
 \margincite{Eisenberg2016}%
@@ -891,7 +897,13 @@ \subsection{Gradualizing \kl(tit){STLC}}
 We show that $\Omega$ is \emph{necessarily} a well-typed, diverging term in any
 gradualization of \kl{STLC} that satisfies the other properties.
 
-\begin{minipage}{\textwidth}
+\pagebreak
+
+\begin{marginfigure}
+  \includegraphics{Fire_triangle.pdf}
+  \caption{The Fire Triangle of Graduality}
+\end{marginfigure}
+
 \begin{theorem}[\reintro{Fire Triangle of Graduality} for \kl{STLC}]
   \label{thm:triangle-STLC}
 
@@ -899,14 +911,9 @@ \subsection{Gradualizing \kl(tit){STLC}}
 \kl{STLC} and \kl{graduality}. Then it cannot be \kl{normalizing}.
 
 \end{theorem}
-\end{minipage}
-
-\begin{marginfigure}[-4em]
-  \includegraphics{Fire_triangle.pdf}
-  \caption{The Fire Triangle of Graduality}
-\end{marginfigure}
 
 \begin{proof}
+
   We pose $\Omega \coloneqq \delta\ (\asc{\delta}{\?})$ with
   $\delta \coloneqq \l x : \?.\ (\asc{x}{\? \to \?})~x$
   and show that it must necessarily be a well-typed, diverging term.
@@ -969,8 +976,8 @@ \subsection{Gradualizing \kl(tit){CIC}}
 
   Additionally, \kl{graduality} can be specialized to the simply-typed fragment of the theory,
   by setting the unknown type $\?$ to be $\?[\uni[0]]$.
-  Therefore, we can apply \cref{thm:triangle-STLC},
-  and we get a well-typed term that diverges, finishing the proof.
+  We can then apply \cref{thm:triangle-STLC},
+  and get a diverging well-typed term, finishing the proof.
 \end{proof}
 
 \subsection{The Fire Triangle in practice}
@@ -990,7 +997,8 @@ \subsection{The Fire Triangle in practice}
 there are effectively-terminating programs with imprecise variants that yield termination
 contract errors.
 
-To date, the only related work that considers the gradualization of full dependent types with
+\AP To date, the only related work that considers
+the gradualization of full dependent types with
 $\?$ as both a term and a type, is the work on \intro{GDTL} \sidecite{Eremondi2019}.
 \kl{GDTL} is a programming language with a clear separation between the typing and execution 
 phases, like \kl{Idris} \sidecite{Brady2013}.
@@ -1034,7 +1042,7 @@ \subsection{Three in One}
 \kl{GCIC}, with two parameters%
 \sidenote{This system is precisely detailed in \cref{fig:ccic-ty}}.
 
-The first parameter characterizes how the universe level of a Π-type is determined
+\AP The first parameter characterizes how the universe level of a Π-type is determined
 in typing rules: either as taking the \emph{maximum} of the levels of the involved 
 types – as in standard \kl{CIC} – or as the \emph{successor} of that maximum.
 The latter option yields a variant of \kl{CIC} that we call \intro{CICs} – read “\kl{CIC}-shift”.
@@ -1057,7 +1065,7 @@ \subsection{Three in One}
 \paragraph{… and three meaningful theories.}
 
 Based on these parameters, we develop the following three variants of \kl{GCIC},
-whose properties are summarized in \cref{tab:gcic}
+whose properties are summarized in \cref{fig:gcic-summary}
 % with pointers to the respective theorems
 – because \kl{GCIC} is one common parametrized framework,
 we are able to establish most properties for all variants at once.
@@ -1070,13 +1078,13 @@ \subsection{Three in One}
 with \kl{errors} and \kl{unknown terms}, and replacing \kl{conversion} with
 \kl(grad){consistency}. This results in a theory that is not normalizing.
 
-Next, \intro{GCICs} satisfies both \kl{normalization} and \kl{graduality},
+\AP Next, \intro{GCICs} satisfies both \kl{normalization} and \kl{graduality},
 and supports \kl{conservativity}, but only with respect to \kl{CICs}.
 This theory uses the universe hierarchy at the \emph{typing level} to detect and forbid
 the potential non-termination induced by the use of \kl(grad){consistency}
 instead of \kl{conversion}.
 
-Finally, \intro{GCICT} satisfies both \kl{conservativity} with respect to \kl{CIC}
+\AP Finally, \intro{GCICT} satisfies both \kl{conservativity} with respect to \kl{CIC}
 and \kl{normalization}, but does not fully validate \kl{graduality}.
 This theory uses the universe hierarchy at the \emph{computational level} to detect
 potential divergence, eagerly raising errors.
@@ -1084,7 +1092,7 @@ \subsection{Three in One}
 and hence \kl{graduality}, as well as the \kl{SGG}, since in our dependent setting it depends
 on the \kl{DGG}.
 
-\begin{table*}[h]
+\begin{figure*}[h]
   \begin{tabular}{ccccccc}
    & \kl{Safety} & \kl{Normalization} & \kl{Conservativity} wrt. & \kl{Graduality} & \kl{SGG} & \kl{DGG} \\
   \kl{GCICP} \rule{0pt}{4ex}
@@ -1113,8 +1121,8 @@ \subsection{Three in One}
   \end{tabular}\\
   
   \caption{\kl{GCIC} variants and their properties}
-  \label{tab:gcic}
-\end{table*}
+  \label{fig:gcic-summary}
+\end{figure*}
 
 \paragraph{Practical implications of \kl{GCIC} variants.}
 Regarding our introductory examples, all three variants of \kl{GCIC}
@@ -1227,7 +1235,7 @@ \subsection{Typing, Conversion and Bidirectional Elaboration}
 and the downcast (gain of precision) $\castrev{v}{\?}{\Nat}$ reduces successfully
 if $v$ is such a tagged natural number, or to an error otherwise.
 
-We follow a similar approach for \kl{GCIC}, which is
+\AP We follow a similar approach for \kl{GCIC}, which is
 elaborated in a type-directed manner to a second calculus,
 named \reintro{CastCIC} (\cref{sec:cast-calculus}).
 The interplay between typing and cast insertion is however more subtle in the
@@ -1240,7 +1248,8 @@ \subsection{Typing, Conversion and Bidirectional Elaboration}
 \cite[Core language]{CoqManual}, where terms input by the user in the \kl{Gallina} language
 are first elaborated in order to add implicit arguments, coercions, etc.
 The computation steps required by conversion are
-performed on the elaborated terms, never on the raw input syntax.}
+performed on the elaborated terms, never on the raw input syntax.}%
+\margincite{CoqManual}%
 
 In order to satisfy \kl{conservativity} with respect to \kl{CIC}, ascriptions in \kl{GCIC}
 are required to satisfy \kl(grad){consistency}. For instance, $\asc{\asc{\true}{\?}}{\Nat}$ is well-typed by \kl(grad){consistency} – used twice –, but $\asc{\true}{\Nat}$ is ill-typed.
@@ -1273,7 +1282,7 @@ \subsection{Precisions and Properties}
 As explained earlier (\cref{sec:graduality}), we need three different notions of
 precision to deal with \kl{SGG} and \kl{graduality}.
 
-At the source level – \kl{GCIC} –,
+\AP At the source level – \kl{GCIC} –,
 we introduce a notion of \reintro{syntactic precision}, that captures the
 intuition of a more imprecise term as "the same term with sub-terms and/or
 type annotations replaced by $\?$", and is defined without any assumption of typing.
@@ -1296,7 +1305,7 @@ \subsection{Precisions and Properties}
 for \kl{CastCIC} – in its variants \kl{CCICP} and \kl{CCICs} – on \kl{structural
 precision}.
 
-However, as explained in \cref{sec:grad-simple}, we cannot expect to prove \kl{graduality}
+\AP However, as explained in \cref{sec:grad-simple}, we cannot expect to prove \kl{graduality}
 for these \kl{CastCIC} variants with respect to \kl{structural precision} directly.
 In order to overcome this problem, and to justify the design of \kl{CastCIC},
 we build two models kinds of models for \kl{CastCIC}. The first%
diff --git a/Manuscript/gradual-elab.tex b/Manuscript/gradual-elab.tex
index 76c6a16..4a98a79 100644
--- a/Manuscript/gradual-elab.tex
+++ b/Manuscript/gradual-elab.tex
@@ -21,7 +21,7 @@ \chapter{From \kl(tit){GCIC} to \kl(tit){CastCIC}: Bidirectional Elaboration}
 the gradual guarantees (\cref{sec:gcic-theorems}).
 
 In this whole chapter, we do not treat indexed inductive types, thus the system should be
-seen as an extension of \kl{CIC-} rather than full-blown \kl{CIC}. We come back to this
+seen as an extension of \kl{CIC-}, rather than full-blown \kl{CIC}. We come back to this
 issue in \cref{chap:beyond-gcic}. The original reference \sidecite[4em]{LennonBertrand2022}
 considers the case of general inductive types, here we restrict the presentation to
 $\List$ to ease readability.
@@ -32,7 +32,7 @@ \section{\kl(tit){CastCIC}}
 \subsection{System Definition}
 
 \paragraph{Syntax.}
-The syntax of \intro{CCIC}%
+\AP The syntax of \intro{CCIC}%
 \sidenote{Written using a \targetcolor{dark blue colour}.}
 extends that of \kl{CIC-}
 with three new term constructors: the \intro{unknown term} $\tcol{\?[T]}$ and
@@ -72,7 +72,7 @@ \subsection{System Definition}
 	\label{fig:univ-param}
 \end{figure}
 
-\kl{CCIC} is parametrized by two functions,
+\AP \kl{CCIC} is parametrized by two functions,
 described in \cref{fig:univ-param}, to account for the three
 different variants of \kl{GCIC} we consider – see \cref{sec:gcic:-3-1} : \intro{CCICP},
 \intro{CCICs} and \intro{CCICT}.
@@ -171,7 +171,8 @@ \subsection{System Definition}
 The typing rules provide little insight on the new primitives; the interesting
 part really lie in their \kl{reduction} behaviour.
 
-\kl{Reduction} relies on two auxiliary functions relating \intro{head constructors} $h \in \H$
+\AP \kl{Reduction} relies on two auxiliary functions relating
+\intro{head constructors} $h \in \H$
 to those terms that start with either $\P$, $\uni$ or and inductive type
 – in our running example, $\List$ – the set of which we call $\types{\kl{CCIC}}$.
 These are defined in \cref{fig:head-germ}.
@@ -179,7 +180,7 @@ \subsection{System Definition}
 The first is the function $\hd$, which returns the head constructor of a type.
 %
 
-In the other direction, the \intro{germ} function $\stalkCIC{i}{h}$ constructs the least
+\AP In the other direction, the \intro{germ} function $\stalkCIC{i}{h}$ constructs the least
 precise type with head $h$ at level $i$.
 In the case where no such type exists — \eg when $\castOfPi{i} < 0$ –
 this least precise type is the error.
@@ -393,7 +394,7 @@ \subsection{System Definition}
 \cref{sec:realizing-cast-calculus}, but similar decompositions appear \eg in \sidetextcite{Siek2015},
 where the equivalent of our germs are called ground types.
 The side conditions guarantee that this rule is used when no other applies.
-%
+
 \ruleref{rule:up-down} erases the succession of an upcast to $\tcol{\?}$
 and a downcast from it. Note that in this rule the upcast
 $\tcol{\castrev{}{\stalkCIC{h}{i}}{\?[\uni[i]]}}$ acts like a constructor for
@@ -407,8 +408,7 @@ \subsection{System Definition}
   Here, these correspond exactly to the canonical forms of $\tcol{\?[\uni]}$,
   but we also have to account for the many neutral forms that appear in open contexts.
 }%
-\margincite{Siek2006}
-%
+
 Finally, \ruleref{rule:size-err} corresponds to a peculiar kind of error, which only happens due to the presence of a type hierarchy: 
 $\tcol{\?[\uni[i]]}$ is only universal with respect to types at level $i$, and so a type might
 be of a level too high to fit into it.
@@ -434,7 +434,8 @@ \subsection{Safety}
 
 \begin{proof}
 
-  We follow again the Tait-Martin-Löf proof, as exposed by \sidetextcite{Takahashi1995} extend the notion of parallel reduction from \cref{thm:pcuic-confluence}
+  We follow again the Tait-Martin-Löf proof, as exposed by \sidetextcite{Takahashi1995},
+  extending the notion of parallel reduction from \cref{thm:pcuic-confluence}
   to account for our additional reduction. The triangle property still holds,
   because as before there is no real critical pair between our rules – we carefully set
   them up to that effect!
@@ -929,6 +930,8 @@ \subsection{Direct properties}
 Next come the more "algorithmic" properties: elaboration is decidable,
 and outputs are unique – up to \kl{conversion} if no strategy is fixed.
 
+\pagebreak
+
 \begin{theorem}[Decidability of elaboration]
 	\label{thm:decidability}
 	The relation elaboration of \cref{fig:gcic-infer-stat,fig:gcic-infer-unk,fig:gcic-check-pinfer}
@@ -1075,7 +1078,7 @@ \subsection{Illustration: Back to Ω}
 The first step corresponds to the first three above, the only difference being the value of $\castOfPi{i}$. The reductions however differ in the next step because $\tcol{\?_i \to \?_{i-1}} \neq \tcol{\stalkCIC{i}{\Pi}}$, so \ruleref{rule:prod-germ} applies before
 \ruleref{rule:up-down}.
 For the third step, note that $\tcol{\?_{i-1} \to \?_{i-1}} = \tcol{\stalkCIC{i}{\P}}$,
-so that \ruleref{rule:red-size-err} applies in the rightmost sequence of casts.
+so that \ruleref{rule:size-err} applies in the rightmost sequence of casts.
 The last three steps of reduction then propagate the error by first using
 \ruleref{rule:prod-germ}, \ruleref{rule:up-down} and \ruleref{rule:red-prod-prod},
 then the β-rule, and finally \ruleref{rule:red-down-err},
@@ -1118,7 +1121,7 @@ \section{Precision is a simulation for reduction}
   \textcite{Siek2015}, \cref{lem:catchup-univ,lem:catchup-type} are completely novel.
 }
 
-Technically, we need to distinguish between two notions of precision,
+\AP Technically, we need to distinguish between two notions of precision,
 one for \kl{GCIC} and one for \kl{CCIC}:
 \reintro{syntactic precision}, on terms in \kl{GCIC}, which corresponds to the
 usual syntactic precision of gradual typing, such as that of \textcite{Siek2015};
@@ -1344,7 +1347,7 @@ \subsection{\kl{Structural precision} for \kl(tit){CCIC}}
 
 \subsection{Catch-up lemmas}
 
-The fact that \kl{structural precision} is a \kl{simulation} relies on a series of lemmas,
+\AP The fact that \kl{structural precision} is a \kl{simulation} relies on a series of lemmas,
 which constitute the technical core of this whole chapter.
 They all have the same form:
 under the assumption that a term $\tcol{t'}$ is less precise than a term $\tcol{t}$
@@ -1385,7 +1388,6 @@ \subsection{Catch-up lemmas}
   reduce away.
 \end{proof}
 
-\begin{minipage}{\textwidth}
 \begin{lemma}[Types catchup]
 	\label{lem:catchup-type}
 	Under the hypothesis that $\tcol{\fs{\GG}} \capre \tcol{\sn{\GG}}$, we have the following:
@@ -1407,7 +1409,6 @@ \subsection{Catch-up lemmas}
       such that $\tcol{\GG} \vdash \tcol{\List(A)} \capre \tcol{\List(A')}$.
 	\end{itemize}
 \end{lemma}
-\end{minipage}
 
 \begin{proof}
   
@@ -1549,6 +1550,7 @@ \subsection{Simulation}
   \ie $\tcol{u} \hred \tcol{u'}$.
 \end{theorem}
 
+\pagebreak
 \begin{proof}
   The case of \kl{definitional precision} holds by \kl{confluence} of
   reduction.
@@ -1756,7 +1758,7 @@ \subsection{Elaboration Graduality}
 	\label{fig:apre-gcic}
 \end{figure*}
 
-Next, we turn to \kl{elaboration graduality}, the equivalent of the \kl{static gradual
+\AP Next, we turn to \kl{elaboration graduality}, the equivalent of the \kl{static gradual
 guarantee} (SGG) of \sidetextcite{Siek2015} in our setting.
 %
 We state it with respect to a notion of precision for terms in \kl{GCIC},
diff --git a/Manuscript/intro-en.tex b/Manuscript/intro-en.tex
index 781c4af..d929a16 100644
--- a/Manuscript/intro-en.tex
+++ b/Manuscript/intro-en.tex
@@ -179,10 +179,11 @@ \subsection{Proof assistants}
 
 Computers excel where humans are weak: their speciality is to treat large volumes of
 information in a very precise way, exactly the kind of needs brought up when manipulating
-formalized mathematics. Therefore, already at the beginning of the 70s,%
+formalized mathematics. Therefore, already at the beginning of the 70s%
 \sidenote{With systems like Automath \cite{DeBruijn1970}, or Mizar
   \cite{Rudnicki1992}.}%
-\margincite{DeBruijn1970}\margincite{Rudnicki1992}
+\margincite{DeBruijn1970}%
+\margincite{Rudnicki1992},
 software tools, collectively called \intro{proof assistants}, had started to
 appear. They are dedicated to writing and verifying formal proofs.
 Through the formalization of proofs and the verification by computers that they
@@ -231,8 +232,9 @@ \subsection{Logics, programming and type theory}
 and \kl[dependent type]{dependent type theory}. The proof assistant \kl{Coq},
 which is at the heart of my work, belongs to this family.
 
-If one compares a computer program with a text in a natural language, \intro{types}
-are a sort of equivalent of grammatical categories. However, contrarily to natural
+If one compares a computer program with a text in a natural language,
+\intro(en){types}
+are a kind of equivalent of grammatical categories. However, contrarily to natural
 languages, these types are conceived at the same time as the programming language, in order
 to mirror properties of the objects it manipulates.
 Their first use is to detect manifest errors. For instance, if a procedure
@@ -380,7 +382,7 @@ \subsection{\kl{MetaCoq}, a formalization in \kl{Coq}, for \kl{Coq}}[\kl{MetaCoq
 % length later on.}
 consists in implementing a type-checking algorithm as close as possible to the one of the
 \kl{kernel}, directly in \kl{Gallina}%
-\sidenote{Indeed, thanks to the \kl{Curry-Horward correspondence}, \kl{Gallina} is of
+\sidenote{Indeed, thanks to the \kl{Curry-Howard correspondence}, \kl{Gallina} is of
 course a proof language, but also a true programming language!}.
 We show, while defining the algorithm, that it is indeed \reintro(bidir){correct}%
 \sidenote{If the algorithm pretends that a term is well-typed, then it is the case.}
@@ -429,8 +431,9 @@ \subsection{Gradual types: some flexibility in a desperately static world}
 for the good execution of a program.
 The strictness of static typing, conversely, allows for error detection earlier in the
 development, and imposes invariants useful to optimize compilation or execution.
-Instead of opting exclusively for one of the two approaches, \intro{gradual typing}
-\sidecite{Siek2015} aims at integrating the static and dynamic disciplines in one and the
+Instead of opting exclusively for one of the two approaches,
+\reintro{gradual typing} \sidecite{Siek2015} aims at integrating
+the static and dynamic disciplines in one and the
 same language. This gives access to a whole spectrum of options, from a rigid completely static
 discipline to a flexible dynamic one. It particularly allows for a fine-grained, local choice
 of how each part of a program is type-checked.
@@ -446,7 +449,7 @@ \subsection{Gradual types: some flexibility in a desperately static world}
 and it would often be nice to temporarily lift the strong guarantees of typing to
 facilitate experimentation. The idea would then be to take inspiration from gradual typing,
 in order to pave the way for a more flexible logical or software development. Once again,
-\kl{Curry-Horward correspondence} is at work, since we adapt concepts from the world of
+\kl{Curry-Howard correspondence} is at work, since we adapt concepts from the world of
 programming languages to the logical one.
 
 \section{And this thesis?}
@@ -487,7 +490,7 @@ \subsection{Gradual dependent types}
 usual presentation of dependent types turns out to be unsuited, as it is too flexible.
 The additional structure provided by bidirectional typing is thus key. It also
 appeared relevant to present the type-directed elaboration of terms from a source language
-to a target one, an important characteristic shared by all \kl[gradual]{gradual languages}.
+to a target one, an important characteristic shared by all\kl[gradual@typ]{gradual languages}.
 The use of a bidirectional elaboration, and the properties it allows us to obtain, are described
 in \cref{chap:bidir-gradual-elab}. Finally, \cref{chap:beyond-gcic} succinctly describes the
 rest of the work I took part in regarding gradual types, but which is not directly
@@ -495,7 +498,8 @@ \subsection{Gradual dependent types}
 
 \subsection{Technical contributions}
 
-My doctoral work started with the study of \kl{gradual} \kl(typ){dependent} types.
+My doctoral work started with the study of \kl(typ){gradual}
+\kl(typ){dependent} types.
 I contributed, together with Kenji Maillard, Nicolas Tabareau and Éric Tanter, to
 \sidetextcite{LennonBertrand2022}, where we study a gradual extension to the
 Calculus of Inductive Constructions. My main technical contribution corresponds
diff --git a/Manuscript/intro-fr.tex b/Manuscript/intro-fr.tex
index f565cc0..c64d613 100644
--- a/Manuscript/intro-fr.tex
+++ b/Manuscript/intro-fr.tex
@@ -289,7 +289,7 @@ \subsection{Logique, programmation et théorie des types}
 travail.
 
 Si on compare un programme informatique à un texte dans une langue naturelle,
-les \intro{types} sont une sorte d’équivalent des catégories grammaticales.
+les \intro(fr){types} sont une sorte d’équivalent des catégories grammaticales.
 Cependant, contrairement aux langues naturelles, ces types sont conçus
 en même temps que le langage de programmation,
 de manière à refléter des propriétés des objets manipulés par celui-ci.
@@ -297,15 +297,16 @@ \subsection{Logique, programmation et théorie des types}
 Par exemple, si une procédure attendant un objet de type “image” est
 appliquée à un objet de type “chaîne de caractères”, une erreur pourra être rapportée
 à la programmeuse.\sidenote{
-  Un slogan dû à \sidetextcite[4em]{Milner1978} affirme que
+  Un slogan dû à \textcite{Milner1978} affirme que
   « Les programmes bien typés ne peuvent pas  mal s’exécuter. »
-}
+}%
+\margincite{Milner1978}
 Mais les types sont très versatiles, et leur capacité à encoder
 des propriétés des programmes sous-jacents peut servir à la compilation, la
 documentation, et bien d’autres choses.
 Dans notre cadre, par exemple, les types correspondent à la validité d’un raisonnement logique.
 
-\begin{marginfigure}[2em]
+\begin{marginfigure}
 
   % \begin{mathpar}
   %   \inferrule{ \Gamma, A \vdash B}{\Gamma \vdash A \Rightarrow B} \and
@@ -404,7 +405,7 @@ \section{Coq et son noyau}
   \label{fig:coq}
 \end{figure}
 
-\kl{Coq} est basé sur la \kl{correspondance de Curry-Howard} : les preuves sont vues comme des programmes dans un langage appelé \intro{Gallina},
+\kl{Coq} est basé sur la \kl{correspondance de Curry-Howard} : les preuves sont vues comme des programmes dans un langage appelé \intro(fr){Gallina},
 et leur vérification est effectuée par un algorithme proche
 de ceux utilisés pour les types des langages conventionnels.
 Cependant, si, dans les premières versions des années 80, \kl{Coq} ressemblait à un langage de programmation un peu étrange, ce n’est actuellement plus du tout le cas.
@@ -440,15 +441,15 @@ \subsection{\kl{MetaCoq}, une formalisation en \kl{Coq}, pour \kl{Coq}}[\kl{Meta
 Une fois ces propriétés établies, la deuxième étape
 % \sidenote{C’est celle sur laquelle j’ai principalement travaillé, et sur laquelle 
 % on reviendra plus longuement.}
-consiste à implémenter un algorithme de vérification de type ressemblant au maximum à celui du \kl{noyau}, directement en \kl{Gallina}\sidenote{
-  En effet, grâce à la \kl{correspondance de Curry-Howard}, \kl{Gallina} est certes un langage
+consiste à implémenter un algorithme de vérification de type ressemblant au maximum à celui du \kl{noyau}, directement en \kl(fr){Gallina}\sidenote{
+  En effet, grâce à la \kl{correspondance de Curry-Howard}, \kl(fr){Gallina} est certes un langage
   de preuve, mais aussi un véritable langage de programmation !}.
 On démontre en même temps qu’il est bien \reintro(bidir){correct}%
 \sidenote{
   Si l’algorithme prétend qu’un terme est bien typé, alors c’est bien le cas.}
 et \reintro[Completeness]{complet}%
 \sidenote{L’algorithme répond bien affirmativement sur tous les termes bien typés.}.
-Enfin, lors d’une troisième étape, on extrait de ce programme \kl{Gallina} certifié
+Enfin, lors d’une troisième étape, on extrait de ce programme \kl(fr){Gallina} certifié
 un autre programme plus efficace, en effaçant le contenu lié à la preuve de correction
 pour ne garder que celui qui est algorithmiquement intéressant.
 Cette extraction est une étape complexe,
@@ -456,7 +457,7 @@ \subsection{\kl{MetaCoq}, une formalisation en \kl{Coq}, pour \kl{Coq}}[\kl{Meta
 efficacité raisonnable.
 C’est pourquoi on prouve là encore que ladite extraction est correcte%
 \sidenote{C’est-à-dire qu’elle préserve la sémantique des programmes.},
-en la programmant à nouveau en \kl{Gallina}.
+en la programmant à nouveau en \kl(fr){Gallina}.
 
 \subsection{Vérification, inférence et typage bidirectionnel}[Typage bidirectionnel]
 
diff --git a/Manuscript/kaobook b/Manuscript/kaobook
index 73895e1..a233bef 160000
--- a/Manuscript/kaobook
+++ b/Manuscript/kaobook
@@ -1 +1 @@
-Subproject commit 73895e1b56b7f055ec457d14776cb4213884de2b
+Subproject commit a233befed3b4c9ea8a9847833a4b34c8cb619a00
diff --git a/Manuscript/knowledge_declarations.tex b/Manuscript/knowledge_declarations.tex
index b21476c..546051b 100644
--- a/Manuscript/knowledge_declarations.tex
+++ b/Manuscript/knowledge_declarations.tex
@@ -22,6 +22,9 @@
 \knowledge{system}
   | Agda
 
+\knowledge{system}
+  | Gallina@fr
+
 \knowledge{system}
   | Gallina
 
@@ -40,8 +43,8 @@
 %Chapter 1
 
 \knowledge{ignore}
-  | type
-  | types
+  | type@fr
+  | types@fr
 
 \knowledge{notion}
   | type dépendant
@@ -77,8 +80,13 @@
 
 % Chapter 2
 
+\knowledge{ignore}
+  | type@en
+  | types@en
+
 \knowledge{notion}
   | dependent type
+  | dependent types
   | dependent@typ
   | dependently
 
@@ -89,10 +97,15 @@
   | kernel
 
 \knowledge{notion}
-  | De Bruijn’s criterion
+  | De Bruijn criterion
 
 \knowledge{notion}
   | proof assistant
+  | proof assistants
+
+\knowledge{notion}
+  | bidirectional
+  | bidirectional typing
 
 % Chapter 3
 
@@ -335,7 +348,7 @@
   | indexed inductive type
   | indexed inductive types
 
-\knowledge{notion, text ={CIC\textsubscript{-}}}
+\knowledge{notion, text ={CIC\textsubscript{$-$}}}
   | CIC-
 
 % Section 3.6
@@ -439,6 +452,7 @@
   | gradual@typ
   | Gradual typing
   | gradual typing
+  | gradual types
 
 \knowledge{notion}
   | consistency@grad
@@ -471,9 +485,6 @@
   | unknown type
   | unknown
 
-\knowledge{notion}
-  | gradual
-
 \knowledge{notion}
   | gradual guarantee
   | gradual guarantees
diff --git a/Manuscript/main.tex b/Manuscript/main.tex
index 9938d23..3d1ca0f 100644
--- a/Manuscript/main.tex
+++ b/Manuscript/main.tex
@@ -33,11 +33,11 @@
 % Load the package for hyperreferences
 \usepackage{kaobook/kaorefs}
 
-\includeonly{intro-fr,intro-en}
+%\includeonly{gradual-dependent}
 
 \graphicspath{{./figures/}} % Paths where images are looked for
 
-\makeindex[columns=3, title=Alphabetical Index, intoc] % Make LaTeX produce the files required to compile the index
+%\makeindex[columns=3, title=Alphabetical Index, intoc] % Make LaTeX produce the files required to compile the index
 
 \input{knowledge_declarations.tex}
 
@@ -121,6 +121,6 @@
 
 % The index needs to be compiled on the command line with 'makeindex main' from the template directory
 
-\printindex % Output the index
+% \printindex % Output the index
 
 \end{document}
diff --git a/Manuscript/metacoq-intro.tex b/Manuscript/metacoq-intro.tex
index 5599e54..2916844 100644
--- a/Manuscript/metacoq-intro.tex
+++ b/Manuscript/metacoq-intro.tex
@@ -5,7 +5,7 @@
 \sidenote{A \href{https://github.com/coq/coq/blob/master/dev/doc/critical-bugs}{compilation}
   of those is maintained by \kl{Coq}’s development team.}
 Although it is not easy to exploit these to actually derive an inconsistency, even less
-so inadvertedly, simply relying on \kl{De Bruijn’s criterion} is not enough if
+so inadvertedly, simply relying on the \kl{De Bruijn criterion} is not enough if
 one wants to have a maximal trust in the kernel – and thus \kl{Coq} as a whole.
 Although \kl{CIC} is well-understood and has been widely studied,
 this is much less true of the type theory which is actually implemented, \kl{PCUIC}.
diff --git a/Manuscript/technical-intro.tex b/Manuscript/technical-intro.tex
index b4f7a4e..1eb3329 100644
--- a/Manuscript/technical-intro.tex
+++ b/Manuscript/technical-intro.tex
@@ -146,7 +146,7 @@ \subsection{Functions and applications}
 if from a hypothesis $A$ one can deduce $T$, then $A \to T$ holds; conversely if $A \to T$
 and $A$ both hold, then $T$ does as well.
 
-\begin{marginfigure}[-4em]
+\begin{marginfigure}
   \ContinuedFloat*
   \begin{mathpar}
     \inferdef{Abs}{\Gamma \vdash A \ty \uni \\ \Gamma, x : A \vdash t \ty T}
@@ -692,7 +692,7 @@ \subsection{Stability under basic operations}
 The most essential properties of a type system are its stability
 by basic type theoretic operations.
 The first is stability under renaming, which states that a context can be replaced by another
-one which declares at least variables with the same types:
+one which contains at least the same variables:
 
 \begin{property}[\intro{Stability under renaming}]
   \label{prop:stab-renaming}
@@ -700,11 +700,18 @@ \subsection{Stability under basic operations}
   \begin{itemize}
     \item $x_1 : A_1 \dots x_n : A_n \vdash t \ty T$
     \item $\vdash \Delta$
-    \item for all $i$, there is a variable $y_i$ such that $(y_i : \multisubs{A}{x_1 \into y_1 \dots x_n \into y_n}) \in \Delta$
+    \item for all $i$, there is a variable $y_i$ such that $(y_i : \multisubs{A_i}{x_1 \into y_1 \dots x_n \into y_n}) \in \Delta$%
   \end{itemize} 
   we have that $\Delta \vdash \multisubs{t}{x_1 \into y_1 \dots x_n \into y_n} \ty \multisubs{T}{x_1 \into y_1 \dots x_n \into y_n}$.
 \end{property}
 
+Since the context $x_1 : A_1 \dots x_n : A_n$ is well-formed – a consequence of
+\kl{validity}, another property we are about to see –, $A_i$ can only depend on variables
+$x_1 \dots x_{i-1}$, thus we do not actually need to substitute the variables
+$x_{i+1} \dots x_n$ in it. However, this presentation, where the same substitution is applied
+to all types even if applies to variables which we know are not present in them,
+is easier to work with in practice.
+
 A direct consequence is the \kl{weakening} property:
 
 \begin{property}[\intro{Weakening}]
@@ -718,7 +725,7 @@ \subsection{Stability under basic operations}
 
 \begin{property}[\intro{Stability under substitution}]
   \label{prop:stab-subst}
-  For any substitution $\sigma$ (function from variable to terms)
+  For any substitution $\sigma$ (function from variables to terms)
   such that the following hold
   \begin{itemize}
     \item $x_1 : A_1 \dots x_n : A_n \vdash t \ty T$