bsc-consolidated/src/02_preliminaries.tex

\chapter{Preliminaries}

We assume familiarity with basic categorical notions, in particular: categories, functors, functor algebras and natural transformations, as well as special objects like (co)products, terminal and initial objects and special classes of morphisms like isomorphisms (isos), epimorphisms (epis) and monomorphisms (monos). % chktex 36
In this chapter we will introduce notation that will be used throughout the thesis and also introduce some notions that are crucial to this thesis in more detail.
We write \(\obj{\C}\) for the objects of a category \( \C \), \(id_X\) for the identity morphism on \(X\), \((-) \circ (-)\) for the composition of morphisms and \(\C(X,Y)\) for the set of morphisms between \(X\) and \(Y\).
We will also sometimes omit indices of the identity and of natural transformations in favor of readability.

\section{Distributive and Cartesian Closed Categories}
Let us first introduce notation for binary (co)products by giving their usual diagrams: % chktex 36

% https://q.uiver.app/#q=WzAsOCxbMiwwLCJBIFxcdGltZXMgQiJdLFswLDAsIkEiXSxbNCwwLCJCIl0sWzIsMiwiQyJdLFs4LDAsIkEgKyBCIl0sWzYsMCwiQSJdLFsxMCwwLCJCIl0sWzgsMiwiQyJdLFswLDEsIlxccGlfMSIsMl0sWzAsMiwiXFxwaV8yIl0sWzMsMiwiZyIsMl0sWzMsMSwiZiJdLFszLDAsIlxcZXhpc3RzISBcXGxhbmdsZSBmICwgZyBcXHJhbmdsZSIsMix7InN0eWxlIjp7ImJvZHkiOnsibmFtZSI6ImRhc2hlZCJ9fX1dLFs1LDQsImlfMSJdLFs2LDQsImlfMiIsMl0sWzUsNywiZiIsMl0sWzYsNywiZyJdLFs0LDcsIlxcZXhpc3RzICEgW2YgLCBnXSIsMV1d
\[
  \begin{tikzcd}
    A && {A \times B} && B && A && {A + B} && B \\
    \\
    && C &&&&&& C
    \arrow["{\pi_1}"', from=1-3, to=1-1]
    \arrow["{\pi_2}", from=1-3, to=1-5]
    \arrow["g"', from=3-3, to=1-5]
    \arrow["f", from=3-3, to=1-1]
    \arrow["{\exists! \langle f , g \rangle}"', dashed, from=3-3, to=1-3]
    \arrow["{i_1}", from=1-7, to=1-9]
    \arrow["{i_2}"', from=1-11, to=1-9]
    \arrow["f"', from=1-7, to=3-9]
    \arrow["g", from=1-11, to=3-9]
    \arrow["{\exists ! [f , g]}", dashed, from=1-9, to=3-9]
  \end{tikzcd}
\]

We will furthermore overload this notation and write \(f \times g := \langle f \circ \pi_1 , g \circ \pi_2 \rangle \) and \(f + g := \lbrack i_1 \circ f ,  i_2 \circ g \rbrack \) on morphisms. To avoid parentheses we will use the convention that products bind stronger than coproducts.

We write \(1\) for the terminal object together with the unique morphism \(! : A \rightarrow 1\) and \(0\) for the initial object with the unique morphism \(¡ : A \rightarrow 0\).

Categories with finite products (i.e.\ binary products and a terminal object) are also called Cartesian and categories with finite coproducts (i.e.\ binary coproducts and an initial object) are called coCartesian.

\begin{definition}[Distributive Category]~\label{def:distributive}
  A Cartesian and coCartesian category \(\C \) is called distributive if the canonical (left) distributivity morphism \(dstl^{-1}\) is an isomorphism:
  % https://q.uiver.app/#q=WzAsMixbMCwwLCJYIFxcdGltZXMgWSArIFggXFx0aW1lcyBaIl0sWzMsMCwiWCBcXHRpbWVzIChZICsgWikiXSxbMCwxLCJkc3RsXnstMX0gOj0ge1xcbGJyYWNrIGlkIFxcdGltZXMgaV8xICwgaWQgXFx0aW1lcyBpXzIgXFxyYnJhY2t9IiwwLHsiY3VydmUiOi0zfV0sWzEsMCwiZHN0bCIsMCx7ImN1cnZlIjotM31dXQ==
  \[
    \begin{tikzcd}
      {X \times Y + X \times Z} &&& {X \times (Y + Z)}
      \arrow["{dstl^{-1} := {\lbrack id \times i_1 , id \times i_2 \rbrack}}", curve={height=-18pt}, from=1-1, to=1-4]
      \arrow["dstl", curve={height=-18pt}, from=1-4, to=1-1]
    \end{tikzcd}
  \]

\end{definition}

\begin{remark}
  Definition~\ref{def:distributive} can equivalently be expressed by requiring that the canonical right distributivity morphism is an iso, giving these inverse morphisms:
  % https://q.uiver.app/#q=WzAsMixbMCwwLCJZIFxcdGltZXMgWCArIFpcXHRpbWVzIFgiXSxbMywwLCIoWSArIFopIFxcdGltZXMgWCJdLFswLDEsImRzdHJeey0xfSA6PSBbIGlfMSBcXHRpbWVzIGlkICwgaV8yIFxcdGltZXMgaWQgXSIsMCx7ImN1cnZlIjotM31dLFsxLDAsImRzdHIiLDAseyJjdXJ2ZSI6LTN9XV0=
  \[
    \begin{tikzcd}
      {Y \times X + Z\times X} &&& {(Y + Z) \times X}
      \arrow["{dstr^{-1} := [ i_1 \times id , i_2 \times id ]}", curve={height=-18pt}, from=1-1, to=1-4]
      \arrow["dstr", curve={height=-18pt}, from=1-4, to=1-1]
    \end{tikzcd}
  \]

  These two can be derived from each other by taking either
  \[dstr := (swap + swap) \circ dstl \circ swap \]
  or
  \[dstl := (swap + swap) \circ dstr \circ swap \]
  where \(swap := \langle \pi_2 , \pi_1 \rangle : A \times B \rightarrow B \times A\).
\end{remark}

\begin{proposition}
  The distribution morphisms can be viewed as natural transformations i.e.\ they satisfy the following diagrams:
  % https://q.uiver.app/#q=WzAsOCxbMCwwLCJYIFxcdGltZXMgKFkgK1opIl0sWzIsMCwiQSBcXHRpbWVzIChCICsgQykiXSxbMCwxLCJYIFxcdGltZXMgWSArIFggXFx0aW1lcyBaIl0sWzIsMSwiQSBcXHRpbWVzIEIgKyBBIFxcdGltZXMgQyJdLFszLDAsIihZICsgWikgXFx0aW1lcyBYIl0sWzUsMCwiKEIgKyBDKSBcXHRpbWVzIEEiXSxbMywxLCJZIFxcdGltZXMgWCArIFogXFx0aW1lcyBYIl0sWzUsMSwiQiBcXHRpbWVzIEEgKyBDIFxcdGltZXMgQSJdLFswLDEsImYgXFx0aW1lcyAoZyArIGgpIl0sWzIsMywiZiBcXHRpbWVzIGcgKyBmIFxcdGltZXMgaCJdLFswLDIsImRzdGwiXSxbMSwzLCJkc3RsIl0sWzQsNSwiKGcgKyBoKSBcXHRpbWVzIGYiXSxbNCw2LCJkc3RyIiwyXSxbNSw3LCJkc3RyIl0sWzYsNywiZyBcXHRpbWVzIGYgKyBoIFxcdGltZXMgZiJdXQ==
  \[
    \begin{tikzcd}[column sep=4ex]
      {X \times (Y +Z)} && {A \times (B + C)} & {(Y + Z) \times X} && {(B + C) \times A} \\
      {X \times Y + X \times Z} && {A \times B + A \times C} & {Y \times X + Z \times X} && {B \times A + C \times A}
      \arrow["{f \times (g + h)}", from=1-1, to=1-3]
      \arrow["{f \times g + f \times h}", from=2-1, to=2-3]
      \arrow["dstl", from=1-1, to=2-1]
      \arrow["dstl", from=1-3, to=2-3]
      \arrow["{(g + h) \times f}", from=1-4, to=1-6]
      \arrow["dstr"', from=1-4, to=2-4]
      \arrow["dstr", from=1-6, to=2-6]
      \arrow["{g \times f + h \times f}", from=2-4, to=2-6]
    \end{tikzcd}
  \]
\end{proposition}
\begin{proof}
  We will prove naturality of \(dstl\), naturality for \(dstr\) is symmetric. We use the fact that \(dstl^{-1}\) is an iso and therefore also an epi.

  \begin{alignat*}{1}
        & dstl \circ (f \times (g + h)) \circ dstl^{-1}                                            \\
    =\; & dstl \circ (f \times (g + h)) \circ \lbrack id \times i_1 , id \times i_2 \rbrack        \\
    =\; & dstl \circ \lbrack f \times ((g + h) \circ i_1) , f \times ((g + h) \circ i_2) \rbrack   \\
    =\; & dstl \circ \lbrack f \times (i_1 \circ g) , f \times (i_2 \circ h) \rbrack               \\
    =\; & dstl \circ \lbrack id \times i_1 , id \times i_2 \rbrack \circ (f \times g + f \times h) \\
    =\; & dstl \circ dstl^{-1} \circ (f \times g + f \times h)                                     \\
    =\; & (f \times g + f \times h)                                                                \\
    =\; & (f \times g + f \times h) \circ dstl \circ dstl^{-1}\tag*{\qedhere}
  \end{alignat*}
\end{proof}

\begin{proposition}
  The distribution morphisms satisfy the following properties:

  \begin{enumerate}
    \item \(dstl \circ (id \times i_1) = i_1\)
    \item \(dstl \circ (id \times i_2) = i_2\)
    \item \([ \pi_1 , \pi_1 ] \circ dstl = \pi_1\)
    \item \(( \pi_2 + \pi_2 ) \circ dstl = \pi_2\)
    \item \(dstl \circ swap = (swap + swap) \circ dstr\)
    \item \(dstr \circ (i_1 \times id) = i_1\)
    \item \(dstr \circ (i_2 \times id) = i_2\)
    \item \((\pi_1 + \pi_1) \circ dstr = \pi_1\)
    \item \([ \pi_2 , \pi_2 ] \circ dstr = \pi_2\)
    \item \(dstr \circ swap = (swap + swap) \circ dstl\)
  \end{enumerate}
\end{proposition}
\begin{proof}
  Let us verify the five properties concerning \(dstl\), the ones concerning \(dstr\) follow symmetrically:

  \begin{enumerate}
    \item
          \begin{alignat*}{1}
             & dstl \circ (id \times i_1)
            \\=\;&dstl \circ [ id \times i_1 , id \times i_2 ] \circ i_1
            \\=\;&dstl \circ dstl^{-1} \circ i_1
            \\=\;&i_1
          \end{alignat*}
    \item
          \begin{alignat*}{1}
             & dstl \circ (id \times i_2)
            \\=\;&dstl \circ [ id \times i_1 , id \times i_2 ] \circ i_2
            \\=\;&dstl \circ dstl^{-1} \circ i_2
            \\=\;&i_2
          \end{alignat*}
    \item
          \begin{alignat*}{1}
             & \pi_1
            \\=\;&\pi_1 \circ dstl^{-1} \circ dstl
            \\=\;&[ \pi_1 \circ (id \times i_1) , \pi_1 \circ (id \times i_2) ] \circ dstl
            \\=\;&[ \pi_1 , \pi_1 ] \circ dstl
          \end{alignat*}
    \item
          \begin{alignat*}{1}
             & \pi_2
            \\=\;&\pi_2 \circ dstl^{-1} \circ dstl
            \\=\;&[ \pi_2 \circ (id \times i_1) , \pi_2 \circ (id \times i_2) ] \circ dstl
            \\=\;&(\pi_2 + \pi_2) \circ dstl
          \end{alignat*}
    \item
          \begin{alignat*}{1}
             & dstl \circ swap
            \\=\;&dstl \circ swap \circ dstr^{-1} \circ dstr
            \\=\;&dstl \circ [ swap \circ (i_1 \times id) , swap \circ (i_2 \times id) ] \circ dstr
            \\=\;&dstl \circ [ (id \times i_1) \circ swap , (id \times i_2) \circ swap ] \circ dstr
            \\=\;&dstl \circ [ id \times i_1 , id \times i_2 ] \circ (swap + swap) \circ dstr
            \\=\;&dstl \circ dstl^{-1} \circ (swap + swap) \circ dstr
            \\=\;&(swap + swap) \circ dstr\tag*{\qedhere}
          \end{alignat*}
  \end{enumerate}
\end{proof}

\begin{definition}[Exponential Object]
  Let \(\C \) be a Cartesian category and \(X , Y \in \vert \C \vert \).
  An object \(X^Y\) is called an exponential object (of \(X\) and \(Y\)) if there exists an evaluation morphism \(eval : X^Y \times Y \rightarrow X\) and for any \(f : X \times Y \rightarrow Z\) there exists a morphism \(curry\; f : X \rightarrow Z^Y\) that is unique with respect to the following diagram:
  % https://q.uiver.app/#q=WzAsMyxbMCwwLCJaIFxcdGltZXMgWSJdLFsyLDAsIlheWSBcXHRpbWVzIFkiXSxbMiwyLCJYIl0sWzEsMiwiZXZhbCJdLFswLDEsImN1cnJ5XFw7ZiBcXHRpbWVzIGlkIl0sWzAsMiwiZiIsMl1d
  \[
    \begin{tikzcd}
      {Z \times Y} && {X^Y \times Y} \\
      \\
      && X
      \arrow["eval", from=1-3, to=3-3]
      \arrow["{curry\;f \times id}", from=1-1, to=1-3]
      \arrow["f"', from=1-1, to=3-3]
    \end{tikzcd}
  \]
\end{definition}

\begin{proposition}
  Every exponential object \(X^Y\) satisfies the following properties:

  \begin{enumerate}
    \item The mapping \(curry : \C(X \times Y , Z) \rightarrow \C(X \rightarrow Z^Y)\) is injective,
    \item \(curry(eval \circ (f \times id)) = f\) for any \(f : X \times Y \rightarrow Z\),
    \item \(curry\;f \circ g = curry(f \circ (g \times id))\) for any \(f : X \times Y \rightarrow Z, g : A \rightarrow X\).
  \end{enumerate}
\end{proposition}
\begin{proof}
  \begin{enumerate}
    \item Let \(f, g : X \times Y \rightarrow Z\) and \(curry\;f = curry\;g\), then indeed
          \[f = eval \circ (curry\; f \times id) = eval \circ (curry\;g \times id) = g. \]

    \item \(curry(eval \circ (f \times id)) = f\) follows instantly by uniqueness of \(curry(eval \circ (f \times id))\).
    \item Note that \(eval \circ (curry\;f \circ g \times id) = eval \circ (curry\;f \times id) \circ (g \times id) = f \circ (g \times id)\), thus we are done by uniqueness of \(curry(f \circ (g \times id))\).
          \qedhere
  \end{enumerate}
\end{proof}

A Cartesian closed category is a Cartesian category \(\C \) that also has an exponential object \(X^Y\) for any \(X, Y \in \obj{\C} \).
The internal logic of Cartesian closed categories is the simply typed \(\lambda \)-calculus, which makes them a suitable environment for interpreting programming languages.
For the rest of this thesis we will work in an ambient distributive category \(\C\), that however need not be Cartesian closed as to be more general.

\section{F-Coalgebras}
Let \(F : \C \rightarrow \C \) be an endofunctor. Recall that F-algebras are tuples \((X, \alpha : FX \rightarrow X)\) consisting of an object of \(\C \) and a morphism out of the functor. Initial F-algebras have been studied extensively as a means of modeling inductive data types together with induction and recursion principles~\cite{inductive}. For this thesis we will be more interested in the dual concept namely terminal coalgebras; let us formally introduce them now.

\begin{definition}[F-Coalgebra]
  A tuple \((X \in \obj{\C}, \alpha : X \rightarrow FX)\) is called an \emph{F-coalgebra} (hereafter referred to as just \emph{coalgebra}).
\end{definition}

\begin{definition}[Coalgebra Morphisms]\label{def:coalgmorph}
  Let \((X, \alpha : X \rightarrow FX)\) and \((Y, \beta : Y \rightarrow FY)\) be two coalgebras. A morphism between these coalgebras is a morphism \(f : X \rightarrow Y\) such that the following diagram commutes:
  % https://q.uiver.app/#q=WzAsNCxbMCwwLCJYIl0sWzAsMiwiWSJdLFsyLDAsIkZYIl0sWzIsMiwiRlkiXSxbMSwzLCJcXGJldGEiXSxbMCwyLCJcXGFscGhhIl0sWzAsMSwiZiIsMl0sWzIsMywiRmYiXV0=
  \[
    \begin{tikzcd}[ampersand replacement=\&]
      X \&\& FX \\
      \\
      Y \&\& FY
      \arrow["\beta", from=3-1, to=3-3]
      \arrow["\alpha", from=1-1, to=1-3]
      \arrow["f"', from=1-1, to=3-1]
      \arrow["Ff", from=1-3, to=3-3]
    \end{tikzcd}
  \]

\end{definition}

Coalgebras on a given functor together with their morphisms form a category that we call \(\coalgs{F}\).

\begin{proposition}
  \(\coalgs{F}\) is a category.
\end{proposition}
\begin{proof}
  Let \((X , \alpha : X \rightarrow FX)\) be a coalgebra. The identity morphism on \((X , \alpha)\) is the identity morphism of \(\C\) that trivially satisfies \(\alpha \circ id = Fid \circ \alpha \).

  Let \((X , \alpha : X \rightarrow FX), (Y, \beta : Y \rightarrow FY)\) and \((Z , \gamma : Z \rightarrow FZ)\) be coalgebras.
  Composition of \(f : (X, \alpha) \rightarrow (Y, \beta)\) and \(g : (Y, \beta) \rightarrow (Z, \gamma)\) is composition of the underlying morphisms in \(\C \) where:
  \begin{alignat*}{1}
        & \gamma \circ g \circ f                   \\
    =\; & Fg \circ \beta \circ f                   \\
    =\; & Fg \circ Ff \circ \alpha                 \\
    =\; & F(g \circ f) \circ \alpha\tag*{\qedhere}
  \end{alignat*}
\end{proof}

The terminal object of \(\coalgs{F}\) is sometimes called \textit{final coalgebra}, we will however call it the \textit{terminal coalgebra} for consistency with initial F-algebras.
Similarly to initial F-algebras, the final coalgebra can be used for modeling the semantics of coinductive data types where terminality of the coalgebra yields corecursion as a definitional principle and coinduction as a proof principle.
Let us make the universal property of terminal coalgebras concrete.

\begin{definition}[Terminal Coalgebra]
  A coalgebra \((T, t : T \rightarrow FT)\) is called a terminal coalgebra if for any other coalgebra \((X, \alpha : X \rightarrow FX)\) there exists a unique morphism \(\coalg{\alpha} : X \rightarrow T\) satisfying:

  % https://q.uiver.app/#q=WzAsNCxbMCwwLCJYIl0sWzIsMCwiRlgiXSxbMCwyLCJUIl0sWzIsMiwiRlQiXSxbMCwxLCJcXGFscGhhIl0sWzIsMywidCJdLFswLDIsIlxcbGxicmFja2V0IFxcYWxwaGEgXFxycmJyYWNrZXQiLDIseyJzdHlsZSI6eyJib2R5Ijp7Im5hbWUiOiJkYXNoZWQifX19XSxbMSwzLCJGXFxsbGJyYWNrZXQgXFxhbHBoYSBcXHJyYnJhY2tldCJdXQ==
  \[
    \begin{tikzcd}[ampersand replacement=\&]
      X \&\& FX \\
      \\
      T \&\& FT
      \arrow["\alpha", from=1-1, to=1-3]
      \arrow["t", from=3-1, to=3-3]
      \arrow["{\coalg{\alpha}}"', dashed, from=1-1, to=3-1]
      \arrow["{F\coalg{\alpha}}", from=1-3, to=3-3]
    \end{tikzcd}
  \]
  We use the common notation \(\nu F\) to denote the terminal coalgebra for \(F\) (if it exists).
\end{definition}

We will discuss the concrete form that induction and coinduction take in a type theory in \autoref{chp:agda-cat}. Let us now reiterate a famous Lemma concerning terminal F-coalgebras.

\begin{lemma}[Lambek's Lemma~\cite{lambek}]\label{lem:lambek}
  Let \((T, t : T \rightarrow FT)\) be a terminal coalgebra. Then \(t\) is an isomorphism.
\end{lemma}
% \begin{proof}
%   First note that \((FT, Ft : FT \rightarrow FFT)\) is also an F-coalgebra. This yields the unique morphism \(\coalg{Ft} : FT \rightarrow T\) satisfying:
%   % https://q.uiver.app/#q=WzAsNCxbMCwwLCJGVCJdLFsyLDAsIkZGVCJdLFswLDIsIlQiXSxbMiwyLCJGVCJdLFswLDEsIkZ0Il0sWzIsMywidCJdLFswLDIsIlxcbGxicmFja2V0IEZ0IFxccnJicmFja2V0IiwyLHsic3R5bGUiOnsiYm9keSI6eyJuYW1lIjoiZGFzaGVkIn19fV0sWzEsMywiRlxcbGxicmFja2V0IEZ0IFxccnJicmFja2V0Il1d
%   \[
%     \begin{tikzcd}[ampersand replacement=\&]
%       FT \&\& FFT \\
%       \\
%       T \&\& FT
%       \arrow["Ft", from=1-1, to=1-3]
%       \arrow["t", from=3-1, to=3-3]
%       \arrow["{\coalg{Ft}}"', dashed, from=1-1, to=3-1]
%       \arrow["{F\coalg{Ft}}", from=1-3, to=3-3]
%     \end{tikzcd}
%   \]

%   \(\coalg{Ft}\) is inverse to \(t\):

%   \begin{enumerate}
%     \item \(\coalg{Ft} \circ t : (T, t) \rightarrow (T, t)\) is a morphism between F-coalgebras since
%           \begin{alignat*}{1}
%                 & F(\coalg{Ft} \circ t) \circ t \\
%             =\; & F \coalg{Ft} \circ t \circ t  \\
%             =\; & F \coalg{Ft} \circ Ft \circ t \\
%             =\; & t \circ \coalg{Ft} \circ t
%           \end{alignat*}
%           By uniqueness of the identity on \((T, t)\) we follow that \(\coalg{Ft} \circ t = id\).

%     \item \(t \circ \coalg{Ft} = id : (FT, Ft) \rightarrow (FT, Ft)\) follows by:
%           \begin{alignat*}{1}
%                 & t \circ \coalg{Ft}    \\
%             =\; & F\coalg{Ft} \circ Ft  \\
%             =\; & F(\coalg{Ft} \circ t) \\
%             =\; & F(id)                 \\
%             =\; & id
%           \end{alignat*}
%   \end{enumerate}
% \end{proof}

\section{Monads}
Monads are widely known in functional programming as a means for modeling effects in ``pure'' languages and are also central to this thesis. Let us recall the basic definitions\cite{Lane1971}\cite{moggi}.

\begin{definition}[Monad]
  A monad \(\mathbf{T}\) on a category \(\C \) is a triple \((T, \eta, \mu)\), where \(T : \C \rightarrow \C \) is an endofunctor and \(\eta : Id \rightarrow T, \mu : TT \rightarrow T\) are natural transformations, satisfying the following laws:
  \begin{alignat*}{2}
     & \mu_X \circ \mu_{TX}  &  & = \mu_X \circ T\mu_X \tag*{(M1)}\label{M1} \\
     & \mu_X \circ \eta_{TX} &  & = id_{TX} \tag*{(M2)}\label{M2}            \\
     & \mu_X \circ T\eta_X   &  & = id_{TX} \tag*{(M3)}\label{M3}
  \end{alignat*}

  These laws are expressed by the following diagrams:
  % with indices: % https://q.uiver.app/#q=WzAsOCxbMCwwLCJUVFRYIl0sWzIsMCwiVFRYIl0sWzAsMiwiVFRYIl0sWzIsMiwiVFgiXSxbNCwwLCJUWCJdLFs2LDAsIlRUWCJdLFs4LDAsIlRYIl0sWzYsMiwiVFgiXSxbMCwxLCJcXG11X3tUWH0iXSxbMCwyLCJUXFxtdV9YIiwyXSxbMSwzLCJcXG11X1giXSxbNSw3LCJcXG11X1giXSxbNCw1LCJcXGV0YV97VFh9Il0sWzYsNSwiVFxcZXRhX1giXSxbNCw3LCJpZF97VFh9IiwyXSxbNiw3LCJpZF97VFh9IiwyXSxbMiwzLCJcXG11X1giLDJdXQ==
  % https://q.uiver.app/#q=WzAsOCxbMCwwLCJUVFRYIl0sWzIsMCwiVFRYIl0sWzAsMiwiVFRYIl0sWzIsMiwiVFgiXSxbNCwwLCJUWCJdLFs2LDAsIlRUWCJdLFs4LDAsIlRYIl0sWzYsMiwiVFgiXSxbMCwxLCJcXG11Il0sWzAsMiwiVFxcbXUiLDJdLFsxLDMsIlxcbXUiXSxbNSw3LCJcXG11Il0sWzQsNSwiXFxldGEiXSxbNiw1LCJUIl0sWzQsNywiaWQiLDJdLFs2LDcsImlkIiwyXSxbMiwzLCJcXG11IiwyXV0=
  \[
    \begin{tikzcd}
      TTTX && TTX && TX && TTX && TX \\
      \\
      TTX && TX &&&& TX
      \arrow["\mu", from=1-1, to=1-3]
      \arrow["T\mu"', from=1-1, to=3-1]
      \arrow["\mu", from=1-3, to=3-3]
      \arrow["\mu", from=1-7, to=3-7]
      \arrow["\eta", from=1-5, to=1-7]
      \arrow["T", from=1-9, to=1-7]
      \arrow["id"', from=1-5, to=3-7]
      \arrow["id"', from=1-9, to=3-7]
      \arrow["\mu"', from=3-1, to=3-3]
    \end{tikzcd}
  \]
\end{definition}

\begin{definition}[Monad Morphism]\label{def:monadmorphism}
  A morphism between monads \((S : \C \rightarrow \C, \eta^S, \mu^S)\) and \((T : \C \rightarrow \C, \eta^T, \mu^T)\) is a natural transformation \(\alpha : S \rightarrow T\) between the underlying functors such that the following diagrams commute.
  % https://q.uiver.app/#q=WzAsOCxbMCwwLCJYIl0sWzIsMCwiU1giXSxbMiwxLCJUWCJdLFszLDAsIlNTWCJdLFs1LDAsIlNUWCJdLFszLDEsIlNYIl0sWzcsMCwiVFRYIl0sWzcsMSwiVFgiXSxbMCwxLCJcXGV0YV5TIl0sWzEsMiwiXFxhbHBoYSJdLFswLDIsIlxcZXRhXlQiLDJdLFszLDQsIlNcXGFscGhhIl0sWzMsNSwiXFxtdV5TIiwyXSxbNCw2LCJcXGFscGhhIl0sWzUsNywiXFxhbHBoYSIsMl0sWzYsNywiXFxtdV5UIl1d
  \[
    \begin{tikzcd}[ampersand replacement=\&]
      X \&\& SX \& SSX \&\& STX \&\& TTX \\
      \&\& TX \& SX \&\&\&\& TX
      \arrow["{\eta^S}", from=1-1, to=1-3]
      \arrow["\alpha", from=1-3, to=2-3]
      \arrow["{\eta^T}"', from=1-1, to=2-3]
      \arrow["S\alpha", from=1-4, to=1-6]
      \arrow["{\mu^S}"', from=1-4, to=2-4]
      \arrow["\alpha", from=1-6, to=1-8]
      \arrow["\alpha"', from=2-4, to=2-8]
      \arrow["{\mu^T}", from=1-8, to=2-8]
    \end{tikzcd}
  \]
\end{definition}

This yields a category of monads on a given category \(\C\) that we call \(\monads{\C}\).

\begin{proposition}\label{prop:monadscat}
  \(\monads{\C}\) is a category.
\end{proposition}
\begin{proof}
  The identity morphism of \(\monads{\C}\) is the identity natural transformation \(Id : F \rightarrow F\), which trivially respects the monad unit and multiplication. Composition of monad morphisms is composition of the underlying natural transformation, the diagrams then also follow easily.
\end{proof}

Monads can also be specified in a second equivalent way that is better suited to describe computation.

\begin{definition}[Kleisli Triple]
  A Kleisli triple on a category \(\C \) is a triple \((F, \eta, {(-)}^*)\), where \(F : \obj{C} \rightarrow \obj{C}\) is a mapping on objects, \({(\eta_X : X \rightarrow FX)}_{X\in\obj{C}}\) is a family of morphisms and for every morphism \(f : X \rightarrow FY\) there exists a morphism \(f^* : FX \rightarrow FY\) called the Kleisli lifting, where the following laws hold:
  \begin{alignat*}{3}
     & \eta_X^*         &  & = id_{FX} \tag*{(K1)}\label{K1}                                                                                        \\
     & f^* \circ \eta_X &  & = f                             &  & \text{ for any } f : X \rightarrow FY \tag*{(K2)}\label{K2}                       \\
     & f^* \circ g*     &  & = {(f^* \circ g)}^*             &  & \text{ for any } f : Y \rightarrow FZ, g : X \rightarrow FY \tag*{(K3)}\label{K3}
  \end{alignat*}
\end{definition}

Let \(f : X \rightarrow TY, g : Y \rightarrow TZ\) be two programs, where \(T\) is a Kleisli triple. These programs can be composed by taking: \(f^* \circ g : X \rightarrow TZ\), which is called Kleisli composition. Haskell's do-notation is a useful tool for writing Kleisli composition in a legible way. We will sometimes express \((f^* \circ g) x\) equivalently as
\begin{minted}{haskell}
  do y <- g x
     f y
\end{minted}

This yields the category of programs for a Kleisli triple that is called the Kleisli category.

\begin{definition}[Kleisli Category]
  Given a monad \(T\) on a category \(\C \), the Kleisli category \(\C^T\) is defined as:
  \begin{itemize}
    \item \(\vert \C^T \vert = \obj{C}\)
    \item \(\C^T(X, Y) = \C(X, TY)\)
    \item Composition of programs is Kleisli composition.
    \item The identity morphisms are the unit morphisms of \(T\), \(id_X = \eta_X : X \rightarrow TX\)
  \end{itemize}
  The laws of categories then follow from the Kleisli triple laws.
\end{definition}

\begin{proposition}[\cite{manes}] The notions of Kleisli triple and monad are equivalent.
\end{proposition}
\begin{proof}
  The crux of this proof is defining the triples, the proofs of the corresponding laws (functoriality, naturality, monad and Kleisli triple laws) are left out.

  ``\(\Rightarrow \)'':
  Given a Kleisli triple \((F, \eta, {(-)}^*)\),
  we obtain a monad \((F, \eta, \mu)\) where \(F\) is the object mapping of the Kleisli triple together with the functor action \(F(f : X \rightarrow Y) = {(\eta_Y \circ f)}^*\),
  \(\eta \) is the morphism family of the Kleisli triple where naturality is easy to show and \(\mu \) is a natural transformation defined as \(\mu_X = id_{FX}^*\)


  ``\(\Leftarrow \)'': \\
  Given a monad \((F, \eta, \mu)\),
  we obtain a Kleisli triple \((F, \eta, {(-)}^*)\) by restricting the functor \(F\) on objects,
  taking the underlying mapping of \(\eta \) and defining \(f^* = \mu_Y \circ Ff\) for any \(f : X \rightarrow FY\).
\end{proof}

For the rest of this thesis we will use both equivalent notions interchangeably to make definitions easier.

\section{Strong and Commutative Monads}
Consider the following program in do-notation
\begin{minted}{haskell}
  do y <- g x
     f (x , y)
\end{minted}
where \(g : X \rightarrow TY\) and \(f : X \times Y \rightarrow TZ\) are programs and \(\mathbf{T}\) is a monad. Kleisli composition does not suffice for interpreting this program, we will get stuck at
\[X \overset{\langle id , g \rangle}{\longrightarrow} X \times TY \overset{?}{\longrightarrow} T(X \times Y) \overset{f^*}{\longrightarrow} TZ. \]

Instead, one needs the following stronger notion of monad.
\begin{definition}[Strong Monad] A monad \((T, \eta, \mu)\) on a Cartesian category \(\C \) is called strong if there exists a natural transformation \(\tau_{X,Y} : X \times TY \rightarrow T(X \times Y)\) that satisfies the following conditions:
  \begin{alignat*}{2}
     & T\pi_2 \circ \tau_{1,X}                     &  & = \pi_2 \tag*{(S1)}\label{S1}                                                                     \\
     & \tau_{X,Y} \circ (id_X \times \eta_Y)       &  & = \eta_{X\times Y} \tag*{(S2)}\label{S2}                                                          \\
     & \tau_{X,Y} \circ (id_X \times \mu_Y)        &  & = \mu_{X\times Y} \circ T\tau_{X,Y} \circ \tau_{X,TY} \tag*{(S3)}\label{S3}                       \\
     & M \alpha_{X,Y,Z} \circ \tau_{X \times Y, Z} &  & = \tau_{X, Y\times Z} \circ (id_X \times \tau_{Y, Z}) \circ \alpha_{X,Y,TZ} \tag*{(S4)}\label{S4}
  \end{alignat*}
  where \(\alpha_{X,Y,Z} = \langle \langle \pi_1 , \pi_1 \circ \pi_2 \rangle , \pi_2 \circ \pi_2 \rangle : X \times (Y \times Z) \rightarrow (X \times Y) \times Z\) is the associativity morphism on products.
\end{definition}

\begin{definition}[Strong Monad Morphism]\label{def:strongmonadmorphism}
  A morphism between two strong monads \((S : \C \rightarrow \C, \eta^S, \mu^S, \tau^S)\) and \((T : \C \rightarrow \C, \eta^T, \mu^T, \tau^T)\) is a morphism between monads as in \autoref{def:monadmorphism} where additionally the following diagram commutes.
  % https://q.uiver.app/#q=WzAsNCxbMCwwLCJYIFxcdGltZXMgU1kiXSxbMCwyLCJTKFggXFx0aW1lcyBZKSJdLFsyLDIsIlQoWCBcXHRpbWVzIFkpIl0sWzIsMCwiWCBcXHRpbWVzIFRZIl0sWzAsMSwiXFx0YXVeUyJdLFsxLDIsIlxcYWxwaGEiXSxbMCwzLCJpZCBcXHRpbWVzIFxcYWxwaGEiXSxbMywyLCJcXHRhdV5UIiwyXV0=
  \[
    \begin{tikzcd}[ampersand replacement=\&]
      {X \times SY} \&\& {X \times TY} \\
      \\
      {S(X \times Y)} \&\& {T(X \times Y)}
      \arrow["{\tau^S}", from=1-1, to=3-1]
      \arrow["\alpha", from=3-1, to=3-3]
      \arrow["{id \times \alpha}", from=1-1, to=1-3]
      \arrow["{\tau^T}"', from=1-3, to=3-3]
    \end{tikzcd}
  \]
\end{definition}

As with monads this yields a category of strong monads on \(\C\) that we call \(\strongmonads{\C}\).

Let us now consider the following two programs
\begin{multicols}{2}
  \begin{minted}{haskell}
    do x <- p
       y <- q
       return (x, y)
    \end{minted}

  \begin{minted}{haskell}
    do y <- q
       x <- p
       return (x, y)
  \end{minted}
\end{multicols}
Where \(p : TX\) and \(q : TY\) are computations of some monad \(T\). A monad where these programs are equal, is called commutative.

\begin{definition}[Commutative Monad]
  A strong monad \(\mathbf{T}\) is called commutative if the (right) strength \(\tau \) commutes with the induced left strength
  \[\sigma_{X,Y} = Tswap \circ \tau_{Y,X} \circ swap : TX \times Y \rightarrow T(X \times Y)\]
  that satisfies symmetrical conditions to the ones \(\tau \) satisfies.
  Concretely, \(\mathbf{T}\) is called commutative if the following diagram commutes:
  % https://q.uiver.app/#q=WzAsNCxbMCwyLCJUKFggXFx0aW1lcyBUWSkiXSxbMiwwLCJUKFRYIFxcdGltZXMgWSkiXSxbMiwyLCJUKFggXFx0aW1lcyBZKSJdLFswLDAsIlRYIFxcdGltZXMgVFkiXSxbMywxLCJcXHRhdSJdLFszLDAsIlxcc2lnbWEiLDJdLFswLDIsIlxcdGF1XioiLDJdLFsxLDIsIlxcc2lnbWFeKiJdXQ==
  \[
    \begin{tikzcd}
      {TX \times TY} && {T(TX \times Y)} \\
      \\
      {T(X \times TY)} && {T(X \times Y)}
      \arrow["\tau", from=1-1, to=1-3]
      \arrow["\sigma"', from=1-1, to=3-1]
      \arrow["{\tau^*}"', from=3-1, to=3-3]
      \arrow["{\sigma^*}", from=1-3, to=3-3]
    \end{tikzcd}
  \]
\end{definition}

\section{Free Objects}
Free objects, roughly speaking, are constructions for instantiating structure declarations in a minimal way.
We will rely on free structures in \autoref{chp:iteration} to define a monad in a general setting. We recall the definition
to establish some notation and then describe how to obtain a monad via existence of free objects.

\begin{definition}[Free Object]\label{def:free}
  Let \(\C, \D \) be categories and \(U : \C \rightarrow \D \) be a forgetful functor (whose construction usually is obvious). A free object on some object \(X \in \obj{\D}\) is an object \(FX \in \obj{\C}\) together with a morphism \(\eta : X \rightarrow UFX\) such that for any \(Y \in \obj{\C}\) and \(f : X \rightarrow UY\) there exists a unique morphism \(\free{f} : FX \rightarrow Y\) satisfying:
  % https://q.uiver.app/#q=WzAsMyxbMCwwLCJYIl0sWzEsMCwiVVkiXSxbMCwxLCJVRlgiXSxbMCwxLCJmIl0sWzAsMiwiXFxldGEiLDJdLFsyLDEsIlVcXGZyZWV7Zn0iLDIseyJzdHlsZSI6eyJib2R5Ijp7Im5hbWUiOiJkYXNoZWQifX19XV0=
  \[
    \begin{tikzcd}[ampersand replacement=\&]
      X \& UY \\
      UFX
      \arrow["f", from=1-1, to=1-2]
      \arrow["\eta"', from=1-1, to=2-1]
      \arrow["{U\free{f}}"', dashed, from=2-1, to=1-2]
    \end{tikzcd}
  \]
\end{definition}

\begin{proposition}\label{thm:freemonad}
  Let \(U : \C \rightarrow \D \) be a forgetful functor.
  If for every \(X \in \obj{\D}\) a free object \(FX \in \obj{C}\) exists then \((X \mapsto UFX, \eta : X \rightarrow UFX, \free{(f : X \rightarrow UFY)} : UFX \rightarrow UFY)\) is a Kleisli triple on \(\D \).
\end{proposition}
\begin{proof}
  We are left to check the laws of Kleisli triples.

  \begin{itemize}
    \item[\ref{K1}] \(\free{\eta} = id\)

          By uniqueness of \(\free{\eta}\) it suffices to show that \(id \circ \eta = \eta \) which holds trivially.
    \item[\ref{K2}] \(\free{f} \circ \eta = f\) for any \(f : X \rightarrow UFY\)

          This is the universal property concerning \(\free{f}\).
    \item[\ref{K3}] \(\free{f} \circ \free{g} = \free{\freee{f} \circ g}\) for any \(f : Y \rightarrow UFZ, g : X \rightarrow UFY\)

          By uniqueness of \(\free{\freee{f} \circ g}\) we are left to show \(\free{f} \circ \free{g} \circ \eta = \free{f} \circ g\) which again follows directly by the universal property of \(\free{g}\).
          \qedhere
  \end{itemize}
\end{proof}