diff --git a/docs/documentation.tex b/docs/documentation.tex
index 356d18551fee84643d177deac56ac9217a25bb4c..e239ffb77efa443b4c9feffb280b7aa759d23379 100644
--- a/docs/documentation.tex
+++ b/docs/documentation.tex
@@ -12,13 +12,21 @@
 
 %\usepackage{isodate}
 \usepackage{graphicx}
-
-\title{An (potential) Engineers Guide to the Truncated Newton Nonsmooth (Maybe Multigrid) Method}
+\usepackage{listings}
+\title{An Engineers Guide to the Truncated Nonsmooth Newton Method}
+\subtitle{An example in small-strain elastoplasticity}
 %\docuntertitel{An example in small-strain elastoplasticity}
 
+\newcommand{\pythonfile}{../elasticity_fufem.py}
+
 \usepackage{preample}
 \usepackage[backend=biber]{biblatex}
 
+\lstset{rangeprefix=\#\{\ ,% curly left brace plus space
+        rangesuffix=\ \}}% space plus curly right brace
+
+\newcommand{\inputtaggedcode}[2][0]{\lstinputlisting[includerangemarker=false,firstnumber=0,widthgobble=#1,language={[3]Python},linerange=#2-end]{\pythonfile}}
+
 \addbibresource{literature.bib}
 
 \begin{document}
@@ -37,564 +45,15 @@ The Implementation will also be documented in the appendix, explaining the neces
 
 \section{The TNNMG Method}
 
-\subsection{The abstract setting}
-
-The \textbf{truncated newton nonsmooth multigrid method} (TNNMG) introduced by~\textcite{KornhuberGraeser2009a} and further generalized in~\textcite{Graeser2017a} is
-a numerical method for finding solutions of convex block-separable nonsmooth minimization problems, which for example, arise in small strain elastoplasticity~\cite{Sander2019cm}, but also 
-fracture phase-field methods  as well as contact problems.
-
-The TNNMG Method is applicable to functionals $J: \R^n \rightarrow \R \cup \{+\infty\} $ with $x\in \R^n$ of the form: \begin{equation}
-    J(x) = J_0(x) + \sum_{i}^m \varphi_i(R_i x) \quad x \in \R^n
-\end{equation}
-where $J_0$ is a twice continuously differentiable functional, the $\varphi_i$ are convex, proper, and lower-semicontinuous (= non-smooth)
-and the \textbf{restriction maps} $R_k : \R^n \rightarrow \R^{n_k}$ with $k < n$.
-
-We call Functionals which can be split into a smooth part $J_0$ and many nonsmooth parts $\varphi_i$ and restriction maps 
-which restrict into compatable subspaces \textbf{block-separable}.
-
-\subsubsection*{A note on block-separability}
-
-One key property of the block-separability is, that a \textit{minimization} of the distinct parts \textit{minimize} the whole non-smooth part of the functional,
-therefore, we can minimize each non-smooth part on it's own, resulting in a sequence of very small minimization problems, which can be solved efficiently,
-as the problem size of the blocks are small.
-
-To put this fact into a formal statement, we need some definitions.
-
-We say that the domain of $J$ can be decomposed to into $m$ subspace with dimension $n_k$:\begin{align}
-    \R^n &= \sum_{k=1}^m V_k \label{eqn:decomposedSpace}
-\end{align} where we identify each subspace $V_k$ with a corresponding $\R^{n_k}$ via Prolongation maps $P_k: \R^{n_k} \rightarrow \R^n$,
-where the expression \eqref{eqn:decomposedSpace} can now be written as:
-\begin{align}
-    \R^n &= \sum_{k=1}^m V_k = \sum_{k=1}^m P_k(\R^{n_k})
-\end{align}
-which is a complicated way of saying that any element in $\R^{n_k}$ has a direct representant in the whole space $\R^n$ by means of its prolongation map $P_k$.
-
-\begin{example}
-    Given $n,k$ and $n_k = 2$ one could write for the prolongation:
-    \begin{equation*}
-        P_k: \begin{bmatrix}
-            a_1 \\
-            a_2 \\
-        \end{bmatrix} \mapsto \begin{bmatrix}
-            0 \\
-            \vdots \\
-            a_1 \\
-            a_2 \\ 
-            \vdots \\
-            0 \\
-        \end{bmatrix}
-    \end{equation*}    
-\end{example}
-
-These prolongation operators allow to \textit{communicate} from the restricted parts of the functional 
-to the whole functional.
-
-To communicate from the whole functional to the restricted parts, the \textit{restriction maps} $R_k$ need to be defined clearly.
-These $R_k: \R^n \rightarrow \R^{\tilde{n}_k}$ can be the most natural restriction of an element $v$ in $\R^n$ 
-to some components $\{v_{i},v_{i+1},\ldots, v_{i+\tilde{n}_k}\}$, writing them as a smaller vector in $\R^{\tilde{n}_k}$ 
-
-\begin{example}
-    The restriction $R_k$ in the above example would be the map\begin{align*}
-        R_k: \begin{bmatrix}
-            0 \\
-            \vdots \\
-            a_1 \\
-            a_2 \\ 
-            \vdots \\
-            0 \\
-        \end{bmatrix} &\mapsto \begin{bmatrix}
-            a_1 \\
-            a_2 \\
-        \end{bmatrix}
-    \end{align*}
-\end{example}
-
-
-If the subspaces of dimension $\tilde{n}_k$ are defined in such a way that $n = \sum \tilde{n}_k$, then the whole space $\R^n$ can be decomposed into the following 
-subspaces arising from the restriction maps: \begin{align}
-    \R^n &= \bigoplus\limits_{k=1}^{m} {(\ker R_k)}^{\bot} \label{eqn:decompProperty}
-\end{align}
-
-\begin{example}
-    Take $n = 6$, $n_1 = 3$, $n_2 = 2$, $n_3 = 1$ with the Restriction Maps $R^k$ \begin{align*}
-        R_1: \begin{bmatrix}
-            a_1 \\
-            a_2 \\
-            a_3 \\
-            a_4 \\
-            a_5 \\
-            a_6 \\
-        \end{bmatrix} &\mapsto \begin{bmatrix}
-            a_1 \\
-            a_2 \\
-            a_3 \\
-        \end{bmatrix} & R_2: \begin{bmatrix}
-            a_1 \\
-            a_2 \\
-            a_3 \\
-            a_4 \\
-            a_5 \\
-            a_6 \\
-        \end{bmatrix} &\mapsto \begin{bmatrix}
-            a_4 \\
-            a_5 \\
-        \end{bmatrix} & R_3: \begin{bmatrix}
-            a_1 \\
-            a_2 \\
-            a_3 \\
-            a_4 \\
-            a_5 \\
-            a_6 \\
-        \end{bmatrix} &\mapsto \begin{bmatrix}
-            a_6 \\
-        \end{bmatrix} 
-    \end{align*}
-
-    their kernels are spanned by the vectors \begin{align*}
-        \ker R_1 &= \spn \{ \begin{bmatrix}
-            0 \\
-            0 \\
-            0 \\
-            1 \\
-            0 \\
-            0 \\
-        \end{bmatrix},\begin{bmatrix}
-            0 \\
-            0 \\
-            0 \\
-            0 \\
-            1 \\
-            0 \\
-        \end{bmatrix},\begin{bmatrix}
-            0 \\
-            0 \\
-            0 \\
-            0 \\
-            0 \\
-            1 \\
-        \end{bmatrix}\} & \ker R_2 &= \spn \{ \begin{bmatrix}
-            1 \\
-            0 \\
-            0 \\
-            0 \\
-            0 \\
-            0 \\
-        \end{bmatrix},\begin{bmatrix}
-            0 \\
-            1 \\
-            0 \\
-            0 \\
-            0 \\
-            0 \\
-        \end{bmatrix},\begin{bmatrix}
-            0 \\
-            0 \\
-            1 \\
-            0 \\
-            0 \\
-            0 \\
-        \end{bmatrix},\begin{bmatrix}
-            0 \\
-            0 \\
-            0 \\
-            0 \\
-            0 \\
-            1 \\
-        \end{bmatrix}\} \\
-        \ker R_3 &= \spn \{ \begin{bmatrix}
-            1 \\
-            0 \\
-            0 \\
-            0 \\
-            0 \\
-            0 \\
-        \end{bmatrix},\begin{bmatrix}
-            0 \\
-            1 \\
-            0 \\
-            0 \\
-            0 \\
-            0 \\
-        \end{bmatrix},\begin{bmatrix}
-            0 \\
-            0 \\
-            1 \\
-            0 \\
-            0 \\
-            0 \\
-        \end{bmatrix},\begin{bmatrix}
-            0 \\
-            0 \\
-            0 \\
-            1 \\
-            0 \\
-            0 \\
-        \end{bmatrix},\begin{bmatrix}
-            0 \\
-            0 \\
-            0 \\
-            0 \\
-            1 \\
-            0 \\
-        \end{bmatrix}\} 
-    \end{align*}
-
-    Therefore the orthogonal complement is \begin{align*}
-        {(\ker R_1)}^{\bot} &= \spn \{ \begin{bmatrix}
-            1 \\
-            0 \\
-            0 \\
-            0 \\
-            0 \\
-            0 \\
-        \end{bmatrix},\begin{bmatrix}
-            0 \\
-            1 \\
-            0 \\
-            0 \\
-            0 \\
-            0 \\
-        \end{bmatrix},\begin{bmatrix}
-            0 \\
-            0 \\
-            1 \\
-            0 \\
-            0 \\
-            0 \\
-        \end{bmatrix}\} & {(\ker R_2)}^{\bot} &= \spn \{ \begin{bmatrix}
-            0 \\
-            0 \\
-            0 \\
-            1 \\
-            0 \\
-            0 \\
-        \end{bmatrix},\begin{bmatrix}
-            0 \\
-            0 \\
-            0 \\
-            0 \\
-            1 \\
-            0 \\
-        \end{bmatrix}\} & {(\ker R_3)}^{\bot} &= \spn \{ \begin{bmatrix}
-            0 \\
-            0 \\
-            0 \\
-            0 \\
-            0 \\
-            1 \\
-        \end{bmatrix} \}
-    \end{align*}
-
-    whose direct sum span the whole space.
-\end{example}
-% Example of 6 d vector into 3d, 2d, 1d as a compatible 
-We now write $R = \{R_1, R_2, \ldots R_M\}$ as the collection of all restriction maps, whose restrictions fulfill \eqref{eqn:decompProperty}
-which leads us to the following definition of \textbf{block-separability}:
-
-\begin{definition}[Block-separability of a functional]
-    A function $J: \R^n \rightarrow \R \cup \infty$ is \textbf{block-separable nonsmooth} with respect to the collection of restriction maps $R$,
-    if $J$ can be decomposed into a continouisly differentiable function $J_0: \R^n \rightarrow \R$ and convex, proper, lower-semicontinous functions 
-    $\phi_k: \R^{n_k} \rightarrow \R \cup \infty$ such that \begin{equation}
-        J = J_0(v) + \sum_{k=1}^{M} \phi_k(R_k v) \quad v \in \R^n
-    \end{equation}
-\end{definition}
-the current collection of restrictions $R$ into distinct subspaces might seem very rigid, but there are generalizations of 
-such restrictions. The key property of restrictions is the \textbf{compatibility} of the subspace decomposition:
-
-\begin{definition}[Compatible subspace-decomposition]
-    A subspace decomposition $\R^n = \sum V_k$ is called \textbf{compatible}, if \begin{equation}
-        J(u) \leq J(u + v) \quad \forall v \in V_k, \quad k = 1,\ldots,n
-    \end{equation} implies global optimality \begin{equation}
-        J(u) \leq J(u + v) \quad \forall v \in \R^n
-    \end{equation}
-\end{definition}
-the direct orthogonal decomposition~\eqref{eqn:decompProperty} is a compatible decomposition.
-For more details refer to~\cite{Graeser2017a}.
-
-\subsubsection*{The algorithm}
-
-While the term \textbf{multigrid} is in the name, it is by any means not required to use a multigrid step anywhere in the algorithm.
-It only helps in finding an approximate solution extremely quickly.
-
-With the definitions from the previous sections, the algorithm is as follows: 
-
-\begin{figure}[h]
-\centering
-\begin{algorithmic}[1]
-    \State $u^{0} \gets \text{Admissable initial guess}$
-    \Repeat 
-    \LComment{Step 1: Nonlinear pre-smoothing (Also Nonlinear Block-Gauss-Seidel)}
-    \BeginBox 
-    \State $w^{i,0} \gets u^{i}$
-    \For{$k = 1,\ldots,m$}
-        \State Compute approximate solution $w^{i,k}$ in the block $k$ with \begin{equation}
-            w^{i,k} \approx \argmin_{v \in w^{i,k-1} + V_k} J(v) \tag{I}\label{eqn:FirstStep}
-        \end{equation}
-    \EndFor
-    \State $u^{i + \sfrac{1}{2}} \gets w^{i,k}$
-    \EndBox
-    \LComment{Step 2: Truncated (Multigrid, Quasi-) Newton-Step in a Smooth Subspace}
-    \BeginBox
-        \State Determine a large subspace $W_i \subset \R^n$ such that $\bigl. J \bigr|_{u^{i + \sfrac{1}{2}} + W_i}$ is twice continuously differentiable.
-        \State Solve for the Newton Increment $\Delta u^i$ approximately in the subspace $W_i$: \begin{equation}
-            \nabla^{2}\bigl. J(u^{i + \sfrac{1}{2}})\bigr|_{W_i \times W_i} \cdot \Delta u^i = \nabla \bigl. J(u^{i + \sfrac{1}{2}})\bigr|_{W_i} \tag{II}\label{eqn:SecondStep}
-        \end{equation} 
-    \EndBox
-    \LComment{Step 3: Projection onto admissable space and Postprocessing }
-    \BeginBox
-        \State Compute a projection $\Delta \tilde{u}^i = \Pi_{J}(\Delta u^i)$ onto the domain of $J$
-        \State Compute a damping $\rho_i$ such that \begin{equation}
-            J(u^{i + \sfrac{1}{2}} + \rho_i \Delta \tilde{u}^i) \leq J(u^{i + \sfrac{1}{2}}) \tag{III}\label{eqn:ThirdStep}
-        \end{equation}
-        \State $u^{i+1} \gets u^{i + \sfrac{1}{2}} + \rho_i \Delta \tilde{u}^i$
-    \EndBox
-    \Until{Convergence}
-\end{algorithmic}
-\end{figure}
-
-%These steps will be explained in more detail, pointing to ressources on practical ressources, but a more rigorous treatment is found in~\cite{Graeser2017a}
-
-\subsubsection{Nonlinear Pre-Smoother / Nonlinear Block-Gauss-Seidel}
-
-In the first Step of the TNNMG method, a sequence of minimization problems must be solved~\eqref{eqn:FirstStep}.
-
-Implementation wise, this is the focus of this method, because for all other steps, existing libraries can be used, whereas the 
-implementation of the first step strongly depends on the nonsmooth blocks of the functional.
-
-In the algorithm, the block size is assumed to be small, compared to the number of the blocks,
-and because the functionals $\phi_k$ are assumed to be convex, algorithms from convex optimizations can be utilized,
-the argument being, that even if the algorithm scales badly with the problem size, it can be used because the block size is small.
-
-\subsubsection{Truncated Linear Correction}
-
-After the nonlinear pre-smoother, one linear correction step is done, where~\eqref{eqn:SecondStep} gives the correction.
-This correction only needs to be solved very inexactly. While multigrid methods are a good step for solving the system,
-a preconditioned conjugate gradient iteration could be taken, or even a direct solution of the system. A multigrid step is not 
-needed or mandatory despite the name of the method.
-
-In order to determine the admissable space, where $J$ is twice continouisly differentiable, 
-the subspaces which are \textit{not} differentiable can be filtered, using some kind of criterion.
-It is not required to use the maximal subspace, only a large subspace.
-
-\subsubsection{Projection onto the admissable domain}
-
-After computing the increment step, a projection and damping is required to complete the algorithm. 
-While the projection step onto the admissable domain is not strictly necessary, it accelerates the convergence of the method 
-by allowing bigger steps.
-
-The damping is mandatory for global convergence of the method from any initial iterate, which is common for \textsc{Newton-Raphson} like solvers.
-
-\subsection{The concrete Setting}
-
-In the choosen example, the problem of small-strain-elastoplasticity is used as an example to illustrate the steps of the method.
-To reiterate the exact formulation, as it is different from the commonly used strain-rate formulation, we reiterate the 
-problem of elastoplasticity.
-
-\subsubsection{Kinematics}
-
-Consider a simple body $\manifold{B} \subset \R^d$ as a domain with \textsc{Lipschitz} boundary $\pdiff \manifold{B}$.
-With $\smtenI{u}: \manifold{B} \rightarrow \R^d$ the displacement field, and the engineering strains $\smtenII{\varepsilon}: \manifold{B} \rightarrow \SymMat^d$\begin{equation}
-    \smtenII{\varepsilon} = \sym \grad \smtenI{u}
-\end{equation} where $\SymMat^d$ denotes the space of $d \times d$ symmetric matrices.
-
-In order to describe plasticity, a additive split of the strains is used \begin{equation}
-    \smtenII{\varepsilon} = \smtenII{\varepsilon}^{e} + \smtenII{\varepsilon}^{p} \label{eqn:additiveStrainSplit}
-\end{equation} into the \textit{elastic strains} $\smtenII{\varepsilon}^{e} \in \SymMat^d$ and \textit{plastic strains} $\smtenII{\varepsilon}^{p} \in \SymMatTr^d$,
-where $\SymMatTr^d$ denotes the space of symmetric trace free $d \times d$ matrices, as we consider crystalline materials, where changes in volume are not attributed to plastic deformation.
-The theory of the solution algorithm does not enforce this condition, but a minimizer of a local functional can be solved for analytically in the space of trace free matrices.
-
-\subsubsection{Balance Equations}
-
-We assume that balance of momentum holds for any subbody $\manifold{U} \subset \manifold{B}$, which implies the local form of 
-balance \begin{equation}
-   - \divg \smtenII{\sigma} = \smtenI{f} \label{eqn:balanceLaw}
-\end{equation} where $\smtenII{\sigma}: \manifold{B} \rightarrow \SymMat^d$ are the engineering stresses and $\smtenI{f}: \manifold{B} \rightarrow \R^d$ is a given volume force field.
-We only consider the quasi-static process, where inertial terms are omitted, but the equation is considered in a pseudo-time $t \in [0, T]$ 
-due to the irreversibility of the plastic strains.
-
-Due to the plastic \textit{Dissipation} of Energy, we consider the \textit{Principle of Maximum Dissipated Energy} by Hill, 
-which states, that the dissipated energy \begin{equation}
-    \mathcal{D} = \smtenII{\sigma} : \smtenII[\dot]{\varepsilon}^p \geq 0
-\end{equation} is maximized \begin{equation}
-    \sup_{\smtenII{\sigma} \in \mathcal{E}} \mathcal{D}(\smtenII{\sigma}) \quad \smtenII{\sigma} \in \mathcal{E}
-\end{equation} over all admissable real stress states.
-
-For this reason we introduce the \textit{real dissipated energy} $\mathcal{D}^p$ as the support function of the dissipated energy: \begin{equation}
-    \mathcal{D}^p(\smtenII[\dot]{\varepsilon}^p) \coloneq \sup_{\smtenII{\sigma} \in \mathcal{E}} \mathcal{D}(\smtenII{\sigma})
-\end{equation} which is the \textsc{Legendre}-transformation of the dissipated energy.
-
-Due to the convexity of the elastic domain and the convexity of the Dissipation function it holds \begin{equation}
-    \mathcal{D}(\smtenII{\sigma}) - \mathcal{D}(\smtenII{\sigma}^{\ast}) = (\smtenII{\sigma} - \smtenII{\sigma}^{\ast}) : \smtenII[\dot]{\varepsilon}^p \geq 0 \qquad \forall \smtenII{\sigma}^{\ast} \in \mathcal{E}
-\end{equation}
-which holds in an equivalent way for the \textsc{Legendre}-transformed function $\mathcal{D}^p$ \begin{equation}
-    \mathcal{D}^p(\smtenII[\dot]{\varepsilon}^p) - \mathcal{D}^p(\vdiff \smtenII[\dot]{\varepsilon}^p) = \smtenII{\sigma}: (\smtenII[\dot]{\varepsilon}^p - \vdiff \smtenII[\dot]{\varepsilon}^p) \geq 0 \qquad \forall \vdiff \smtenII[\dot]{\varepsilon}^p \in \mathcal{E}
-\end{equation} which implies the \textit{real dissipated energy} $\mathcal{D}^p$ to also be a convex function.
-
-\subsubsection{Constitutive Equations}
-
-For the elastic part of the strains, we use a \textsc{St.\@ Venant-Kirchhoff} Material law, which is only for simplicity, 
-more complicated laws can be used here, it just eases the implementation.
-It is stated with the two \textsc{Lamé} Constants $\lambda$ and $\mu$, which we assume to be positive constants.
-\begin{equation}
-    \smtenII{\sigma} = \lambda \trace(\smtenII{\varepsilon}^e) + 2 \mu (\smtenII{\varepsilon}^e) = \smtenIV{C} : \smtenII{\varepsilon}^e \label{eqn:StVernantKirchhoff}
-\end{equation} where $\smtenIV{C}$ denotes the \textsc{Hooke}-Tensor
-
-For the plastic regime, we describe the admissable stress states $\mathcal{E}$ as a convex region whose \textit{boundary} $\pdiff \mathcal{E}$ is described by 
-a \textit{yield function} $\phi(\smtenII{\sigma})$, attaining its name as the \textit{yield surface} of $\mathcal{E}$ defined by \begin{align}
-    \mathcal{E} &= \{ \smtenII{\sigma} \in \SymMat^d : \phi(\smtenII{\sigma}) \leq 0 \} \\
-    \pdiff \mathcal{E} &= \{ \smtenII{\sigma} \in \SymMat^d : \phi(\smtenII{\sigma}) = 0 \}
-\end{align}
-using the principle of maximum work, the \textit{associated flow rule} can be expressed as \begin{equation}
-    \smtenII[\dot]{\varepsilon}^p = \lambda^p \frac{\pdiff \phi(\smtenII{\sigma})}{\pdiff \smtenII{\sigma}} \label{eqn:smoothFlowRule}
-\end{equation} assuming $\phi$ to be differentiable everywhere.
-
-If we want to drop the differentiability of $\phi$ everywhere, we can say that the plastic flow is an element of the scaled \textbf{subdifferential} $\pdiff \phi$ of the yield surface 
-\begin{equation}
-    \smtenII[\dot]{\varepsilon}^p \in \lambda^p \pdiff \phi(\smtenII{\sigma}) \label{eqn:nonsmoothFlowRule}
-\end{equation} which is defined as
-
-\begin{definition}[Subdifferential of a convex function]
-    Given a convex function $f: X \rightarrow \R$ on a vector space $X$ we define the \textbf{subdifferential} $\pdiff f(x)$ of $f$ at $x \in X$
-    to be the subset of the dual space $X'$ defined by \begin{align}
-        \pdiff f(x) \coloneq \{ x^\ast \in X' : f(y) \geq f(x) + \scalar{x^\ast}{y - x} \quad\forall y \in X \} \label{eqn:subdifferentialSet}
-    \end{align}
-\end{definition}
-
-\begin{example}[Subdifferential of convex functions over $\R^n$]
-    Let $\R^n$ be the Vectorspace $X$ in the above definition, 
-    and let $\smtenI{x} \in \R^n$ denote a vector, and $f$ a convex function on $\R^n$, 
-    the subdifferential of $f$ at $\smtenI{x}$ is given by \begin{align}
-        \pdiff f(x) \coloneq \{ \smtenI{x}^\ast \in \R^n : f(\smtenI{y}) \geq f(\smtenI{x}) + \smtenI{x}^\ast {(\smtenI{y} - \smtenI{x})}^T \quad\forall \smtenI{y} \in \R^n \}
-    \end{align} where $\square^T$ denotes transposition.
-
-    In $\R^n$ one could interpret the subdifferential as the cone of possible subgradients of $f$ at points $\smtenI{x}$ where $f$ is not differentiable.
-\end{example}
-
-This definition allows for nonsmooth yield functions.
-
-Using the dissipation function, one can write~\eqref{eqn:nonsmoothFlowRule} in its dual form \begin{equation}
-    \smtenII{\sigma} \in \pdiff \mathcal{D}^p(\smtenII[\dot]{\varepsilon}^p) \label{eqn:StressflowRule}
-\end{equation}
-
-\subsubsection*{Example: The \textsc{von Mises} yield function}
-
-We define the stress deviator to be \begin{equation}
-    \smtenII{\sigma}^D = \smtenII{\sigma} - \frac{1}{d} \trace\smtenII{\sigma} \smtenII{I}
-\end{equation} for any $\smtenII{\sigma} \in \SymMat^d$. 
-This implies that the case with $d=2$ does not describe real materials, as the notion of a stress deviator has changed.
-But it allows for formulas which are not dependent on the dimension of the problem.
-
-The \textsc{von Mises} yield function is described by the elastic shear energy, which is described by the invariant \begin{align}
-    I_2(\smtenII{\sigma}^D) &= \frac{1}{2} \norm{\smtenII{\sigma}^D}^2
-\end{align} reaches a empirically measured critical value $\sigma_c$.
-
-The \textsc{von Mises} yield function takes the form \begin{equation}
-    \phi(\smtenII{\sigma}) \coloneq \sqrt{I_2(\smtenII{\sigma}^D)} - \sigma_c
-\end{equation}
-
-We are interested in the \textit{Dissipation Function} corresponding to $\phi$, which is the following value: \begin{align}
-    \mathcal{D}^p(\smtenII[\dot]{\varepsilon}^p) &= \sup_{\smtenII{\sigma} \in \SymMat^D} \{ \smtenII[\dot]{\varepsilon}^p : \smtenII{\sigma}; \norm{\smtenII{\sigma}^D} - \sigma_c \leq 0 \} \\
-    \shortintertext{$\smtenII[\dot]{\varepsilon}^p$ is trace free}
-    &= \sup_{\smtenII{\sigma} \in \SymMat^D} \{ \smtenII[\dot]{\varepsilon}^p : \smtenII{\sigma}^D; \norm{\smtenII{\sigma}^D} \leq \sigma_c \} \\ 
-    \shortintertext{Because the arguments are parallel $\smtenII{\sigma} = \lambda \smtenII[\dot]{\varepsilon}^p$, the first term can be written as $\smtenII[\dot]{\varepsilon}^p : \smtenII{\sigma}^D= \norm{\smtenII[\dot]{\varepsilon}^p} \cdot \norm{\smtenII{\sigma}^D}$}
-    &= \sup_{\smtenII{\sigma} \in \SymMat^D} \{ \norm{\smtenII[\dot]{\varepsilon}^p} \cdot \norm{\smtenII{\sigma}^D} ; \norm{\smtenII{\sigma}^D} \leq \sigma_c \} \\
-    \shortintertext{Because $\norm{\smtenII[\dot]{\varepsilon}^p} \geq 0$ and $\smtenII[\dot]{\varepsilon}^p$ is fixed, $\sup\{ \norm{\smtenII[\dot]{\varepsilon}^p} a \} = \norm{\smtenII[\dot]{\varepsilon}^p} \sup\{ a \}$}
-    &= \norm{\smtenII[\dot]{\varepsilon}^p} \cdot \sup_{\smtenII{\sigma} \in \SymMat^D} \{ \norm{\smtenII{\sigma}^D} ; \norm{\smtenII{\sigma}^D} \leq \sigma_c \} \\
-    \shortintertext{The suppremum is achieved when the inequality holds with equality, therefore $\norm{\smtenII{\sigma}^D} = \sigma_c$}
-    \mathcal{D}^p(\smtenII[\dot]{\varepsilon}^p) &= \norm{\smtenII[\dot]{\varepsilon}^p} \cdot \sigma_c
-\end{align} which is the Dissipation Function of the \textsc{Von-Mises} Yield Function.
-
-This technique presented here and given in~\cite{Jaap2023} is applicable to any yield function to derive the corresponding dissipation function.
-
-The \textsc{Von-Mises} Dissipation Function is already not differentiable for $\norm{\smtenII[\dot]{\varepsilon}^p} = 0$, but 
-the boundary of the elastic domain is still smooth.
-
-\subsubsection*{Example: The \textsc{Tresca} yield function}
-
-To give another example of a dissipation function the \textsc{Tresca} yield function is presented.
-Given the critical shear stress $\tau_0$, the material yields, if 
-\begin{align}
-    \phi(\smtenII{\sigma}) &= \max_{i,j} \abs{ \sigma_i - \sigma_j} - \tau_0
-\end{align} is fulfilled, $\sigma_1,\sigma_2,\sigma_3$ denoting the principal stresses of the stress tensor.
-As derived in \cite{Jaap2023}, the dissipation function is given by \begin{align}
-    \mathcal{D}^p(\smtenII[\dot]{\varepsilon}^p) = \tau_c \max\{ \abs{\varepsilon_1}, \abs{\varepsilon_2}, \abs{\varepsilon_3} \} 
-\end{align} where it is assumed, that $\smtenII[\dot]{\varepsilon}^p \in \SymMatTr$, and $\varepsilon_1,\varepsilon_2,\varepsilon_3$ being the principal strains.
-
-\subsubsection{Weak Form and Discretization of small-strain-elastoplasticity}
-
-The derivation works in three steps: \begin{enumerate}
-    \item Derivation of a \textit{variational inequality} from the dissipation function 
-    \item Derivation of the weak form of equilibrium from the balance equation, using special test functions
-    \item Summing them up to derive the total equation.
-\end{enumerate}
-afterwards, during the discretization, the time derivatives are eliminated exploiting the mathematical properties of the dissipation function.
-
-\begin{enumerate}
-    \item Dissipation function
-
-    Following~\eqref{eqn:StressflowRule} the condition \begin{equation}
-        \smtenII{\sigma} \in \pdiff \mathcal{D}(\smtenII[\dot]{\varepsilon}^p)
-    \end{equation} corresponds to the inequality of the supdifferential~\eqref{eqn:subdifferentialSet} \begin{align}
-        \mathcal{D}(\vdiff \smtenII[\dot]{\varepsilon}^p) &\geq \mathcal{D}(\smtenII[\dot]{\varepsilon}^p) + \smtenII{\sigma} : ( \vdiff \smtenII[\dot]{\varepsilon}^p - \smtenII[\dot]{\varepsilon}^p ) \quad \forall \vdiff \smtenII[\dot]{\varepsilon}^p 
-    \end{align} where we eliminate $\smtenII{\sigma}$ with~\eqref{eqn:StVernantKirchhoff} and insert the~\eqref{eqn:additiveStrainSplit} to eliminate the elastic strains.
-    \begin{align}
-        \mathcal{D}(\vdiff \smtenII[\dot]{\varepsilon}^p) &\geq \mathcal{D}(\smtenII[\dot]{\varepsilon}^p) + (\smtenIV{C} : (\smtenII{\varepsilon} - \smtenII{\varepsilon}^p)): ( \vdiff \smtenII[\dot]{\varepsilon}^p - \smtenII[\dot]{\varepsilon}^p ) \quad \forall \vdiff \smtenII[\dot]{\varepsilon}^p \label{eqn:dissipationInequalityVaria}
-    \end{align} and integrate over the domain of interest $\Omega$ \begin{equation}
-      \int_{\Omega} \mathcal{D}(\vdiff \smtenII[\dot]{\varepsilon}^p)-\mathcal{D}(\smtenII[\dot]{\varepsilon}^p) - (\smtenIV{C} : (\smtenII{\varepsilon} - \smtenII{\varepsilon}^p)): ( \vdiff \smtenII[\dot]{\varepsilon}^p - \smtenII[\dot]{\varepsilon}^p ) \diff V \geq 0 \quad \forall \vdiff \smtenII[\dot]{\varepsilon}^p
-    \end{equation}
-
-    \item Seeing~\eqref{eqn:dissipationInequalityVaria}, it \textit{feels natural} to test \eqref{eqn:balanceLaw} with $\vdiff\smtenII[\dot]{u} - \smtenII[\dot]{u}$, because \begin{align}
-        \sym\grad (\vdiff\smtenII[\dot]{u} - \smtenII[\dot]{u}) &= \sym\grad (\vdiff\smtenII[\dot]{u}) - \sym \grad(\smtenII[\dot]{u}) \\
-        &=  \vdiff \smtenII[\dot]{\varepsilon} - \smtenII[\dot]{\varepsilon}
-    \end{align} from which \begin{align}
-        - \int_\Omega \divg \smtenII{\sigma} (\vdiff\smtenII[\dot]{u} - \smtenII[\dot]{u}) \diff V &= \int_\Omega \smtenII{f}(\vdiff\smtenII[\dot]{u} - \smtenII[\dot]{u}) \diff V \\
-        \int_\Omega \smtenII{\sigma} : \grad(\vdiff\smtenII[\dot]{u} - \smtenII[\dot]{u}) \diff V &= \int_\Omega \smtenII{f}(\vdiff\smtenII[\dot]{u} - \smtenII[\dot]{u}) \diff V \\
-        \int_\Omega \smtenII{\sigma} : \sym\grad(\vdiff\smtenII[\dot]{u} - \smtenII[\dot]{u}) \diff V &= \int_\Omega \smtenII{f}(\vdiff\smtenII[\dot]{u} - \smtenII[\dot]{u}) \diff V \\
-        \int_\Omega \smtenII{\sigma} : (\vdiff \smtenII[\dot]{\varepsilon} - \smtenII[\dot]{\varepsilon}) \diff V &= \int_\Omega \smtenII{f}(\vdiff\smtenII[\dot]{u} - \smtenII[\dot]{u}) \diff V \\
-        \int_\Omega (\smtenIV{C} : (\smtenII{\varepsilon} - \smtenII{\varepsilon}^p)) : (\vdiff \smtenII[\dot]{\varepsilon} - \smtenII[\dot]{\varepsilon}) \diff V &= \int_\Omega \smtenII{f}(\vdiff\smtenII[\dot]{u} - \smtenII[\dot]{u}) \diff V
-    \end{align}
+\input{sec1-tnnmg.tex}
 
-    \item As both expressions have the same unit of Power, we add them to arrive at the variational inequality: \begin{align}
-        \int_\Omega (\smtenIV{C} : (\smtenII{\varepsilon} - \smtenII{\varepsilon}^p)) : (\vdiff \smtenII[\dot]{\varepsilon} - \smtenII[\dot]{\varepsilon}) - (\smtenIV{C} : (\smtenII{\varepsilon} - \smtenII{\varepsilon}^p)): ( \vdiff \smtenII[\dot]{\varepsilon}^p - \smtenII[\dot]{\varepsilon}^p ) \diff V +& \nonumber\\
-          \int_{\Omega} \mathcal{D}(\vdiff \smtenII[\dot]{\varepsilon}^p)-\mathcal{D}(\smtenII[\dot]{\varepsilon}^p) \diff V &\geq \int_\Omega \smtenII{f}(\vdiff\smtenII[\dot]{u} - \smtenII[\dot]{u}) \diff V \\
-    \end{align}
-    simplifying
-    \begin{align}
-    \int_\Omega (\smtenIV{C} : (\smtenII{\varepsilon} - \smtenII{\varepsilon}^p)) : \bigl[(\vdiff \smtenII[\dot]{\varepsilon} - \smtenII[\dot]{\varepsilon}) - (\vdiff \smtenII[\dot]{\varepsilon}^p - \smtenII[\dot]{\varepsilon}^p )\bigr] \diff V +
-    \int_{\Omega} \mathcal{D}(\vdiff \smtenII[\dot]{\varepsilon}^p)-\mathcal{D}(\smtenII[\dot]{\varepsilon}^p) \diff V &\geq \int_\Omega \smtenII{f}(\vdiff\smtenII[\dot]{u} - \smtenII[\dot]{u}) \diff V \label{eqn:variationalInequalityPrim1Concrete}
-    \end{align}
-    
-    Defining the stiffness functional $a((\smtenI{u},\smtenII{\varepsilon}^p),(\vdiff\smtenI[\dot]{u},\vdiff\smtenII[\dot]{\varepsilon}^p))$ and the dissipation functional $j(\smtenII[\dot]{\varepsilon}^p)$ \begin{align}
-        a((\smtenI{u},\smtenII{\varepsilon}^p),(\vdiff\smtenI[\dot]{u},\vdiff\smtenII[\dot]{\varepsilon}^p)) 
-        &\coloneq \int_\Omega (\smtenIV{C} : (\smtenII{\varepsilon} - \smtenII{\varepsilon}^p)) : (\vdiff \smtenII[\dot]{\varepsilon} - \vdiff\smtenII[\dot]{\varepsilon}^p) \diff V \\
-        j(\smtenII[\dot]{\varepsilon}^p) &\coloneq  \int_{\Omega} \mathcal{D}(\smtenII[\dot]{\varepsilon}^p) \diff V
-    \end{align} we obtain the abstract form of~\eqref{eqn:variationalInequalityPrim1Concrete}, with $w = (\smtenI{u},\smtenII{\varepsilon}^p)$ \begin{align}
-        a(w, \vdiff \dot{w} - \dot{w}) + j(\vdiff \dot{w}) - j(\dot{w}) \geq \scalarbrackets{f,\vdiff \dot{w} - \dot{w}} \label{eqn:abstractVariationalInequality}
-    \end{align}
-\end{enumerate}
+\section{A simple example}
 
-The incorporation of additional terms due to kinematic or isotropic hardening follows the derivation sketched above.
-These change the dissipation functional, as well introduce additional terms into the stiffness functional.
+\input{sec2-simpleExample.tex}
 
-We omitted the time-dependency of the unknown function $w$ in the derivation, which will be handled by a simple backward euler scheme: \begin{align}
-    \Delt w_n &= w_n - w_{n-1} \\
-    \dot{w} &= \frac{w_n - w_{n-1}}{\Delt t} = \frac{\Delt w_n}{\Delt t}
-\end{align} which leads to \begin{align}
-    a(w_n, \vdiff \dot{w} - \frac{\Delt w_n}{\Delt t}) + j(\vdiff \dot{w}) - j(\frac{\Delt w_n}{\Delt t}) \geq \scalarbrackets{f,\vdiff \dot{w} - \frac{\Delt w_n}{\Delt t}} %\label{eqn:abstractVariationalInequality}
-\end{align} because $a({}\cdot{},{}\cdot{})$ is bilinear and $j$ is $1$-homogeneous, $\Delt t$ can be factored out, leading to \begin{align}
-    a(w_n, \Delt t \vdiff \dot{w} - \Delt w_n) + j(\Delt t \vdiff \dot{w}) - j(\Delt w_n) \geq \scalarbrackets{f,\Delt t \vdiff \dot{w} - \Delt w_n} %\label{eqn:abstractVariationalInequality}    
-\end{align} because the test functions are arbitrary, the $\Delt t$ can be neglected leading to the interpretation of 
-replacing \textit{the virtual velocities} $\vdiff \dot{w}$ with their corresponding \textit{virtual displacements} $\vdiff w$\begin{align}
-    a(w_n, \vdiff w - \Delt w_n) + j(\vdiff w) - j(\Delt w_n) \geq \scalarbrackets{f,\vdiff w - \Delt w_n} %\label{eqn:abstractVariationalInequality}    
-\end{align} which can be rewritten in terms of $\Delt w_n$ yielding \begin{align}
-    a(\Delt w_n, \vdiff w - \Delt w_n) + j(\vdiff w) - j(\Delt w_n) \geq \scalarbrackets{f,\vdiff w - \Delt w_n} - a(w_{n-1}, \vdiff w - \Delt w_n) \label{eqn:incrementAbstractVariationalInequality}        
-\end{align} 
+\section{A practical example}
 
-Following Theorem 1 from \cite{Sander2019cm}, the solution $\Delt w_n$ of \eqref{eqn:incrementAbstractVariationalInequality} is 
-the minimizer of the following convex functional \begin{equation}
-    L(\Delt w_{n}) \coloneq \frac{1}{2} a(\Delt w_{n},\Delt w_{n}) + j(\Delt w_{n}) - \scalarbrackets{f, \Delt w_n} + a(w_{n-1},\Delt w_n)
-\end{equation}
-which will be discretized in space and solved using the TNNMG algorithm.
+\input{sec3-plasticityExample.tex}
 
 \printbibliography%
 
diff --git a/docs/preample.sty b/docs/preample.sty
index 6a7105f627af9b31c5ab32247e5b88a0815442a5..68c9c64f4939e3db12cd7545489fa38163eb5acd 100644
--- a/docs/preample.sty
+++ b/docs/preample.sty
@@ -5,7 +5,7 @@
 \RequirePackage[fleqn]{amsmath}
 \RequirePackage{amsthm}
 \RequirePackage{amssymb}
-\RequirePackage{nccmath}
+%\RequirePackage{nccmath}
 \RequirePackage{mathtools}
 \RequirePackage{siunitx}
 \RequirePackage{tabularx}
@@ -176,3 +176,34 @@
 
 \newcommand{\defeq}{\overset{\text{def}}{=}}
 \newcommand{\excleq}{\overset{!}{=}}
+
+
+%% Listings inputlisting indent https://tex.stackexchange.com/a/89541
+
+\newlength{\rawgobble}
+\newlength{\gobble}
+\newlength{\gobblea}
+% The width of a single space. basicstyle from lstset should be used
+\sbox0{\Huge\ttfamily \ }
+% Remove a single space
+\settowidth{\rawgobble}{\Huge\ttfamily \ }
+\setlength{\rawgobble}{-\rawgobble}
+
+\makeatletter
+\def\sepstar#1*#2\relax{%
+    \def\sepstarone{#1}%
+    \def\sepstartwo{#2}%
+}
+\lst@Key{firstlineandnumber}\relax{\def\lst@firstline{#1\relax}\def\lst@firstnumber{#1\relax}}
+\lst@Key{widthgobble}{0*0}{%
+    % Reindent a bit by multiplying with 0.9, then multiply by tabsize and number of indentation levels
+    \sepstar #1\relax
+    \setlength{\gobble}{0.9\rawgobble}%
+    \setlength{\gobble}{\sepstarone\gobble}%
+    \setlength{\gobble}{\sepstartwo\gobble}%
+    \setlength{\gobblea}{\gobble}%
+    \addtolength{\gobblea}{10pt}%
+    \def\lst@xleftmargin{\gobble}%
+    \def\lst@framexleftmargin{\gobble}%
+    \def\lst@numbersep{\gobblea}%
+}
\ No newline at end of file
diff --git a/docs/sec1-tnnmg.tex b/docs/sec1-tnnmg.tex
new file mode 100644
index 0000000000000000000000000000000000000000..343095c266132affe7ac77ccfb01e9960f5b5798
--- /dev/null
+++ b/docs/sec1-tnnmg.tex
@@ -0,0 +1,346 @@
+
+
+The \textbf{truncated newton nonsmooth multigrid method} (TNNMG) introduced by~\textcite{KornhuberGraeser2009a} and further generalized in~\textcite{Graeser2017a} is
+a numerical method for finding solutions of convex block-separable nonsmooth minimization problems, which for example, arise in small strain elastoplasticity~\cite{Sander2019cm}, but also 
+fracture phase-field methods  as well as contact problems.
+
+The TNNMG Method is applicable to functionals $J: \R^n \rightarrow \R \cup \{+\infty\} $ with $x\in \R^n$ of the form: \begin{equation}
+    J(x) = J_0(x) + \sum_{i}^m \varphi_i(R_i x) \quad x \in \R^n
+\end{equation}
+where $J_0$ is a twice continuously differentiable functional, the $\varphi_i$ are convex, proper, and lower-semicontinuous (= non-smooth)
+and the \textbf{restriction maps} $R_k : \R^n \rightarrow \R^{n_k}$ with $k < n$.
+
+We call Functionals which can be split into a smooth part $J_0$ and many nonsmooth parts $\varphi_i$ and restriction maps 
+which restrict into compatable subspaces \textbf{block-separable}.
+
+\subsection*{A note on block-separability}
+
+One key property of the block-separability is, that a \textit{minimization} of the distinct parts \textit{minimize} the whole non-smooth part of the functional,
+therefore, we can minimize each non-smooth part on it's own, resulting in a sequence of very small minimization problems, which can be solved efficiently,
+as the problem size of the blocks are small.
+
+To put this fact into a formal statement, we need some definitions.
+
+We say that the domain of $J$ can be decomposed to into $m$ subspace with dimension $n_k$:\begin{align}
+    \R^n &= \sum_{k=1}^m V_k \label{eqn:decomposedSpace}
+\end{align} where we identify each subspace $V_k$ with a corresponding $\R^{n_k}$ via Prolongation maps $P_k: \R^{n_k} \rightarrow \R^n$,
+where the expression \eqref{eqn:decomposedSpace} can now be written as:
+\begin{align}
+    \R^n &= \sum_{k=1}^m V_k = \sum_{k=1}^m P_k(\R^{n_k})
+\end{align}
+which is a complicated way of saying that any element in $\R^{n_k}$ has a direct representant in the whole space $\R^n$ by means of its prolongation map $P_k$.
+
+\begin{example}
+    Given $n,k$ and $n_k = 2$ one could write for the prolongation:
+    \begin{equation*}
+        P_k: \begin{bmatrix}
+            a_1 \\
+            a_2 \\
+        \end{bmatrix} \mapsto \begin{bmatrix}
+            0 \\
+            \vdots \\
+            a_1 \\
+            a_2 \\ 
+            \vdots \\
+            0 \\
+        \end{bmatrix}
+    \end{equation*}    
+\end{example}
+
+These prolongation operators allow to \textit{communicate} from the restricted parts of the functional 
+to the whole functional.
+
+To communicate from the whole functional to the restricted parts, the \textit{restriction maps} $R_k$ need to be defined clearly.
+These $R_k: \R^n \rightarrow \R^{\tilde{n}_k}$ can be the most natural restriction of an element $v$ in $\R^n$ 
+to some components $\{v_{i},v_{i+1},\ldots, v_{i+\tilde{n}_k}\}$, writing them as a smaller vector in $\R^{\tilde{n}_k}$ 
+
+\begin{example}
+    The restriction $R_k$ in the above example would be the map\begin{align*}
+        R_k: \begin{bmatrix}
+            0 \\
+            \vdots \\
+            a_1 \\
+            a_2 \\ 
+            \vdots \\
+            0 \\
+        \end{bmatrix} &\mapsto \begin{bmatrix}
+            a_1 \\
+            a_2 \\
+        \end{bmatrix}
+    \end{align*}
+\end{example}
+
+
+If the subspaces of dimension $\tilde{n}_k$ are defined in such a way that $n = \sum \tilde{n}_k$, then the whole space $\R^n$ can be decomposed into the following 
+subspaces arising from the restriction maps: \begin{align}
+    \R^n &= \bigoplus\limits_{k=1}^{m} {(\ker R_k)}^{\bot} \label{eqn:decompProperty}
+\end{align}
+
+\begin{example}
+    Take $n = 6$, $n_1 = 3$, $n_2 = 2$, $n_3 = 1$ with the Restriction Maps $R^k$ \begin{align*}
+        R_1: \begin{bmatrix}
+            a_1 \\
+            a_2 \\
+            a_3 \\
+            a_4 \\
+            a_5 \\
+            a_6 \\
+        \end{bmatrix} &\mapsto \begin{bmatrix}
+            a_1 \\
+            a_2 \\
+            a_3 \\
+        \end{bmatrix} & R_2: \begin{bmatrix}
+            a_1 \\
+            a_2 \\
+            a_3 \\
+            a_4 \\
+            a_5 \\
+            a_6 \\
+        \end{bmatrix} &\mapsto \begin{bmatrix}
+            a_4 \\
+            a_5 \\
+        \end{bmatrix} & R_3: \begin{bmatrix}
+            a_1 \\
+            a_2 \\
+            a_3 \\
+            a_4 \\
+            a_5 \\
+            a_6 \\
+        \end{bmatrix} &\mapsto \begin{bmatrix}
+            a_6 \\
+        \end{bmatrix} 
+    \end{align*}
+
+    their kernels are spanned by the vectors \begin{align*}
+        \ker R_1 &= \spn \{ \begin{bmatrix}
+            0 \\
+            0 \\
+            0 \\
+            1 \\
+            0 \\
+            0 \\
+        \end{bmatrix},\begin{bmatrix}
+            0 \\
+            0 \\
+            0 \\
+            0 \\
+            1 \\
+            0 \\
+        \end{bmatrix},\begin{bmatrix}
+            0 \\
+            0 \\
+            0 \\
+            0 \\
+            0 \\
+            1 \\
+        \end{bmatrix}\} & \ker R_2 &= \spn \{ \begin{bmatrix}
+            1 \\
+            0 \\
+            0 \\
+            0 \\
+            0 \\
+            0 \\
+        \end{bmatrix},\begin{bmatrix}
+            0 \\
+            1 \\
+            0 \\
+            0 \\
+            0 \\
+            0 \\
+        \end{bmatrix},\begin{bmatrix}
+            0 \\
+            0 \\
+            1 \\
+            0 \\
+            0 \\
+            0 \\
+        \end{bmatrix},\begin{bmatrix}
+            0 \\
+            0 \\
+            0 \\
+            0 \\
+            0 \\
+            1 \\
+        \end{bmatrix}\} \\
+        \ker R_3 &= \spn \{ \begin{bmatrix}
+            1 \\
+            0 \\
+            0 \\
+            0 \\
+            0 \\
+            0 \\
+        \end{bmatrix},\begin{bmatrix}
+            0 \\
+            1 \\
+            0 \\
+            0 \\
+            0 \\
+            0 \\
+        \end{bmatrix},\begin{bmatrix}
+            0 \\
+            0 \\
+            1 \\
+            0 \\
+            0 \\
+            0 \\
+        \end{bmatrix},\begin{bmatrix}
+            0 \\
+            0 \\
+            0 \\
+            1 \\
+            0 \\
+            0 \\
+        \end{bmatrix},\begin{bmatrix}
+            0 \\
+            0 \\
+            0 \\
+            0 \\
+            1 \\
+            0 \\
+        \end{bmatrix}\} 
+    \end{align*}
+
+    Therefore the orthogonal complement is \begin{align*}
+        {(\ker R_1)}^{\bot} &= \spn \{ \begin{bmatrix}
+            1 \\
+            0 \\
+            0 \\
+            0 \\
+            0 \\
+            0 \\
+        \end{bmatrix},\begin{bmatrix}
+            0 \\
+            1 \\
+            0 \\
+            0 \\
+            0 \\
+            0 \\
+        \end{bmatrix},\begin{bmatrix}
+            0 \\
+            0 \\
+            1 \\
+            0 \\
+            0 \\
+            0 \\
+        \end{bmatrix}\} & {(\ker R_2)}^{\bot} &= \spn \{ \begin{bmatrix}
+            0 \\
+            0 \\
+            0 \\
+            1 \\
+            0 \\
+            0 \\
+        \end{bmatrix},\begin{bmatrix}
+            0 \\
+            0 \\
+            0 \\
+            0 \\
+            1 \\
+            0 \\
+        \end{bmatrix}\} & {(\ker R_3)}^{\bot} &= \spn \{ \begin{bmatrix}
+            0 \\
+            0 \\
+            0 \\
+            0 \\
+            0 \\
+            1 \\
+        \end{bmatrix} \}
+    \end{align*}
+
+    whose direct sum span the whole space.
+\end{example}
+% Example of 6 d vector into 3d, 2d, 1d as a compatible 
+We now write $R = \{R_1, R_2, \ldots R_M\}$ as the collection of all restriction maps, whose restrictions fulfill \eqref{eqn:decompProperty}
+which leads us to the following definition of \textbf{block-separability}:
+
+\begin{definition}[Block-separability of a functional]
+    A function $J: \R^n \rightarrow \R \cup \infty$ is \textbf{block-separable nonsmooth} with respect to the collection of restriction maps $R$,
+    if $J$ can be decomposed into a continouisly differentiable function $J_0: \R^n \rightarrow \R$ and convex, proper, lower-semicontinous functions 
+    $\phi_k: \R^{n_k} \rightarrow \R \cup \infty$ such that \begin{equation}
+        J = J_0(v) + \sum_{k=1}^{M} \phi_k(R_k v) \quad v \in \R^n
+    \end{equation}
+\end{definition}
+the current collection of restrictions $R$ into distinct subspaces might seem very rigid, but there are generalizations of 
+such restrictions. The key property of restrictions is the \textbf{compatibility} of the subspace decomposition:
+
+\begin{definition}[Compatible subspace-decomposition]
+    A subspace decomposition $\R^n = \sum V_k$ is called \textbf{compatible}, if \begin{equation}
+        J(u) \leq J(u + v) \quad \forall v \in V_k, \quad k = 1,\ldots,n
+    \end{equation} implies global optimality \begin{equation}
+        J(u) \leq J(u + v) \quad \forall v \in \R^n
+    \end{equation}
+\end{definition}
+the direct orthogonal decomposition~\eqref{eqn:decompProperty} is a compatible decomposition.
+For more details refer to~\cite{Graeser2017a}.
+
+\subsection*{The algorithm}
+
+While the term \textbf{multigrid} is in the name, it is by any means not required to use a multigrid step anywhere in the algorithm.
+It only helps in finding an approximate solution extremely quickly.
+
+With the definitions from the previous sections, the algorithm is as follows: 
+
+\begin{figure}[h]
+\centering
+\begin{algorithmic}[1]
+    \State $u^{0} \gets \text{Admissable initial guess}$
+    \Repeat 
+    \LComment{Step 1: Nonlinear pre-smoothing (Also Nonlinear Block-Gauss-Seidel)}
+    \BeginBox 
+    \State $w^{i,0} \gets u^{i}$
+    \For{$k = 1,\ldots,m$}
+        \State Compute approximate solution $w^{i,k}$ in the block $k$ with \begin{equation}
+            w^{i,k} \approx \argmin_{v \in w^{i,k-1} + V_k} J(v) \tag{I}\label{eqn:FirstStep}
+        \end{equation}
+    \EndFor
+    \State $u^{i + \sfrac{1}{2}} \gets w^{i,k}$
+    \EndBox
+    \LComment{Step 2: Truncated (Multigrid, Quasi-) Newton-Step in a Smooth Subspace}
+    \BeginBox
+        \State Determine a large subspace $W_i \subset \R^n$ such that $\bigl. J \bigr|_{u^{i + \sfrac{1}{2}} + W_i}$ is twice continuously differentiable.
+        \State Solve for the Newton Increment $\Delta u^i$ approximately in the subspace $W_i$: \begin{equation}
+            \nabla^{2}\bigl. J(u^{i + \sfrac{1}{2}})\bigr|_{W_i \times W_i} \cdot \Delta u^i = \nabla \bigl. J(u^{i + \sfrac{1}{2}})\bigr|_{W_i} \tag{II}\label{eqn:SecondStep}
+        \end{equation} 
+    \EndBox
+    \LComment{Step 3: Projection onto admissable space and Postprocessing }
+    \BeginBox
+        \State Compute a projection $\Delta \tilde{u}^i = \Pi_{J}(\Delta u^i)$ onto the domain of $J$
+        \State Compute a damping $\rho_i$ such that \begin{equation}
+            J(u^{i + \sfrac{1}{2}} + \rho_i \Delta \tilde{u}^i) \leq J(u^{i + \sfrac{1}{2}}) \tag{III}\label{eqn:ThirdStep}
+        \end{equation}
+        \State $u^{i+1} \gets u^{i + \sfrac{1}{2}} + \rho_i \Delta \tilde{u}^i$
+    \EndBox
+    \Until{Convergence}
+\end{algorithmic}
+\end{figure}
+
+%These steps will be explained in more detail, pointing to ressources on practical ressources, but a more rigorous treatment is found in~\cite{Graeser2017a}
+
+\subsection{Nonlinear Pre-Smoother / Nonlinear Block-Gauss-Seidel}
+
+In the first Step of the TNNMG method, a sequence of minimization problems must be solved~\eqref{eqn:FirstStep}.
+
+Implementation wise, this is the focus of this method, because for all other steps, existing libraries can be used, whereas the 
+implementation of the first step strongly depends on the nonsmooth blocks of the functional.
+
+In the algorithm, the block size is assumed to be small, compared to the number of the blocks,
+and because the functionals $\phi_k$ are assumed to be convex, algorithms from convex optimizations can be utilized,
+the argument being, that even if the algorithm scales badly with the problem size, it can be used because the block size is small.
+
+\subsection{Truncated Linear Correction}
+
+After the nonlinear pre-smoother, one linear correction step is done, where~\eqref{eqn:SecondStep} gives the correction.
+This correction only needs to be solved very inexactly. While multigrid methods are a good step for solving the system,
+a preconditioned conjugate gradient iteration could be taken, or even a direct solution of the system. A multigrid step is not 
+needed or mandatory despite the name of the method.
+
+In order to determine the admissable space, where $J$ is twice continouisly differentiable, 
+the subspaces which are \textit{not} differentiable can be filtered, using some kind of criterion.
+It is not required to use the maximal subspace, only a large subspace.
+
+\subsection{Projection onto the admissable domain}
+
+After computing the increment step, a projection and damping is required to complete the algorithm. 
+While the projection step onto the admissable domain is not strictly necessary, it accelerates the convergence of the method 
+by allowing bigger steps.
+
+The damping is mandatory for global convergence of the method from any initial iterate, which is common for \textsc{Newton-Raphson} like solvers.
diff --git a/docs/sec2-simpleExample.tex b/docs/sec2-simpleExample.tex
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/docs/sec3-plasticityExample.tex b/docs/sec3-plasticityExample.tex
new file mode 100644
index 0000000000000000000000000000000000000000..219e8f04f44f134f9afd4499d14cf5655c4de9b7
--- /dev/null
+++ b/docs/sec3-plasticityExample.tex
@@ -0,0 +1,246 @@
+
+In the choosen example, the problem of small-strain-elastoplasticity is used as an example to illustrate the steps of the method.
+To reiterate the exact formulation, as it is different from the commonly used strain-rate formulation, we reiterate the 
+problem of elastoplasticity.
+
+\subsection{Kinematics}
+
+Consider a simple body $\manifold{B} \subset \R^d$ as a domain with \textsc{Lipschitz} boundary $\pdiff \manifold{B}$.
+With $\smtenI{u}: \manifold{B} \rightarrow \R^d$ the displacement field, and the engineering strains $\smtenII{\varepsilon}: \manifold{B} \rightarrow \SymMat^d$\begin{equation}
+    \smtenII{\varepsilon} = \sym \grad \smtenI{u}
+\end{equation} where $\SymMat^d$ denotes the space of $d \times d$ symmetric matrices.
+
+In order to describe plasticity, a additive split of the strains is used \begin{equation}
+    \smtenII{\varepsilon} = \smtenII{\varepsilon}^{e} + \smtenII{\varepsilon}^{p} \label{eqn:additiveStrainSplit}
+\end{equation} into the \textit{elastic strains} $\smtenII{\varepsilon}^{e} \in \SymMat^d$ and \textit{plastic strains} $\smtenII{\varepsilon}^{p} \in \SymMatTr^d$,
+where $\SymMatTr^d$ denotes the space of symmetric trace free $d \times d$ matrices, as we consider crystalline materials, where changes in volume are not attributed to plastic deformation.
+The theory of the solution algorithm does not enforce this condition, but a minimizer of a local functional can be solved for analytically in the space of trace free matrices.
+
+\subsection{Balance Equations}
+
+We assume that balance of momentum holds for any subbody $\manifold{U} \subset \manifold{B}$, which implies the local form of 
+balance \begin{equation}
+   - \divg \smtenII{\sigma} = \smtenI{f} \label{eqn:balanceLaw}
+\end{equation} where $\smtenII{\sigma}: \manifold{B} \rightarrow \SymMat^d$ are the engineering stresses and $\smtenI{f}: \manifold{B} \rightarrow \R^d$ is a given volume force field.
+We only consider the quasi-static process, where inertial terms are omitted, but the equation is considered in a pseudo-time $t \in [0, T]$ 
+due to the irreversibility of the plastic strains.
+
+Due to the plastic \textit{Dissipation} of Energy, we consider the \textit{Principle of Maximum Dissipated Energy} by Hill, 
+which states, that the dissipated energy \begin{equation}
+    \mathcal{D} = \smtenII{\sigma} : \smtenII[\dot]{\varepsilon}^p \geq 0
+\end{equation} is maximized \begin{equation}
+    \sup_{\smtenII{\sigma} \in \mathcal{E}} \mathcal{D}(\smtenII{\sigma}) \quad \smtenII{\sigma} \in \mathcal{E}
+\end{equation} over all admissable real stress states.
+
+For this reason we introduce the \textit{real dissipated energy} $\mathcal{D}^p$ as the support function of the dissipated energy: \begin{equation}
+    \mathcal{D}^p(\smtenII[\dot]{\varepsilon}^p) \coloneq \sup_{\smtenII{\sigma} \in \mathcal{E}} \mathcal{D}(\smtenII{\sigma})
+\end{equation} which is the \textsc{Legendre}-transformation of the dissipated energy.
+
+Due to the convexity of the elastic domain and the convexity of the Dissipation function it holds \begin{equation}
+    \mathcal{D}(\smtenII{\sigma}) - \mathcal{D}(\smtenII{\sigma}^{\ast}) = (\smtenII{\sigma} - \smtenII{\sigma}^{\ast}) : \smtenII[\dot]{\varepsilon}^p \geq 0 \qquad \forall \smtenII{\sigma}^{\ast} \in \mathcal{E}
+\end{equation}
+which holds in an equivalent way for the \textsc{Legendre}-transformed function $\mathcal{D}^p$ \begin{equation}
+    \mathcal{D}^p(\smtenII[\dot]{\varepsilon}^p) - \mathcal{D}^p(\vdiff \smtenII[\dot]{\varepsilon}^p) = \smtenII{\sigma}: (\smtenII[\dot]{\varepsilon}^p - \vdiff \smtenII[\dot]{\varepsilon}^p) \geq 0 \qquad \forall \vdiff \smtenII[\dot]{\varepsilon}^p \in \mathcal{E}
+\end{equation} which implies the \textit{real dissipated energy} $\mathcal{D}^p$ to also be a convex function.
+
+\subsection{Constitutive Equations}
+
+For the elastic part of the strains, we use a \textsc{St.\@ Venant-Kirchhoff} Material law, which is only for simplicity, 
+more complicated laws can be used here, it just eases the implementation.
+It is stated with the two \textsc{Lamé} Constants $\lambda$ and $\mu$, which we assume to be positive constants.
+\begin{equation}
+    \smtenII{\sigma} = \lambda \trace(\smtenII{\varepsilon}^e) + 2 \mu (\smtenII{\varepsilon}^e) = \smtenIV{C} : \smtenII{\varepsilon}^e \label{eqn:StVernantKirchhoff}
+\end{equation} where $\smtenIV{C}$ denotes the \textsc{Hooke}-Tensor
+
+For the plastic regime, we describe the admissable stress states $\mathcal{E}$ as a convex region whose \textit{boundary} $\pdiff \mathcal{E}$ is described by 
+a \textit{yield function} $\phi(\smtenII{\sigma})$, attaining its name as the \textit{yield surface} of $\mathcal{E}$ defined by \begin{align}
+    \mathcal{E} &= \{ \smtenII{\sigma} \in \SymMat^d : \phi(\smtenII{\sigma}) \leq 0 \} \\
+    \pdiff \mathcal{E} &= \{ \smtenII{\sigma} \in \SymMat^d : \phi(\smtenII{\sigma}) = 0 \}
+\end{align}
+using the principle of maximum work, the \textit{associated flow rule} can be expressed as \begin{equation}
+    \smtenII[\dot]{\varepsilon}^p = \lambda^p \frac{\pdiff \phi(\smtenII{\sigma})}{\pdiff \smtenII{\sigma}} \label{eqn:smoothFlowRule}
+\end{equation} assuming $\phi$ to be differentiable everywhere.
+
+If we want to drop the differentiability of $\phi$ everywhere, we can say that the plastic flow is an element of the scaled \textbf{subdifferential} $\pdiff \phi$ of the yield surface 
+\begin{equation}
+    \smtenII[\dot]{\varepsilon}^p \in \lambda^p \pdiff \phi(\smtenII{\sigma}) \label{eqn:nonsmoothFlowRule}
+\end{equation} which is defined as
+
+\begin{definition}[Subdifferential of a convex function]
+    Given a convex function $f: X \rightarrow \R$ on a vector space $X$ we define the \textbf{subdifferential} $\pdiff f(x)$ of $f$ at $x \in X$
+    to be the subset of the dual space $X'$ defined by \begin{align}
+        \pdiff f(x) \coloneq \{ x^\ast \in X' : f(y) \geq f(x) + \scalar{x^\ast}{y - x} \quad\forall y \in X \} \label{eqn:subdifferentialSet}
+    \end{align}
+\end{definition}
+
+\begin{example}[Subdifferential of convex functions over $\R^n$]
+    Let $\R^n$ be the Vectorspace $X$ in the above definition, 
+    and let $\smtenI{x} \in \R^n$ denote a vector, and $f$ a convex function on $\R^n$, 
+    the subdifferential of $f$ at $\smtenI{x}$ is given by \begin{align}
+        \pdiff f(x) \coloneq \{ \smtenI{x}^\ast \in \R^n : f(\smtenI{y}) \geq f(\smtenI{x}) + \smtenI{x}^\ast {(\smtenI{y} - \smtenI{x})}^T \quad\forall \smtenI{y} \in \R^n \}
+    \end{align} where $\square^T$ denotes transposition.
+
+    In $\R^n$ one could interpret the subdifferential as the cone of possible subgradients of $f$ at points $\smtenI{x}$ where $f$ is not differentiable.
+\end{example}
+
+This definition allows for nonsmooth yield functions.
+
+Using the dissipation function, one can write~\eqref{eqn:nonsmoothFlowRule} in its dual form \begin{equation}
+    \smtenII{\sigma} \in \pdiff \mathcal{D}^p(\smtenII[\dot]{\varepsilon}^p) \label{eqn:StressflowRule}
+\end{equation}
+
+\subsection*{Example: The \textsc{von Mises} yield function}
+
+We define the stress deviator to be \begin{equation}
+    \smtenII{\sigma}^D = \smtenII{\sigma} - \frac{1}{d} \trace\smtenII{\sigma} \smtenII{I}
+\end{equation} for any $\smtenII{\sigma} \in \SymMat^d$. 
+This implies that the case with $d=2$ does not describe real materials, as the notion of a stress deviator has changed.
+But it allows for formulas which are not dependent on the dimension of the problem.
+
+The \textsc{von Mises} yield function is described by the elastic shear energy, which is described by the invariant \begin{align}
+    I_2(\smtenII{\sigma}^D) &= \frac{1}{2} \norm{\smtenII{\sigma}^D}^2
+\end{align} reaches a empirically measured critical value $\sigma_c$.
+
+The \textsc{von Mises} yield function takes the form \begin{equation}
+    \phi(\smtenII{\sigma}) \coloneq \sqrt{I_2(\smtenII{\sigma}^D)} - \sigma_c
+\end{equation}
+
+We are interested in the \textit{Dissipation Function} corresponding to $\phi$, which is the following value: \begin{align}
+    \mathcal{D}^p(\smtenII[\dot]{\varepsilon}^p) &= \sup_{\smtenII{\sigma} \in \SymMat^D} \{ \smtenII[\dot]{\varepsilon}^p : \smtenII{\sigma}; \norm{\smtenII{\sigma}^D} - \sigma_c \leq 0 \} \\
+    \shortintertext{$\smtenII[\dot]{\varepsilon}^p$ is trace free}
+    &= \sup_{\smtenII{\sigma} \in \SymMat^D} \{ \smtenII[\dot]{\varepsilon}^p : \smtenII{\sigma}^D; \norm{\smtenII{\sigma}^D} \leq \sigma_c \} \\ 
+    \shortintertext{Because the arguments are parallel $\smtenII{\sigma} = \lambda \smtenII[\dot]{\varepsilon}^p$, the first term can be written as $\smtenII[\dot]{\varepsilon}^p : \smtenII{\sigma}^D= \norm{\smtenII[\dot]{\varepsilon}^p} \cdot \norm{\smtenII{\sigma}^D}$}
+    &= \sup_{\smtenII{\sigma} \in \SymMat^D} \{ \norm{\smtenII[\dot]{\varepsilon}^p} \cdot \norm{\smtenII{\sigma}^D} ; \norm{\smtenII{\sigma}^D} \leq \sigma_c \} \\
+    \shortintertext{Because $\norm{\smtenII[\dot]{\varepsilon}^p} \geq 0$ and $\smtenII[\dot]{\varepsilon}^p$ is fixed, $\sup\{ \norm{\smtenII[\dot]{\varepsilon}^p} a \} = \norm{\smtenII[\dot]{\varepsilon}^p} \sup\{ a \}$}
+    &= \norm{\smtenII[\dot]{\varepsilon}^p} \cdot \sup_{\smtenII{\sigma} \in \SymMat^D} \{ \norm{\smtenII{\sigma}^D} ; \norm{\smtenII{\sigma}^D} \leq \sigma_c \} \\
+    \shortintertext{The suppremum is achieved when the inequality holds with equality, therefore $\norm{\smtenII{\sigma}^D} = \sigma_c$}
+    \mathcal{D}^p(\smtenII[\dot]{\varepsilon}^p) &= \norm{\smtenII[\dot]{\varepsilon}^p} \cdot \sigma_c
+\end{align} which is the Dissipation Function of the \textsc{Von-Mises} Yield Function.
+
+This technique presented here and given in~\cite{Jaap2023} is applicable to any yield function to derive the corresponding dissipation function.
+
+The \textsc{Von-Mises} Dissipation Function is already not differentiable for $\norm{\smtenII[\dot]{\varepsilon}^p} = 0$, but 
+the boundary of the elastic domain is still smooth.
+
+\subsection*{Example: The \textsc{Tresca} yield function}
+
+To give another example of a dissipation function the \textsc{Tresca} yield function is presented.
+Given the critical shear stress $\tau_0$, the material yields, if 
+\begin{align}
+    \phi(\smtenII{\sigma}) &= \max_{i,j} \abs{ \sigma_i - \sigma_j} - \tau_0
+\end{align} is fulfilled, $\sigma_1,\sigma_2,\sigma_3$ denoting the principal stresses of the stress tensor.
+As derived in \cite{Jaap2023}, the dissipation function is given by \begin{align}
+    \mathcal{D}^p(\smtenII[\dot]{\varepsilon}^p) = \tau_c \max\{ \abs{\varepsilon_1}, \abs{\varepsilon_2}, \abs{\varepsilon_3} \} 
+\end{align} where it is assumed, that $\smtenII[\dot]{\varepsilon}^p \in \SymMatTr$, and $\varepsilon_1,\varepsilon_2,\varepsilon_3$ being the principal strains.
+
+\subsection{Weak Form and Discretization of small-strain-elastoplasticity}
+
+The derivation works in three steps: \begin{enumerate}
+    \item Derivation of a \textit{variational inequality} from the dissipation function 
+    \item Derivation of the weak form of equilibrium from the balance equation, using special test functions
+    \item Summing them up to derive the total equation.
+\end{enumerate}
+afterwards, during the discretization, the time derivatives are eliminated exploiting the mathematical properties of the dissipation function.
+
+\begin{enumerate}
+    \item Dissipation function
+
+    Following~\eqref{eqn:StressflowRule} the condition \begin{equation}
+        \smtenII{\sigma} \in \pdiff \mathcal{D}(\smtenII[\dot]{\varepsilon}^p)
+    \end{equation} corresponds to the inequality of the supdifferential~\eqref{eqn:subdifferentialSet} \begin{align}
+        \mathcal{D}(\vdiff \smtenII[\dot]{\varepsilon}^p) &\geq \mathcal{D}(\smtenII[\dot]{\varepsilon}^p) + \smtenII{\sigma} : ( \vdiff \smtenII[\dot]{\varepsilon}^p - \smtenII[\dot]{\varepsilon}^p ) \quad \forall \vdiff \smtenII[\dot]{\varepsilon}^p 
+    \end{align} where we eliminate $\smtenII{\sigma}$ with~\eqref{eqn:StVernantKirchhoff} and insert the~\eqref{eqn:additiveStrainSplit} to eliminate the elastic strains.
+    \begin{align}
+        \mathcal{D}(\vdiff \smtenII[\dot]{\varepsilon}^p) &\geq \mathcal{D}(\smtenII[\dot]{\varepsilon}^p) + (\smtenIV{C} : (\smtenII{\varepsilon} - \smtenII{\varepsilon}^p)): ( \vdiff \smtenII[\dot]{\varepsilon}^p - \smtenII[\dot]{\varepsilon}^p ) \quad \forall \vdiff \smtenII[\dot]{\varepsilon}^p \label{eqn:dissipationInequalityVaria}
+    \end{align} and integrate over the domain of interest $\Omega$ \begin{equation}
+      \int_{\Omega} \mathcal{D}(\vdiff \smtenII[\dot]{\varepsilon}^p)-\mathcal{D}(\smtenII[\dot]{\varepsilon}^p) - (\smtenIV{C} : (\smtenII{\varepsilon} - \smtenII{\varepsilon}^p)): ( \vdiff \smtenII[\dot]{\varepsilon}^p - \smtenII[\dot]{\varepsilon}^p ) \diff V \geq 0 \quad \forall \vdiff \smtenII[\dot]{\varepsilon}^p
+    \end{equation}
+
+    \item Seeing~\eqref{eqn:dissipationInequalityVaria}, it \textit{feels natural} to test \eqref{eqn:balanceLaw} with $\vdiff\smtenII[\dot]{u} - \smtenII[\dot]{u}$, because \begin{align}
+        \sym\grad (\vdiff\smtenII[\dot]{u} - \smtenII[\dot]{u}) &= \sym\grad (\vdiff\smtenII[\dot]{u}) - \sym \grad(\smtenII[\dot]{u}) \\
+        &=  \vdiff \smtenII[\dot]{\varepsilon} - \smtenII[\dot]{\varepsilon}
+    \end{align} from which \begin{align}
+        - \int_\Omega \divg \smtenII{\sigma} (\vdiff\smtenII[\dot]{u} - \smtenII[\dot]{u}) \diff V &= \int_\Omega \smtenII{f}(\vdiff\smtenII[\dot]{u} - \smtenII[\dot]{u}) \diff V \\
+        \int_\Omega \smtenII{\sigma} : \grad(\vdiff\smtenII[\dot]{u} - \smtenII[\dot]{u}) \diff V &= \int_\Omega \smtenII{f}(\vdiff\smtenII[\dot]{u} - \smtenII[\dot]{u}) \diff V \\
+        \int_\Omega \smtenII{\sigma} : \sym\grad(\vdiff\smtenII[\dot]{u} - \smtenII[\dot]{u}) \diff V &= \int_\Omega \smtenII{f}(\vdiff\smtenII[\dot]{u} - \smtenII[\dot]{u}) \diff V \\
+        \int_\Omega \smtenII{\sigma} : (\vdiff \smtenII[\dot]{\varepsilon} - \smtenII[\dot]{\varepsilon}) \diff V &= \int_\Omega \smtenII{f}(\vdiff\smtenII[\dot]{u} - \smtenII[\dot]{u}) \diff V \\
+        \int_\Omega (\smtenIV{C} : (\smtenII{\varepsilon} - \smtenII{\varepsilon}^p)) : (\vdiff \smtenII[\dot]{\varepsilon} - \smtenII[\dot]{\varepsilon}) \diff V &= \int_\Omega \smtenII{f}(\vdiff\smtenII[\dot]{u} - \smtenII[\dot]{u}) \diff V
+    \end{align}
+
+    \item As both expressions have the same unit of Power, we add them to arrive at the variational inequality: \begin{align}
+        \int_\Omega (\smtenIV{C} : (\smtenII{\varepsilon} - \smtenII{\varepsilon}^p)) : (\vdiff \smtenII[\dot]{\varepsilon} - \smtenII[\dot]{\varepsilon}) - (\smtenIV{C} : (\smtenII{\varepsilon} - \smtenII{\varepsilon}^p)): ( \vdiff \smtenII[\dot]{\varepsilon}^p - \smtenII[\dot]{\varepsilon}^p ) \diff V +& \nonumber\\
+          \int_{\Omega} \mathcal{D}(\vdiff \smtenII[\dot]{\varepsilon}^p)-\mathcal{D}(\smtenII[\dot]{\varepsilon}^p) \diff V &\geq \int_\Omega \smtenII{f}(\vdiff\smtenII[\dot]{u} - \smtenII[\dot]{u}) \diff V \\
+    \end{align}
+    simplifying
+    \begin{align}
+    \int_\Omega (\smtenIV{C} : (\smtenII{\varepsilon} - \smtenII{\varepsilon}^p)) : \bigl[(\vdiff \smtenII[\dot]{\varepsilon} - \smtenII[\dot]{\varepsilon}) - (\vdiff \smtenII[\dot]{\varepsilon}^p - \smtenII[\dot]{\varepsilon}^p )\bigr] \diff V +
+    \int_{\Omega} \mathcal{D}(\vdiff \smtenII[\dot]{\varepsilon}^p)-\mathcal{D}(\smtenII[\dot]{\varepsilon}^p) \diff V &\geq \int_\Omega \smtenII{f}(\vdiff\smtenII[\dot]{u} - \smtenII[\dot]{u}) \diff V \label{eqn:variationalInequalityPrim1Concrete}
+    \end{align}
+    2.3
+    Defining the stiffness functional $a((\smtenI{u},\smtenII{\varepsilon}^p),(\vdiff\smtenI[\dot]{u},\vdiff\smtenII[\dot]{\varepsilon}^p))$ and the dissipation functional $j(\smtenII[\dot]{\varepsilon}^p)$ \begin{align}
+        a((\smtenI{u},\smtenII{\varepsilon}^p),(\vdiff\smtenI[\dot]{u},\vdiff\smtenII[\dot]{\varepsilon}^p)) 
+        &\coloneq \int_\Omega (\smtenIV{C} : (\smtenII{\varepsilon} - \smtenII{\varepsilon}^p)) : (\vdiff \smtenII[\dot]{\varepsilon} - \vdiff\smtenII[\dot]{\varepsilon}^p) \diff V \\
+        j(\smtenII[\dot]{\varepsilon}^p) &\coloneq  \int_{\Omega} \mathcal{D}(\smtenII[\dot]{\varepsilon}^p) \diff V
+    \end{align} we obtain the abstract form of~\eqref{eqn:variationalInequalityPrim1Concrete}, with $w = (\smtenI{u},\smtenII{\varepsilon}^p)$ \begin{align}
+        a(w, \vdiff \dot{w} - \dot{w}) + j(\vdiff \dot{w}) - j(\dot{w}) \geq \scalarbrackets{f,\vdiff \dot{w} - \dot{w}} \label{eqn:abstractVariationalInequality}
+    \end{align}
+\end{enumerate}
+
+The incorporation of additional terms due to kinematic or isotropic hardening follows the derivation sketched above.
+These change the dissipation functional, as well introduce additional terms into the stiffness functional.
+
+We omitted the time-dependency of the unknown function $w$ in the derivation, which will be handled by a simple backward euler scheme: \begin{align}
+    \Delt w_n &= w_n - w_{n-1} \\
+    \dot{w} &= \frac{w_n - w_{n-1}}{\Delt t} = \frac{\Delt w_n}{\Delt t}
+\end{align} which leads to \begin{align}
+    a(w_n, \vdiff \dot{w} - \frac{\Delt w_n}{\Delt t}) + j(\vdiff \dot{w}) - j(\frac{\Delt w_n}{\Delt t}) \geq \scalarbrackets{f,\vdiff \dot{w} - \frac{\Delt w_n}{\Delt t}} %\label{eqn:abstractVariationalInequality}
+\end{align} because $a({}\cdot{},{}\cdot{})$ is bilinear and $j$ is $1$-homogeneous, $\Delt t$ can be factored out, leading to \begin{align}
+    a(w_n, \Delt t \vdiff \dot{w} - \Delt w_n) + j(\Delt t \vdiff \dot{w}) - j(\Delt w_n) \geq \scalarbrackets{f,\Delt t \vdiff \dot{w} - \Delt w_n} %\label{eqn:abstractVariationalInequality}    
+\end{align} because the test functions are arbitrary, the $\Delt t$ can be neglected leading to the interpretation of 
+replacing \textit{the virtual velocities} $\vdiff \dot{w}$ with their corresponding \textit{virtual displacements} $\vdiff w$\begin{align}
+    a(w_n, \vdiff w - \Delt w_n) + j(\vdiff w) - j(\Delt w_n) \geq \scalarbrackets{f,\vdiff w - \Delt w_n} %\label{eqn:abstractVariationalInequality}    
+\end{align} which can be rewritten in terms of $\Delt w_n$ yielding \begin{align}
+    a(\Delt w_n, \vdiff w - \Delt w_n) + j(\vdiff w) - j(\Delt w_n) \geq \scalarbrackets{f,\vdiff w - \Delt w_n} - a(w_{n-1}, \vdiff w - \Delt w_n) \label{eqn:incrementAbstractVariationalInequality}        
+\end{align} 
+
+Following Theorem 1 from \cite{Sander2019cm}, the solution $\Delt w_n$ of~\eqref{eqn:incrementAbstractVariationalInequality} is 
+the minimizer of the following convex functional \begin{equation}
+    L(\Delt w_{n}) \coloneq \frac{1}{2} a(\Delt w_{n},\Delt w_{n}) + j(\Delt w_{n}) - \scalarbrackets{f, \Delt w_n} + a(w_{n-1},\Delt w_n) \label{eqn:incrementalFunctional}
+\end{equation}
+which will be discretized in space and solved using the TNNMG algorithm.
+
+\subsection{Spatial Discretization}
+
+\begin{itemize}
+    \item Displacements are discretized with first order lagrangian elements with values in $\R^d$
+    \item Plastic strains are discretized with constant elements with values in $\mathbb{S}^d_0$
+    \item Isotropic hardening variable is also discretized with constant elements with values in $\R$
+\end{itemize}
+
+\subsection{Implementation Details}
+
+For the Implementation of the aforementioned algorithm, several things need to be considered, 
+especially regarding the restriction Maps.
+The problem arises because the stiffness matrix assembled by \lstinline!fufem! is stored as a SciPy \lstinline!csr_matrix!, 
+which does not allow for reverse indexing. Therefore a strategy for creating the local subproblems must be choosen.
+
+There are multiple strategies for retrieving the entries of a sparse matrix: \begin{itemize}
+    \item Precomputing an index bitmap for reverse lookup of the entries.
+    \item Creating restriction maps, which map the whole matrix to rows and columns, as well as corresponding restriction maps.
+\end{itemize}
+Because the code looks more readable when using the second option, this option has been chosen for this implementation.
+
+A block restriction map for block $i$ with blocksize $N_i$ is a matrix of the form: \begin{equation}
+    \smtenII{R}_i = \begin{bmatrix}
+        \smtenI{0} \\
+        \vdots \\
+        \smtenI{I} \\
+        \vdots \\
+        \smtenI{0}
+    \end{bmatrix}\quad \smtenI{0},\smtenI{I} \in \R^{N_i \times N_i},
+\end{equation} which has the desirable property of calculating the desired blocks via
+\begin{equation}
+    \smtenII{E}_{ii} = \smtenII{R}_i^T \smtenII{K} \smtenII{R}_i.
+\end{equation}
+
+\inputtaggedcode[1*7]{ElasticBlock}%
diff --git a/elasticity_fufem.py b/elasticity_fufem.py
index 561f29ec703383b387ffe22060b00d10917ad676..ad6869a4df659585a353cb1e1092b8871f35d191 100644
--- a/elasticity_fufem.py
+++ b/elasticity_fufem.py
@@ -1,6 +1,6 @@
 import numpy as np
 import scipy.sparse.linalg
-from matspy import spy
+#from matspy import spy
 
 
 import dune.common
@@ -42,10 +42,46 @@ kinematicHardeningModulus = 3e6
 g = np.zeros(dimension)
 g[1] = -g_n*rho
 
+# Neumann boundary condition
+loadPullingUp = np.zeros(dimension)
+loadPullingUp[0] = 0.0
+loadPullingUp[1] = 1.0e5
+
+
 # Lamé parameters for St.Venant-Kirchhoff
 lambda_ = 1e7 # E*nu/((1.-2.*nu) * (1.+nu))
 mu = 6.5e6 #E/(2*(1+nu))
 
+############################# FuFem Hacks #####################################
+
+
+# Expression adding a GridView to the Function Signature
+class GridViewExpression(Expression):
+  def __init__(self, gridView):
+    Expression.__init__(self)
+    self.argList = [gridView]
+    self.templateArgList = ["class GridView#"]
+    self.callArgList = ["const GridView#& gridView#"]
+    self.cppExpression = "gridView#"
+
+# Expression creating the BoundaryPatchLambda
+def makeBoundaryPatch(gridView):
+  return FunctionCallExpression(" ".join("""[&](auto p) { 
+      auto robinPatch = BoundaryPatch(p);
+      robinPatch.insertFacesByProperty([&](auto&& intersection) { 
+        auto geomX = intersection.geometry().center();
+        return std::abs(geomX[1] - 0.01) < 1e-8; 
+      });
+      return robinPatch;
+    }""".splitlines()), GridViewExpression(gridView))
+
+def IntegrateNeumannByPredicate(expr,gridView):
+  return FunctionCallExpression("integrate",expr,makeBoundaryPatch(gridView))
+
+
+def neumann(x):
+    return loadPullingUp
+
 ############################# Mechanics #######################################
 
 # Form expression calculating the symmetrized gradient of a vector field.
@@ -57,11 +93,11 @@ def E(expr):
 # The used rangeOperator is only present on the cpp side, and does not have a binding on the python side,
 # therefore the RangeOperator is baked as pure cpp here.
 def PlasticE(expr):
-  return FunctionCallExpression("""
+  return FunctionCallExpression(" ".join("""
     RangeOperator([](const auto& p) -> auto { 
       return 1.0/std::sqrt(2.0) * Dune::FieldMatrix<double,2,2>{ {p[0], p[1]},{p[1], -p[0]} };
     })
-  """
+  """.splitlines())
   ,expr)
 
 # Form Expression mapping the strains to their corresponding stresses.
@@ -132,6 +168,8 @@ def globalAssembler(basis):
 
     # Vector valued rhs coefficient function
     f = Coefficient(rhs, basis.gridView, rangeType=VectorType(dimension))
+    # Vector values neumann coefficient function 
+    neu = Coefficient(neumann, basis.gridView, rangeType=VectorType(dimension))
 
     # Bilinear form and rhs functional
     # The Bilinear Form has been expanded, to make Fufem happy.
@@ -145,7 +183,7 @@ def globalAssembler(basis):
           + dot(sigma(PlasticE(theta)),PlasticE(eta))
           + dot(kinematicHardeningModulus*PlasticE(theta),PlasticE(eta)))
     
-    b = integrate(dot(f,v))
+    b = integrate(dot(f,v)) + IntegrateNeumannByPredicate(dot(neu,v), basis.gridView)
 
     # Assemble into matrix and vector
     assembler = GlobalAssembler(basis)
@@ -318,46 +356,33 @@ while(True):
     for blockIndex in range(numElasticBlocks+numPlasticBlocks):
         # if we are on an elastic problem:
         if blockIndex < numElasticBlocks:
+#{ ElasticBlock }
             # Get the restriction:
-            #print(blockIndex)
             restrict,restrictT = restrictionMatrix(numDofs,elasticBlockSize,blockIndex)
-    
-            # Get the diagonal block:
             a = (restrictT.dot(A)).dot(restrict)
-            # Get the current residual, and get its restriction:
             residual = b - A.dot(solution)
-    
             # Get the restriction residual
-            fRestrict = restrictT.dot( residual)
-    
-            #print(a.shape,fRestrict.shape)
-    
+            fRestrict = restrictT.dot(residual)
             deltX = scipy.sparse.linalg.spsolve(a,fRestrict)
-            #print(deltX)
             update = restrict * deltX
-    
             solution = solution + update
-            #print(np.linalg.norm(solution))
+#{ end }
         else: # Now we're in a plastic block step
+#{ PlasticBlock }
             restrict,restrictT = restrictionMatrix(numDofs,plasticBlockSize,blockIndex)
-    
             # Get Diagonal
             p = (restrictT.dot(A)).dot(restrict)
-    
             residualElastic = b - A.dot(solution)
-    
             #print(residualElastic.shape)
             # Now we need to get the Minimizer of the local nonsmooth functional.
             fRestrict = restrictT.dot( residualElastic )
             normFRestrict = np.linalg.norm(fRestrict)
             residualStressDir = fRestrict / normFRestrict
             residualStressMagni = max(normFRestrict - yieldStress,0)/(2.0 * mu + kinematicHardeningModulus)
-    
             deltP = residualStressMagni * residualStressDir
-    
             update = restrict * deltX
-    
             solution = solution + update
+#{ end }
             #print(np.linalg.norm(solution))
             
     # Step 2: Truncated linear correction problem