From a7eaf1f13a426d1fad4e29eb2d97c60113de2270 Mon Sep 17 00:00:00 2001
From: Oliver Sander <sander@igpm.rwth-aachen.de>
Date: Fri, 22 Aug 2014 10:04:45 +0000
Subject: [PATCH] First implementation of a mixed discretization for Cosserat
 continua

'Mixed' means that the spaces for deformations and microrotations can
be chosen separately.

At this point there hasn't been a lot of testing yet.  In particular,
parallelizing the assembly is currently not supported.

[[Imported from SVN: r9852]]
---
 Makefile.am                               |  12 +-
 dune/gfe/mixedcosseratenergy.hh           | 404 +++++++++++++++
 dune/gfe/mixedgfeassembler.hh             | 339 ++++++++++++
 dune/gfe/mixedlocalgeodesicfestiffness.hh |  80 +++
 dune/gfe/mixedlocalgfeadolcstiffness.hh   | 523 +++++++++++++++++++
 dune/gfe/mixedriemanniantrsolver.cc       | 595 ++++++++++++++++++++++
 dune/gfe/mixedriemanniantrsolver.hh       | 158 ++++++
 mixed-cosserat-continuum.cc               | 472 +++++++++++++++++
 8 files changed, 2582 insertions(+), 1 deletion(-)
 create mode 100644 dune/gfe/mixedcosseratenergy.hh
 create mode 100644 dune/gfe/mixedgfeassembler.hh
 create mode 100644 dune/gfe/mixedlocalgeodesicfestiffness.hh
 create mode 100644 dune/gfe/mixedlocalgfeadolcstiffness.hh
 create mode 100644 dune/gfe/mixedriemanniantrsolver.cc
 create mode 100644 dune/gfe/mixedriemanniantrsolver.hh
 create mode 100644 mixed-cosserat-continuum.cc

diff --git a/Makefile.am b/Makefile.am
index 8b80d08e..f65db4c4 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -13,7 +13,9 @@ ADOLC_CPPFLAGS = -I/home/sander/adolc-inst/include
 ADOLC_LDFLAGS = -L/home/sander/adolc-inst/lib64
 ADOLC_LIBS = -ladolc
 
-noinst_PROGRAMS = cosserat-continuum rodobstacle rod3d harmonicmaps harmonicmaps-eoc rod-eoc
+noinst_PROGRAMS = cosserat-continuum \
+                  mixed-cosserat-continuum \
+                  rodobstacle rod3d harmonicmaps harmonicmaps-eoc rod-eoc
 
 cosserat_continuum_SOURCES  = cosserat-continuum.cc
 cosserat_continuum_CXXFLAGS = $(UG_CPPFLAGS) $(IPOPT_CPPFLAGS) \
@@ -23,6 +25,14 @@ cosserat_continuum_LDADD    = $(UG_LIBS) $(IPOPT_LIBS) \
 cosserat_continuum_LDFLAGS  = $(UG_LDFLAGS) $(IPOPT_LDFLAGS) \
                               $(ADOLC_LDFLAGS) $(PYTHON_LDFLAGS)
 
+mixed_cosserat_continuum_SOURCES  = mixed-cosserat-continuum.cc
+mixed_cosserat_continuum_CXXFLAGS = $(UG_CPPFLAGS) $(IPOPT_CPPFLAGS) \
+                              $(ADOLC_CPPFLAGS) $(PYTHON_CPPFLAGS)
+mixed_cosserat_continuum_LDADD    = $(UG_LIBS) $(IPOPT_LIBS) \
+                              $(ADOLC_LIBS) $(PYTHON_LIBS)
+mixed_cosserat_continuum_LDFLAGS  = $(UG_LDFLAGS) $(IPOPT_LDFLAGS) \
+                              $(ADOLC_LDFLAGS) $(PYTHON_LDFLAGS)
+
 rodobstacle_SOURCES = rodobstacle.cc
 rod3d_SOURCES = rod3d.cc
 
diff --git a/dune/gfe/mixedcosseratenergy.hh b/dune/gfe/mixedcosseratenergy.hh
new file mode 100644
index 00000000..4239ede3
--- /dev/null
+++ b/dune/gfe/mixedcosseratenergy.hh
@@ -0,0 +1,404 @@
+#ifndef DUNE_GFE_MIXEDCOSSERATENERGY_HH
+#define DUNE_GFE_MIXEDCOSSERATENERGY_HH
+
+#include <dune/common/fmatrix.hh>
+#include <dune/common/parametertree.hh>
+#include <dune/geometry/quadraturerules.hh>
+
+#include <dune/fufem/functions/virtualgridfunction.hh>
+#include <dune/fufem/boundarypatch.hh>
+
+#include <dune/gfe/mixedlocalgeodesicfestiffness.hh>
+#include <dune/gfe/localgeodesicfefunction.hh>
+#include <dune/gfe/rigidbodymotion.hh>
+#include <dune/gfe/tensor3.hh>
+#include <dune/gfe/orthogonalmatrix.hh>
+#include <dune/gfe/cosseratstrain.hh>
+
+#define DONT_USE_CURL
+
+//#define QUADRATIC_MEMBRANE_ENERGY
+
+
+template<class GridView, class DisplacementLocalFiniteElement, class OrientationLocalFiniteElement, int dim, class field_type=double>
+class MixedCosseratEnergy
+    : public MixedLocalGeodesicFEStiffness<GridView,
+                                           DisplacementLocalFiniteElement,RealTuple<field_type,dim>,
+                                           OrientationLocalFiniteElement,Rotation<field_type,dim> >
+{
+    // grid types
+    typedef typename GridView::Grid::ctype DT;
+    typedef field_type RT;
+    typedef typename GridView::template Codim<0>::Entity Entity;
+
+    // some other sizes
+    enum {gridDim=GridView::dimension};
+
+
+    /** \brief Compute the symmetric part of a matrix A, i.e. \f$ \frac 12 (A + A^T) \f$ */
+    static Dune::FieldMatrix<field_type,dim,dim> sym(const Dune::FieldMatrix<field_type,dim,dim>& A)
+    {
+        Dune::FieldMatrix<field_type,dim,dim> result;
+        for (int i=0; i<dim; i++)
+            for (int j=0; j<dim; j++)
+                result[i][j] = 0.5 * (A[i][j] + A[j][i]);
+        return result;
+    }
+
+    /** \brief Compute the antisymmetric part of a matrix A, i.e. \f$ \frac 12 (A - A^T) \f$ */
+    static Dune::FieldMatrix<field_type,dim,dim> skew(const Dune::FieldMatrix<field_type,dim,dim>& A)
+    {
+        Dune::FieldMatrix<field_type,dim,dim> result;
+        for (int i=0; i<dim; i++)
+            for (int j=0; j<dim; j++)
+                result[i][j] = 0.5 * (A[i][j] - A[j][i]);
+        return result;
+    }
+
+    /** \brief Return the square of the trace of a matrix */
+    template <int N>
+    static field_type traceSquared(const Dune::FieldMatrix<field_type,N,N>& A)
+    {
+        field_type trace = 0;
+        for (int i=0; i<N; i++)
+            trace += A[i][i];
+        return trace*trace;
+    }
+
+    /** \brief Compute the (row-wise) curl of a matrix R \f$
+        \param DR The partial derivatives of the matrix R
+     */
+    static Dune::FieldMatrix<field_type,dim,dim> curl(const Tensor3<field_type,dim,dim,dim>& DR)
+    {
+        Dune::FieldMatrix<field_type,dim,dim> result;
+
+        for (int i=0; i<dim; i++) {
+            result[i][0] = DR[i][2][1] - DR[i][1][2];
+            result[i][1] = DR[i][0][2] - DR[i][2][0];
+            result[i][2] = DR[i][1][0] - DR[i][0][1];
+        }
+
+        return result;
+    }
+
+public:  // for testing
+    /** \brief Compute the derivative of the rotation (with respect to x), but wrt matrix coordinates
+        \param value Value of the gfe function at a certain point
+        \param derivative First derivative of the gfe function wrt x at that point, in quaternion coordinates
+        \param DR First derivative of the gfe function wrt x at that point, in matrix coordinates
+     */
+    static void computeDR(const Rotation<field_type,3>& value,
+                          const Dune::FieldMatrix<field_type,4,gridDim>& derivative,
+                          Tensor3<field_type,3,3,3>& DR)
+    {
+        // The LocalGFEFunction class gives us the derivatives of the orientation variable,
+        // but as a map into quaternion space.  To obtain matrix coordinates we use the
+        // chain rule, which means that we have to multiply the given derivative with
+        // the derivative of the embedding of the unit quaternion into the space of 3x3 matrices.
+        // This second derivative is almost given by the method getFirstDerivativesOfDirectors.
+        // However, since the directors of a given unit quaternion are the _columns_ of the
+        // corresponding orthogonal matrix, we need to invert the i and j indices
+        //
+        // So, if I am not mistaken, DR[i][j][k] contains \partial R_ij / \partial k
+        Tensor3<field_type,3 , 3, 4> dd_dq;
+        value.getFirstDerivativesOfDirectors(dd_dq);
+
+        DR = field_type(0);
+        for (int i=0; i<3; i++)
+            for (int j=0; j<3; j++)
+                for (int k=0; k<gridDim; k++)
+                    for (int l=0; l<4; l++)
+                        DR[i][j][k] += dd_dq[j][i][l] * derivative[l][k];
+
+    }
+
+public:
+
+    /** \brief Constructor with a set of material parameters
+     * \param parameters The material parameters
+     */
+    MixedCosseratEnergy(const Dune::ParameterTree& parameters,
+                        const BoundaryPatch<GridView>* neumannBoundary,
+                        const Dune::VirtualFunction<Dune::FieldVector<double,gridDim>, Dune::FieldVector<double,3> >* neumannFunction)
+    : neumannBoundary_(neumannBoundary),
+      neumannFunction_(neumannFunction)
+    {
+        // The shell thickness
+        thickness_ = parameters.template get<double>("thickness");
+
+        // Lame constants
+        mu_ = parameters.template get<double>("mu");
+        lambda_ = parameters.template get<double>("lambda");
+
+        // Cosserat couple modulus, preferably 0
+        mu_c_ = parameters.template get<double>("mu_c");
+
+        // Length scale parameter
+        L_c_ = parameters.template get<double>("L_c");
+
+        // Curvature exponent
+        q_ = parameters.template get<double>("q");
+
+        // Shear correction factor
+        kappa_ = parameters.template get<double>("kappa");
+    }
+
+    /** \brief Assemble the energy for a single element */
+    RT energy (const Entity& e,
+               const DisplacementLocalFiniteElement& displacementLocalFiniteElement,
+               const std::vector<RealTuple<field_type,dim> >& localDisplacementConfiguration,
+               const OrientationLocalFiniteElement& orientationLocalFiniteElement,
+               const std::vector<Rotation<field_type,dim> >& localOrientationConfiguration) const;
+
+    /** \brief The energy \f$ W_{mp}(\overline{U}) \f$, as written in
+     * the first equation of (4.4) in Neff's paper
+     */
+    RT quadraticMembraneEnergy(const Dune::GFE::CosseratStrain<field_type,3,gridDim>& U) const
+    {
+        Dune::FieldMatrix<field_type,3,3> UMinus1 = U;
+        for (int i=0; i<dim; i++)
+            UMinus1[i][i] -= 1;
+
+        return mu_ * sym(UMinus1).frobenius_norm2()
+                + mu_c_ * skew(UMinus1).frobenius_norm2()
+                + (mu_*lambda_)/(2*mu_ + lambda_) * traceSquared(sym(UMinus1));
+    }
+
+    /** \brief The energy \f$ W_{mp}(\overline{U}) \f$, as written in
+     * the second equation of (4.4) in Neff's paper
+     */
+    RT longQuadraticMembraneEnergy(const Dune::GFE::CosseratStrain<field_type,3,gridDim>& U) const
+    {
+        RT result = 0;
+
+        // shear-stretch energy
+        Dune::FieldMatrix<field_type,dim-1,dim-1> sym2x2;
+        for (int i=0; i<dim-1; i++)
+            for (int j=0; j<dim-1; j++)
+                sym2x2[i][j] = 0.5 * (U.matrix()[i][j] + U.matrix()[j][i]) - (i==j);
+
+        result += mu_ * sym2x2.frobenius_norm2();
+
+        // first order drill energy
+        Dune::FieldMatrix<field_type,dim-1,dim-1> skew2x2;
+        for (int i=0; i<dim-1; i++)
+            for (int j=0; j<dim-1; j++)
+                skew2x2[i][j] = 0.5 * (U.matrix()[i][j] - U.matrix()[j][i]);
+
+        result += mu_c_ * skew2x2.frobenius_norm2();
+
+
+        // classical transverse shear energy
+        result += kappa_ * (mu_ + mu_c_)/2 * (U.matrix()[2][0]*U.matrix()[2][0] + U.matrix()[2][1]*U.matrix()[2][1]);
+
+        // elongational stretch energy
+        result += mu_*lambda_ / (2*mu_ + lambda_) * traceSquared(sym2x2);
+
+        return result;
+    }
+
+    /** \brief Energy for large-deformation problems (private communication by Patrizio Neff)
+     */
+    RT nonquadraticMembraneEnergy(const Dune::GFE::CosseratStrain<field_type,3,gridDim>& U) const
+    {
+        Dune::FieldMatrix<field_type,3,3> UMinus1 = U.matrix();
+        for (int i=0; i<dim; i++)
+            UMinus1[i][i] -= 1;
+
+        RT detU = U.determinant();
+
+        return mu_ * sym(UMinus1).frobenius_norm2()
+                + (mu_*lambda_)/(2*mu_ + lambda_) * 0.5 * ((detU-1)*(detU-1) + (1.0/detU -1)*(1.0/detU -1));
+    }
+
+    RT curvatureEnergy(const Tensor3<field_type,3,3,3>& DR) const
+    {
+#ifdef DONT_USE_CURL
+        return mu_ * std::pow(L_c_ * L_c_ * DR.frobenius_norm2(),q_/2.0);
+#else
+        return mu_ * std::pow(L_c_ * L_c_ * curl(DR).frobenius_norm2(),q_/2.0);
+#endif
+    }
+
+    RT bendingEnergy(const Dune::FieldMatrix<field_type,dim,dim>& R, const Tensor3<field_type,3,3,3>& DR) const
+    {
+        // left-multiply the derivative of the third director (in DR[][2][]) with R^T
+        Dune::FieldMatrix<field_type,3,3> RT_DR3;
+        for (int i=0; i<3; i++)
+            for (int j=0; j<3; j++) {
+                RT_DR3[i][j] = 0;
+                for (int k=0; k<3; k++)
+                    RT_DR3[i][j] += R[k][i] * DR[k][2][j];
+            }
+
+
+
+        return mu_ * sym(RT_DR3).frobenius_norm2()
+               + mu_c_ * skew(RT_DR3).frobenius_norm2()
+               + mu_*lambda_/(2*mu_+lambda_) * traceSquared(RT_DR3);
+    }
+
+    /** \brief The shell thickness */
+    double thickness_;
+
+    /** \brief Lame constants */
+    double mu_, lambda_;
+
+    /** \brief Cosserat couple modulus, preferably 0 */
+    double mu_c_;
+
+    /** \brief Length scale parameter */
+    double L_c_;
+
+    /** \brief Curvature exponent */
+    double q_;
+
+    /** \brief Shear correction factor */
+    double kappa_;
+
+    /** \brief The Neumann boundary */
+    const BoundaryPatch<GridView>* neumannBoundary_;
+
+    /** \brief The function implementing the Neumann data */
+    const Dune::VirtualFunction<Dune::FieldVector<double,gridDim>, Dune::FieldVector<double,3> >* neumannFunction_;
+};
+
+template <class GridView, class DeformationLocalFiniteElement, class OrientationLocalFiniteElement, int dim, class field_type>
+typename MixedCosseratEnergy<GridView,DeformationLocalFiniteElement,OrientationLocalFiniteElement,dim,field_type>::RT
+MixedCosseratEnergy<GridView,DeformationLocalFiniteElement,OrientationLocalFiniteElement,dim,field_type>::
+energy(const Entity& element,
+       const DeformationLocalFiniteElement& deformationLocalFiniteElement,
+       const std::vector<RealTuple<field_type,dim> >& localDeformationConfiguration,
+       const OrientationLocalFiniteElement& orientationLocalFiniteElement,
+       const std::vector<Rotation<field_type,dim> >& localOrientationConfiguration) const
+{
+    assert(element.type() == deformationLocalFiniteElement.type());
+    assert(element.type() == orientationLocalFiniteElement.type());
+    typedef typename GridView::template Codim<0>::Entity::Geometry Geometry;
+
+    RT energy = 0;
+
+    typedef LocalGeodesicFEFunction<gridDim, DT, DeformationLocalFiniteElement, RealTuple<field_type,dim> > LocalDeformationGFEFunctionType;
+    LocalDeformationGFEFunctionType localDeformationGFEFunction(deformationLocalFiniteElement,localDeformationConfiguration);
+
+    typedef LocalGeodesicFEFunction<gridDim, DT, OrientationLocalFiniteElement, Rotation<field_type,dim> > LocalOrientationGFEFunctionType;
+    LocalOrientationGFEFunctionType localOrientationGFEFunction(orientationLocalFiniteElement,localOrientationConfiguration);
+
+    // \todo Implement smarter quadrature rule selection for more efficiency, i.e., less evaluations of the Rotation GFE function
+    int quadOrder = deformationLocalFiniteElement.localBasis().order() * ((element.type().isSimplex()) ? 1 : gridDim);
+
+    const Dune::QuadratureRule<DT, gridDim>& quad
+        = Dune::QuadratureRules<DT, gridDim>::rule(element.type(), quadOrder);
+
+    for (size_t pt=0; pt<quad.size(); pt++) {
+
+        // Local position of the quadrature point
+        const Dune::FieldVector<DT,gridDim>& quadPos = quad[pt].position();
+
+        const DT integrationElement = element.geometry().integrationElement(quadPos);
+
+        const typename Geometry::JacobianInverseTransposed& jacobianInverseTransposed = element.geometry().jacobianInverseTransposed(quadPos);
+
+        DT weight = quad[pt].weight() * integrationElement;
+
+        // The value of the local deformation
+        RealTuple<field_type,dim> deformationValue = localDeformationGFEFunction.evaluate(quadPos);
+        Rotation<field_type,dim>  orientationValue = localOrientationGFEFunction.evaluate(quadPos);
+
+        // The derivative of the local function defined on the reference element
+        typename LocalDeformationGFEFunctionType::DerivativeType deformationReferenceDerivative = localDeformationGFEFunction.evaluateDerivative(quadPos,deformationValue);
+        typename LocalOrientationGFEFunctionType::DerivativeType orientationReferenceDerivative = localOrientationGFEFunction.evaluateDerivative(quadPos,orientationValue);
+
+        // The derivative of the function defined on the actual element
+        typename LocalDeformationGFEFunctionType::DerivativeType deformationDerivative;
+        typename LocalOrientationGFEFunctionType::DerivativeType orientationDerivative;
+
+        for (size_t comp=0; comp<deformationReferenceDerivative.N(); comp++)
+            jacobianInverseTransposed.mv(deformationReferenceDerivative[comp], deformationDerivative[comp]);
+
+        for (size_t comp=0; comp<orientationReferenceDerivative.N(); comp++)
+            jacobianInverseTransposed.mv(orientationReferenceDerivative[comp], orientationDerivative[comp]);
+
+        /////////////////////////////////////////////////////////
+        // compute U, the Cosserat strain
+        /////////////////////////////////////////////////////////
+        static_assert(dim>=gridDim, "Codim of the grid must be nonnegative");
+
+        //
+        Dune::FieldMatrix<field_type,dim,dim> R;
+        orientationValue.matrix(R);
+
+        Dune::GFE::CosseratStrain<field_type,dim,gridDim> U(deformationDerivative,R);
+
+        //////////////////////////////////////////////////////////
+        //  Compute the derivative of the rotation
+        //  Note: we need it in matrix coordinates
+        //////////////////////////////////////////////////////////
+
+        Tensor3<field_type,3,3,3> DR;
+        computeDR(orientationValue, orientationDerivative, DR);
+
+        // Add the local energy density
+        if (gridDim==2) {
+#ifdef QUADRATIC_MEMBRANE_ENERGY
+            //energy += weight * thickness_ * quadraticMembraneEnergy(U.matrix());
+            energy += weight * thickness_ * longQuadraticMembraneEnergy(U);
+#else
+            energy += weight * thickness_ * nonquadraticMembraneEnergy(U);
+#endif
+            energy += weight * thickness_ * curvatureEnergy(DR);
+            energy += weight * std::pow(thickness_,3) / 12.0 * bendingEnergy(R,DR);
+        } else if (gridDim==3) {
+            energy += weight * quadraticMembraneEnergy(U);
+            energy += weight * curvatureEnergy(DR);
+        } else
+            DUNE_THROW(Dune::NotImplemented, "CosseratEnergyStiffness for 1d grids");
+
+    }
+
+    //////////////////////////////////////////////////////////////////////////////
+    //   Assemble boundary contributions
+    //////////////////////////////////////////////////////////////////////////////
+
+    if (not neumannFunction_)
+        return energy;
+
+    for (typename Entity::LeafIntersectionIterator it = element.ileafbegin(); it != element.ileafend(); ++it) {
+
+        if (not neumannBoundary_ or not neumannBoundary_->contains(*it))
+            continue;
+
+        const Dune::QuadratureRule<DT, gridDim-1>& quad
+            = Dune::QuadratureRules<DT, gridDim-1>::rule(it->type(), quadOrder);
+
+        for (size_t pt=0; pt<quad.size(); pt++) {
+
+            // Local position of the quadrature point
+            const Dune::FieldVector<DT,gridDim>& quadPos = it->geometryInInside().global(quad[pt].position());
+
+            const DT integrationElement = it->geometry().integrationElement(quad[pt].position());
+
+            // The value of the local function
+            RealTuple<field_type,dim> deformationValue = localDeformationGFEFunction.evaluate(quadPos);
+
+            // Value of the Neumann data at the current position
+            Dune::FieldVector<double,3> neumannValue;
+
+            if (dynamic_cast<const VirtualGridViewFunction<GridView,Dune::FieldVector<double,3> >*>(neumannFunction_))
+                dynamic_cast<const VirtualGridViewFunction<GridView,Dune::FieldVector<double,3> >*>(neumannFunction_)->evaluateLocal(element, quadPos, neumannValue);
+            else
+                neumannFunction_->evaluate(it->geometry().global(quad[pt].position()), neumannValue);
+
+            // Only translational dofs are affected by the Neumann force
+            for (size_t i=0; i<neumannValue.size(); i++)
+                energy += thickness_ * (neumannValue[i] * deformationValue.globalCoordinates()[i]) * quad[pt].weight() * integrationElement;
+
+        }
+
+    }
+
+    return energy;
+}
+
+#endif   //#ifndef COSSERAT_ENERGY_LOCAL_STIFFNESS_HH
+
diff --git a/dune/gfe/mixedgfeassembler.hh b/dune/gfe/mixedgfeassembler.hh
new file mode 100644
index 00000000..cfa4bc65
--- /dev/null
+++ b/dune/gfe/mixedgfeassembler.hh
@@ -0,0 +1,339 @@
+#ifndef DUNE_GFE_MIXEDGFEASSEMBLER_HH
+#define DUNE_GFE_MIXEDGFEASSEMBLER_HH
+
+#include <dune/istl/bcrsmatrix.hh>
+#include <dune/common/fmatrix.hh>
+#include <dune/istl/matrixindexset.hh>
+#include <dune/istl/matrix.hh>
+
+#include <dune/gfe/mixedlocalgeodesicfestiffness.hh>
+
+
+/** \brief A global FE assembler for problems involving functions that map into non-Euclidean spaces
+ */
+template <class Basis0, class TargetSpace0, class Basis1, class TargetSpace1>
+class MixedGFEAssembler {
+
+    typedef typename Basis0::GridView GridView;
+    typedef typename GridView::template Codim<0>::template Partition<Dune::Interior_Partition>::Iterator ElementIterator;
+
+    //! Dimension of the grid.
+    enum { gridDim = GridView::dimension };
+
+    //! Dimension of a tangent space
+    enum { blocksize0 = TargetSpace0::TangentVector::dimension };
+    enum { blocksize1 = TargetSpace1::TangentVector::dimension };
+
+    //!
+    typedef Dune::FieldMatrix<double, blocksize0, blocksize0> MatrixBlock00;
+    typedef Dune::FieldMatrix<double, blocksize0, blocksize1> MatrixBlock01;
+    typedef Dune::FieldMatrix<double, blocksize1, blocksize0> MatrixBlock10;
+    typedef Dune::FieldMatrix<double, blocksize1, blocksize1> MatrixBlock11;
+
+protected:
+public:
+    const Basis0 basis0_;
+    const Basis1 basis1_;
+
+    MixedLocalGeodesicFEStiffness<GridView,
+                                  typename Basis0::LocalFiniteElement,
+                                  TargetSpace0,
+                                  typename Basis1::LocalFiniteElement,
+                                  TargetSpace1>* localStiffness_;
+
+public:
+
+    /** \brief Constructor for a given grid */
+    MixedGFEAssembler(const Basis0& basis0,
+                      const Basis1& basis1,
+                      MixedLocalGeodesicFEStiffness<GridView,
+                                               typename Basis0::LocalFiniteElement, TargetSpace0,
+                                               typename Basis0::LocalFiniteElement, TargetSpace1>* localStiffness)
+        : basis0_(basis0),
+          basis1_(basis1),
+          localStiffness_(localStiffness)
+    {}
+
+    /** \brief Assemble the tangent stiffness matrix and the functional gradient together
+     *
+     * This is more efficient than computing them separately, because you need the gradient
+     * anyway to compute the Riemannian Hessian.
+     */
+    virtual void assembleGradientAndHessian(const std::vector<TargetSpace0>& configuration0,
+                                            const std::vector<TargetSpace1>& configuration1,
+                                            Dune::BlockVector<Dune::FieldVector<double, blocksize0> >& gradient0,
+                                            Dune::BlockVector<Dune::FieldVector<double, blocksize1> >& gradient1,
+                                            Dune::BCRSMatrix<MatrixBlock00>& hessian00,
+                                            Dune::BCRSMatrix<MatrixBlock01>& hessian01,
+                                            Dune::BCRSMatrix<MatrixBlock10>& hessian10,
+                                            Dune::BCRSMatrix<MatrixBlock11>& hessian11,
+                                            bool computeOccupationPattern=true) const;
+#if 0
+    /** \brief Assemble the gradient */
+    virtual void assembleGradient(const std::vector<TargetSpace>& sol,
+                          Dune::BlockVector<Dune::FieldVector<double, blocksize> >& grad) const;
+#endif
+    /** \brief Compute the energy of a deformation state */
+    virtual double computeEnergy(const std::vector<TargetSpace0>& configuration0,
+                                 const std::vector<TargetSpace1>& configuration1) const;
+
+    //protected:
+    void getMatrixPattern(Dune::MatrixIndexSet& nb00,
+                          Dune::MatrixIndexSet& nb01,
+                          Dune::MatrixIndexSet& nb10,
+                          Dune::MatrixIndexSet& nb11) const;
+
+}; // end class
+
+
+
+template <class Basis0, class TargetSpace0, class Basis1, class TargetSpace1>
+void MixedGFEAssembler<Basis0,TargetSpace0,Basis1,TargetSpace1>::
+getMatrixPattern(Dune::MatrixIndexSet& nb00,
+                 Dune::MatrixIndexSet& nb01,
+                 Dune::MatrixIndexSet& nb10,
+                 Dune::MatrixIndexSet& nb11) const
+{
+    nb00.resize(basis0_.size(), basis0_.size());
+    nb01.resize(basis0_.size(), basis1_.size());
+    nb10.resize(basis1_.size(), basis0_.size());
+    nb11.resize(basis1_.size(), basis1_.size());
+
+    // Grid view must be the same for both bases
+    ElementIterator it    = basis0_.getGridView().template begin<0,Dune::Interior_Partition>();
+    ElementIterator endit = basis0_.getGridView().template end<0,Dune::Interior_Partition>  ();
+
+    for (; it!=endit; ++it) {
+
+        const typename Basis0::LocalFiniteElement& lfe0 = basis0_.getLocalFiniteElement(*it);
+        const typename Basis1::LocalFiniteElement& lfe1 = basis1_.getLocalFiniteElement(*it);
+
+        for (size_t i=0; i<lfe0.localBasis().size(); i++) {
+
+            int iIdx = basis0_.index(*it,i);
+
+            for (size_t j=0; j<lfe0.localBasis().size(); j++) {
+                int jIdx = basis0_.index(*it,j);
+                nb00.add(iIdx, jIdx);
+            }
+
+            for (size_t j=0; j<lfe1.localBasis().size(); j++) {
+                int jIdx = basis1_.index(*it,j);
+                nb01.add(iIdx, jIdx);
+
+            }
+
+        }
+
+        for (size_t i=0; i<lfe1.localBasis().size(); i++) {
+
+            int iIdx = basis1_.index(*it,i);
+
+            for (size_t j=0; j<lfe0.localBasis().size(); j++) {
+                int jIdx = basis0_.index(*it,j);
+                nb10.add(iIdx, jIdx);
+            }
+
+            for (size_t j=0; j<lfe1.localBasis().size(); j++) {
+                int jIdx = basis1_.index(*it,j);
+                nb11.add(iIdx, jIdx);
+
+            }
+
+        }
+
+    }
+
+}
+
+template <class Basis0, class TargetSpace0, class Basis1, class TargetSpace1>
+void MixedGFEAssembler<Basis0,TargetSpace0,Basis1,TargetSpace1>::
+assembleGradientAndHessian(const std::vector<TargetSpace0>& configuration0,
+                           const std::vector<TargetSpace1>& configuration1,
+                           Dune::BlockVector<Dune::FieldVector<double, blocksize0> >& gradient0,
+                           Dune::BlockVector<Dune::FieldVector<double, blocksize1> >& gradient1,
+                           Dune::BCRSMatrix<MatrixBlock00>& hessian00,
+                           Dune::BCRSMatrix<MatrixBlock01>& hessian01,
+                           Dune::BCRSMatrix<MatrixBlock10>& hessian10,
+                           Dune::BCRSMatrix<MatrixBlock11>& hessian11,
+                           bool computeOccupationPattern) const
+{
+    if (computeOccupationPattern) {
+
+        Dune::MatrixIndexSet pattern00;
+        Dune::MatrixIndexSet pattern01;
+        Dune::MatrixIndexSet pattern10;
+        Dune::MatrixIndexSet pattern11;
+
+        getMatrixPattern(pattern00, pattern01, pattern10, pattern11);
+
+        pattern00.exportIdx(hessian00);
+        pattern01.exportIdx(hessian01);
+        pattern10.exportIdx(hessian10);
+        pattern11.exportIdx(hessian11);
+
+    }
+
+    hessian00 = 0;
+    hessian01 = 0;
+    hessian10 = 0;
+    hessian11 = 0;
+
+    gradient0.resize(configuration0.size());
+    gradient0 = 0;
+    gradient1.resize(configuration1.size());
+    gradient1 = 0;
+
+    ElementIterator it    = basis0_.getGridView().template begin<0,Dune::Interior_Partition>();
+    ElementIterator endit = basis0_.getGridView().template end<0,Dune::Interior_Partition>  ();
+
+    for( ; it != endit; ++it ) {
+
+        const int nDofs0 = basis0_.getLocalFiniteElement(*it).localBasis().size();
+        const int nDofs1 = basis1_.getLocalFiniteElement(*it).localBasis().size();
+
+        // Extract local solution
+        std::vector<TargetSpace0> localConfiguration0(nDofs0);
+        std::vector<TargetSpace1> localConfiguration1(nDofs1);
+
+        for (int i=0; i<nDofs0; i++)
+            localConfiguration0[i] = configuration0[basis0_.index(*it,i)];
+
+        for (int i=0; i<nDofs1; i++)
+            localConfiguration1[i] = configuration1[basis1_.index(*it,i)];
+
+        std::vector<Dune::FieldVector<double,blocksize0> > localGradient0(nDofs0);
+        std::vector<Dune::FieldVector<double,blocksize1> > localGradient1(nDofs1);
+
+        // setup local matrix and gradient
+        localStiffness_->assembleGradientAndHessian(*it,
+                                                    basis0_.getLocalFiniteElement(*it), localConfiguration0,
+                                                    basis1_.getLocalFiniteElement(*it), localConfiguration1,
+                                                    localGradient0, localGradient1);
+
+        // Add element matrix to global stiffness matrix
+        for (int i=0; i<nDofs0; i++) {
+
+            int row = basis0_.index(*it,i);
+
+            for (int j=0; j<nDofs0; j++ ) {
+                int col = basis0_.index(*it,j);
+                hessian00[row][col] += localStiffness_->A00_[i][j];
+            }
+
+            for (int j=0; j<nDofs1; j++ ) {
+                int col = basis1_.index(*it,j);
+                hessian01[row][col] += localStiffness_->A01_[i][j];
+            }
+        }
+
+        for (int i=0; i<nDofs1; i++) {
+
+            int row = basis1_.index(*it,i);
+
+            for (int j=0; j<nDofs0; j++ ) {
+                int col = basis0_.index(*it,j);
+                hessian10[row][col] += localStiffness_->A10_[i][j];
+            }
+
+            for (int j=0; j<nDofs1; j++ ) {
+                int col = basis1_.index(*it,j);
+                hessian11[row][col] += localStiffness_->A11_[i][j];
+            }
+        }
+
+        // Add local gradient to global gradient
+        for (int i=0; i<nDofs0; i++)
+            gradient0[basis0_.index(*it,i)] += localGradient0[i];
+
+        for (int i=0; i<nDofs1; i++)
+            gradient1[basis1_.index(*it,i)] += localGradient1[i];
+    }
+
+}
+
+#if 0
+template <class Basis, class TargetSpace>
+void GeodesicFEAssembler<Basis,TargetSpace>::
+assembleGradient(const std::vector<TargetSpace>& sol,
+                 Dune::BlockVector<Dune::FieldVector<double, blocksize> >& grad) const
+{
+    if (sol.size()!=basis_.size())
+        DUNE_THROW(Dune::Exception, "Solution vector doesn't match the grid!");
+
+    grad.resize(sol.size());
+    grad = 0;
+
+    ElementIterator it    = basis_.getGridView().template begin<0,Dune::Interior_Partition>();
+    ElementIterator endIt = basis_.getGridView().template end<0,Dune::Interior_Partition>();
+
+    // Loop over all elements
+    for (; it!=endIt; ++it) {
+
+        // A 1d grid has two vertices
+        const int nDofs = basis_.getLocalFiniteElement(*it).localBasis().size();
+
+        // Extract local solution
+        std::vector<TargetSpace> localSolution(nDofs);
+
+        for (int i=0; i<nDofs; i++)
+            localSolution[i] = sol[basis_.index(*it,i)];
+
+        // Assemble local gradient
+        std::vector<Dune::FieldVector<double,blocksize> > localGradient(nDofs);
+
+        localStiffness_->assembleGradient(*it, basis_.getLocalFiniteElement(*it), localSolution, localGradient);
+
+        // Add to global gradient
+        for (int i=0; i<nDofs; i++)
+            grad[basis_.index(*it,i)] += localGradient[i];
+
+    }
+
+}
+#endif
+
+template <class Basis0, class TargetSpace0, class Basis1, class TargetSpace1>
+double MixedGFEAssembler<Basis0, TargetSpace0, Basis1, TargetSpace1>::
+computeEnergy(const std::vector<TargetSpace0>& configuration0,
+              const std::vector<TargetSpace1>& configuration1) const
+{
+    double energy = 0;
+
+    if (configuration0.size()!=basis0_.size())
+        DUNE_THROW(Dune::Exception, "Configuration vector doesn't match the grid!");
+
+    if (configuration1.size()!=basis1_.size())
+        DUNE_THROW(Dune::Exception, "Configuration vector doesn't match the grid!");
+
+    ElementIterator it    = basis0_.getGridView().template begin<0,Dune::Interior_Partition>();
+    ElementIterator endIt = basis0_.getGridView().template end<0,Dune::Interior_Partition>();
+
+    // Loop over all elements
+    for (; it!=endIt; ++it) {
+
+        // Number of degrees of freedom on this element
+        size_t nDofs0 = basis0_.getLocalFiniteElement(*it).localBasis().size();
+        size_t nDofs1 = basis1_.getLocalFiniteElement(*it).localBasis().size();
+
+        std::vector<TargetSpace0> localConfiguration0(nDofs0);
+        std::vector<TargetSpace1> localConfiguration1(nDofs1);
+
+        for (size_t i=0; i<nDofs0; i++)
+            localConfiguration0[i] = configuration0[basis0_.index(*it,i)];
+
+        for (size_t i=0; i<nDofs1; i++)
+            localConfiguration1[i] = configuration1[basis1_.index(*it,i)];
+
+        energy += localStiffness_->energy(*it,
+                                          basis0_.getLocalFiniteElement(*it), localConfiguration0,
+                                          basis1_.getLocalFiniteElement(*it), localConfiguration1);
+
+    }
+
+    return energy;
+
+}
+
+#endif
+
diff --git a/dune/gfe/mixedlocalgeodesicfestiffness.hh b/dune/gfe/mixedlocalgeodesicfestiffness.hh
new file mode 100644
index 00000000..82a90357
--- /dev/null
+++ b/dune/gfe/mixedlocalgeodesicfestiffness.hh
@@ -0,0 +1,80 @@
+#ifndef DUNE_GFE_MIXEDLOCALGEODESICFESTIFFNESS_HH
+#define DUNE_GFE_MIXEDLOCALGEODESICFESTIFFNESS_HH
+
+#include "omp.h"
+
+#include <dune/common/fmatrix.hh>
+#include <dune/istl/matrix.hh>
+
+
+template<class GridView,
+         class DeformationLocalFiniteElement, class DeformationTargetSpace,
+         class OrientationLocalFiniteElement, class OrientationTargetSpace>
+class MixedLocalGeodesicFEStiffness
+{
+    // grid types
+    typedef typename GridView::Grid::ctype DT;
+    typedef typename DeformationTargetSpace::ctype RT;
+    typedef typename GridView::template Codim<0>::Entity Entity;
+
+    // some other sizes
+    enum {gridDim=GridView::dimension};
+
+public:
+
+    //! Dimension of a tangent space
+    enum { deformationBlocksize = DeformationTargetSpace::TangentVector::dimension };
+    enum { orientationBlocksize = OrientationTargetSpace::TangentVector::dimension };
+
+    /** \brief Assemble the local stiffness matrix at the current position
+
+    This default implementation used finite-difference approximations to compute the second derivatives
+
+    The formula for the Riemannian Hessian has been taken from Absil, Mahony, Sepulchre:
+    'Optimization algorithms on matrix manifolds', page 107.  There it says that
+    \f[
+        \langle Hess f(x)[\xi], \eta \rangle
+            = \frac 12 \frac{d^2}{dt^2} \Big(f(\exp_x(t(\xi + \eta))) - f(\exp_x(t\xi)) - f(\exp_x(t\eta))\Big)\Big|_{t=0}.
+    \f]
+    We compute that using a finite difference approximation.
+
+    */
+    virtual void assembleGradientAndHessian(const Entity& e,
+                                            const DeformationLocalFiniteElement& displacementLocalFiniteElement,
+                                            const std::vector<DeformationTargetSpace>& localDisplacementConfiguration,
+                                            const OrientationLocalFiniteElement& orientationLocalFiniteElement,
+                                            const std::vector<OrientationTargetSpace>& localOrientationConfiguration,
+                                            std::vector<typename DeformationTargetSpace::TangentVector>& localDeformationGradient,
+                                            std::vector<typename OrientationTargetSpace::TangentVector>& localOrientationGradient)
+    {
+      DUNE_THROW(Dune::NotImplemented, "!");
+    }
+
+    /** \brief Compute the energy at the current configuration */
+    virtual RT energy (const Entity& e,
+                       const DeformationLocalFiniteElement& deformationLocalFiniteElement,
+                       const std::vector<DeformationTargetSpace>& localDeformationConfiguration,
+                       const OrientationLocalFiniteElement& orientationLocalFiniteElement,
+                       const std::vector<OrientationTargetSpace>& localOrientationConfiguration) const = 0;
+#if 0
+    /** \brief Assemble the element gradient of the energy functional
+
+    The default implementation in this class uses a finite difference approximation */
+    virtual void assembleGradient(const Entity& element,
+                                  const LocalFiniteElement& localFiniteElement,
+                                  const std::vector<TargetSpace>& solution,
+                                  std::vector<typename TargetSpace::TangentVector>& gradient) const;
+    {
+      DUNE_THROW(Dune::NotImplemented, "!");
+    }
+#endif
+    // assembled data
+    Dune::Matrix<Dune::FieldMatrix<RT, deformationBlocksize, deformationBlocksize> > A00_;
+    Dune::Matrix<Dune::FieldMatrix<RT, deformationBlocksize, orientationBlocksize> > A01_;
+    Dune::Matrix<Dune::FieldMatrix<RT, orientationBlocksize, deformationBlocksize> > A10_;
+    Dune::Matrix<Dune::FieldMatrix<RT, orientationBlocksize, orientationBlocksize> > A11_;
+
+};
+
+#endif
+
diff --git a/dune/gfe/mixedlocalgfeadolcstiffness.hh b/dune/gfe/mixedlocalgfeadolcstiffness.hh
new file mode 100644
index 00000000..f6056774
--- /dev/null
+++ b/dune/gfe/mixedlocalgfeadolcstiffness.hh
@@ -0,0 +1,523 @@
+#ifndef DUNE_GFE_MIXEDLOCALGFEADOLCSTIFFNESS_HH
+#define DUNE_GFE_MIXEDLOCALGFEADOLCSTIFFNESS_HH
+
+#include <adolc/adouble.h>            // use of active doubles
+#include <adolc/drivers/drivers.h>    // use of "Easy to Use" drivers
+// gradient(.) and hessian(.)
+#include <adolc/interfaces.h>    // use of "Easy to Use" drivers
+#include <adolc/taping.h>             // use of taping
+
+#include <dune/gfe/adolcnamespaceinjections.hh>
+
+#include <dune/common/fmatrix.hh>
+#include <dune/istl/matrix.hh>
+
+#include <dune/gfe/mixedlocalgeodesicfestiffness.hh>
+
+#define ADOLC_VECTOR_MODE
+
+/** \brief Assembles energy gradient and Hessian with ADOL-C (automatic differentiation)
+ */
+template<class GridView,
+         class LocalFiniteElement0, class TargetSpace0,
+         class LocalFiniteElement1, class TargetSpace1>
+class MixedLocalGFEADOLCStiffness
+    : public MixedLocalGeodesicFEStiffness<GridView,
+                                           LocalFiniteElement0,TargetSpace0,
+                                           LocalFiniteElement1,TargetSpace1>
+{
+    // grid types
+    typedef typename GridView::Grid::ctype DT;
+    typedef typename TargetSpace0::ctype RT;
+    typedef typename GridView::template Codim<0>::Entity Entity;
+
+    // The 'active' target spaces, i.e., the number type is replaced by adouble
+    typedef typename TargetSpace0::template rebind<adouble>::other ATargetSpace0;
+    typedef typename TargetSpace1::template rebind<adouble>::other ATargetSpace1;
+
+    // some other sizes
+    enum {gridDim=GridView::dimension};
+
+public:
+
+    //! Dimension of a tangent space
+    enum { blocksize0 = TargetSpace0::TangentVector::dimension };
+    enum { blocksize1 = TargetSpace1::TangentVector::dimension };
+
+    //! Dimension of the embedding space
+    enum { embeddedBlocksize0 = TargetSpace0::EmbeddedTangentVector::dimension };
+    enum { embeddedBlocksize1 = TargetSpace1::EmbeddedTangentVector::dimension };
+
+    MixedLocalGFEADOLCStiffness(const MixedLocalGeodesicFEStiffness<GridView,
+                                                               LocalFiniteElement0, ATargetSpace0,
+                                                               LocalFiniteElement1, ATargetSpace1>* energy)
+    : localEnergy_(energy)
+    {}
+
+    /** \brief Compute the energy at the current configuration */
+    virtual RT energy (const Entity& e,
+                       const LocalFiniteElement0& localFiniteElement0,
+                       const std::vector<TargetSpace0>& localConfiguration0,
+                       const LocalFiniteElement1& localFiniteElement1,
+                       const std::vector<TargetSpace1>& localConfiguration1) const;
+#if 0
+    /** \brief Assemble the element gradient of the energy functional
+
+    This uses the automatic differentiation toolbox ADOL_C.
+    */
+    virtual void assembleGradient(const Entity& element,
+                          const LocalFiniteElement& localFiniteElement,
+                          const std::vector<TargetSpace>& solution,
+                          std::vector<typename TargetSpace::TangentVector>& gradient) const;
+#endif
+    /** \brief Assemble the local stiffness matrix at the current position
+
+    This uses the automatic differentiation toolbox ADOL_C.
+    */
+    virtual void assembleGradientAndHessian(const Entity& e,
+                                            const LocalFiniteElement0& localFiniteElement0,
+                                            const std::vector<TargetSpace0>& localConfiguration0,
+                                            const LocalFiniteElement1& localFiniteElement1,
+                                            const std::vector<TargetSpace1>& localConfiguration1,
+                                            std::vector<typename TargetSpace0::TangentVector>& localGradient0,
+                                            std::vector<typename TargetSpace1::TangentVector>& localGradient1);
+
+    const MixedLocalGeodesicFEStiffness<GridView, LocalFiniteElement0, ATargetSpace0, LocalFiniteElement1, ATargetSpace1>* localEnergy_;
+
+};
+
+
+template <class GridView, class LocalFiniteElement0, class TargetSpace0, class LocalFiniteElement1, class TargetSpace1>
+typename MixedLocalGFEADOLCStiffness<GridView, LocalFiniteElement0, TargetSpace0, LocalFiniteElement1, TargetSpace1>::RT
+MixedLocalGFEADOLCStiffness<GridView, LocalFiniteElement0, TargetSpace0, LocalFiniteElement1, TargetSpace1>::
+energy(const Entity& element,
+       const LocalFiniteElement0& localFiniteElement0,
+       const std::vector<TargetSpace0>& localConfiguration0,
+       const LocalFiniteElement1& localFiniteElement1,
+       const std::vector<TargetSpace1>& localConfiguration1) const
+{
+    double pureEnergy;
+
+    std::vector<ATargetSpace0> localAConfiguration0(localConfiguration0.size());
+    std::vector<ATargetSpace1> localAConfiguration1(localConfiguration1.size());
+
+    trace_on(1);
+
+    adouble energy = 0;
+
+    // The following loop is not quite intuitive: we copy the localSolution into an
+    // array of FieldVector<double>, go from there to FieldVector<adouble> and
+    // only then to ATargetSpace.
+    // Rationale: The constructor/assignment-from-vector of TargetSpace frequently
+    // contains a projection onto the manifold from the surrounding Euclidean space.
+    // ADOL-C needs a function on the whole Euclidean space, hence that projection
+    // is part of the function and needs to be taped.
+
+    // The following variable cannot be declared inside of the loop, or ADOL-C will report wrong results
+    // (Presumably because several independent variables use the same memory location.)
+    std::vector<typename ATargetSpace0::CoordinateType> aRaw0(localConfiguration0.size());
+    for (size_t i=0; i<localConfiguration0.size(); i++) {
+      typename TargetSpace0::CoordinateType raw = localConfiguration0[i].globalCoordinates();
+      for (size_t j=0; j<raw.size(); j++)
+        aRaw0[i][j] <<= raw[j];
+      localAConfiguration0[i] = aRaw0[i];  // may contain a projection onto M -- needs to be done in adouble
+    }
+
+    std::vector<typename ATargetSpace1::CoordinateType> aRaw1(localConfiguration1.size());
+    for (size_t i=0; i<localConfiguration1.size(); i++) {
+      typename TargetSpace1::CoordinateType raw = localConfiguration1[i].globalCoordinates();
+      for (size_t j=0; j<raw.size(); j++)
+        aRaw1[i][j] <<= raw[j];
+      localAConfiguration1[i] = aRaw1[i];  // may contain a projection onto M -- needs to be done in adouble
+    }
+
+    energy = localEnergy_->energy(element,
+                                  localFiniteElement0,localAConfiguration0,
+                                  localFiniteElement1,localAConfiguration1);
+
+    energy >>= pureEnergy;
+
+    trace_off();
+#if 0
+    size_t tape_stats[STAT_SIZE];
+    tapestats(1,tape_stats);             // reading of tape statistics
+    cout<<"maxlive "<<tape_stats[NUM_MAX_LIVES]<<"\n";
+    cout<<"tay_stack_size "<<tape_stats[TAY_STACK_SIZE]<<"\n";
+    cout<<"total number of operations "<<tape_stats[NUM_OPERATIONS]<<"\n";
+    // ..... print other tape stats
+#endif
+    return pureEnergy;
+}
+
+#if 0
+template <class GridView, class LocalFiniteElement, class TargetSpace>
+void LocalGeodesicFEADOLCStiffness<GridView, LocalFiniteElement, TargetSpace>::
+assembleGradient(const Entity& element,
+                 const LocalFiniteElement& localFiniteElement,
+                 const std::vector<TargetSpace>& localSolution,
+                 std::vector<typename TargetSpace::TangentVector>& localGradient) const
+{
+    // Tape energy computation.  We may not have to do this every time, but it's comparatively cheap.
+    energy(element, localFiniteElement, localSolution);
+
+    // Compute the actual gradient
+    size_t nDofs = localSolution.size();
+    size_t nDoubles = nDofs*embeddedBlocksize;
+    std::vector<double> xp(nDoubles);
+    int idx=0;
+    for (size_t i=0; i<nDofs; i++)
+        for (size_t j=0; j<embeddedBlocksize; j++)
+            xp[idx++] = localSolution[i].globalCoordinates()[j];
+
+  // Compute gradient
+    std::vector<double> g(nDoubles);
+    gradient(1,nDoubles,xp.data(),g.data());                  // gradient evaluation
+
+    // Copy into Dune type
+    std::vector<typename TargetSpace::EmbeddedTangentVector> localEmbeddedGradient(localSolution.size());
+
+    idx=0;
+    for (size_t i=0; i<nDofs; i++)
+        for (size_t j=0; j<embeddedBlocksize; j++)
+            localEmbeddedGradient[i][j] = g[idx++];
+
+//     std::cout << "localEmbeddedGradient:\n";
+//     for (size_t i=0; i<nDofs; i++)
+//       std::cout << localEmbeddedGradient[i] << std::endl;
+
+    // Express gradient in local coordinate system
+    for (size_t i=0; i<nDofs; i++) {
+        Dune::FieldMatrix<RT,blocksize,embeddedBlocksize> orthonormalFrame = localSolution[i].orthonormalFrame();
+        orthonormalFrame.mv(localEmbeddedGradient[i],localGradient[i]);
+    }
+}
+#endif
+
+// ///////////////////////////////////////////////////////////
+//   Compute gradient and Hessian together
+//   To compute the Hessian we need to compute the gradient anyway, so we may
+//   as well return it.  This saves assembly time.
+// ///////////////////////////////////////////////////////////
+template <class GridType, class LocalFiniteElement0, class TargetSpace0, class LocalFiniteElement1, class TargetSpace1>
+void MixedLocalGFEADOLCStiffness<GridType, LocalFiniteElement0, TargetSpace0, LocalFiniteElement1, TargetSpace1>::
+assembleGradientAndHessian(const Entity& element,
+                           const LocalFiniteElement0& localFiniteElement0,
+                           const std::vector<TargetSpace0>& localConfiguration0,
+                           const LocalFiniteElement1& localFiniteElement1,
+                           const std::vector<TargetSpace1>& localConfiguration1,
+                           std::vector<typename TargetSpace0::TangentVector>& localGradient0,
+                           std::vector<typename TargetSpace1::TangentVector>& localGradient1)
+{
+    // Tape energy computation.  We may not have to do this every time, but it's comparatively cheap.
+    energy(element, localFiniteElement0, localConfiguration0, localFiniteElement1, localConfiguration1);
+
+    /////////////////////////////////////////////////////////////////
+    // Compute the gradient.  It is needed to transform the Hessian
+    // into the correct coordinates.
+    /////////////////////////////////////////////////////////////////
+
+    // Compute the actual gradient
+    size_t nDofs0 = localConfiguration0.size();
+    size_t nDofs1 = localConfiguration1.size();
+    size_t nDoubles = nDofs0*embeddedBlocksize0 + nDofs1*embeddedBlocksize1;
+
+    std::vector<double> xp(nDoubles);
+    int idx=0;
+    for (size_t i=0; i<localConfiguration0.size(); i++)
+        for (size_t j=0; j<embeddedBlocksize0; j++)
+            xp[idx++] = localConfiguration0[i].globalCoordinates()[j];
+
+    for (size_t i=0; i<localConfiguration1.size(); i++)
+        for (size_t j=0; j<embeddedBlocksize1; j++)
+            xp[idx++] = localConfiguration1[i].globalCoordinates()[j];
+
+  // Compute gradient
+    std::vector<double> g(nDoubles);
+    gradient(1,nDoubles,xp.data(),g.data());                  // gradient evaluation
+
+    // Copy into Dune type
+    std::vector<typename TargetSpace0::EmbeddedTangentVector> localEmbeddedGradient0(localConfiguration0.size());
+    std::vector<typename TargetSpace1::EmbeddedTangentVector> localEmbeddedGradient1(localConfiguration1.size());
+
+    idx=0;
+    for (size_t i=0; i<localConfiguration0.size(); i++) {
+        for (size_t j=0; j<embeddedBlocksize0; j++)
+            localEmbeddedGradient0[i][j] = g[idx++];
+
+        // Express gradient in local coordinate system
+        localConfiguration0[i].orthonormalFrame().mv(localEmbeddedGradient0[i],localGradient0[i]);
+    }
+
+    for (size_t i=0; i<localConfiguration1.size(); i++) {
+        for (size_t j=0; j<embeddedBlocksize1; j++)
+            localEmbeddedGradient1[i][j] = g[idx++];
+
+        // Express gradient in local coordinate system
+        localConfiguration1[i].orthonormalFrame().mv(localEmbeddedGradient1[i],localGradient1[i]);
+    }
+
+    /////////////////////////////////////////////////////////////////
+    // Compute Hessian
+    /////////////////////////////////////////////////////////////////
+
+    // We compute the Hessian of the energy functional using the ADOL-C system.
+    // Since ADOL-C does not know about nonlinear spaces, what we get is actually
+    // the Hessian of a prolongation of the energy functional into the surrounding
+    // Euclidean space.  To obtain the Riemannian Hessian from this we apply the
+    // formula described in Absil, Mahoney, Trumpf, "An extrinsic look at the Riemannian Hessian".
+    // This formula consists of two steps:
+    // 1) Remove all entries of the Hessian pertaining to the normal space of the
+    //    manifold.  In the aforementioned paper this is done by projection onto the
+    //    tangent space.  Since we want a matrix that is really smaller (but full rank again),
+    //    we can achieve the same effect by multiplying the embedded Hessian from the left
+    //    and from the right by the matrix of orthonormal frames.
+    // 2) Add a correction involving the Weingarten map.
+    //
+    // This works, and is easy to implement using the ADOL-C "hessian" driver.
+    // However, here we implement a small shortcut.  Computing the embedded Hessian and
+    // multiplying one side by the orthonormal frame is the same as evaluating the Hessian
+    // (seen as an operator from R^n to R^n) in the directions of the vectors of the
+    // orthonormal frame.  By luck, ADOL-C can compute the evaluations of the Hessian in
+    // a given direction directly (in fact, this is also how the "hessian" driver works).
+    // Since there are less frame vectors than the dimension of the embedding space,
+    // this reinterpretation allows to reduce the number of calls to ADOL-C.
+    // In my Cosserat shell tests this reduced assembly time by about 10%.
+
+    std::vector<Dune::FieldMatrix<RT,blocksize0,embeddedBlocksize0> > orthonormalFrame0(nDofs0);
+
+    for (size_t i=0; i<nDofs0; i++)
+        orthonormalFrame0[i] = localConfiguration0[i].orthonormalFrame();
+
+    std::vector<Dune::FieldMatrix<RT,blocksize1,embeddedBlocksize1> > orthonormalFrame1(nDofs1);
+
+    for (size_t i=0; i<nDofs1; i++)
+        orthonormalFrame1[i] = localConfiguration1[i].orthonormalFrame();
+
+    Dune::Matrix<Dune::FieldMatrix<double,blocksize0, embeddedBlocksize0> > embeddedHessian00(nDofs0,nDofs0);
+    Dune::Matrix<Dune::FieldMatrix<double,blocksize0, embeddedBlocksize1> > embeddedHessian01(nDofs0,nDofs1);
+    Dune::Matrix<Dune::FieldMatrix<double,blocksize1, embeddedBlocksize0> > embeddedHessian10(nDofs1,nDofs0);
+    Dune::Matrix<Dune::FieldMatrix<double,blocksize1, embeddedBlocksize1> > embeddedHessian11(nDofs1,nDofs1);
+
+#ifndef ADOLC_VECTOR_MODE
+#error ADOL-C scalar mode not implemented
+#if 0
+    std::vector<double> v(nDoubles);
+    std::vector<double> w(nDoubles);
+
+    std::fill(v.begin(), v.end(), 0.0);
+
+    for (int i=0; i<nDofs; i++)
+      for (int ii=0; ii<blocksize; ii++)
+      {
+        // Evaluate Hessian in the direction of each vector of the orthonormal frame
+        for (size_t k=0; k<embeddedBlocksize; k++)
+          v[i*embeddedBlocksize + k] = orthonormalFrame[i][ii][k];
+
+        int rc= 3;
+        MINDEC(rc, hess_vec(1, nDoubles, xp.data(), v.data(), w.data()));
+        if (rc < 0)
+          DUNE_THROW(Dune::Exception, "ADOL-C has returned with error code " << rc << "!");
+
+        for (int j=0; j<nDoubles; j++)
+          embeddedHessian[i][j/embeddedBlocksize][ii][j%embeddedBlocksize] = w[j];
+
+        // Make v the null vector again
+        std::fill(&v[i*embeddedBlocksize], &v[(i+1)*embeddedBlocksize], 0.0);
+      }
+#endif
+#else
+    int n = nDoubles;
+    int nDirections = nDofs0 * blocksize0 + nDofs1 * blocksize1;
+    double* tangent[nDoubles];
+    for(size_t i=0; i<nDoubles; i++)
+        tangent[i] = (double*)malloc(nDirections*sizeof(double));
+
+    double* rawHessian[nDoubles];
+    for(size_t i=0; i<nDoubles; i++)
+        rawHessian[i] = (double*)malloc(nDirections*sizeof(double));
+
+    // Initialize directions field with zeros
+    for (int j=0; j<nDirections; j++)
+      for (int i=0; i<n; i++)
+        tangent[i][j] = 0.0;
+
+    for (int j=0; j<nDofs0*blocksize0; j++)
+      for (int i=0; i<embeddedBlocksize0; i++)
+        tangent[(j/blocksize0)*embeddedBlocksize0+i][j] = orthonormalFrame0[j/blocksize0][j%blocksize0][i];
+
+    for (int j=0; j<nDofs1*blocksize1; j++)
+      for (int i=0; i<embeddedBlocksize1; i++)
+        tangent[nDofs0*embeddedBlocksize0 + (j/blocksize1)*embeddedBlocksize1+i][nDofs0*blocksize0 + j] = orthonormalFrame1[j/blocksize1][j%blocksize1][i];
+
+    hess_mat(1,nDoubles,nDirections,xp.data(),tangent,rawHessian);
+
+    // Copy Hessian into Dune data type
+    size_t offset0 = nDofs0*embeddedBlocksize0;
+    size_t offset1 = nDofs0*blocksize0;
+
+    // upper left block
+    for(size_t i=0; i<nDofs0*embeddedBlocksize0; i++)
+      for (size_t j=0; j<nDofs0*blocksize0; j++)
+        embeddedHessian00[j/blocksize0][i/embeddedBlocksize0][j%blocksize0][i%embeddedBlocksize0] = rawHessian[i][j];
+
+    // upper right block
+    for(size_t i=0; i<nDofs1*embeddedBlocksize1; i++)
+      for (size_t j=0; j<nDofs0*blocksize0; j++)
+        embeddedHessian01[j/blocksize0][i/embeddedBlocksize1][j%blocksize0][i%embeddedBlocksize1] = rawHessian[offset0+i][j];
+
+    // lower left block
+    for(size_t i=0; i<nDofs0*embeddedBlocksize0; i++)
+      for (size_t j=0; j<nDofs1*blocksize1; j++)
+        embeddedHessian10[j/blocksize1][i/embeddedBlocksize0][j%blocksize1][i%embeddedBlocksize0] = rawHessian[i][offset1+j];
+
+    // lower right block
+    for(size_t i=0; i<nDofs1*embeddedBlocksize1; i++)
+      for (size_t j=0; j<nDofs1*blocksize1; j++)
+        embeddedHessian11[j/blocksize1][i/embeddedBlocksize1][j%blocksize1][i%embeddedBlocksize1] = rawHessian[offset0+i][offset1+j];
+
+    for(size_t i=0; i<nDoubles; i++) {
+        free(rawHessian[i]);
+        free(tangent[i]);
+    }
+#endif
+
+    // From this, compute the Hessian with respect to the manifold (which we assume here is embedded
+    // isometrically in a Euclidean space.
+    // For the detailed explanation of the following see: Absil, Mahoney, Trumpf, "An extrinsic look
+    // at the Riemannian Hessian".
+
+    this->A00_.setSize(nDofs0,nDofs0);
+
+    for (size_t col=0; col<nDofs0; col++) {
+
+        for (size_t subCol=0; subCol<blocksize0; subCol++) {
+
+            typename TargetSpace0::EmbeddedTangentVector z = orthonormalFrame0[col][subCol];
+
+            // P_x \partial^2 f z
+            for (size_t row=0; row<nDofs0; row++) {
+                typename TargetSpace0::TangentVector semiEmbeddedProduct;
+                embeddedHessian00[row][col].mv(z,semiEmbeddedProduct);
+
+                for (int subRow=0; subRow<blocksize0; subRow++)
+                    this->A00_[row][col][subRow][subCol] = semiEmbeddedProduct[subRow];
+            }
+
+        }
+
+    }
+
+    this->A01_.setSize(nDofs0,nDofs1);
+
+    for (size_t col=0; col<nDofs1; col++) {
+
+        for (size_t subCol=0; subCol<blocksize1; subCol++) {
+
+            typename TargetSpace0::EmbeddedTangentVector z = orthonormalFrame0[col][subCol];
+
+            // P_x \partial^2 f z
+            for (size_t row=0; row<nDofs0; row++) {
+                typename TargetSpace0::TangentVector semiEmbeddedProduct;
+                embeddedHessian01[row][col].mv(z,semiEmbeddedProduct);
+
+                for (int subRow=0; subRow<blocksize0; subRow++)
+                    this->A01_[row][col][subRow][subCol] = semiEmbeddedProduct[subRow];
+            }
+
+        }
+
+    }
+
+    this->A10_.setSize(nDofs1,nDofs0);
+
+    for (size_t col=0; col<nDofs0; col++) {
+
+        for (size_t subCol=0; subCol<blocksize0; subCol++) {
+
+            typename TargetSpace1::EmbeddedTangentVector z = orthonormalFrame1[col][subCol];
+
+            // P_x \partial^2 f z
+            for (size_t row=0; row<nDofs1; row++) {
+                typename TargetSpace1::TangentVector semiEmbeddedProduct;
+                embeddedHessian10[row][col].mv(z,semiEmbeddedProduct);
+
+                for (int subRow=0; subRow<blocksize1; subRow++)
+                    this->A10_[row][col][subRow][subCol] = semiEmbeddedProduct[subRow];
+            }
+
+        }
+
+    }
+
+    this->A11_.setSize(nDofs1,nDofs1);
+
+    for (size_t col=0; col<nDofs1; col++) {
+
+        for (size_t subCol=0; subCol<blocksize1; subCol++) {
+
+            typename TargetSpace1::EmbeddedTangentVector z = orthonormalFrame1[col][subCol];
+
+            // P_x \partial^2 f z
+            for (size_t row=0; row<nDofs1; row++) {
+                typename TargetSpace1::TangentVector semiEmbeddedProduct;
+                embeddedHessian11[row][col].mv(z,semiEmbeddedProduct);
+
+                for (int subRow=0; subRow<blocksize1; subRow++)
+                    this->A11_[row][col][subRow][subCol] = semiEmbeddedProduct[subRow];
+            }
+
+        }
+
+    }
+
+    //////////////////////////////////////////////////////////////////////////////////////
+    //  Further correction due to non-planar configuration space
+    //  + \mathfrak{A}_x(z,P^\orth_x \partial f)
+    //////////////////////////////////////////////////////////////////////////////////////
+
+    // Project embedded gradient onto normal space
+    std::vector<typename TargetSpace0::EmbeddedTangentVector> projectedGradient0(nDofs0);
+    for (size_t i=0; i<nDofs0; i++)
+      projectedGradient0[i] = localConfiguration0[i].projectOntoNormalSpace(localEmbeddedGradient0[i]);
+
+    std::vector<typename TargetSpace1::EmbeddedTangentVector> projectedGradient1(nDofs1);
+    for (size_t i=0; i<nDofs1; i++)
+      projectedGradient1[i] = localConfiguration1[i].projectOntoNormalSpace(localEmbeddedGradient1[i]);
+
+    // The Weingarten map has only diagonal entries
+    for (size_t row=0; row<nDofs0; row++) {
+
+      for (size_t subRow=0; subRow<blocksize0; subRow++) {
+
+        typename TargetSpace0::EmbeddedTangentVector z = orthonormalFrame0[row][subRow];
+        typename TargetSpace0::EmbeddedTangentVector tmp1 = localConfiguration0[row].weingarten(z,projectedGradient0[row]);
+
+        typename TargetSpace0::TangentVector tmp2;
+        orthonormalFrame0[row].mv(tmp1,tmp2);
+
+        this->A00_[row][row][subRow] += tmp2;
+      }
+
+    }
+
+    for (size_t row=0; row<nDofs1; row++) {
+
+      for (size_t subRow=0; subRow<blocksize1; subRow++) {
+
+        typename TargetSpace1::EmbeddedTangentVector z = orthonormalFrame1[row][subRow];
+        typename TargetSpace1::EmbeddedTangentVector tmp1 = localConfiguration1[row].weingarten(z,projectedGradient1[row]);
+
+        typename TargetSpace1::TangentVector tmp2;
+        orthonormalFrame1[row].mv(tmp1,tmp2);
+
+        this->A11_[row][row][subRow] += tmp2;
+      }
+
+    }
+
+//     std::cout << "ADOL-C stiffness:\n";
+//     printmatrix(std::cout, this->A_, "foo", "--");
+}
+
+#endif
+
diff --git a/dune/gfe/mixedriemanniantrsolver.cc b/dune/gfe/mixedriemanniantrsolver.cc
new file mode 100644
index 00000000..7a86d524
--- /dev/null
+++ b/dune/gfe/mixedriemanniantrsolver.cc
@@ -0,0 +1,595 @@
+#include "omp.h"
+
+#include <dune/common/bitsetvector.hh>
+#include <dune/common/timer.hh>
+#include <dune/common/parallel/mpihelper.hh>
+
+#include <dune/istl/io.hh>
+
+#include <dune/fufem/functionspacebases/p1nodalbasis.hh>
+#include <dune/fufem/assemblers/operatorassembler.hh>
+#include <dune/fufem/assemblers/localassemblers/laplaceassembler.hh>
+#include <dune/fufem/assemblers/localassemblers/massassembler.hh>
+
+// Using a monotone multigrid as the inner solver
+#include <dune/solvers/iterationsteps/trustregiongsstep.hh>
+#include <dune/solvers/iterationsteps/mmgstep.hh>
+#include <dune/solvers/transferoperators/truncatedcompressedmgtransfer.hh>
+#if defined THIRD_ORDER || defined SECOND_ORDER
+#include <dune/gfe/pktop1mgtransfer.hh>
+#endif
+#include <dune/solvers/transferoperators/mandelobsrestrictor.hh>
+#include <dune/solvers/solvers/iterativesolver.hh>
+#include "maxnormtrustregion.hh"
+
+#include <dune/solvers/norms/twonorm.hh>
+#include <dune/solvers/norms/h1seminorm.hh>
+
+#include <dune/gfe/parallel/matrixcommunicator.hh>
+#include <dune/gfe/parallel/vectorcommunicator.hh>
+
+#include <dune/gfe/cosseratvtkwriter.hh>
+
+template <class GridType,
+          class Basis0, class TargetSpace0,
+          class Basis1, class TargetSpace1>
+void MixedRiemannianTrustRegionSolver<GridType,Basis0,TargetSpace0,Basis1,TargetSpace1>::
+setup(const GridType& grid,
+      const MixedGFEAssembler<Basis0, TargetSpace0, Basis1, TargetSpace1>* assembler,
+         const SolutionType0& x0,
+         const SolutionType1& x1,
+         const Dune::BitSetVector<blocksize0>& dirichletNodes0,
+         const Dune::BitSetVector<blocksize1>& dirichletNodes1,
+         double tolerance,
+         int maxTrustRegionSteps,
+         double initialTrustRegionRadius,
+         int multigridIterations,
+         double mgTolerance,
+         int mu,
+         int nu1,
+         int nu2,
+         int baseIterations,
+         double baseTolerance,
+         bool instrumented)
+{
+    int rank = grid.comm().rank();
+
+    grid_                     = &grid;
+    assembler_                = assembler;
+    x0_                       = x0;
+    x1_                       = x1;
+    tolerance_                = tolerance;
+    maxTrustRegionSteps_      = maxTrustRegionSteps;
+    initialTrustRegionRadius_ = initialTrustRegionRadius;
+    innerIterations_          = multigridIterations;
+    innerTolerance_           = mgTolerance;
+    ignoreNodes0_             = &dirichletNodes0;
+    ignoreNodes1_             = &dirichletNodes1;
+
+    int numLevels = grid_->maxLevel()+1;
+
+    //////////////////////////////////////////////////////////////////
+    //  Create global numbering for matrix and vector transfer
+    //////////////////////////////////////////////////////////////////
+#if 0
+    guIndex_ = std::unique_ptr<GUIndex>(new GUIndex(grid_->leafGridView()));
+#endif
+    // ////////////////////////////////
+    //   Create a multigrid solver
+    // ////////////////////////////////
+
+    // First create a Gauss-seidel base solver
+    TrustRegionGSStep<MatrixType00, CorrectionType0>* baseSolverStep0 = new TrustRegionGSStep<MatrixType00, CorrectionType0>;
+    TrustRegionGSStep<MatrixType11, CorrectionType1>* baseSolverStep1 = new TrustRegionGSStep<MatrixType11, CorrectionType1>;
+
+    // Hack: the two-norm may not scale all that well, but it is fast!
+    TwoNorm<CorrectionType0>* baseNorm0 = new TwoNorm<CorrectionType0>;
+    TwoNorm<CorrectionType1>* baseNorm1 = new TwoNorm<CorrectionType1>;
+
+    ::LoopSolver<CorrectionType0>* baseSolver0 = new ::LoopSolver<CorrectionType0>(baseSolverStep0,
+                                                                                   baseIterations,
+                                                                                   baseTolerance,
+                                                                                   baseNorm0,
+                                                                                   Solver::QUIET);
+
+    ::LoopSolver<CorrectionType0>* baseSolver1 = new ::LoopSolver<CorrectionType1>(baseSolverStep1,
+                                                                                   baseIterations,
+                                                                                   baseTolerance,
+                                                                                   baseNorm1,
+                                                                                   Solver::QUIET);
+
+    // Transfer all Dirichlet data to the master processor
+#if 0
+    VectorCommunicator<GUIndex, Dune::BitSetVector<blocksize> > vectorComm(*guIndex_, 0);
+    Dune::BitSetVector<blocksize>* globalDirichletNodes = NULL;
+    globalDirichletNodes = new Dune::BitSetVector<blocksize>(vectorComm.reduceCopy(dirichletNodes));
+#endif
+    Dune::BitSetVector<blocksize0>* globalDirichletNodes0 = NULL;
+    globalDirichletNodes0 = new Dune::BitSetVector<blocksize0>(dirichletNodes0);
+    Dune::BitSetVector<blocksize1>* globalDirichletNodes1 = NULL;
+    globalDirichletNodes1 = new Dune::BitSetVector<blocksize1>(dirichletNodes1);
+
+
+    // Make pre and postsmoothers
+    TrustRegionGSStep<MatrixType00, CorrectionType0>* presmoother0  = new TrustRegionGSStep<MatrixType00, CorrectionType0>;
+    TrustRegionGSStep<MatrixType00, CorrectionType0>* postsmoother0 = new TrustRegionGSStep<MatrixType00, CorrectionType0>;
+
+    mmgStep0 = new MonotoneMGStep<MatrixType00, CorrectionType0>;
+
+    mmgStep0->setMGType(mu, nu1, nu2);
+    mmgStep0->ignoreNodes_ = globalDirichletNodes0;
+    mmgStep0->basesolver_        = baseSolver0;
+    mmgStep0->setSmoother(presmoother0, postsmoother0);
+    mmgStep0->obstacleRestrictor_= new MandelObstacleRestrictor<CorrectionType0>();
+    mmgStep0->verbosity_         = Solver::FULL;
+
+    TrustRegionGSStep<MatrixType11, CorrectionType1>* presmoother1  = new TrustRegionGSStep<MatrixType11, CorrectionType1>;
+    TrustRegionGSStep<MatrixType11, CorrectionType1>* postsmoother1 = new TrustRegionGSStep<MatrixType11, CorrectionType1>;
+
+    mmgStep1 = new MonotoneMGStep<MatrixType11, CorrectionType1>;
+
+    mmgStep1->setMGType(mu, nu1, nu2);
+    mmgStep1->ignoreNodes_ = globalDirichletNodes1;
+    mmgStep1->basesolver_        = baseSolver1;
+    mmgStep1->setSmoother(presmoother1, postsmoother1);
+    mmgStep1->obstacleRestrictor_= new MandelObstacleRestrictor<CorrectionType1>();
+    mmgStep1->verbosity_         = Solver::FULL;
+
+#if 0
+    // //////////////////////////////////////////////////////////////////////////////////////
+    //   Assemble a Laplace matrix to create a norm that's equivalent to the H1-norm
+    // //////////////////////////////////////////////////////////////////////////////////////
+
+    BasisType basis(grid.leafGridView());
+    OperatorAssembler<BasisType,BasisType> operatorAssembler(basis, basis);
+
+    LaplaceAssembler<GridType, typename BasisType::LocalFiniteElement, typename BasisType::LocalFiniteElement> laplaceStiffness;
+    typedef Dune::BCRSMatrix<Dune::FieldMatrix<double,1,1> > ScalarMatrixType;
+    ScalarMatrixType localA;
+
+    operatorAssembler.assemble(laplaceStiffness, localA);
+
+    if (h1SemiNorm_)
+        delete h1SemiNorm_;
+
+
+    MatrixCommunicator<GUIndex, ScalarMatrixType> matrixComm(*guIndex_, 0);
+    ScalarMatrixType* A = new ScalarMatrixType(matrixComm.reduceAdd(localA));
+
+    h1SemiNorm_ = new H1SemiNorm<CorrectionType>(*A);
+
+    innerSolver_ = std::shared_ptr<LoopSolver<CorrectionType> >(new ::LoopSolver<CorrectionType>(mmgStep,
+                                                                                                   innerIterations_,
+                                                                                                   innerTolerance_,
+                                                                                                   h1SemiNorm_,
+                                                                                                 Solver::FULL));
+#endif
+
+    // ////////////////////////////////////////////////////////////
+    //    Create Hessian matrix and its occupation structure
+    // ////////////////////////////////////////////////////////////
+
+    // \todo Why are the hessianMatrix objects class members at all, and not local to 'solve'?
+
+    hessianMatrix00_ = std::auto_ptr<MatrixType00>(new MatrixType00);
+    hessianMatrix01_ = std::auto_ptr<MatrixType01>(new MatrixType01);
+    hessianMatrix10_ = std::auto_ptr<MatrixType10>(new MatrixType10);
+    hessianMatrix11_ = std::auto_ptr<MatrixType11>(new MatrixType11);
+
+    // ////////////////////////////////////
+    //   Create the transfer operators
+    // ////////////////////////////////////
+
+    for (size_t k=0; k<mmgStep0->mgTransfer_.size(); k++)
+        delete(mmgStep0->mgTransfer_[k]);
+
+    for (size_t k=0; k<mmgStep1->mgTransfer_.size(); k++)
+        delete(mmgStep1->mgTransfer_[k]);
+
+    mmgStep0->mgTransfer_.resize(numLevels-1);
+    mmgStep1->mgTransfer_.resize(numLevels-1);
+
+    if (assembler->basis0_.getLocalFiniteElement(*grid.leafGridView().template begin<0>()).localBasis().order() > 1)
+    {
+      if (numLevels>1) {
+        P1NodalBasis<typename GridType::LeafGridView,double> p1Basis(grid_->leafGridView());
+
+        PKtoP1MGTransfer<CorrectionType0>* topTransferOp0 = new PKtoP1MGTransfer<CorrectionType0>;
+        topTransferOp0->setup(assembler->basis0_,p1Basis);
+        #if 1
+        mmgStep0->mgTransfer_.back() = topTransferOp0;
+        #else
+        // If we are on more than 1 processors, join all local transfer matrices on rank 0,
+        // and construct a single global transfer operator there.
+        typedef GlobalUniqueIndex<typename GridType::LeafGridView, gridDim> LeafP1GUIndex;
+        LeafP1GUIndex p1Index(grid_->leafGridView());
+
+        typedef typename TruncatedCompressedMGTransfer<CorrectionType>::TransferOperatorType TransferOperatorType;
+        MatrixCommunicator<GUIndex, TransferOperatorType, LeafP1GUIndex> matrixComm(*guIndex_, p1Index, 0);
+
+        mmgStep->mgTransfer_.back() = new PKtoP1MGTransfer<CorrectionType>
+        (Dune::make_shared<TransferOperatorType>(matrixComm.reduceCopy(topTransferOp->getMatrix())));
+        #endif
+        for (int i=0; i<mmgStep0->mgTransfer_.size()-1; i++){
+          // Construct the local multigrid transfer matrix
+          TruncatedCompressedMGTransfer<CorrectionType0>* newTransferOp0 = new TruncatedCompressedMGTransfer<CorrectionType0>;
+          newTransferOp0->setup(*grid_,i+1,i+2);
+
+          #if 1
+          mmgStep0->mgTransfer_[i] = newTransferOp0;
+          #else
+          // If we are on more than 1 processors, join all local transfer matrices on rank 0,
+          // and construct a single global transfer operator there.
+          typedef GlobalUniqueIndex<typename GridType::LevelGridView, gridDim> LevelGUIndex;
+          LevelGUIndex fineGUIndex(grid_->levelGridView(i+2));
+          LevelGUIndex coarseGUIndex(grid_->levelGridView(i+1));
+
+          typedef typename TruncatedCompressedMGTransfer<CorrectionType>::TransferOperatorType TransferOperatorType;
+          MatrixCommunicator<LevelGUIndex, TransferOperatorType> matrixComm(fineGUIndex, coarseGUIndex, 0);
+
+          mmgStep->mgTransfer_[i] = new TruncatedCompressedMGTransfer<CorrectionType>
+          (Dune::make_shared<TransferOperatorType>(matrixComm.reduceCopy(newTransferOp->getMatrix())));
+          #endif
+        }
+
+      }
+
+    }
+    else  // order == 1
+    {
+
+      for (size_t i=0; i<mmgStep0->mgTransfer_.size(); i++){
+
+        // Construct the local multigrid transfer matrix
+        TruncatedCompressedMGTransfer<CorrectionType0>* newTransferOp0 = new TruncatedCompressedMGTransfer<CorrectionType0>;
+        newTransferOp0->setup(*grid_,i,i+1);
+
+        #if 1
+        mmgStep0->mgTransfer_[i] = newTransferOp0;
+        #else
+        // If we are on more than 1 processors, join all local transfer matrices on rank 0,
+        // and construct a single global transfer operator there.
+        typedef GlobalUniqueIndex<typename GridType::LevelGridView, gridDim> LevelGUIndex;
+        LevelGUIndex fineGUIndex(grid_->levelGridView(i+1));
+        LevelGUIndex coarseGUIndex(grid_->levelGridView(i));
+
+        typedef typename TruncatedCompressedMGTransfer<CorrectionType>::TransferOperatorType TransferOperatorType;
+        MatrixCommunicator<LevelGUIndex, TransferOperatorType> matrixComm(fineGUIndex, coarseGUIndex, 0);
+
+        mmgStep->mgTransfer_[i] = new TruncatedCompressedMGTransfer<CorrectionType>
+        (Dune::make_shared<TransferOperatorType>(matrixComm.reduceCopy(newTransferOp->getMatrix())));
+        #endif
+      }
+
+    }
+
+    if (assembler->basis1_.getLocalFiniteElement(*grid.leafGridView().template begin<0>()).localBasis().order() > 1)
+    {
+      if (numLevels>1) {
+        P1NodalBasis<typename GridType::LeafGridView,double> p1Basis(grid_->leafGridView());
+
+        PKtoP1MGTransfer<CorrectionType1>* topTransferOp1 = new PKtoP1MGTransfer<CorrectionType1>;
+        topTransferOp1->setup(assembler->basis1_,p1Basis);
+        mmgStep1->mgTransfer_.back() = topTransferOp1;
+
+        for (int i=0; i<mmgStep1->mgTransfer_.size()-1; i++){
+          // Construct the local multigrid transfer matrix
+          TruncatedCompressedMGTransfer<CorrectionType1>* newTransferOp1 = new TruncatedCompressedMGTransfer<CorrectionType1>;
+          newTransferOp1->setup(*grid_,i+1,i+2);
+          mmgStep1->mgTransfer_[i] = newTransferOp1;
+        }
+
+      }
+
+    }
+    else  // order == 1
+    {
+
+      for (size_t i=0; i<mmgStep1->mgTransfer_.size(); i++){
+
+        // Construct the local multigrid transfer matrix
+        TruncatedCompressedMGTransfer<CorrectionType1>* newTransferOp = new TruncatedCompressedMGTransfer<CorrectionType1>;
+        newTransferOp->setup(*grid_,i,i+1);
+        mmgStep1->mgTransfer_[i] = newTransferOp;
+      }
+    }
+
+    // //////////////////////////////////////////////////////////
+    //   Create obstacles
+    // //////////////////////////////////////////////////////////
+
+    if (rank==0)
+    {
+      #if 0
+      hasObstacle0_.resize(guIndex_->nGlobalEntity(), true);
+      #else
+      hasObstacle0_.resize(assembler->basis0_.size(), true);
+      hasObstacle1_.resize(assembler->basis1_.size(), true);
+      #endif
+      mmgStep0->hasObstacle_ = &hasObstacle0_;
+      mmgStep1->hasObstacle_ = &hasObstacle1_;
+    }
+
+  }
+
+
+template <class GridType,
+          class Basis0, class TargetSpace0,
+          class Basis1, class TargetSpace1>
+void MixedRiemannianTrustRegionSolver<GridType,Basis0,TargetSpace0,Basis1,TargetSpace1>::solve()
+{
+    int argc = 0;
+    char** argv;
+    Dune::MPIHelper& mpiHelper = Dune::MPIHelper::instance(argc,argv);
+    int rank = grid_->comm().rank();
+
+    MonotoneMGStep<MatrixType00,CorrectionType0>* mgStep0 = nullptr;
+
+    // if the inner solver is a monotone multigrid set up a max-norm trust-region
+    if (dynamic_cast<LoopSolver<CorrectionType0>*>(innerSolver_.get()))
+        mgStep0 = dynamic_cast<MonotoneMGStep<MatrixType00,CorrectionType0>*>(dynamic_cast<LoopSolver<CorrectionType0>*>(innerSolver_.get())->iterationStep_);
+
+    // \todo Use global index set instead of basis for parallel computations
+    MaxNormTrustRegion<blocksize0> trustRegion0(assembler_->basis0_.size(), initialTrustRegionRadius_);
+    MaxNormTrustRegion<blocksize1> trustRegion1(assembler_->basis1_.size(), initialTrustRegionRadius_);
+
+    std::vector<BoxConstraint<field_type,blocksize0> > trustRegionObstacles0;
+    std::vector<BoxConstraint<field_type,blocksize1> > trustRegionObstacles1;
+
+    // /////////////////////////////////////////////////////
+    //   Trust-Region Solver
+    // /////////////////////////////////////////////////////
+
+    double oldEnergy = assembler_->computeEnergy(x0_, x1_);
+    oldEnergy = mpiHelper.getCollectiveCommunication().sum(oldEnergy);
+
+    bool recomputeGradientHessian = true;
+    CorrectionType0 rhs0;
+    CorrectionType1 rhs1;
+    MatrixType00 stiffnessMatrix00;
+    MatrixType01 stiffnessMatrix01;
+    MatrixType10 stiffnessMatrix10;
+    MatrixType11 stiffnessMatrix11;
+    CorrectionType0 rhs_global0;
+    CorrectionType1 rhs_global1;
+#if 0
+    VectorCommunicator<GUIndex, CorrectionType> vectorComm(*guIndex_, 0);
+    MatrixCommunicator<GUIndex, MatrixType> matrixComm(*guIndex_, 0);
+#endif
+
+    for (int i=0; i<maxTrustRegionSteps_; i++) {
+
+        Dune::Timer totalTimer;
+        if (this->verbosity_ == Solver::FULL and rank==0) {
+            std::cout << "----------------------------------------------------" << std::endl;
+            std::cout << "      Trust-Region Step Number: " << i
+                      << ",     radius: " << trustRegion0.radius()
+                      << ",     energy: " << oldEnergy << std::endl;
+            std::cout << "----------------------------------------------------" << std::endl;
+        }
+
+        CorrectionType0 corr0(x0_.size());
+        corr0 = 0;
+        CorrectionType1 corr1(x1_.size());
+        corr1 = 0;
+
+        Dune::Timer assemblyTimer;
+
+        if (recomputeGradientHessian) {
+
+            assembler_->assembleGradientAndHessian(x0_,
+                                                   x1_,
+                                                   rhs0,
+                                                   rhs1,
+                                                   *hessianMatrix00_,
+                                                   *hessianMatrix01_,
+                                                   *hessianMatrix10_,
+                                                   *hessianMatrix11_,
+                                                   i==0    // assemble occupation pattern only for the first call
+                                                   );
+
+            rhs0 *= -1;        // The right hand side is the _negative_ gradient
+            rhs1 *= -1;
+
+            if (this->verbosity_ == Solver::FULL)
+              std::cout << "Assembly took " << assemblyTimer.elapsed() << " sec." << std::endl;
+
+            // Transfer matrix data
+#if 0
+            stiffnessMatrix00 = matrixComm.reduceAdd(*hessianMatrix00_);
+            stiffnessMatrix01 = matrixComm.reduceAdd(*hessianMatrix01_);
+            stiffnessMatrix10 = matrixComm.reduceAdd(*hessianMatrix10_);
+            stiffnessMatrix11 = matrixComm.reduceAdd(*hessianMatrix11_);
+#endif
+            stiffnessMatrix00 = *hessianMatrix00_;
+            stiffnessMatrix01 = *hessianMatrix01_;
+            stiffnessMatrix10 = *hessianMatrix10_;
+            stiffnessMatrix11 = *hessianMatrix11_;
+
+            // Transfer vector data
+#if 0
+            rhs_global0 = vectorComm.reduceAdd(rhs0);
+            rhs_global1 = vectorComm.reduceAdd(rhs1);
+#endif
+            rhs_global0 = rhs0;
+            rhs_global1 = rhs1;
+
+            recomputeGradientHessian = false;
+
+        }
+
+        CorrectionType0 corr_global0(rhs_global0.size());
+        corr_global0 = 0;
+        CorrectionType1 corr_global1(rhs_global1.size());
+        corr_global1 = 0;
+
+        if (rank==0)
+        {
+            ///////////////////////////////
+            //    Solve !
+            ///////////////////////////////
+
+            std::cout << "Solve quadratic problem..." << std::endl;
+            Dune::Timer solutionTimer;
+            for (int ii=0; ii<200; ii++)
+            {
+              std::cout << "Iteration " << ii << std::endl;
+              CorrectionType0 residual0 = rhs_global0;
+              CorrectionType1 residual1 = rhs_global1;
+
+              stiffnessMatrix01.mmv(corr_global1, residual0);
+              mmgStep0->setProblem(stiffnessMatrix00, corr_global0, residual0);
+              trustRegionObstacles0 = trustRegion0.obstacles();
+              mmgStep0->obstacles_ = &trustRegionObstacles0;
+
+              mmgStep0->preprocess();
+              mmgStep0->iterate();
+
+              stiffnessMatrix10.mmv(corr_global0, residual1);
+              mmgStep1->setProblem(stiffnessMatrix11, corr_global1, residual1);
+              trustRegionObstacles1 = trustRegion1.obstacles();
+              mmgStep1->obstacles_ = &trustRegionObstacles1;
+
+              mmgStep1->preprocess();
+              mmgStep1->iterate();
+
+            }
+
+            std::cout << "Solving the quadratic problem took " << solutionTimer.elapsed() << " seconds." << std::endl;
+
+            //std::cout << "Correction: " << std::endl << corr_global << std::endl;
+
+        }
+
+        // Distribute solution
+        if (mpiHelper.size()>1 and rank==0)
+            std::cout << "Transfer solution back to root process ..." << std::endl;
+
+        //corr = vectorComm.scatter(corr_global);
+        corr0 = corr_global0;
+        corr1 = corr_global1;
+
+        if (this->verbosity_ == NumProc::FULL)
+            std::cout << "Infinity norm of the correction: " << std::max(corr0.infinity_norm(), corr1.infinity_norm()) << std::endl;
+
+        if (corr_global0.infinity_norm() < tolerance_ and corr_global1.infinity_norm() < tolerance_) {
+            if (verbosity_ == NumProc::FULL and rank==0)
+                std::cout << "CORRECTION IS SMALL ENOUGH" << std::endl;
+
+            if (verbosity_ != NumProc::QUIET and rank==0)
+                std::cout << i+1 << " trust-region steps were taken." << std::endl;
+            break;
+        }
+
+        // ////////////////////////////////////////////////////
+        //   Check whether trust-region step can be accepted
+        // ////////////////////////////////////////////////////
+
+        SolutionType0 newIterate0 = x0_;
+        for (size_t j=0; j<newIterate0.size(); j++)
+            newIterate0[j] = TargetSpace0::exp(newIterate0[j], corr0[j]);
+
+        SolutionType1 newIterate1 = x1_;
+        for (size_t j=0; j<newIterate1.size(); j++)
+            newIterate1[j] = TargetSpace1::exp(newIterate1[j], corr1[j]);
+
+        double energy    = assembler_->computeEnergy(newIterate0,newIterate1);
+        energy = mpiHelper.getCollectiveCommunication().sum(energy);
+
+        // compute the model decrease
+        // It is $ m(x) - m(x+s) = -<g,s> - 0.5 <s, Hs>
+        // Note that rhs = -g
+        CorrectionType0 tmp0(corr0.size());
+        tmp0 = 0;
+        hessianMatrix00_->umv(corr0, tmp0);
+        hessianMatrix01_->umv(corr1, tmp0);
+
+        CorrectionType1 tmp1(corr1.size());
+        tmp1 = 0;
+        hessianMatrix10_->umv(corr0, tmp1);
+        hessianMatrix11_->umv(corr1, tmp1);
+
+        double modelDecrease = (rhs0*corr0+rhs1*corr1) - 0.5 * (corr0*tmp0+corr1*tmp1);
+        modelDecrease = mpiHelper.getCollectiveCommunication().sum(modelDecrease);
+
+        double relativeModelDecrease = modelDecrease / std::fabs(energy);
+
+        if (verbosity_ == NumProc::FULL and rank==0) {
+            std::cout << "Absolute model decrease: " << modelDecrease
+                      << ",  functional decrease: " << oldEnergy - energy << std::endl;
+            std::cout << "Relative model decrease: " << relativeModelDecrease
+                      << ",  functional decrease: " << (oldEnergy - energy)/energy << std::endl;
+        }
+
+        assert(modelDecrease >= 0);
+
+        if (energy >= oldEnergy and rank==0) {
+            if (this->verbosity_ == NumProc::FULL)
+                printf("Richtung ist keine Abstiegsrichtung!\n");
+        }
+
+        if (energy >= oldEnergy &&
+            (std::abs((oldEnergy-energy)/energy) < 1e-9 || relativeModelDecrease < 1e-9)) {
+            if (this->verbosity_ == NumProc::FULL and rank==0)
+                std::cout << "Suspecting rounding problems" << std::endl;
+
+            if (this->verbosity_ != NumProc::QUIET and rank==0)
+                std::cout << i+1 << " trust-region steps were taken." << std::endl;
+
+            x0_ = newIterate0;
+            x1_ = newIterate1;
+            break;
+        }
+
+        // //////////////////////////////////////////////
+        //   Check for acceptance of the step
+        // //////////////////////////////////////////////
+        if ( (oldEnergy-energy) / modelDecrease > 0.9) {
+            // very successful iteration
+
+            x0_ = newIterate0;
+            x1_ = newIterate1;
+            trustRegion0.scale(2);
+            trustRegion1.scale(2);
+
+            // current energy becomes 'oldEnergy' for the next iteration
+            oldEnergy = energy;
+
+            recomputeGradientHessian = true;
+
+        } else if ( (oldEnergy-energy) / modelDecrease > 0.01
+                    || std::abs(oldEnergy-energy) < 1e-12) {
+            // successful iteration
+            x0_ = newIterate0;
+            x1_ = newIterate1;
+
+            // current energy becomes 'oldEnergy' for the next iteration
+            oldEnergy = energy;
+
+            recomputeGradientHessian = true;
+
+        } else {
+
+            // unsuccessful iteration
+
+            // Decrease the trust-region radius
+            trustRegion0.scale(0.5);
+            trustRegion1.scale(0.5);
+
+            if (this->verbosity_ == NumProc::FULL and rank==0)
+                std::cout << "Unsuccessful iteration!" << std::endl;
+        }
+
+        // Output each iterate, to better understand what the algorithm does
+        std::stringstream iAsAscii;
+        iAsAscii << i+1;
+        CosseratVTKWriter<GridType>::template writeMixed<Basis0,Basis1>(assembler_->basis0_,x0_,
+                                                                        assembler_->basis1_,x1_,
+                                                                        "cosserat_iterate_" + iAsAscii.str());
+
+        if (rank==0)
+            std::cout << "iteration took " << totalTimer.elapsed() << " sec." << std::endl;
+
+    }
+
+}
diff --git a/dune/gfe/mixedriemanniantrsolver.hh b/dune/gfe/mixedriemanniantrsolver.hh
new file mode 100644
index 00000000..bb2ecf4b
--- /dev/null
+++ b/dune/gfe/mixedriemanniantrsolver.hh
@@ -0,0 +1,158 @@
+#ifndef DUNE_GFE_MIXED_RIEMANNIAN_TRUST_REGION_SOLVER_HH
+#define DUNE_GFE_MIXED_RIEMANNIAN_TRUST_REGION_SOLVER_HH
+
+#include <vector>
+
+#include <dune/common/bitsetvector.hh>
+
+#include <dune/istl/bcrsmatrix.hh>
+#include <dune/istl/bvector.hh>
+
+#include <dune/solvers/common/boxconstraint.hh>
+#include <dune/solvers/norms/h1seminorm.hh>
+#include <dune/solvers/solvers/iterativesolver.hh>
+#include <dune/solvers/solvers/loopsolver.hh>
+#include <dune/solvers/iterationsteps/mmgstep.hh>
+
+#include <dune/fufem/functionspacebases/p1nodalbasis.hh>
+#include <dune/fufem/functionspacebases/p2nodalbasis.hh>
+#include <dune/fufem/functionspacebases/p3nodalbasis.hh>
+
+#include <dune/gfe/mixedgfeassembler.hh>
+#include <dune/gfe/parallel/globalindex.hh>
+
+/** \brief Riemannian trust-region solver for geodesic finite-element problems */
+template <class GridType,
+          class Basis0, class TargetSpace0,
+          class Basis1, class TargetSpace1>
+class MixedRiemannianTrustRegionSolver
+    : public NumProc
+{
+    const static int blocksize0 = TargetSpace0::TangentVector::dimension;
+    const static int blocksize1 = TargetSpace1::TangentVector::dimension;
+
+    const static int gridDim = GridType::dimension;
+
+    // Centralize the field type here
+    typedef double field_type;
+
+    // Some types that I need
+    typedef Dune::BCRSMatrix<Dune::FieldMatrix<field_type, blocksize0, blocksize0> > MatrixType00;
+    typedef Dune::BCRSMatrix<Dune::FieldMatrix<field_type, blocksize0, blocksize1> > MatrixType01;
+    typedef Dune::BCRSMatrix<Dune::FieldMatrix<field_type, blocksize1, blocksize0> > MatrixType10;
+    typedef Dune::BCRSMatrix<Dune::FieldMatrix<field_type, blocksize1, blocksize1> > MatrixType11;
+    typedef Dune::BlockVector<Dune::FieldVector<field_type, blocksize0> >             CorrectionType0;
+    typedef Dune::BlockVector<Dune::FieldVector<field_type, blocksize1> >             CorrectionType1;
+    typedef std::vector<TargetSpace0>                                                SolutionType0;
+    typedef std::vector<TargetSpace1>                                                SolutionType1;
+
+#if 0
+#ifdef SECOND_ORDER
+    typedef Dune::GlobalP2Mapper<typename GridType::LeafGridView> GUIndex;
+#else
+    typedef GlobalUniqueIndex<typename GridType::LeafGridView, gridDim> GUIndex;
+#endif
+#endif
+
+public:
+
+    MixedRiemannianTrustRegionSolver()
+        : NumProc(NumProc::FULL),
+
+          h1SemiNorm0_(nullptr), h1SemiNorm1_(nullptr)
+    {}
+
+    /** \brief Set up the solver using a monotone multigrid method as the inner solver */
+    void setup(const GridType& grid,
+               const MixedGFEAssembler<Basis0, TargetSpace0, Basis1, TargetSpace1>* assembler,
+               const SolutionType0& x0,
+               const SolutionType1& x1,
+               const Dune::BitSetVector<blocksize0>& dirichletNodes0,
+               const Dune::BitSetVector<blocksize1>& dirichletNodes1,
+               double tolerance,
+               int maxTrustRegionSteps,
+               double initialTrustRegionRadius,
+               int multigridIterations,
+               double mgTolerance,
+               int mu,
+               int nu1,
+               int nu2,
+               int baseIterations,
+               double baseTolerance,
+               bool instrumented);
+#if 0
+    void setIgnoreNodes(const Dune::BitSetVector<blocksize0>& ignoreNodes)
+    {
+        ignoreNodes_ = &ignoreNodes;
+        Dune::shared_ptr<LoopSolver<CorrectionType> > loopSolver = std::dynamic_pointer_cast<LoopSolver<CorrectionType> >(innerSolver_);
+        assert(loopSolver);
+        loopSolver->iterationStep_->ignoreNodes_ = ignoreNodes_;
+    }
+#endif
+    void solve();
+
+    void setInitialIterate(const SolutionType0& x0,
+                           const SolutionType1& x1)
+    {
+        x0_ = x0;
+        x1_ = x1;
+    }
+
+    //SolutionType getSol() const {return x_;}
+
+protected:
+#if 0
+    std::unique_ptr<GUIndex> guIndex_;
+#endif
+    /** \brief The grid */
+    const GridType* grid_;
+
+    /** \brief The solution vectors */
+    SolutionType0 x0_;
+    SolutionType1 x1_;
+
+    /** \brief The initial trust-region radius in the maximum-norm */
+    double initialTrustRegionRadius_;
+
+    /** \brief Maximum number of trust-region steps */
+    int maxTrustRegionSteps_;
+
+    /** \brief Maximum number of multigrid iterations */
+    int innerIterations_;
+
+    /** \brief Error tolerance of the multigrid QP solver */
+    double innerTolerance_;
+
+    /** \brief Hessian matrix */
+    std::auto_ptr<MatrixType00> hessianMatrix00_;
+    std::auto_ptr<MatrixType01> hessianMatrix01_;
+    std::auto_ptr<MatrixType10> hessianMatrix10_;
+    std::auto_ptr<MatrixType11> hessianMatrix11_;
+
+    /** \brief The assembler for the material law */
+    const MixedGFEAssembler<Basis0, TargetSpace0, Basis1, TargetSpace1>* assembler_;
+
+    /** \brief The solver for the quadratic inner problems */
+    std::shared_ptr<Solver> innerSolver_;
+
+    MonotoneMGStep<MatrixType00, CorrectionType0>* mmgStep0;
+    MonotoneMGStep<MatrixType11, CorrectionType1>* mmgStep1;
+
+    double tolerance_;
+
+    /** \brief Contains 'true' everywhere -- the trust-region is bounded */
+    Dune::BitSetVector<blocksize0> hasObstacle0_;
+    Dune::BitSetVector<blocksize1> hasObstacle1_;
+
+    /** \brief The Dirichlet nodes */
+    const Dune::BitSetVector<blocksize0>* ignoreNodes0_;
+    const Dune::BitSetVector<blocksize1>* ignoreNodes1_;
+
+    /** \brief The norm used to measure multigrid convergence */
+    H1SemiNorm<CorrectionType0>* h1SemiNorm0_;
+    H1SemiNorm<CorrectionType1>* h1SemiNorm1_;
+};
+
+#include "mixedriemanniantrsolver.cc"
+
+#endif
diff --git a/mixed-cosserat-continuum.cc b/mixed-cosserat-continuum.cc
new file mode 100644
index 00000000..8b6a05f3
--- /dev/null
+++ b/mixed-cosserat-continuum.cc
@@ -0,0 +1,472 @@
+#include <config.h>
+
+#define SECOND_ORDER
+
+#include <fenv.h>
+
+// Includes for the ADOL-C automatic differentiation library
+// Need to come before (almost) all others.
+#include <adolc/adouble.h>
+#include <adolc/drivers/drivers.h>    // use of "Easy to Use" drivers
+#include <adolc/taping.h>
+
+#include <dune/gfe/adolcnamespaceinjections.hh>
+#include <dune/common/bitsetvector.hh>
+#include <dune/common/parametertree.hh>
+#include <dune/common/parametertreeparser.hh>
+
+#include <dune/grid/uggrid.hh>
+#include <dune/grid/onedgrid.hh>
+#include <dune/grid/geometrygrid.hh>
+#include <dune/grid/utility/structuredgridfactory.hh>
+
+#include <dune/grid/io/file/gmshreader.hh>
+
+#include <dune/fufem/boundarypatch.hh>
+#include <dune/fufem/functiontools/boundarydofs.hh>
+#include <dune/fufem/functiontools/basisinterpolator.hh>
+#include <dune/fufem/functionspacebases/p1nodalbasis.hh>
+#include <dune/fufem/functionspacebases/p2nodalbasis.hh>
+#include <dune/fufem/dunepython.hh>
+
+#include <dune/solvers/solvers/iterativesolver.hh>
+#include <dune/solvers/norms/energynorm.hh>
+
+#include <dune/gfe/rigidbodymotion.hh>
+#include <dune/gfe/mixedlocalgfeadolcstiffness.hh>
+#include <dune/gfe/mixedcosseratenergy.hh>
+#include <dune/gfe/cosseratvtkwriter.hh>
+#include <dune/gfe/mixedgfeassembler.hh>
+#include <dune/gfe/mixedriemanniantrsolver.hh>
+
+// grid dimension
+const int dim = 2;
+
+using namespace Dune;
+
+// Dirichlet boundary data for the shear/wrinkling example
+class WrinklingDirichletValues
+: public Dune::VirtualFunction<FieldVector<double,dim>, FieldVector<double,3> >
+{
+  FieldVector<double,2> upper_;
+  double homotopy_;
+public:
+  WrinklingDirichletValues(FieldVector<double,2> upper, double homotopy)
+  : upper_(upper), homotopy_(homotopy)
+  {}
+
+  void evaluate(const FieldVector<double,dim>& in, FieldVector<double,3>& out) const
+  {
+    out = 0;
+    for (int i=0; i<dim; i++)
+      out[i] = in[i];
+
+    //     if (out[1] > 1-1e-3)
+    if (out[1] > upper_[1]-1e-4)
+      out[0] += 0.003*homotopy_;
+  }
+};
+
+class WongPellegrinoDirichletValuesPythonWriter
+{
+  FieldVector<double,2> upper_;
+  double homotopy_;
+public:
+
+  WongPellegrinoDirichletValuesPythonWriter(FieldVector<double,2> upper, double homotopy)
+  : upper_(upper), homotopy_(homotopy)
+  {}
+
+  void writeDeformation()
+  {
+    Python::runStream()
+        << std::endl << "def deformationDirichletValues(x):"
+        << std::endl << "    out = [x[0], x[1], 0]"
+        << std::endl << "    if x[1] > " << upper_[1]-1e-4 << ":"
+        << std::endl << "        out[0] += 0.003 * " << homotopy_
+        << std::endl << "    return out";
+  }
+
+  void writeOrientation()
+  {
+    Python::runStream()
+        << std::endl << "def orientationDirichletValues(x):"
+        << std::endl << "    rotation = [[1,0,0], [0, 1, 0], [0, 0, 1]]"
+        << std::endl << "    return rotation";
+  }
+
+};
+
+
+class TwistedStripDirichletValuesPythonWriter
+{
+  FieldVector<double,2> upper_;
+  double homotopy_;
+  double totalAngle_;
+public:
+
+  TwistedStripDirichletValuesPythonWriter(FieldVector<double,2> upper, double homotopy)
+  : upper_(upper), homotopy_(homotopy)
+  {
+    totalAngle_ = 6*M_PI;
+  }
+
+  void writeDeformation()
+  {
+    Python::runStream()
+        << std::endl << "def deformationDirichletValues(x):"
+        << std::endl << "    upper = [" << upper_[0] << ", " << upper_[1] << "]"
+        << std::endl << "    homotopy = " << homotopy_
+        << std::endl << "    angle = " << totalAngle_ << " * x[0]/upper[0]"
+        << std::endl << "    angle *= homotopy"
+
+        // center of rotation
+        << std::endl << "    center = [0, 0, 0]"
+        << std::endl << "    center[1] = upper[1]/2.0"
+
+        << std::endl << "    rotation = numpy.array([[1,0,0], [0, math.cos(angle), -math.sin(angle)], [0, math.sin(angle), math.cos(angle)]])"
+
+        << std::endl << "    inEmbedded = [x[0]-center[0], x[1]-center[1], 0-center[2]]"
+
+        // Matrix-vector product
+        << std::endl << "    out = [rotation[0][0]*inEmbedded[0]+rotation[0][1]*inEmbedded[1], rotation[1][0]*inEmbedded[0]+rotation[1][1]*inEmbedded[1], rotation[2][0]*inEmbedded[0]+rotation[2][1]*inEmbedded[1]]"
+
+        << std::endl << "    out = [out[0]+center[0], out[1]+center[1], out[2]+center[2]]"
+        << std::endl << "    return out";
+  }
+
+  void writeOrientation()
+  {
+    Python::runStream()
+        << std::endl << "def orientationDirichletValues(x):"
+        << std::endl << "    upper = [" << upper_[0] << ", " << upper_[1] << "]"
+        << std::endl << "    angle = " << totalAngle_ << " * x[0]/upper[0];"
+        << std::endl << "    angle *= " << homotopy_
+
+        << std::endl << "    rotation = numpy.array([[1,0,0], [0, math.cos(angle), -math.sin(angle)], [0, math.sin(angle), math.cos(angle)]])"
+        << std::endl << "    return rotation";
+  }
+
+};
+
+
+/** \brief A constant vector-valued function, for simple Neumann boundary values */
+struct NeumannFunction
+    : public Dune::VirtualFunction<FieldVector<double,dim>, FieldVector<double,3> >
+{
+    NeumannFunction(const FieldVector<double,3> values,
+                    double homotopyParameter)
+    : values_(values),
+      homotopyParameter_(homotopyParameter)
+    {}
+
+    void evaluate(const FieldVector<double, dim>& x, FieldVector<double,3>& out) const {
+        out = 0;
+        out.axpy(homotopyParameter_, values_);
+    }
+
+    FieldVector<double,3> values_;
+    double homotopyParameter_;
+};
+
+
+int main (int argc, char *argv[]) try
+{
+    // initialize MPI, finalize is done automatically on exit
+    Dune::MPIHelper& mpiHelper = MPIHelper::instance(argc, argv);
+
+    // Start Python interpreter
+    Python::start();
+    Python::Reference main = Python::import("__main__");
+    Python::run("import math");
+
+    //feenableexcept(FE_INVALID);
+
+    typedef std::vector<RealTuple<double,3> > DeformationSolutionType;
+    typedef std::vector<Rotation<double,3> >  OrientationSolutionType;
+
+    // parse data file
+    ParameterTree parameterSet;
+    if (argc != 2)
+      DUNE_THROW(Exception, "Usage: ./mixed-cosserat-continuum <parameter file>");
+
+    ParameterTreeParser::readINITree(argv[1], parameterSet);
+
+    ParameterTreeParser::readOptions(argc, argv, parameterSet);
+
+    // read solver settings
+    const int numLevels                   = parameterSet.get<int>("numLevels");
+    int numHomotopySteps                  = parameterSet.get<int>("numHomotopySteps");
+    const double tolerance                = parameterSet.get<double>("tolerance");
+    const int maxTrustRegionSteps         = parameterSet.get<int>("maxTrustRegionSteps");
+    const double initialTrustRegionRadius = parameterSet.get<double>("initialTrustRegionRadius");
+    const int multigridIterations         = parameterSet.get<int>("numIt");
+    const int nu1                         = parameterSet.get<int>("nu1");
+    const int nu2                         = parameterSet.get<int>("nu2");
+    const int mu                          = parameterSet.get<int>("mu");
+    const int baseIterations              = parameterSet.get<int>("baseIt");
+    const double mgTolerance              = parameterSet.get<double>("mgTolerance");
+    const double baseTolerance            = parameterSet.get<double>("baseTolerance");
+    const bool instrumented               = parameterSet.get<bool>("instrumented");
+    std::string resultPath                = parameterSet.get("resultPath", "");
+
+    // ///////////////////////////////////////
+    //    Create the grid
+    // ///////////////////////////////////////
+    typedef std::conditional<dim==1,OneDGrid,UGGrid<dim> >::type GridType;
+
+    shared_ptr<GridType> grid;
+
+    FieldVector<double,dim> lower(0), upper(1);
+
+    if (parameterSet.get<bool>("structuredGrid")) {
+
+        lower = parameterSet.get<FieldVector<double,dim> >("lower");
+        upper = parameterSet.get<FieldVector<double,dim> >("upper");
+
+        array<unsigned int,dim> elements = parameterSet.get<array<unsigned int,dim> >("elements");
+        grid = StructuredGridFactory<GridType>::createCubeGrid(lower, upper, elements);
+
+    } else {
+        std::string path                = parameterSet.get<std::string>("path");
+        std::string gridFile            = parameterSet.get<std::string>("gridFile");
+        grid = shared_ptr<GridType>(GmshReader<GridType>::read(path + "/" + gridFile));
+    }
+
+    grid->globalRefine(numLevels-1);
+
+    grid->loadBalance();
+
+    if (mpiHelper.rank()==0)
+      std::cout << "There are " << grid->leafGridView().comm().size() << " processes" << std::endl;
+
+    typedef GridType::LeafGridView GridView;
+    GridView gridView = grid->leafGridView();
+
+    typedef P2NodalBasis<GridView,double> DeformationFEBasis;
+    typedef P2NodalBasis<GridView,double> OrientationFEBasis;
+
+    DeformationFEBasis deformationFEBasis(gridView);
+    OrientationFEBasis orientationFEBasis(gridView);
+
+    // /////////////////////////////////////////
+    //   Read Dirichlet values
+    // /////////////////////////////////////////
+
+    BitSetVector<1> dirichletVertices(gridView.size(dim), false);
+    BitSetVector<1> neumannVertices(gridView.size(dim), false);
+
+    GridType::Codim<dim>::LeafIterator vIt    = gridView.begin<dim>();
+    GridType::Codim<dim>::LeafIterator vEndIt = gridView.end<dim>();
+
+    const GridView::IndexSet& indexSet = gridView.indexSet();
+
+    // Make Python function that computes which vertices are on the Dirichlet boundary,
+    // based on the vertex positions.
+    std::string lambda = std::string("lambda x: (") + parameterSet.get<std::string>("dirichletVerticesPredicate") + std::string(")");
+    PythonFunction<FieldVector<double,dim>, bool> pythonDirichletVertices(Python::evaluate(lambda));
+
+    // Same for the Neumann boundary
+    lambda = std::string("lambda x: (") + parameterSet.get<std::string>("neumannVerticesPredicate", "0") + std::string(")");
+    PythonFunction<FieldVector<double,dim>, bool> pythonNeumannVertices(Python::evaluate(lambda));
+
+    for (; vIt!=vEndIt; ++vIt) {
+
+        bool isDirichlet;
+        pythonDirichletVertices.evaluate(vIt->geometry().corner(0), isDirichlet);
+        dirichletVertices[indexSet.index(*vIt)] = isDirichlet;
+
+        bool isNeumann;
+        pythonNeumannVertices.evaluate(vIt->geometry().corner(0), isNeumann);
+        neumannVertices[indexSet.index(*vIt)] = isNeumann;
+
+    }
+
+    BoundaryPatch<GridView> dirichletBoundary(gridView, dirichletVertices);
+    BoundaryPatch<GridView> neumannBoundary(gridView, neumannVertices);
+
+    if (mpiHelper.rank()==0)
+      std::cout << "Neumann boundary has " << neumannBoundary.numFaces() << " faces\n";
+
+
+    BitSetVector<1> deformationDirichletNodes(deformationFEBasis.size(), false);
+    constructBoundaryDofs(dirichletBoundary,deformationFEBasis,deformationDirichletNodes);
+
+    BitSetVector<3> deformationDirichletDofs(deformationFEBasis.size(), false);
+    for (size_t i=0; i<deformationFEBasis.size(); i++)
+      if (deformationDirichletNodes[i][0])
+        for (int j=0; j<3; j++)
+          deformationDirichletDofs[i][j] = true;
+
+    BitSetVector<1> orientationDirichletNodes(orientationFEBasis.size(), false);
+    constructBoundaryDofs(dirichletBoundary,orientationFEBasis,orientationDirichletNodes);
+
+    BitSetVector<3> orientationDirichletDofs(orientationFEBasis.size(), false);
+    for (size_t i=0; i<orientationFEBasis.size(); i++)
+      if (orientationDirichletNodes[i][0])
+        for (int j=0; j<3; j++)
+          orientationDirichletDofs[i][j] = true;
+
+    // //////////////////////////
+    //   Initial iterate
+    // //////////////////////////
+
+    DeformationSolutionType xDisp(deformationFEBasis.size());
+
+    lambda = std::string("lambda x: (") + parameterSet.get<std::string>("initialDeformation") + std::string(")");
+    PythonFunction<FieldVector<double,dim>, FieldVector<double,3> > pythonInitialDeformation(Python::evaluate(lambda));
+
+    std::vector<FieldVector<double,3> > v;
+    Functions::interpolate(deformationFEBasis, v, pythonInitialDeformation);
+
+    for (size_t i=0; i<xDisp.size(); i++)
+      xDisp[i] = v[i];
+
+    OrientationSolutionType xOrient(orientationFEBasis.size());
+#if 0
+    lambda = std::string("lambda x: (") + parameterSet.get<std::string>("initialDeformation") + std::string(")");
+    PythonFunction<FieldVector<double,dim>, FieldVector<double,3> > pythonInitialDeformation(Python::evaluate(lambda));
+
+    std::vector<FieldVector<double,3> > v;
+    Functions::interpolate(feBasis, v, pythonInitialDeformation);
+
+    for (size_t i=0; i<x.size(); i++)
+      xDisp[i] = v[i];
+#endif
+    ////////////////////////////////////////////////////////
+    //   Main homotopy loop
+    ////////////////////////////////////////////////////////
+
+    // Output initial iterate (of homotopy loop)
+    CosseratVTKWriter<GridType>::writeMixed<DeformationFEBasis,OrientationFEBasis>(deformationFEBasis,xDisp,
+                                                                                   orientationFEBasis,xOrient,
+                                                                                   resultPath + "cosserat_homotopy_0");
+
+    for (int i=0; i<numHomotopySteps; i++) {
+
+        double homotopyParameter = (i+1)*(1.0/numHomotopySteps);
+        if (mpiHelper.rank()==0)
+            std::cout << "Homotopy step: " << i << ",    parameter: " << homotopyParameter << std::endl;
+
+
+    // ////////////////////////////////////////////////////////////
+    //   Create an assembler for the energy functional
+    // ////////////////////////////////////////////////////////////
+
+    const ParameterTree& materialParameters = parameterSet.sub("materialParameters");
+
+    shared_ptr<NeumannFunction> neumannFunction;
+    if (parameterSet.hasKey("neumannValues"))
+        neumannFunction = make_shared<NeumannFunction>(parameterSet.get<FieldVector<double,3> >("neumannValues"),
+                                                       homotopyParameter);
+
+
+        if (mpiHelper.rank() == 0) {
+            std::cout << "Material parameters:" << std::endl;
+            materialParameters.report();
+        }
+
+    // Assembler using ADOL-C
+    MixedCosseratEnergy<GridView,
+                        DeformationFEBasis::LocalFiniteElement,
+                        OrientationFEBasis::LocalFiniteElement,
+                        3,adouble> cosseratEnergyADOLCLocalStiffness(materialParameters,
+                                                                     &neumannBoundary,
+                                                                     neumannFunction.get());
+
+    MixedLocalGFEADOLCStiffness<GridView,
+                                DeformationFEBasis::LocalFiniteElement,
+                                RealTuple<double,3>,
+                                OrientationFEBasis::LocalFiniteElement,
+                                Rotation<double,3> > localGFEADOLCStiffness(&cosseratEnergyADOLCLocalStiffness);
+
+    MixedGFEAssembler<DeformationFEBasis,
+                      RealTuple<double,3>,
+                      OrientationFEBasis,
+                      Rotation<double,3> > assembler(deformationFEBasis, orientationFEBasis, &localGFEADOLCStiffness);
+
+    // /////////////////////////////////////////////////
+    //   Create a Riemannian trust-region solver
+    // /////////////////////////////////////////////////
+
+    MixedRiemannianTrustRegionSolver<GridType,
+                                     DeformationFEBasis, RealTuple<double,3>,
+                                     OrientationFEBasis, Rotation<double,3> > solver;
+    solver.setup(*grid,
+                 &assembler,
+                 xDisp,
+                 xOrient,
+                 deformationDirichletDofs,
+                 orientationDirichletDofs,
+                 tolerance,
+                 maxTrustRegionSteps,
+                 initialTrustRegionRadius,
+                 multigridIterations,
+                 mgTolerance,
+                 mu, nu1, nu2,
+                 baseIterations,
+                 baseTolerance,
+                 instrumented);
+
+        ////////////////////////////////////////////////////////
+        //   Set Dirichlet values
+        ////////////////////////////////////////////////////////
+
+        if (parameterSet.get<std::string>("problem") == "twisted-strip")
+        {
+          TwistedStripDirichletValuesPythonWriter twistedStripDirichletValuesPythonWriter(upper, homotopyParameter);
+          twistedStripDirichletValuesPythonWriter.writeDeformation();
+          twistedStripDirichletValuesPythonWriter.writeOrientation();
+
+        } else if (parameterSet.get<std::string>("problem") == "wong-pellegrino")
+        {
+          WongPellegrinoDirichletValuesPythonWriter wongPellegrinoDirichletValuesPythonWriter(upper, homotopyParameter);
+          wongPellegrinoDirichletValuesPythonWriter.writeDeformation();
+          wongPellegrinoDirichletValuesPythonWriter.writeOrientation();
+
+        } else if (parameterSet.get<std::string>("problem") == "wriggers-L-shape")
+        {
+
+        } else
+          DUNE_THROW(Exception, "Unknown problem type");
+
+        PythonFunction<FieldVector<double,dim>, FieldVector<double,3> >   deformationDirichletValues(main.get("deformationDirichletValues"));
+        PythonFunction<FieldVector<double,dim>, FieldMatrix<double,3,3> > orientationDirichletValues(main.get("orientationDirichletValues"));
+
+        std::vector<FieldVector<double,3> > ddV;
+        Functions::interpolate(deformationFEBasis, ddV, deformationDirichletValues, deformationDirichletDofs);
+
+        std::vector<FieldMatrix<double,3,3> > dOV;
+        Functions::interpolate(orientationFEBasis, dOV, orientationDirichletValues, orientationDirichletDofs);
+
+        for (size_t j=0; j<xDisp.size(); j++)
+          if (deformationDirichletNodes[j][0])
+            xDisp[j] = ddV[j];
+
+        for (size_t j=0; j<xOrient.size(); j++)
+          if (orientationDirichletNodes[j][0])
+            xOrient[j].set(dOV[j]);
+
+        // /////////////////////////////////////////////////////
+        //   Solve!
+        // /////////////////////////////////////////////////////
+
+        solver.setInitialIterate(xDisp,xOrient);
+        solver.solve();
+
+        //x = solver.getSol();
+
+        // Output result of each homotopy step
+        std::stringstream iAsAscii;
+        iAsAscii << i+1;
+        CosseratVTKWriter<GridType>::writeMixed<DeformationFEBasis,OrientationFEBasis>(deformationFEBasis,xDisp,
+                                                                                       orientationFEBasis,xOrient,
+                                                                                       resultPath + "cosserat_homotopy_" + iAsAscii.str());
+
+    }
+
+ } catch (Exception e) {
+
+    std::cout << e << std::endl;
+
+ }
-- 
GitLab