diff --git a/dune/gfe/riemannianpnsolver.cc b/dune/gfe/riemannianpnsolver.cc
new file mode 100644
index 0000000000000000000000000000000000000000..3e79b4d236568e43210231abb2a73e7518b3dda3
--- /dev/null
+++ b/dune/gfe/riemannianpnsolver.cc
@@ -0,0 +1,527 @@
+#include <dune/common/bitsetvector.hh>
+#include <dune/common/timer.hh>
+
+#include <dune/istl/io.hh>
+
+#include <dune/grid/common/mcmgmapper.hh>
+
+#include <dune/fufem/functionspacebases/dunefunctionsbasis.hh>
+#include <dune/fufem/assemblers/operatorassembler.hh>
+#include <dune/fufem/assemblers/localassemblers/laplaceassembler.hh>
+#include <dune/fufem/assemblers/localassemblers/massassembler.hh>
+#include <dune/fufem/assemblers/basisinterpolationmatrixassembler.hh>
+
+// Using a cholmod solver as the inner solver
+#include <dune/solvers/solvers/cholmodsolver.hh>
+
+#include <dune/solvers/norms/twonorm.hh>
+#include <dune/solvers/norms/h1seminorm.hh>
+
+#if HAVE_MPI
+#include <dune/gfe/parallel/matrixcommunicator.hh>
+#include <dune/gfe/parallel/vectorcommunicator.hh>
+#endif
+
+template <class Basis, class TargetSpace>
+void RiemannianProximalNewtonSolver<Basis,TargetSpace>::
+setup(const GridType& grid,
+      const GeodesicFEAssembler<Basis, TargetSpace>* assembler,
+      const SolutionType& x,
+      const Dune::BitSetVector<blocksize>& dirichletNodes,
+      double tolerance,
+      int maxProximalNewtonSteps,
+      double initialRegularization,
+      bool instrumented)
+{
+    int rank = grid.comm().rank();
+
+    grid_                     = &grid;
+    assembler_                = assembler;
+    x_                        = x;
+    this->tolerance_          = tolerance;
+    maxProximalNewtonSteps_   = maxProximalNewtonSteps;
+    initialRegularization_    = initialRegularization;
+    instrumented_             = instrumented;
+    ignoreNodes_              = &dirichletNodes;
+
+    //////////////////////////////////////////////////////////////////
+    //  Create global numbering for matrix and vector transfer
+    //////////////////////////////////////////////////////////////////
+
+#if HAVE_MPI
+    globalMapper_ = std::make_unique<GlobalMapper>(grid_->leafGridView());
+#endif
+    
+#if HAVE_MPI
+    // Transfer all Dirichlet data to the master processor
+    VectorCommunicator<GlobalMapper, typename GridType::LeafGridView::CollectiveCommunication, Dune::BitSetVector<blocksize> > vectorComm(*globalMapper_,
+                                                                                                                                     grid_->leafGridView().comm(),
+                                                                                                                                     0);
+    auto globalDirichletNodes = new Dune::BitSetVector<blocksize>(vectorComm.reduceCopy(dirichletNodes));
+#else
+    auto globalDirichletNodes = new Dune::BitSetVector<blocksize>(dirichletNodes);
+#endif
+
+    // //////////////////////////////////////////////////////////////////////////////////////
+    //   Assemble a Laplace matrix to create a norm that's equivalent to the H1-norm
+    // //////////////////////////////////////////////////////////////////////////////////////
+
+    typedef DuneFunctionsBasis<Basis> FufemBasis;
+    FufemBasis basis(assembler_->basis_);
+    OperatorAssembler<FufemBasis,FufemBasis> operatorAssembler(basis, basis);
+
+    LaplaceAssembler<GridType, typename FufemBasis::LocalFiniteElement, typename FufemBasis::LocalFiniteElement> laplaceStiffness;
+    typedef Dune::BCRSMatrix<Dune::FieldMatrix<double,1,1> > ScalarMatrixType;
+    ScalarMatrixType localA;
+
+    operatorAssembler.assemble(laplaceStiffness, localA);
+
+#if HAVE_MPI
+    LocalMapper localMapper = MapperFactory<Basis>::createLocalMapper(grid_->leafGridView());
+
+    MatrixCommunicator<GlobalMapper,
+                       typename GridType::LeafGridView,
+                       typename GridType::LeafGridView,
+                       ScalarMatrixType,
+                       LocalMapper,
+                       LocalMapper> matrixComm(*globalMapper_, grid_->leafGridView(), localMapper, localMapper, 0);
+    if (instrumented_) {
+#if !DUNE_VERSION_LT(DUNE_GEOMETRY, 2, 7)
+        auto
+#endif
+        A = std::make_shared<ScalarMatrixType>(matrixComm.reduceAdd(localA));
+#else
+    if (instrumented_) {
+#if !DUNE_VERSION_LT(DUNE_GEOMETRY, 2, 7)
+        auto
+#endif
+        A = std::make_shared<ScalarMatrixType>(localA);
+#endif
+#if DUNE_VERSION_LT(DUNE_GEOMETRY, 2, 7)
+        h1SemiNorm_ = std::make_shared<H1SemiNorm<CorrectionType> >(*A);
+#else
+        h1SemiNorm_ = std::make_shared<H1SemiNorm<CorrectionType> >(A);
+#endif
+    }
+    //////////////////////////////////////////////////////////////////
+    //   Create the inner solver using a cholmod solver
+    //////////////////////////////////////////////////////////////////
+
+    innerSolver_ = std::make_shared<Dune::Solvers::CholmodSolver<MatrixType,CorrectionType> >();
+    innerSolver_->setIgnore(*globalDirichletNodes);
+
+    // //////////////////////////////////////////////////////////////////////////////////////
+    //   Assemble a mass matrix to create a norm that's equivalent to the L2-norm
+    //   This will be used to monitor the gradient
+    // //////////////////////////////////////////////////////////////////////////////////////
+
+    MassAssembler<GridType, typename Basis::LocalView::Tree::FiniteElement, typename Basis::LocalView::Tree::FiniteElement> massStiffness;
+    ScalarMatrixType localMassMatrix;
+
+    operatorAssembler.assemble(massStiffness, localMassMatrix);
+
+#if !DUNE_VERSION_LT(DUNE_GEOMETRY, 2, 7)
+    auto
+#endif
+#if HAVE_MPI
+    massMatrix = std::make_shared<ScalarMatrixType>(matrixComm.reduceAdd(localMassMatrix));
+#else
+    massMatrix = std::make_shared<ScalarMatrixType>(localMassMatrix);
+#endif
+#if DUNE_VERSION_LT(DUNE_GEOMETRY, 2, 7)
+    l2Norm_ = std::make_shared<H1SemiNorm<CorrectionType> >(*massMatrix);
+#else
+    l2Norm_ = std::make_shared<H1SemiNorm<CorrectionType> >(massMatrix);
+#endif
+
+    // Write all intermediate solutions, if requested
+    if (instrumented_
+        && dynamic_cast<IterativeSolver<CorrectionType>*>(innerSolver_.get()))
+        dynamic_cast<IterativeSolver<CorrectionType>*>(innerSolver_.get())->historyBuffer_ = "tmp/mgHistory";
+
+    // ////////////////////////////////////////////////////////////
+    //    Create Hessian matrix and its occupation structure
+    // ////////////////////////////////////////////////////////////
+
+    hessianMatrix_ = std::make_unique<MatrixType>();
+    Dune::MatrixIndexSet indices(grid_->size(1), grid_->size(1));
+    assembler_->getNeighborsPerVertex(indices);
+    indices.exportIdx(*hessianMatrix_);
+}
+
+
+template <class Basis, class TargetSpace>
+void RiemannianProximalNewtonSolver<Basis,TargetSpace>::solve()
+{
+    int rank = grid_->comm().rank();
+
+    // /////////////////////////////////////////////////////
+    //   Set up the log file, if requested
+    // /////////////////////////////////////////////////////
+    FILE* fp = nullptr;
+    if (instrumented_) {
+
+        fp = fopen("statistics", "w");
+        if (!fp)
+            DUNE_THROW(Dune::IOError, "Couldn't open statistics file for writing!");
+
+    }
+
+    // /////////////////////////////////////////////////////
+    //   Proximal Newton Solver
+    // /////////////////////////////////////////////////////
+
+    Dune::Timer energyTimer;
+    double oldEnergy = assembler_->computeEnergy(x_);
+    if (this->verbosity_ == Solver::FULL)
+        std::cout << "Energy computation took " << energyTimer.elapsed() << " sec." << std::endl;
+
+
+    oldEnergy = grid_->comm().sum(oldEnergy);
+
+    bool recomputeGradientHessian = true;
+    CorrectionType rhs, rhs_global;
+    MatrixType stiffnessMatrix;
+#if HAVE_MPI
+    VectorCommunicator<GlobalMapper, typename GridType::LeafGridView::CollectiveCommunication, CorrectionType> vectorComm(*globalMapper_,
+                                                                                                                     grid_->leafGridView().comm(),
+                                                                                                                     0);
+    LocalMapper localMapper = MapperFactory<Basis>::createLocalMapper(grid_->leafGridView());
+    MatrixCommunicator<GlobalMapper,
+                       typename GridType::LeafGridView,
+                       typename GridType::LeafGridView,
+                       MatrixType,
+                       LocalMapper,
+                       LocalMapper> matrixComm(*globalMapper_,
+                                               grid_->leafGridView(),
+                                               localMapper,
+                                               localMapper,
+                                               0);
+#endif
+    auto& i = statistics_.finalIteration;
+    double totalAssemblyTime = 0.0;
+    double totalSolverTime = 0.0;
+    double regularization = initialRegularization_;
+    for (i=0; i<maxProximalNewtonSteps_; i++) {
+
+        Dune::Timer totalTimer;
+        if (this->verbosity_ == Solver::FULL and rank==0) {
+            std::cout << "----------------------------------------------------" << std::endl;
+            std::cout << "      Proximal Newton Step Number: " << i
+                      << ",     regularization parameter: " << regularization
+                      << ",     energy: " << oldEnergy << std::endl;
+            std::cout << "----------------------------------------------------" << std::endl;
+        }
+
+        CorrectionType corr(x_.size());
+        corr = 0;
+
+        Dune::Timer gradientTimer;
+
+        if (recomputeGradientHessian) {
+
+            assembler_->assembleGradientAndHessian(x_,
+                                                   rhs,
+                                                   *hessianMatrix_,
+                                                   i==0    // assemble occupation pattern only for the first call
+                                                   );
+
+            rhs *= -1;        // The right hand side is the _negative_ gradient
+
+            // Transfer vector data
+#if HAVE_MPI
+            rhs_global = vectorComm.reduceAdd(rhs);
+#else
+            rhs_global = rhs;
+#endif
+            CorrectionType gradient = rhs_global;
+            for (size_t j=0; j<gradient.size(); j++)
+              for (size_t k=0; k<gradient[j].size(); k++)
+                if ((innerSolver_->ignore())[j][k])  // global Dirichlet nodes set
+                    gradient[j][k] = 0; 
+
+            if (this->verbosity_ == Solver::FULL and rank==0)
+              std::cout << "Gradient norm: " << l2Norm_->operator()(gradient) << std::endl;
+
+            if (this->verbosity_ == Solver::FULL)
+              std::cout << "Assembly took " << gradientTimer.elapsed() << " sec." << std::endl;
+            totalAssemblyTime += gradientTimer.elapsed();
+
+            // Transfer matrix data
+#if HAVE_MPI
+            stiffnessMatrix = matrixComm.reduceAdd(*hessianMatrix_);
+#else
+            stiffnessMatrix = *hessianMatrix_;
+#endif
+            recomputeGradientHessian = false;
+
+        }
+
+        CorrectionType corr_global(rhs_global.size());
+        corr_global = 0;
+        bool solved = true;
+
+        if (rank==0)
+        {
+            // Add the regularization - Identity Matrix for now
+            for(int i=0; i<stiffnessMatrix.N(); i++)
+              for(int j=0; j<blocksize; j++)
+                stiffnessMatrix[i][i][j][j] += regularization;
+
+            innerSolver_->setProblem(stiffnessMatrix,corr_global,rhs_global);
+            innerSolver_->preprocess();
+
+            ///////////////////////////////
+            //    Solve !
+            ///////////////////////////////
+
+            std::cout << "Solve quadratic problem using cholmod solver..." << std::endl;
+
+            Dune::Timer solutionTimer;
+            try {
+                innerSolver_->solve();
+            } catch (Dune::Exception &e) {
+                std::cerr << "Error while solving: " << e << std::endl;
+                solved = false;
+                corr_global = 0;
+            }
+            std::cout << "Solving the quadratic problem took " << solutionTimer.elapsed() << " seconds." << std::endl;
+            totalSolverTime += solutionTimer.elapsed();
+        }
+
+        // Distribute solution
+        if (grid_->comm().size()>1 and rank==0)
+            std::cout << "Transfer solution back to root process ..." << std::endl;
+
+#if HAVE_MPI
+        solved = grid_->comm().min(solved);
+        if (solved) {
+            corr = vectorComm.scatter(corr_global);
+        } else  {
+            corr_global = 0;
+            corr = 0;
+        }
+#else
+        corr = corr_global;
+#endif
+
+        // Make infinity norm of corr_global known on all processors
+        double corrNorm = corr.infinity_norm();
+        double corrGlobalInfinityNorm = grid_->comm().max(corrNorm);
+
+        if (std::isnan(corrGlobalInfinityNorm))
+            solved = false;
+
+        if (instrumented_) {
+
+            fprintf(fp, "Proximal newton step: %ld, regularization parameter: %g\n",
+                    i, regularization);
+
+            // ///////////////////////////////////////////////////////////////
+            //   Compute and measure progress against the exact solution
+            //   for each proximal newton step
+            // ///////////////////////////////////////////////////////////////
+
+            CorrectionType exactSolution = corr;
+
+            // Start from 0
+            double oldError = 0;
+            double totalConvRate = 1;
+            double convRate = 1;
+
+            // Write statistics of the initial solution
+            // Compute the energy norm
+            oldError = h1SemiNorm_->operator()(exactSolution);
+
+            for (int j=0; j<innerIterations_; j++) {
+
+                // read iteration from file
+                CorrectionType intermediateSol(grid_->size(gridDim));
+                intermediateSol = 0;
+                char iSolFilename[100];
+                sprintf(iSolFilename, "tmp/mgHistory/intermediatesolution_%04d", j);
+
+                FILE* fpInt = fopen(iSolFilename, "rb");
+                if (!fpInt)
+                    DUNE_THROW(Dune::IOError, "Couldn't open intermediate solution");
+                for (size_t k=0; k<intermediateSol.size(); k++)
+                    for (int l=0; l<blocksize; l++)
+                        fread(&intermediateSol[k][l], sizeof(double), 1, fpInt);
+
+                fclose(fpInt);
+                //std::cout << "intermediateSol\n" << intermediateSol << std::endl;
+
+                // Compute errors
+                intermediateSol -= exactSolution;
+
+                //std::cout << "error\n" << intermediateSol << std::endl;
+
+                // Compute the H1 norm
+                double error = h1SemiNorm_->operator()(intermediateSol);
+
+                convRate = error / oldError;
+                totalConvRate *= convRate;
+
+                if (error < 1e-12)
+                    break;
+
+                std::cout << "Iteration: " << j << "  ";
+                std::cout << "Errors:  error " << error << ", convergence rate: " << convRate
+                          << ",  total conv rate " << pow(totalConvRate, 1/((double)j+1)) << std::endl;
+
+
+                fprintf(fp, "%d %g %g %g\n", j+1, error, convRate, pow(totalConvRate, 1/((double)j+1)));
+
+
+                oldError = error;
+
+            }
+
+
+        }
+        double energy = 0;
+        double modelDecrease = 0;
+        SolutionType newIterate = x_;
+        if (i == maxProximalNewtonSteps_ - 1)
+            std::cout << i+1 << " proximal newton steps were taken, the maximum was reached." << std::endl << "Total solver time: " << totalSolverTime << " sec., total assembly time: " << totalAssemblyTime << " sec." << std::endl;
+
+        if (solved) {
+            if (this->verbosity_ == NumProc::FULL)
+                std::cout << "Infinity norm of the correction: " << corrGlobalInfinityNorm << std::endl;
+
+            if (corrGlobalInfinityNorm < this->tolerance_) {
+                if (this->verbosity_ == NumProc::FULL and rank==0)
+                    std::cout << "CORRECTION IS SMALL ENOUGH" << std::endl;
+
+                if (this->verbosity_ != NumProc::QUIET and rank==0)
+                    std::cout << i+1 << " proximal newton steps were taken" << std::endl << "Total solver time: " << totalSolverTime << " sec., total assembly time: " << totalAssemblyTime << " sec." << std::endl;
+                break;
+            }
+
+            // ////////////////////////////////////////////////////
+            //   Check whether proximal newton step can be accepted
+            // ////////////////////////////////////////////////////
+
+            for (size_t j=0; j<newIterate.size(); j++)
+                newIterate[j] = TargetSpace::exp(newIterate[j], corr[j]);
+            try {
+                energy  = assembler_->computeEnergy(newIterate);
+            } catch (Dune::Exception &e) {
+                std::cerr << "Error while computing the energy of the new Iterate: " << e << std::endl;
+                std::cerr << "Redoing proximal newton step with higher regularization parameter ..." << std::endl;
+                solved = false;
+            }
+            solved = grid_->comm().min(solved);
+
+            if (!solved) {
+                energy = oldEnergy;
+                newIterate = x_;
+            } else {
+                energy = grid_->comm().sum(energy);
+
+                // compute the model decrease
+                // It is $ m(x) - m(x+s) = -<g,s> - 0.5 <s, Hs>
+                // Note that rhs = -g
+                CorrectionType tmp(corr.size());
+                tmp = 0;
+                hessianMatrix_->umv(corr, tmp);
+                modelDecrease = (rhs*corr) - 0.5 * (corr*tmp);
+                modelDecrease = grid_->comm().sum(modelDecrease);
+
+                double relativeModelDecrease = modelDecrease / std::fabs(energy);
+
+                if (this->verbosity_ == NumProc::FULL and rank==0) {
+                    std::cout << "Absolute model decrease: " << modelDecrease
+                              << ",  functional decrease: " << oldEnergy - energy << std::endl;
+                    std::cout << "Relative model decrease: " << relativeModelDecrease
+                              << ",  functional decrease: " << (oldEnergy - energy)/energy << std::endl;
+                }
+                assert(modelDecrease >= 0);
+
+
+                if (energy >= oldEnergy and rank==0) {
+                    if (this->verbosity_ == NumProc::FULL)
+                        printf("Richtung ist keine Abstiegsrichtung!\n");
+                }
+
+                if (energy >= oldEnergy &&
+                    (std::abs((oldEnergy-energy)/energy) < 1e-9 || relativeModelDecrease < 1e-9)) {
+                    if (this->verbosity_ == NumProc::FULL and rank==0)
+                        std::cout << "Suspecting rounding problems" << std::endl;
+
+                    if (this->verbosity_ != NumProc::QUIET and rank==0)
+                        std::cout << i+1 << " proximal newton steps were taken." << std::endl;
+
+                    x_ = newIterate;
+                    break;
+                }
+            }
+        }
+
+        // //////////////////////////////////////////////
+        //   Check for acceptance of the step
+        // //////////////////////////////////////////////
+        if (solved && (oldEnergy-energy) / modelDecrease > 0.9) {
+            // very successful iteration
+
+            x_ = newIterate;
+            regularization *= 0.5;
+
+            // current energy becomes 'oldEnergy' for the next iteration
+            oldEnergy = energy;
+
+            recomputeGradientHessian = true;
+
+        } else if (solved && ((oldEnergy-energy) / modelDecrease > 0.01
+                    || std::abs(oldEnergy-energy) < 1e-12)) {
+            // successful iteration
+            x_ = newIterate;
+
+            // current energy becomes 'oldEnergy' for the next iteration
+            oldEnergy = energy;
+
+            recomputeGradientHessian = true;
+
+        } else {
+
+            // unsuccessful iteration
+
+            // Increase the regularization parameter
+            regularization *= 2;
+
+            if (this->verbosity_ == NumProc::FULL and rank==0)
+                std::cout << "Unsuccessful iteration!" << std::endl;
+        }
+
+        // /////////////////////////////////////////////////////////////////////
+        //   Write the iterate to disk for later convergence rate measurement
+        // /////////////////////////////////////////////////////////////////////
+
+        if (instrumented_) {
+
+            char iFilename[100];
+            sprintf(iFilename, "tmp/intermediateSolution_%04ld", i);
+
+            FILE* fpIterate = fopen(iFilename, "wb");
+            if (!fpIterate)
+                DUNE_THROW(SolverError, "Couldn't open file " << iFilename << " for writing");
+
+            for (size_t j=0; j<x_.size(); j++)
+                fwrite(&x_[j], sizeof(TargetSpace), 1, fpIterate);
+
+            fclose(fpIterate);
+
+        }
+
+        if (rank==0)
+            std::cout << "iteration took " << totalTimer.elapsed() << " sec." << std::endl;
+    }
+
+    // //////////////////////////////////////////////
+    //   Close logfile
+    // //////////////////////////////////////////////
+    if (instrumented_)
+        fclose(fp);
+
+    statistics_.finalEnergy = oldEnergy;
+}
diff --git a/dune/gfe/riemannianpnsolver.hh b/dune/gfe/riemannianpnsolver.hh
new file mode 100644
index 0000000000000000000000000000000000000000..22fa156408dc4e1d7371b98e5bb663d666cb16c9
--- /dev/null
+++ b/dune/gfe/riemannianpnsolver.hh
@@ -0,0 +1,158 @@
+#ifndef RIEMANNIAN_PROXIMAL_NEWTON_SOLVER_HH
+#define RIEMANNIAN_PROXIMAL_NEWTON_SOLVER_HH
+
+#include <vector>
+
+#include <dune/common/bitsetvector.hh>
+
+#include <dune/istl/bcrsmatrix.hh>
+#include <dune/istl/bvector.hh>
+
+#include <dune/functions/functionspacebases/lagrangebasis.hh>
+
+#include <dune/solvers/common/boxconstraint.hh>
+#include <dune/solvers/norms/h1seminorm.hh>
+#include <dune/solvers/solvers/iterativesolver.hh>
+#include <dune/solvers/solvers/cholmodsolver.hh>
+
+#include <dune/gfe/periodic1dpq1nodalbasis.hh>
+
+#include "geodesicfeassembler.hh"
+#include "riemanniantrsolver.hh"
+#include <dune/grid/utility/globalindexset.hh>
+#include <dune/gfe/parallel/globalmapper.hh>
+#include <dune/gfe/parallel/globalp1mapper.hh>
+#include <dune/gfe/parallel/globalp2mapper.hh>
+#include <dune/gfe/parallel/p2mapper.hh>
+
+/** \brief Riemannian proximal-newton solver for geodesic finite-element problems */
+template <class Basis, class TargetSpace>
+class RiemannianProximalNewtonSolver
+    : public IterativeSolver<std::vector<TargetSpace>,
+                             Dune::BitSetVector<TargetSpace::TangentVector::dimension> >
+{
+    typedef typename Basis::GridView::Grid GridType;
+
+    const static int blocksize = TargetSpace::TangentVector::dimension;
+
+    const static int gridDim = GridType::dimension;
+
+    // Centralize the field type here
+    typedef double field_type;
+
+    // Some types that I need
+    typedef Dune::BCRSMatrix<Dune::FieldMatrix<field_type, blocksize, blocksize> > MatrixType;
+    typedef Dune::BlockVector<Dune::FieldVector<field_type, blocksize> >           CorrectionType;
+    typedef std::vector<TargetSpace>                                               SolutionType;
+
+#if HAVE_MPI
+    typedef typename MapperFactory<Basis>::GlobalMapper GlobalMapper;
+    typedef typename MapperFactory<Basis>::LocalMapper LocalMapper;
+#endif
+
+    /** \brief Records information about the last run of the RiemannianProximalNewtonSolver
+     *
+     * This is used primarily for unit testing.
+     */
+    struct Statistics
+    {
+      std::size_t finalIteration;
+
+      field_type finalEnergy;
+    };
+
+public:
+
+    RiemannianProximalNewtonSolver()
+        : IterativeSolver<std::vector<TargetSpace>, Dune::BitSetVector<blocksize> >(0,100,NumProc::FULL),
+          hessianMatrix_(nullptr), h1SemiNorm_(NULL)
+    {}
+
+    /** \brief Set up the solver using a monotone multigrid method as the inner solver */
+    void setup(const GridType& grid,
+               const GeodesicFEAssembler<Basis, TargetSpace>* assembler,
+               const SolutionType& x,
+               const Dune::BitSetVector<blocksize>& dirichletNodes,
+               double tolerance,
+               int maxProximalNewtonSteps,
+               double initialRegularization,
+               bool instrumented);
+
+    void setIgnoreNodes(const Dune::BitSetVector<blocksize>& ignoreNodes)
+    {
+        ignoreNodes_ = &ignoreNodes;
+        innerSolver_->ignoreNodes_ = ignoreNodes_;
+    }
+
+    void solve();
+
+    void setInitialSolution(const SolutionType& x) DUNE_DEPRECATED {
+        x_ = x;
+    }
+
+    void setInitialIterate(const SolutionType& x) {
+        x_ = x;
+    }
+
+    SolutionType getSol() const {return x_;}
+
+    const Statistics& getStatistics() const {return statistics_;}
+
+protected:
+
+#if HAVE_MPI
+    std::unique_ptr<GlobalMapper> globalMapper_;
+#endif
+
+    /** \brief The grid */
+    const GridType* grid_;
+
+    /** \brief The solution vector */
+    SolutionType x_;
+
+    /** \brief The initial regularization parameter for the proximal newton step */
+    double initialRegularization_;
+
+    /** \brief Maximum number of proximal-newton steps */
+    std::size_t maxProximalNewtonSteps_;
+
+    /** \brief Maximum number of multigrid iterations */
+    int innerIterations_;
+
+    /** \brief Error tolerance of the multigrid QP solver */
+    double innerTolerance_;
+
+    /** \brief Hessian matrix */
+    std::unique_ptr<MatrixType> hessianMatrix_;
+
+    /** \brief The assembler for the material law */
+    const GeodesicFEAssembler<Basis, TargetSpace>* assembler_;
+
+    /** \brief The solver for the quadratic inner problems */
+    std::shared_ptr<typename Dune::Solvers::CholmodSolver<MatrixType,CorrectionType>> innerSolver_;
+
+    /** \brief The Dirichlet nodes */
+    const Dune::BitSetVector<blocksize>* ignoreNodes_;
+
+    /** \brief The norm used to measure multigrid convergence */
+    std::shared_ptr<H1SemiNorm<CorrectionType> > h1SemiNorm_;
+
+    /** \brief An L2-norm, really.  The H1SemiNorm class is badly named */
+    std::shared_ptr<H1SemiNorm<CorrectionType> > l2Norm_;
+
+    /** \brief If set to true we log convergence speed and other stuff */
+    bool instrumented_;
+
+    /** \brief Store information about solver runs for unit testing */
+    Statistics statistics_;
+
+#if DUNE_VERSION_LT(DUNE_GEOMETRY, 2, 7)
+    std::shared_ptr<Dune::BCRSMatrix<Dune::FieldMatrix<double,1,1> > > A;
+    std::shared_ptr<Dune::BCRSMatrix<Dune::FieldMatrix<double,1,1> > > massMatrix;
+#endif
+
+};
+
+#include "riemannianpnsolver.cc"
+
+#endif
diff --git a/src/film-on-substrate.cc b/src/film-on-substrate.cc
index 724f869dc1c5279bf2ed0b58aa60852a5dcdb4c4..1789c9fb777b7e46991cefc0d2e3dce791d6ce55 100644
--- a/src/film-on-substrate.cc
+++ b/src/film-on-substrate.cc
@@ -60,6 +60,7 @@
 #include <dune/gfe/localgeodesicfeadolcstiffness.hh>
 #include <dune/gfe/cosseratvtkwriter.hh>
 #include <dune/gfe/geodesicfeassembler.hh>
+#include <dune/gfe/riemannianpnsolver.hh>
 #include <dune/gfe/riemanniantrsolver.hh>
 #include <dune/gfe/vertexnormals.hh>
 #include <dune/gfe/surfacecosseratenergy.hh>
@@ -152,8 +153,9 @@ int main (int argc, char *argv[]) try
   int numLevels                         = parameterSet.get<int>("numLevels");
   int numHomotopySteps                  = parameterSet.get<int>("numHomotopySteps");
   const double tolerance                = parameterSet.get<double>("tolerance");
-  const int maxTrustRegionSteps         = parameterSet.get<int>("maxTrustRegionSteps");
+  const int maxSolverSteps              = parameterSet.get<int>("maxSolverSteps");
   const double initialTrustRegionRadius = parameterSet.get<double>("initialTrustRegionRadius");
+  const double initialRegularization    = parameterSet.get<double>("initialRegularization");
   const int multigridIterations         = parameterSet.get<int>("numIt");
   const int nu1                         = parameterSet.get<int>("nu1");
   const int nu2                         = parameterSet.get<int>("nu2");
@@ -577,26 +579,6 @@ int main (int argc, char *argv[]) try
 
     GeodesicFEAssembler<FEBasis,TargetSpace> assembler(gridView, &localGFEADOLCStiffness);
 
-    // /////////////////////////////////////////////////
-    //   Create a Riemannian trust-region solver
-    // /////////////////////////////////////////////////
-
-    RiemannianTrustRegionSolver<FEBasis,TargetSpace> solver;
-    solver.setup(*grid,
-                 &assembler,
-                 x,
-                 dirichletDofs,
-                 tolerance,
-                 maxTrustRegionSteps,
-                 initialTrustRegionRadius,
-                 multigridIterations,
-                 mgTolerance,
-                 mu, nu1, nu2,
-                 baseIterations,
-                 baseTolerance,
-                 instrumented);
-
-    solver.setScaling(parameterSet.get<FieldVector<double,6> >("trustRegionScaling"));
 
     ////////////////////////////////////////////////////////
     //   Set Dirichlet values
@@ -625,16 +607,50 @@ int main (int argc, char *argv[]) try
         x[j].r = ddV[j];
         x[j].q.set(dOV[j]);
       }
+    // /////////////////////////////////////////////////
+    //   Create the solver and solve
+    // /////////////////////////////////////////////////
 
+    if (parameterSet.get<std::string>("solvertype") == "multigrid") {
+      RiemannianTrustRegionSolver<FEBasis,TargetSpace> solver;
+      solver.setup(*grid,
+                   &assembler,
+                   x,
+                   dirichletDofs,
+                   tolerance,
+                   maxSolverSteps,
+                   initialTrustRegionRadius,
+                   multigridIterations,
+                   mgTolerance,
+                   mu, nu1, nu2,
+                   baseIterations,
+                   baseTolerance,
+                   instrumented);
+
+      solver.setScaling(parameterSet.get<FieldVector<double,6> >("trustRegionScaling"));
+      solver.setInitialIterate(x);
+      solver.solve();
+
+      x = solver.getSol();
+    } else { //parameterSet.get<std::string>("solvertype") == "cholmod"
+      RiemannianProximalNewtonSolver<FEBasis,TargetSpace> solver;
+      solver.setup(*grid,
+                   &assembler,
+                   x,
+                   dirichletDofs,
+                   tolerance,
+                   maxSolverSteps,
+                   initialRegularization,
+                   instrumented);
+      solver.setInitialIterate(x);
+      solver.solve();
+
+      x = solver.getSol();
+    }
     // /////////////////////////////////////////////////////
     //   Solve!
     // /////////////////////////////////////////////////////
 
-    solver.setInitialIterate(x);
-    solver.solve();
-
-    x = solver.getSol();
-
     std::cout << "Overall calculation took " << overallTimer.elapsed() << " sec." << std::endl;
 
     /////////////////////////////////
diff --git a/src/film-on-substrate.parset b/src/film-on-substrate.parset
index 645c408bf9efca144da5cdc2c0dfe1b101901442..d594ef8332c1ae5cc702956c01a95ac384bf8f80 100644
--- a/src/film-on-substrate.parset
+++ b/src/film-on-substrate.parset
@@ -15,7 +15,7 @@ elements = 10 5 5
 numLevels = 2
 
 # When starting from a file, the stress-free configuration of the surfaceShell is read from a file, this file needs to match the *finest* grid level!
-startFromFile = true
+startFromFile = false
 pathToGridDeformationFile = ./
 gridDeformationFile = deformation
 
@@ -50,14 +50,31 @@ initialDeformation = "x"
 #  Solver parameters
 #############################################
 
+# Inner solver, cholmod or multigrid
+solvertype = cholmod
+
 # Number of homotopy steps for the Dirichlet boundary conditions
 numHomotopySteps = 1
 
-# Tolerance of the trust region solver
+# Tolerance of the solver
 tolerance = 1e-3
 
-# Max number of steps of the trust region solver
-maxTrustRegionSteps = 20
+# Max number of solver steps
+maxSolverSteps = 300
+
+# Measure convergence
+instrumented = 0
+
+#############################################
+#  Solver parameters specific for proximal newton solver using cholmod
+#############################################
+
+# initial regularization parameter
+initialRegularization = 10000000000
+
+#############################################
+#  Solver parameters specific for trust-region solver using multigrid solver 
+#############################################
 
 trustRegionScaling = 1 1 1 0.01 0.01 0.01
 
@@ -85,9 +102,6 @@ mgTolerance = 1e-5
 # Tolerance of the base grid solver
 baseTolerance = 1e-8
 
-# Measure convergence
-instrumented = 0
-
 ############################
 #   Material parameters
 ############################