diff --git a/src/amdis/Assembler.inc.hpp b/src/amdis/Assembler.inc.hpp
index 1cf4a4eaf39f721d145601816fee68c248095c87..05501f75e98c37487a374f2bf227eab6e15bf3a0 100644
--- a/src/amdis/Assembler.inc.hpp
+++ b/src/amdis/Assembler.inc.hpp
@@ -45,12 +45,12 @@ void Assembler<Traits>::assemble(
       rowLocalView.bind(element);
 
       auto& rhsOp = rhsOperators_[rowNode];
-      if (rhsOp.assemble(asmVector) && !rhsOp.empty()) {
+      if (rhsOp.doAssemble(asmVector) && !rhsOp.empty()) {
         rhsOp.bind(element, geometry);
 
         auto vecAssembler = [&](auto const& context, auto& operator_list) {
           for (auto scaled : operator_list)
-            scaled.op->assemble(context, elementVector, rowLocalView.tree());
+            scaled.op->assemble(context, rowLocalView.tree(), elementVector);
         };
 
         this->assembleElementOperators(element, rhsOp, vecAssembler);
@@ -59,7 +59,7 @@ void Assembler<Traits>::assemble(
       AMDiS::forEachNode_(localView.tree(), [&,this](auto const& colNode, auto colTreePath)
       {
         auto& matOp = matrixOperators_[rowNode][colNode];
-        if (matOp.assemble(asmMatrix) && !matOp.empty()) {
+        if (matOp.doAssemble(asmMatrix) && !matOp.empty()) {
           auto colBasis = Dune::Functions::subspaceBasis(globalBasis_, colTreePath);
           auto colLocalView = colBasis.localView();
           colLocalView.bind(element);
@@ -68,7 +68,7 @@ void Assembler<Traits>::assemble(
 
           auto matAssembler = [&](auto const& context, auto& operator_list) {
             for (auto scaled : operator_list)
-              scaled.op->assemble(context, elementMatrix, rowLocalView.tree(), colLocalView.tree());
+              scaled.op->assemble(context, rowLocalView.tree(), colLocalView.tree(), elementMatrix);
           };
 
           this->assembleElementOperators(element, matOp, matAssembler);
@@ -121,10 +121,10 @@ template <class Traits>
 void Assembler<Traits>::assembleElementOperators(
     Element const& element,
     Operators& operators,
-    ElementAssembler const& elementAssembler) const
+    ElementAssembler const& localAssembler) const
 {
   // assemble element operators
-  elementAssembler(element, operators.element);
+  localAssembler(element, operators.element);
 
   // assemble intersection operators
   if (!operators.intersection.empty()
@@ -132,9 +132,9 @@ void Assembler<Traits>::assembleElementOperators(
   {
     for (auto const& intersection : intersections(globalBasis_.gridView(), element)) {
       if (intersection.boundary())
-        elementAssembler(intersection, operators.boundary);
+        localAssembler(intersection, operators.boundary);
       else
-        elementAssembler(intersection, operators.intersection);
+        localAssembler(intersection, operators.intersection);
     }
   }
 }
@@ -191,14 +191,14 @@ std::size_t Assembler<Traits>::finishMatrixVector(
   {
     auto rowBasis = Dune::Functions::subspaceBasis(globalBasis_, rowTreePath);
     auto& rhsOp = rhsOperators_[rowNode];
-    if (rhsOp.assemble(asmVector))
+    if (rhsOp.doAssemble(asmVector))
       rhsOp.assembled = true;
 
     AMDiS::forEachNode_(localView.tree(), [&,this](auto const& colNode, auto colTreePath)
     {
       auto colBasis = Dune::Functions::subspaceBasis(globalBasis_, colTreePath);
       auto& matOp = matrixOperators_[rowNode][colNode];
-      if (matOp.assemble(asmMatrix))
+      if (matOp.doAssemble(asmMatrix))
         matOp.assembled = true;
 
       // finish boundary condition
diff --git a/src/amdis/ContextGeometry.hpp b/src/amdis/ContextGeometry.hpp
index d679715fe4ea90c48f418aa509b0ee0adf68143f..0d5659bc0efad5389900ec6902f1d3212adce531 100644
--- a/src/amdis/ContextGeometry.hpp
+++ b/src/amdis/ContextGeometry.hpp
@@ -1,65 +1,157 @@
 #pragma once
 
+#include <type_traits>
+
+#include <dune/common/typetraits.hh>
+#include <dune/common/std/optional.hh>
+#include <dune/geometry/type.hh>
+
 namespace AMDiS
 {
-  template <class LocalContext, class Geometry, class LocalGeometry>
+  namespace Impl
+  {
+    template <class E, class = Dune::void_t<>>
+    struct ContextTypes
+    {
+      using Entity = E;
+      using LocalGeometry = typename E::Geometry;
+    };
+
+    // specialization for intersections
+    template <class I>
+    struct ContextTypes<I, Dune::void_t<decltype(std::declval<I>().inside())>>
+    {
+      using Entity = typename I::Entity;
+      using LocalGeometry = typename I::LocalGeometry;
+    };
+
+  } // end namespace Impl
+
+
+  /// \brief Wrapper class for element and geometry
+  /**
+   * A LocalContext can be either a grid entity of codim 0 (called an element)
+   * or an intersection of elements. The element and its geometry may be stored
+   * externally and can be passed along with the localContext object.
+   * Since an intersection has a geometry (and localGeometry) different from the
+   * geometry (and localGeometry) of the entity it belongs to, these objects
+   * are provided as well.
+   **/
+  template <class LocalContextType>
   struct ContextGeometry
   {
+  public:
+
+    using LocalContext = LocalContextType;
+    using Element = typename Impl::ContextTypes<LocalContext>::Entity;
+    using Geometry = typename Element::Geometry;
+    using LocalGeometry = typename Impl::ContextTypes<LocalContext>::LocalGeometry;
+
+    using IsEntity = std::is_same<Element, LocalContext>;
+
     enum {
       dim = Geometry::mydimension,    //< the dimension of the grid element
       dow = Geometry::coorddimension  //< the dimension of the world
     };
 
-    LocalContext const& localContext;
-    Geometry const& geometry;
-    LocalGeometry const& localGeometry;
-
-    ContextGeometry(LocalContext const& localContext,
-                    Geometry const& geometry,
-                    LocalGeometry const& localGeometry)
-      : localContext(localContext)
-      , geometry(geometry)
-      , localGeometry(localGeometry)
+    /// Constructor. Stores pointer to localContext, element, and geometry.
+    ContextGeometry(LocalContext const& localContext, Element const& element, Geometry const& geometry)
+      : localContext_(&localContext)
+      , element_(&element)
+      , geometry_(&geometry)
     {}
 
-    /// Coordinate `p` given in `localGeometry`, transformed to coordinate in `geometry`.
-    template <class Coordinate>
-    decltype(auto) position(Coordinate const& p) const
+  public:
+
+    Element const& element() const
     {
-      return position(p, std::is_same<Geometry, LocalGeometry>{});
+      return *element_;
     }
 
-    /// The integration element from the `localGeometry`, the quadrature points are
-    /// defined in.
+    LocalContext const& localContext() const
+    {
+      return *localContext_;
+    }
+
+    Geometry const& geometry() const
+    {
+      return *geometry_;
+    }
+
+    LocalGeometry const& localGeometry() const
+    {
+      return localGeometryImpl(IsEntity{});
+    }
+
+
+  public:
+
+    /// Coordinate `p` given in `localGeometry`, transformed to coordinate in `geometry`.
     template <class Coordinate>
-    auto integrationElement(Coordinate const& p) const
+    decltype(auto) local(Coordinate const& p) const
     {
-      return localGeometry.integrationElement(p);
+      return localImpl(p, IsEntity{});
     }
 
     /// Transformation of coordinate `p` given in `localGeometry` to world space coordinates.
     template <class Coordinate>
     decltype(auto) global(Coordinate const& p) const
     {
-      return geometry.global(p);
+      return geometry_->global(p);
+    }
+
+    /// Return the geometry-type of the localContext
+    Dune::GeometryType type() const
+    {
+      return localContext_->type();
+    }
+
+    /// The integration element from the `localGeometry`, the quadrature points are
+    /// defined in.
+    template <class Coordinate>
+    auto integrationElement(Coordinate const& p) const
+    {
+      return localGeometry().integrationElement(p);
     }
 
   private: // implementation detail
 
     // position for elements
     template <class Coordinate>
-    Coordinate const& position(Coordinate const& p, std::true_type) const
+    Coordinate const& localImpl(Coordinate const& p, std::true_type) const
     {
       return p;
     }
 
     // position for intersection
     template <class Coordinate>
-    auto position(Coordinate const& p, std::false_type) const
+    auto localImpl(Coordinate const& p, std::false_type) const
+    {
+      return localGeometry().global(p);
+    }
+
+    // local-geometry is the same as geometry
+    Geometry const& localGeometryImpl(std::true_type) const
+    {
+      return *geometry_;
+    }
+
+    // local-geometry of intersection in inside element
+    LocalGeometry const& localGeometryImpl(std::false_type) const
     {
-      return localGeometry.global(p);
+      if (!localGeometry_)
+        localGeometry_.emplace(localContext_->geometryInInside());
+
+      return *localGeometry_;
     }
 
+  private:
+    LocalContext const* localContext_;
+    Element const* element_;
+    Geometry const* geometry_;
+
+    // The localGeometry may be constructed only if needed
+    Dune::Std::optional<LocalGeometry> localGeometry_;
   };
 
 } // end namespace AMDiS
diff --git a/src/amdis/GridFunctionOperator.hpp b/src/amdis/GridFunctionOperator.hpp
index 9b828ff797e29843d5daa253c6e69464cc616bec..61c7e019f1e91ca9c707bbb08110613bb9e58d94 100644
--- a/src/amdis/GridFunctionOperator.hpp
+++ b/src/amdis/GridFunctionOperator.hpp
@@ -4,6 +4,8 @@
 #include <type_traits>
 
 #include <amdis/GridFunctions.hpp>
+#include <amdis/LocalOperator.hpp>
+#include <amdis/QuadratureFactory.hpp>
 #include <amdis/common/Utility.hpp>
 #include <amdis/utility/FiniteElementType.hpp>
 
@@ -33,8 +35,9 @@ namespace AMDiS
    *   degree of the (bi)linear-form. The argument passed to `F` is the polynomial
    *   order of the GridFunction.
    **/
-  template <class GridFunction, class QuadratureCreator>
+  template <class Derived, class GridFunction>
   class GridFunctionOperatorBase
+      : public LocalOperator<Derived>
   {
     using LocalFunction = decltype(localFunction(std::declval<GridFunction>()));
     using Element = typename GridFunction::EntitySet::Element;
@@ -42,6 +45,8 @@ namespace AMDiS
 
     using LocalCoordinate = typename GridFunction::EntitySet::LocalCoordinate;
 
+    using QuadFactory = QuadratureFactory<typename Geometry::ctype, Element::dimension, LocalFunction>;
+
   public:
     /// \brief Constructor. Stores a copy of `expr`.
     /**
@@ -49,27 +54,24 @@ namespace AMDiS
      * \ref ExpressionBase, and stores a copy. Additionally, it gets the
      * differentiation order, to calculate the quadrature degree in \ref getDegree.
      **/
-    GridFunctionOperatorBase(GridFunction const& gridFct, QuadratureCreator creator, int order)
+    GridFunctionOperatorBase(GridFunction const& gridFct, int termOrder)
       : gridFct_(gridFct)
       , localFct_(localFunction(gridFct_))
-      , quadCreator_(creator)
-      , order_(order)
+      , termOrder_(termOrder)
     {}
 
     // Copy constructor
     GridFunctionOperatorBase(GridFunctionOperatorBase const& that)
       : gridFct_(that.gridFct_)
       , localFct_(localFunction(gridFct_)) // recreate localFct_ on new gridFct_
-      , quadCreator_(that.quadCreator_)
-      , order_(that.order_)
+      , termOrder_(that.termOrder_)
     {}
 
     // Move constructor
     GridFunctionOperatorBase(GridFunctionOperatorBase&& that)
       : gridFct_(std::move(that.gridFct_))
       , localFct_(localFunction(gridFct_)) // recreate localFct_ on new gridFct_
-      , quadCreator_(std::move(that.quadCreator_))
-      , order_(std::move(that.order_))
+      , termOrder_(std::move(that.termOrder_))
     {}
 
     /// \brief Binds operator to `element` and `geometry`.
@@ -80,303 +82,158 @@ namespace AMDiS
      *
      * By default, it binds the \ref localFct_ to the `element`.
      **/
-    void bind(Element const& element, Geometry const& geometry)
+    void bind_impl(Element const& element, Geometry const& geometry)
     {
-      if (!bound_) {
+      assert( bool(quadFactory_) );
+      if (!this->bound_) {
         localFct_.bind(element);
-        isSimplex_ = geometry.type().isSimplex();
-        isAffine_ = geometry.affine();
-        bound_ = true;
+        quadFactory_->bind(localFct_);
       }
     }
 
     /// Unbinds operator from element.
-    void unbind()
+    void unbind_impl()
     {
-      bound_ = false;
       localFct_.unbind();
     }
 
+    template <class PreQuadFactory>
+    void setQuadFactory(PreQuadFactory const& pre)
+    {
+      quadFactory_ = makeQuadratureFactoryPtr<typename Geometry::ctype, Element::dimension, LocalFunction>(pre);
+    }
+
+  protected:
     /// Return expression value at LocalCoordinates
     auto coefficient(LocalCoordinate const& local) const
     {
-      assert( bound_ );
+      assert( this->bound_ );
       return localFct_(local);
     }
 
-
     /// Create a quadrature rule using the \ref QuadratureCreator by calculating the
     /// quadrature order necessary to integrate the (bi)linear-form accurately.
     template <class... Nodes>
-    decltype(auto) getQuadratureRule(Dune::GeometryType type, Nodes const&... nodes) const
-    {
-      auto degreeFunctor = [&,this](int order) -> int { return this->getDegree(order, nodes...); };
-      return quadCreator_(type, localFct_, degreeFunctor);
-    }
-
-  protected:
-    /// \brief Return the quadrature degree for a vector operator.
-    /**
-     * The quadrature degree that is necessary, to integrate the expression
-     * multiplied with (possibly derivatives of) shape functions. Thus it needs
-     * the order of derivatives, this operator implements.
-     **/
-    template <class Node>
-    int getDegree(int coeffDegree, Node const& node) const
-    {
-      assert( bound_ );
-
-      int psiDegree = polynomialDegree(node);
-
-      int degree = psiDegree + coeffDegree;
-      if (isSimplex_)
-        degree -= order_;
-      if (isAffine_)
-        degree += 1; // oder += (order+1)
-
-      return degree;
-    }
-
-
-    /// \brief Return the quadrature degree for a matrix operator.
-    /**
-     * The quadrature degree that is necessary, to integrate the expression
-     * multiplied with (possibly derivatives of) shape functions. Thus it needs
-     * the order of derivatives, this operator implements.
-     **/
-    template <class RowNode, class ColNode>
-    int getDegree(int coeffDegree, RowNode const& rowNode, ColNode const& colNode) const
+    auto const& getQuadratureRule(Dune::GeometryType type, Nodes const&... nodes) const
     {
-      assert( bound_ );
-
-      int psiDegree = polynomialDegree(rowNode);
-      int phiDegree = polynomialDegree(colNode);
-
-      int degree = psiDegree + phiDegree + coeffDegree;
-      if (isSimplex_)
-        degree -= order_;
-      if (isAffine_)
-        degree += 1; // oder += (order+1)
-
-      return degree;
+      assert( bool(quadFactory_) );
+      int quadDegree = this->getDegree(termOrder_, quadFactory_->order(), nodes...);
+      return quadFactory_->rule(type, quadDegree);
     }
 
   private:
+    /// The gridFunction to be used within the operator
     GridFunction gridFct_;
+
+    /// localFunction associated with gridFunction. Mus be updated whenever gridFunction changes.
     LocalFunction localFct_;
 
-    QuadratureCreator quadCreator_; //< a creator to provide a quadrature rule
-    int order_;                     //< the derivative order of this operator
+    /// Assign each element type a quadrature rule
+    std::shared_ptr<QuadFactory> quadFactory_ = std::make_shared<QuadFactory>();
 
-    bool isSimplex_ = false;    //< the bound element is a simplex
-    bool isAffine_ = false;     //< the bound geometry is affine
-    bool bound_ = false;        //< an element is bound to the operator
+    int termOrder_ = 0;    //< the derivative order of this operator
   };
 
 
-  /// A default implementation of an GridFunctionOperator if no specialization is available.
-  /**
-   * An operator must implement either \ref calculateElementVector, or
-   * \ref calculateElementMatrix, if it is a vector or matrix operator,
-   * respectively.
-   **/
-  template <class Tag, class GridFct, class QuadratureCreator>
-  class GridFunctionOperator
-      : public GridFunctionOperatorBase<GridFct, QuadratureCreator>
+  template <class Derived, class Transposed>
+  class GridFunctionOperatorTransposed
+      : public LocalOperator<Derived>
   {
+    template <class T, class... Args>
+    using Constructable = decltype( new T(std::declval<Args>()...) );
+
   public:
-    GridFunctionOperator(Tag, GridFct const& gridFct, QuadratureCreator const& quadCreator)
-      : GridFunctionOperatorBase<GridFct, QuadratureCreator>(gridFct, 0, quadCreator)
+    template <class... Args,
+      std::enable_if_t<Dune::Std::is_detected<Constructable, Transposed, Args...>::value, int> = 0>
+    GridFunctionOperatorTransposed(Args&&... args)
+      : transposedOp_(std::forward<Args>(args)...)
     {}
 
-    /// \brief Assemble a local element vector on the element that is bound.
-    /**
-     * \param contextGeometry Container for context related data
-     * \param quad A quadrature formula
-     * \param elementVector The output element vector
-     * \param node The node of the test-basis
-     **/
-    template <class Context, class QuadratureRule,
-              class ElementVector, class Node>
-    void calculateElementVector(Context const& contextGeometry,
-                                QuadratureRule const& quad,
-                                ElementVector& elementVector,
-                                Node const& node)
+    template <class Element, class Geometry>
+    void bind_impl(Element const& element, Geometry const& geometry)
     {
-      error_exit("Needs to be implemented!");
+      transposedOp_.bind(element, geometry);
     }
 
-    /// \brief Assemble a local element matrix on the element that is bound.
-    /**
-     * \param contextGeometry Container for context related data
-     * \param quad A quadrature formula
-     * \param elementMatrix The output element matrix
-     * \param rowNode The node of the test-basis
-     * \param colNode The node of the trial-basis
-     * \param flag1 finiteelement is the same for test and trial
-     * \param flag2 nodes are the same in the basis-tree
-     **/
-    template <class Context, class QuadratureRule,
-              class ElementMatrix, class RowNode, class ColNode, bool sameFE, bool sameNode>
-    void calculateElementMatrix(Context const& contextGeometry,
-                                QuadratureRule const& quad,
-                                ElementMatrix& elementMatrix,
-                                RowNode const& rowNode,
-                                ColNode const& colNode,
-                                std::integral_constant<bool, sameFE> flag1,
-                                std::integral_constant<bool, sameNode> flag2)
+    void unbind_impl()
     {
-      error_exit("Needs to be implemented!");
+      transposedOp_.unbind();
     }
-  };
-
 
-#ifndef DOXYGEN
-  namespace Concepts
-  {
-    namespace Definition
+    template <class PreQuadFactory>
+    void setQuadFactory(PreQuadFactory const& pre)
     {
-      struct HasGridFunctionOrder
-      {
-        template <class F, class GridView>
-        auto requires_(F&& f, GridView const& gridView)
-          -> decltype( order(localFunction(makeGridFunction(f, gridView))) );
-      };
+      transposedOp_.setQuadFactory(pre);
     }
 
-    template <class F, class GridView = Dune::YaspGrid<2>::LeafGridView>
-    constexpr bool HasGridFunctionOrder = models<Definition::HasGridFunctionOrder(F, GridView)>;
-
-  } // end namespace Concepts
-
-
-  namespace tag
-  {
-    struct deduce {};
-    struct order {};
-    struct rule {};
-
-  } // end namespace tag
+    template <class Context, class RowNode, class ColNode, class ElementMatrix>
+    void getElementMatrix(Context const& context,
+                          RowNode const& rowNode,
+                          ColNode const& colNode,
+                          ElementMatrix& elementMatrix)
+    {
+      auto elementMatrixTransposed = trans(elementMatrix);
+      transposedOp_.getElementMatrix(
+        context, colNode, rowNode, elementMatrixTransposed);
+    }
 
+  private:
+    Transposed transposedOp_;
+  };
 
-  template <class Tag, class Expr, class QuadTag, class Rule = void>
-  struct ExpressionPreOperator;
 
-  template <class Tag, class Expr>
-  struct ExpressionPreOperator<Tag, Expr, tag::deduce>
-  {
-    Tag tag;
-    Expr expr;
-  };
+  /// A default implementation of an GridFunctionOperator if no specialization is available.
+  /**
+   * An operator must implement either \ref getElementVector, or
+   * \ref getElementMatrix, if it is a vector or matrix operator,
+   * respectively.
+   **/
+  template <class Tag, class GridFct>
+  class GridFunctionOperator;
 
-  template <class Tag, class Expr>
-  struct ExpressionPreOperator<Tag, Expr, tag::order>
-  {
-    Tag tag;
-    Expr expr;
-    int order;
-    Dune::QuadratureType::Enum qt;
-  };
 
-  template <class Tag, class Expr, class Rule>
-  struct ExpressionPreOperator<Tag, Expr, tag::rule, Rule>
+  template <class Tag, class PreGridFct, class PreQuadFactory>
+  struct PreGridFunctionOperator
   {
     Tag tag;
-    Expr expr;
-    Rule const& rule;
+    PreGridFct expr;
+    PreQuadFactory quadFactory;
   };
-#endif
 
 
   /// Store tag and expression to create a \ref GridFunctionOperator
-  template <class Tag, class Expr>
-  auto makeOperator(Tag tag, Expr const& expr)
+  template <class Tag, class PreGridFct, class... QuadratureArgs>
+  auto makeOperator(Tag tag, PreGridFct const& expr, QuadratureArgs&&... args)
   {
-    using RawExpr = Underlying_t<Expr>;
-    static_assert(Concepts::HasGridFunctionOrder<RawExpr>,
-      "Polynomial degree of expression can not be deduced. You need to provide a polynomial order or a quadrature rule in 'makeOperator()'.");
-
-    return Dune::Hybrid::ifElse(bool_<Concepts::HasGridFunctionOrder<RawExpr>>,
-      [&](auto) { return ExpressionPreOperator<Tag, Expr, tag::deduce>{tag, expr}; },
-      [&](auto) { return ExpressionPreOperator<Tag, double, tag::deduce>{tag, 0.0}; });
-  }
-
-  /// Store tag and expression and polynomial order of expression to create a \ref GridFunctionOperator
-  template <class Tag, class Expr>
-  auto makeOperator(Tag tag, Expr const& expr, int order,
-                    Dune::QuadratureType::Enum qt = Dune::QuadratureType::GaussLegendre)
-  {
-    return ExpressionPreOperator<Tag, Expr, tag::order>{tag, expr, order, qt};
-  }
-
-  /// Store tag and expression and a quadrature rule to create a \ref GridFunctionOperator
-  template <class Tag, class Expr, class ctype, int dim>
-  auto makeOperator(Tag tag, Expr const& expr, Dune::QuadratureRule<ctype,dim> const& rule)
-  {
-    return ExpressionPreOperator<Tag, Expr, tag::rule, Dune::QuadratureRule<ctype,dim>>{tag, expr, rule};
+    auto preQuadFactory = makePreQuadratureFactory(std::forward<QuadratureArgs>(args)...);
+    return PreGridFunctionOperator<Tag, PreGridFct, decltype(preQuadFactory)>{tag, expr, preQuadFactory};
   }
 
   /** @} **/
 
 #ifndef DOXYGEN
 
-  namespace Impl
-  {
-    // Deduce polynomial order of expression automatically. Create standard quadrature rule with degree
-    // build up of operator derivative order, and polynomial degrees of trial/test functions.
-    template <class Tag, class Expr, class GridView>
-    auto makeQuadCreator(ExpressionPreOperator<Tag, Expr, tag::deduce> const& /*op*/, GridView const& /*gridView*/)
-    {
-      using QuadratureRules = Dune::QuadratureRules<typename GridView::ctype, GridView::dimension>;
-      return [](auto type, auto&& localFct, auto getDegree) -> auto const&
-      {
-        return QuadratureRules::rule(type, getDegree(order(localFct)));
-      };
-    }
-
-    // Provide polynomial order of expression explicitly
-    template <class Tag, class Expr, class GridView>
-    auto makeQuadCreator(ExpressionPreOperator<Tag, Expr, tag::order> const& op, GridView const& /*gridView*/)
-    {
-      using QuadratureRules = Dune::QuadratureRules<typename GridView::ctype, GridView::dimension>;
-      return [order=op.order,qt=op.qt](auto type, auto&& /*localFct*/, auto getDegree) -> auto const&
-      {
-        return QuadratureRules::rule(type, getDegree(order), qt);
-      };
-    }
-
-    // Provide quadrature rule explicitly
-    template <class Tag, class Expr, class Rule, class GridView>
-    auto makeQuadCreator(ExpressionPreOperator<Tag, Expr, tag::rule, Rule> const& op, GridView const& /*gridView*/)
-    {
-      return [&rule=op.rule](auto&&...) -> auto const&
-      {
-        return rule;
-      };
-    }
-
-  } // end namespace Impl
-
-
   /// Generate an \ref GridFunctionOperator from a PreOperator (tag, expr).
   /// @{
   template <class Tag, class... Args, class GridView>
-  auto makeGridOperator(ExpressionPreOperator<Tag, Args...> const& op, GridView const& gridView)
+  auto makeLocalOperator(PreGridFunctionOperator<Tag, Args...> const& op, GridView const& gridView)
   {
     auto gridFct = makeGridFunction(op.expr, gridView);
-    auto quadCreator = Impl::makeQuadCreator(op, gridView);
-    using GridFctOp = GridFunctionOperator<Tag, decltype(gridFct), decltype(quadCreator)>;
-    return GridFctOp{op.tag, gridFct, quadCreator};
+    using GridFctOp = GridFunctionOperator<Tag, decltype(gridFct)>;
+    GridFctOp localOperator{op.tag, gridFct};
+    localOperator.setQuadFactory(op.quadFactory);
+    return localOperator;
   }
 
   template <class Tag, class... Args, class GridView>
-  auto makeGridOperator(std::reference_wrapper<ExpressionPreOperator<Tag, Args...>> op, GridView const& gridView)
+  auto makeLocalOperator(std::reference_wrapper<PreGridFunctionOperator<Tag, Args...>> op, GridView const& gridView)
   {
-    ExpressionPreOperator<Tag, Args...> const& op_ref = op;
+    PreGridFunctionOperator<Tag, Args...> const& op_ref = op;
     auto gridFct = makeGridFunction(std::ref(op_ref.expr), gridView);
-    auto quadCreator = Impl::makeQuadCreator(op_ref, gridView);
-    using GridFctOp = GridFunctionOperator<Tag, decltype(gridFct), decltype(quadCreator)>;
-    return GridFctOp{op_ref.tag, gridFct, quadCreator};
+    using GridFctOp = GridFunctionOperator<Tag, decltype(gridFct)>;
+    GridFctOp localOperator{op_ref.tag, gridFct};
+    localOperator.setQuadFactory(op_ref.quadFactory);
+    return localOperator;
   }
   /// @}
 
@@ -384,22 +241,24 @@ namespace AMDiS
   /// Generate a shared_ptr to \ref GridFunctionOperator from a PreOperator (tag, expr).
   /// @{
   template <class Tag, class... Args, class GridView>
-  auto makeGridOperatorPtr(ExpressionPreOperator<Tag, Args...> const& op, GridView const& gridView)
+  auto makeLocalOperatorPtr(PreGridFunctionOperator<Tag, Args...> const& op, GridView const& gridView)
   {
     auto gridFct = makeGridFunction(op.expr, gridView);
-    auto quadCreator = Impl::makeQuadCreator(op, gridView);
-    using GridFctOp = GridFunctionOperator<Tag, decltype(gridFct), decltype(quadCreator)>;
-    return std::make_shared<GridFctOp>(op.tag, gridFct, quadCreator);
+    using GridFctOp = GridFunctionOperator<Tag, decltype(gridFct)>;
+    auto localOperator = std::make_shared<GridFctOp>(op.tag, gridFct);
+    localOperator->setQuadFactory(op.quadFactory);
+    return localOperator;
   }
 
   template <class Tag, class... Args, class GridView>
-  auto makeGridOperatorPtr(std::reference_wrapper<ExpressionPreOperator<Tag, Args...>> op, GridView const& gridView)
+  auto makeLocalOperatorPtr(std::reference_wrapper<PreGridFunctionOperator<Tag, Args...>> op, GridView const& gridView)
   {
-    ExpressionPreOperator<Tag, Args...> const& op_ref = op;
+    PreGridFunctionOperator<Tag, Args...> const& op_ref = op;
     auto gridFct = makeGridFunction(std::ref(op_ref.expr), gridView);
-    auto quadCreator = Impl::makeQuadCreator(op_ref, gridView);
-    using GridFctOp = GridFunctionOperator<Tag, decltype(gridFct), decltype(quadCreator)>;
-    return std::make_shared<GridFctOp>(op_ref.tag, gridFct, quadCreator);
+    using GridFctOp = GridFunctionOperator<Tag, decltype(gridFct)>;
+    auto localOperator = std::make_shared<GridFctOp>(op_ref.tag, gridFct);
+    localOperator->setQuadFactory(op_ref.quadFactory);
+    return localOperator;
   }
   /// @}
 
diff --git a/src/amdis/LocalAssembler.hpp b/src/amdis/LocalAssembler.hpp
index 424cc4560b42df6a9a4e98f5dc69757fbb0ea4ef..85a1db5a3a684e38036232ef576562082b5580f2 100644
--- a/src/amdis/LocalAssembler.hpp
+++ b/src/amdis/LocalAssembler.hpp
@@ -23,12 +23,8 @@ namespace AMDiS
   private:
 
     using Element = typename Super::Element;
-    using ElementMatrixVector = typename Super::ElementMatrixVector;
-
     using Geometry = typename Super::Geometry;
-    using LocalGeometry = typename Super::LocalGeometry;
-
-    using Context = ContextGeometry<LocalContext, Geometry, LocalGeometry>;
+    using ElementMatrixVector = typename Super::ElementMatrixVector;
 
   public:
 
@@ -69,26 +65,17 @@ namespace AMDiS
 
     /// Implementation of \ref LocalAssemblerBase::assemble
     /**
-     * Provides a quadrature formula with necessary degree to integrate the operator,
-     * stores geometry and localGeometry and a \ref ContextGeometry and calls
+     * Stores geometry and localGeometry and a \ref ContextGeometry and calls
      * \ref calculateElementVector or \ref calculateElementMatrix on the
      * vector or matrix operator, respectively.
-     *
-     * The call to operator passes two optimization flags:
-     * 1. RowNode and ColNode implement the same local finiteelement
-     * 2. RowNode and ColNode are the same node in the globalBasis tree.
      **/
-    virtual bool assemble(
+    virtual void assemble(
         LocalContext const& localContext,
-        ElementMatrixVector& elementMatrixVector,
-        Nodes const&... nodes) final
+        Nodes const&... nodes,
+        ElementMatrixVector& elementMatrixVector) final
     {
-      auto&& localGeometry = getLocalGeometry(localContext);
-      auto&& quad = op_.getQuadratureRule(localGeometry.type(), nodes...);
-
-      Context data{localContext, getGeometry(), localGeometry};
-      assembleImpl(data, std::forward<decltype(quad)>(quad), elementMatrixVector, nodes...);
-      return true;
+      ContextGeometry<LocalContext> context{localContext, getElement(), getGeometry()};
+      assembleImpl(context, nodes..., elementMatrixVector);
     }
 
 
@@ -97,92 +84,41 @@ namespace AMDiS
   protected: // implementation detail
 
     // matrix assembling
-    template <class QuadratureRule, class ElementMatrix, class RowNode, class ColNode>
-    void assembleImpl(Context const& context,
-                      QuadratureRule const& quad,
-                      ElementMatrix& elementMatrix,
-                      RowNode const& rowNode, ColNode const& colNode)
-    {
-      assembleImpl(context, quad, elementMatrix, rowNode, colNode,
-        std::is_same<FiniteElementType_t<RowNode>, FiniteElementType_t<ColNode>>{});
-    }
-
-    // local finite elements are the same for rowNode and colNode
-    template <class QuadratureRule, class ElementMatrix, class RowNode, class ColNode>
-    void assembleImpl(Context const& context,
-                      QuadratureRule const& quad,
-                      ElementMatrix& elementMatrix,
-                      RowNode const& rowNode, ColNode const& colNode,
-                      std::true_type /*sameFE*/)
-    {
-      if (rowNode.treeIndex() == colNode.treeIndex())
-        op_.calculateElementMatrix(
-          context, quad, elementMatrix, rowNode, colNode, std::true_type{}, std::true_type{});
-      else
-        op_.calculateElementMatrix(
-          context, quad, elementMatrix, rowNode, colNode, std::true_type{}, std::false_type{});
-    }
-
-    // local finite elements are different for rowNode and colNode
-    template <class QuadratureRule, class ElementMatrix, class RowNode, class ColNode>
+    template <class Context, class RowNode, class ColNode, class ElementMatrix>
     void assembleImpl(Context const& context,
-                      QuadratureRule const& quad,
-                      ElementMatrix& elementMatrix,
                       RowNode const& rowNode, ColNode const& colNode,
-                      std::false_type /*sameFE*/)
+                      ElementMatrix& elementMatrix)
     {
-      op_.calculateElementMatrix(
-        context, quad, elementMatrix, rowNode, colNode, std::false_type{}, std::false_type{});
+      op_.calculateElementMatrix(context, rowNode, colNode, elementMatrix);
     }
 
-
     // vector assembling
-    template <class QuadratureRule, class ElementVector, class Node>
+    template <class Context, class Node, class ElementVector>
     void assembleImpl(Context const& context,
-                      QuadratureRule const& quad,
-                      ElementVector& elementVector,
-                      Node const& node)
+                      Node const& node,
+                      ElementVector& elementVector)
     {
-      op_.calculateElementVector(context, quad, elementVector, node);
+      op_.calculateElementVector(context, node, elementVector);
     }
 
 #endif // DOXYGEN
 
-
   public:
 
+    /// return the bound entity (of codim 0)
     Element const& getElement() const
     {
       assert( element_ );
       return *element_;
     }
 
+    /// return the geometry of the bound element
     Geometry const& getGeometry() const
     {
       assert( geometry_ );
       return *geometry_;
     }
 
-    decltype(auto) getLocalGeometry(LocalContext const& context) const
-    {
-      return getLocalGeometry(context, std::is_same<LocalContext, Element>{});
-    }
-
-
-  private: // implementation detail
-
-    // localGeometry for Entities
-    Geometry const& getLocalGeometry(Element const& element, std::true_type) const
-    {
-      return *geometry_;
-    }
-
-    // localGeometry for intersections
-    LocalGeometry getLocalGeometry(LocalContext const& intersection, std::false_type) const
-    {
-      return intersection.geometryInInside();
-    }
-
 
   private:
 
diff --git a/src/amdis/LocalAssemblerBase.hpp b/src/amdis/LocalAssemblerBase.hpp
index 8ea0af9b1dc1d9bb082d0cd199d4b7d7a9d628c9..50892963a99b8884b41bee3a595cd7ab8826f675 100644
--- a/src/amdis/LocalAssemblerBase.hpp
+++ b/src/amdis/LocalAssemblerBase.hpp
@@ -4,41 +4,21 @@
 #include <memory>
 #include <type_traits>
 
+#include <amdis/ContextGeometry.hpp>
 #include <amdis/common/ConceptsBase.hpp>
 #include <amdis/common/TypeDefs.hpp>
 #include <amdis/utility/TreeData.hpp>
 
 namespace AMDiS
 {
-  namespace Impl
-  {
-    template <class E, class = Void_t<>>
-    struct ContextImpl
-    {
-      using Entity = E;
-      using Geometry = typename E::Geometry;
-    };
-
-    // specialization for intersections
-    template <class I>
-    struct ContextImpl<I, Void_t<decltype(std::declval<I>().inside())>>
-    {
-      using Entity = typename I::Entity;
-      using Geometry = typename I::LocalGeometry;
-    };
-
-  } // end namespace Impl
-
-
   /// Abstract base-class of a \ref LocalAssembler
   template <class LocalContext, class... Nodes>
   class LocalAssemblerBase
   {
   public:
 
-    using Element = typename Impl::ContextImpl<LocalContext>::Entity;
+    using Element = typename Impl::ContextTypes<LocalContext>::Entity;
     using Geometry = typename Element::Geometry;
-    using LocalGeometry = typename Impl::ContextImpl<LocalContext>::Geometry;
 
     static constexpr int numNodes = sizeof...(Nodes);
     static_assert( numNodes == 1 || numNodes == 2,
@@ -57,9 +37,9 @@ namespace AMDiS
     virtual void unbind() = 0;
 
     /// Assemble an element matrix or element vector on the test- (and trial-) function node(s)
-    virtual bool assemble(LocalContext const& localContext,
-                          ElementMatrixVector& elementMatrixVector,
-                          Nodes const&... nodes) = 0;
+    virtual void assemble(LocalContext const& localContext,
+                          Nodes const&... nodes,
+                          ElementMatrixVector& elementMatrixVector) = 0;
   };
 
 
@@ -95,7 +75,7 @@ namespace AMDiS
         return element.empty() && boundary.empty() && intersection.empty();
       }
 
-      bool assemble(bool flag) const
+      bool doAssemble(bool flag) const
       {
         return flag && (!assembled || changing);
       }
diff --git a/src/amdis/LocalOperator.hpp b/src/amdis/LocalOperator.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..5f4ac34cdbdb34b2dbd3249f4e0cc8858eb3b101
--- /dev/null
+++ b/src/amdis/LocalOperator.hpp
@@ -0,0 +1,197 @@
+#pragma once
+
+#include <cassert>
+#include <type_traits>
+
+#include <amdis/GridFunctions.hpp>
+#include <amdis/common/Utility.hpp>
+#include <amdis/utility/FiniteElementType.hpp>
+
+namespace AMDiS
+{
+  /**
+   * \addtogroup operators
+   * @{
+   **/
+
+  /// \brief The main implementation of an operator to be used in a \ref LocalAssembler.
+  /**
+   * An Operator that takes the element it is assembled on.
+   **/
+  template <class Derived>
+  class LocalOperator
+  {
+  public:
+
+    /// Initialize the local operator on the current gridView
+    template <class GridView>
+    void init(GridView const& gridView)
+    {
+      derived().init_impl(gridView);
+    }
+
+    /// \brief Binds operator to `element` and `geometry`.
+    /**
+     * Binding an operator to the currently visited element in grid traversal.
+     * Since all operators need geometry information, the `Element::Geometry`
+     * object `geometry` is created once, during grid traversal, and passed in.
+     *
+     * By default, it binds the \ref localFct_ to the `element`.
+     **/
+    template <class Element, class Geometry>
+    void bind(Element const& element, Geometry const& geometry)
+    {
+      if (!bound_) {
+        isSimplex_ = geometry.type().isSimplex();
+        isAffine_ = geometry.affine();
+        bound_ = true;
+      }
+
+      derived().bind_impl(element, geometry);
+    }
+
+    /// Unbinds operator from element.
+    void unbind()
+    {
+      derived().unbind_impl();
+      bound_ = false;
+    }
+
+    /// \brief Assemble a local element matrix on the element that is bound.
+    /**
+     * \param context Container for geometry related data, \ref ContextGeometry
+     * \param rowNode The node of the test-basis
+     * \param colNode The node of the trial-basis
+     * \param elementMatrix The output element matrix
+     **/
+    template <class Context, class RowNode, class ColNode, class ElementMatrix>
+    void calculateElementMatrix(Context const& context,
+                                RowNode const& rowNode,
+                                ColNode const& colNode,
+                                ElementMatrix& elementMatrix)
+    {
+      derived().getElementMatrix(context, rowNode, colNode, elementMatrix);
+    }
+
+    /// \brief Assemble a local element vector on the element that is bound.
+    /**
+     * \param context Container for geometry related data \ref ContextGeometry
+     * \param node The node of the test-basis
+     * \param elementVector The output element vector
+     **/
+    template <class Context, class Node, class ElementVector>
+    void calculateElementVector(Context const& context,
+                                Node const& node,
+                                ElementVector& elementVector)
+    {
+      derived().getElementVector(context, node, elementVector);
+    }
+
+
+    Derived      & derived()       { return static_cast<Derived&>(*this); }
+    Derived const& derived() const { return static_cast<Derived const&>(*this); }
+
+
+  protected:
+
+    // default implementation. Can be overridden in the derived classes
+    template <class GridView>
+    void init_impl(GridView const& /*gridView*/) {}
+
+    // default implementation. Can be overridden in the derived classes
+    template <class Element, class Geometry>
+    void bind_impl(Element const& /*element*/, Geometry const& /*geometry*/) {}
+
+    // default implementation. Can be overridden in the derived classes
+    void unbind_impl() {}
+
+
+    template <class Context, class RowNode, class ColNode, class ElementMatrix>
+    void getElementMatrix(Context const& context,
+                          RowNode const& rowNode,
+                          ColNode const& colNode,
+                          ElementMatrix& elementMatrix)
+    {
+      error_exit("Needs to be implemented by derived class!");
+    }
+
+    template <class Context, class Node, class ElementVector>
+    void getElementVector(Context const& contextGeometry,
+                          Node const& node,
+                          ElementVector& elementVector)
+    {
+      error_exit("Needs to be implemented by derived class!");
+    }
+
+    /// \brief Return the quadrature degree for a matrix operator.
+    /**
+     * The quadrature degree that is necessary, to integrate the expression
+     * multiplied with (possibly derivatives of) shape functions. Thus it needs
+     * the order of derivatives, this operator implements.
+     **/
+    template <class RowNode, class ColNode>
+    int getDegree(int order, int coeffDegree, RowNode const& rowNode, ColNode const& colNode) const
+    {
+      assert( bound_ );
+      test_warning(coeffDegree >= 0,
+        "polynomial order of coefficient function not determined. Use order 4 by default.");
+
+      int psiDegree = polynomialDegree(rowNode);
+      int phiDegree = polynomialDegree(colNode);
+
+      int degree = psiDegree + phiDegree + (coeffDegree >= 0 ? coeffDegree : 4);
+      if (isSimplex_)
+        degree -= order;
+      if (isAffine_)
+        degree += 1; // oder += (order+1)
+
+      return degree;
+    }
+
+    /// \brief Return the quadrature degree for a vector operator.
+    /**
+     * The quadrature degree that is necessary, to integrate the expression
+     * multiplied with (possibly derivatives of) shape functions. Thus it needs
+     * the order of derivatives, this operator implements.
+     **/
+    template <class Node>
+    int getDegree(int order, int coeffDegree, Node const& node) const
+    {
+      assert( bound_ );
+      test_warning(coeffDegree >= 0,
+        "polynomial order of coefficient function not determined. Use order 4 by default.");
+
+      int psiDegree = polynomialDegree(node);
+
+      int degree = psiDegree + (coeffDegree >= 0 ? coeffDegree : 4);
+      if (isSimplex_)
+        degree -= order;
+      if (isAffine_)
+        degree += 1; // oder += (order+1)
+
+      return degree;
+    }
+
+  protected:
+
+    bool isSimplex_ = false;    //< the bound element is a simplex
+    bool isAffine_ = false;     //< the bound geometry is affine
+    bool bound_ = false;        //< an element is bound to the operator
+  };
+
+
+  /// Generate an \ref GridFunctionOperator from a PreOperator.
+  template <class Derived, class GridView>
+  auto makeLocalOperator(LocalOperator<Derived> const& localOp, GridView const& /*gridView*/)
+  {
+    return localOp.derived();
+  }
+
+  /// Generate a shared_ptr to \ref GridFunctionOperator from a PreOperator.
+  template <class Derived, class GridView>
+  auto makeLocalOperatorPtr(LocalOperator<Derived> const& localOp, GridView const& /*gridView*/)
+  {
+    return std::make_shared<Derived>(localOp.derived());
+  }
+
+} // end namespace AMDiS
diff --git a/src/amdis/ProblemStat.inc.hpp b/src/amdis/ProblemStat.inc.hpp
index 263b5db3d35308019eb2b2eb6146853381c79815..37d72c9472bfcf4a7536291307b9f6ac7fae6a9e 100644
--- a/src/amdis/ProblemStat.inc.hpp
+++ b/src/amdis/ProblemStat.inc.hpp
@@ -176,7 +176,7 @@ void ProblemStat<Traits>::addMatrixOperator(
   auto i = child(localView_->tree(), makeTreePath(row));
   auto j = child(localView_->tree(), makeTreePath(col));
 
-  auto op = makeGridOperator(preOp, globalBasis_->gridView());
+  auto op = makeLocalOperator(preOp, globalBasis_->gridView());
   auto localAssembler = makeLocalAssemblerPtr<Element>(std::move(op), i, j);
 
   matrixOperators_[i][j].element.push_back({localAssembler, factor, estFactor});
@@ -201,7 +201,7 @@ void ProblemStat<Traits>::addMatrixOperator(
   auto j = child(localView_->tree(), makeTreePath(col));
   using Intersection = typename GridView::Intersection;
 
-  auto op = makeGridOperator(preOp, globalBasis_->gridView());
+  auto op = makeLocalOperator(preOp, globalBasis_->gridView());
   auto localAssembler = makeLocalAssemblerPtr<Intersection>(std::move(op), i, j);
 
   matrixOperators_[i][j].boundary.push_back({localAssembler, factor, estFactor, b});
@@ -221,7 +221,7 @@ void ProblemStat<Traits>::addVectorOperator(
 
   auto i = child(localView_->tree(), makeTreePath(path));
 
-  auto op = makeGridOperator(preOp, globalBasis_->gridView());
+  auto op = makeLocalOperator(preOp, globalBasis_->gridView());
   auto localAssembler = makeLocalAssemblerPtr<Element>(std::move(op), i);
 
   rhsOperators_[i].element.push_back({localAssembler, factor, estFactor});
@@ -243,7 +243,7 @@ void ProblemStat<Traits>::addVectorOperator(
   auto i = child(localView_->tree(), makeTreePath(path));
   using Intersection = typename GridView::Intersection;
 
-  auto op = makeGridOperator(preOp, globalBasis_->gridView());
+  auto op = makeLocalOperator(preOp, globalBasis_->gridView());
   auto localAssembler = makeLocalAssemblerPtr<Intersection>(std::move(op), i);
 
   rhsOperators_[i].boundary.push_back({localAssembler, factor, estFactor, b});
diff --git a/src/amdis/QuadratureFactory.hpp b/src/amdis/QuadratureFactory.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..2de45d30e36863b6e87d45ad18a62c2d04c856e0
--- /dev/null
+++ b/src/amdis/QuadratureFactory.hpp
@@ -0,0 +1,197 @@
+#pragma once
+
+#include <dune/common/hybridutilities.hh>
+#include <dune/common/std/type_traits.hh>
+#include <dune/geometry/quadraturerules.hh>
+#include <dune/geometry/type.hh>
+
+#include <amdis/GridFunctions.hpp>
+
+namespace AMDiS
+{
+  /// Base class for quadrature factories for localFunctions
+  template <class ctype, int dimension, class LocalFunction>
+  class QuadratureFactory
+  {
+  protected:
+    using QuadratureRule = Dune::QuadratureRule<ctype, dimension>;
+
+  public:
+    virtual ~QuadratureFactory() {}
+
+    /// Bind the rule to a localFunction
+    virtual void bind(LocalFunction const& localFct)
+    {}
+
+    /// Return the quadrature order of the coefficient relates to the localFunction
+    virtual int order() const
+    {
+      return 0;
+    }
+
+    /// Construct a quadrature rule, based on the local geometry-type and the
+    /// polynomial degree of the integrand function.
+    virtual QuadratureRule const& rule(Dune::GeometryType const& type, int degree) const
+    {
+      using QuadratureRules = Dune::QuadratureRules<ctype, dimension>;
+      return QuadratureRules::rule(type, degree);
+    }
+  };
+
+
+  template <class LF>
+  using HasLocalFunctionOrder = decltype( order(std::declval<LF>()) );
+
+
+  /// \brief Factory for quadrature rule, that calculates the coefficient order from
+  /// a localFunction passed to the bind method.
+  template <class ctype, int dimension, class LocalFunction>
+  class QuadFactoryFromLocalFunction
+      : public QuadratureFactory<ctype, dimension, LocalFunction>
+  {
+    using Concept = Dune::Std::is_detected<HasLocalFunctionOrder, LocalFunction>;
+    static_assert(Concept::value,
+      "Polynomial order of GridFunction can not be extracted. Provide an explicit order parameter instead.");
+
+  public:
+    virtual void bind(LocalFunction const& localFct) final
+    {
+      order_ = Dune::Hybrid::ifElse(Concept{},
+        [&](auto id) { return AMDiS::order(id(localFct)); },
+        [] (auto)    { return -1; });
+    }
+
+    virtual int order() const final { return order_; }
+
+  private:
+    int order_ = -1;
+  };
+
+  struct PreQuadFactoryFromLocalFunction {};
+
+  auto makePreQuadratureFactory()
+  {
+    return PreQuadFactoryFromLocalFunction{};
+  }
+
+  /// Generator for \ref QuadFactoryFromLocalFunction. \relates QuadFactoryFromLocalFunction
+  template <class ctype, int dimension, class LocalFunction>
+  auto makeQuadratureFactory(PreQuadFactoryFromLocalFunction const& /*pre*/)
+  {
+    return QuadFactoryFromLocalFunction<ctype, dimension, LocalFunction>{};
+  }
+
+  template <class ctype, int dimension, class LocalFunction>
+  auto makeQuadratureFactoryPtr(PreQuadFactoryFromLocalFunction const& /*pre*/)
+  {
+    using Factory = QuadFactoryFromLocalFunction<ctype, dimension, LocalFunction>;
+    return std::shared_ptr<Factory>(new Factory{});
+  }
+
+
+
+  /// \brief Factory for quadrature rule, that takes to order of the localFunction
+  /// and a quadrature-type
+  template <class ctype, int dimension, class LocalFunction>
+  class QuadFactoryFromOrder
+      : public QuadratureFactory<ctype, dimension, LocalFunction>
+  {
+    using Super = QuadratureFactory<ctype, dimension, LocalFunction>;
+    using QuadratureRule = typename Super::QuadratureRule;
+
+  public:
+    /// Constructor. Takes the order of the localCoefficient and a quadrature-type
+    QuadFactoryFromOrder(int order, Dune::QuadratureType::Enum qt)
+      : order_(order)
+      , qt_(qt)
+    {}
+
+    virtual int order() const final { return order_; }
+
+    virtual QuadratureRule const& rule(Dune::GeometryType const& type, int degree) const final
+    {
+      using QuadratureRules = Dune::QuadratureRules<ctype, dimension>;
+      return QuadratureRules::rule(type, degree, qt_);
+    }
+
+  private:
+    int order_;
+    Dune::QuadratureType::Enum qt_;
+  };
+
+  struct PreQuadFactoryFromOrder
+  {
+    int order;
+    Dune::QuadratureType::Enum qt;
+  };
+
+  auto makePreQuadratureFactory(int order, Dune::QuadratureType::Enum qt = Dune::QuadratureType::GaussLegendre)
+  {
+    return PreQuadFactoryFromOrder{order, qt};
+  }
+
+  /// Generator for \ref QuadFactoryFromOrder. \relates QuadFactoryFromOrder
+  template <class ctype, int dimension, class LocalFunction>
+  auto makeQuadratureFactory(PreQuadFactoryFromOrder const& pre)
+  {
+    return QuadFactoryFromOrder<ctype, dimension, LocalFunction>{pre.order, pre.qt};
+  }
+
+  template <class ctype, int dimension, class LocalFunction>
+  auto makeQuadratureFactoryPtr(PreQuadFactoryFromOrder const& pre)
+  {
+    using Factory = QuadFactoryFromOrder<ctype, dimension, LocalFunction>;
+    return std::make_shared<Factory>(pre.order, pre.qt);
+  }
+
+
+
+  /// \brief Factory for quadrature rule, that is based on an existing rule
+  template <class ctype, int dimension, class LocalFunction>
+  class QuadFactoryFromRule
+      : public QuadratureFactory<ctype, dimension, LocalFunction>
+  {
+    using Super = QuadratureFactory<ctype, dimension, LocalFunction>;
+    using QuadratureRule = typename Super::QuadratureRule;
+
+  public:
+    QuadFactoryFromRule(QuadratureRule const& rule)
+      : rule_(rule)
+    {}
+
+    virtual QuadratureRule const& rule(Dune::GeometryType const& /*type*/, int /*degree*/) const final
+    {
+      return rule_;
+    }
+
+  private:
+    QuadratureRule const& rule_;
+  };
+
+  template <class QuadRule>
+  struct PreQuadFactoryFromRule
+  {
+    QuadRule const& rule;
+  };
+
+  template <class ctype, int dimension>
+  auto makePreQuadratureFactory(Dune::QuadratureRule<ctype, dimension> const& rule)
+  {
+    return PreQuadFactoryFromRule<Dune::QuadratureRule<ctype, dimension>>{rule};
+  }
+
+  /// Generator for \ref QuadFactoryFromRule. \relates QuadFactoryFromRule
+  template <class ctype, int dimension, class LocalFunction, class QuadRule>
+  auto makeQuadratureFactory(PreQuadFactoryFromRule<QuadRule> const& pre)
+  {
+    return QuadFactoryFromRule<ctype, dimension, LocalFunction>{pre.rule};
+  }
+
+  template <class ctype, int dimension, class LocalFunction, class QuadRule>
+  auto makeQuadratureFactoryPtr(PreQuadFactoryFromRule<QuadRule> const& pre)
+  {
+    using Factory = QuadFactoryFromRule<ctype, dimension, LocalFunction>;
+    return std::make_shared<Factory>(pre.rule);
+  }
+
+} // end namespace AMDiS
diff --git a/src/amdis/assembler/ConvectionDiffusionOperator.hpp b/src/amdis/assembler/ConvectionDiffusionOperator.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..78477656f758cac2bf71a5176af80dc14aca0fa3
--- /dev/null
+++ b/src/amdis/assembler/ConvectionDiffusionOperator.hpp
@@ -0,0 +1,207 @@
+#pragma once
+
+#include <type_traits>
+
+#include <amdis/LocalOperator.hpp>
+#include <amdis/Output.hpp>
+#include <amdis/common/Utility.hpp>
+#include <amdis/common/ValueCategory.hpp>
+
+namespace AMDiS
+{
+  /**
+   * \addtogroup operators
+   * @{
+   **/
+
+  /// convection-diffusion operator
+  /// <A*grad(u),grad(v)> - <b*u, grad(v)> + <c*u, v> = <f, v> (conserving) or
+  /// <A*grad(u),grad(v)> + <b*grad(u), v> + <c*u, v> = <f, v> (non conserving)
+  template <class GridFctA, class GridFctB, class GridFctC, class GridFctF, bool conserving = true>
+  class ConvectionDiffusionOperator
+      : public LocalOperatorBase<ConvectionDiffusionOperator<GridFctA, GridFctB, GridFctC, GridFctF, conserving>>
+  {
+    using A_range_type = typename GridFctA::Range;
+    static_assert( Category::Scalar<A_range_type> || Category::Matrix<A_range_type>,
+      "Expression A(x) must be of scalar or matrix type." );
+    using b_range_type = typename GridFctB::Range;
+    static_assert( Category::Scalar<b_range_type> || Category::Vector<b_range_type>,
+      "Expression b(x) must be of scalar or vector type." );
+    using c_range_type = typename GridFctC::Range;
+    static_assert( Category::Scalar<c_range_type>,
+      "Expression c(x) must be of scalar type." );
+    using f_range_type = typename GridFctF::Range;
+    static_assert( Category::Scalar<f_range_type>,
+      "Expression f(x) must be of scalar type." );
+
+  public:
+    ConvectionDiffusionOperator(GridFctA const& gridFctA, GridFctB const& gridFctB,
+                                GridFctC const& gridFctC, GridFctF const& gridFctF,
+                                bool_t<conserving> = {})
+      : gridFctA_(gridFctA)
+      , gridFctB_(gridFctB)
+      , gridFctC_(gridFctC)
+      , gridFctF_(gridFctF)
+    {}
+
+    template <class Context, class RowNode, class ColNode, class ElementMatrix>
+    void getElementMatrix(Context const& context,
+                          RowNode const& rowNode, ColNode const& colNode,
+                          ElementMatrix& elementMatrix)
+    {
+      static_assert(RowNode::isLeaf && ColNode::isLeaf,
+        "Operator can be applied to Leaf-Nodes only." );
+
+      static_assert(std::is_same<FiniteElementType_t<RowNode>, FiniteElementType_t<ColNode>>{},
+        "Galerkin operator requires equal finite elements for test and trial space." );
+
+      using LocalBasisType = typename FiniteElementType_t<RowNode>::Traits::LocalBasisType;
+
+      using RangeType = typename LocalBasisType::Traits::RangeType;
+      using RangeFieldType = typename LocalBasisType::Traits::RangeFieldType;
+      using JacobianType = typename LocalBasisType::Traits::JacobianType;
+
+      auto localFctA = localFunction(gridFctA_); localFctA.bind(context.element());
+      auto localFctB = localFunction(gridFctB_); localFctB.bind(context.element());
+      auto localFctC = localFunction(gridFctC_); localFctC.bind(context.element());
+
+      auto const& localFE = rowNode.finiteElement();
+
+      int quadDeg = std::max({this->getDegree(2,coeffOrder(localFctA),rowNode,colNode),
+                              this->getDegree(1,coeffOrder(localFctB),rowNode,colNode),
+                              this->getDegree(0,coeffOrder(localFctC),rowNode,colNode)});
+
+      using QuadratureRules = Dune::QuadratureRules<typename Context::Geometry::ctype, Context::dimension>;
+      auto const& quad = QuadratureRules::rule(context.type(), quadDeg);
+
+      for (auto const& qp : quad) {
+        // Position of the current quadrature point in the reference element
+        decltype(auto) local = context.local(qp.position());
+
+        // The transposed inverse Jacobian of the map from the reference element to the element
+        const auto jacobian = context.geometry().jacobianInverseTransposed(local);
+
+        // The multiplicative factor in the integral transformation formula
+        const auto factor = context.integrationElement(qp.position()) * qp.weight();
+
+        // the values of the shape functions on the reference element at the quadrature point
+        thread_local std::vector<RangeType> shapeValues;
+        localFE.localBasis().evaluateFunction(local, shapeValues);
+
+        // The gradients of the shape functions on the reference element
+        thread_local std::vector<JacobianType> shapeGradients;
+        localFE.localBasis().evaluateJacobian(local, shapeGradients);
+
+        // Compute the shape function gradients on the real element
+        thread_local std::vector<Dune::FieldVector<RangeFieldType,Context::dow> > gradients(shapeGradients.size());
+
+        for (std::size_t i = 0; i < gradients.size(); ++i)
+          jacobian.mv(shapeGradients[i][0], gradients[i]);
+
+        const auto A = localFctA(local);
+        const auto b = localFctB(local);
+        const auto c = localFctC(local);
+
+        IF_CONSTEXPR(conserving) {
+          for (std::size_t i = 0; i < localFE.size(); ++i) {
+            const int local_i = rowNode.localIndex(i);
+            const auto gradAi = A * gradients[i];
+            const auto gradBi = b * gradients[i];
+            for (std::size_t j = 0; j < localFE.size(); ++j) {
+              const int local_j = colNode.localIndex(j);
+              elementMatrix[local_i][local_j] += (dot(gradAi, gradients[j]) + (c * shapeValues[i] - gradBi) * shapeValues[j]) * factor;
+            }
+          }
+        } else {
+          for (std::size_t i = 0; i < localFE.size(); ++i) {
+            const int local_i = rowNode.localIndex(i);
+            const auto grad_i = A * gradients[i];
+            grad_i += b * shapeValues[i];
+            for (std::size_t j = 0; j < localFE.size(); ++j) {
+              const int local_j = colNode.localIndex(j);
+              elementMatrix[local_i][local_j] += (dot(grad_i, gradients[j]) + c * shapeValues[i] * shapeValues[j]) * factor;
+            }
+          }
+        }
+      }
+
+      localFctA.unbind();
+      localFctB.unbind();
+      localFctC.unbind();
+    }
+
+
+    template <class Context, class Node, class ElementVector>
+    void getElementVector(Context const& context,
+                          Node const& node,
+                          ElementVector& elementVector)
+    {
+      static_assert(Node::isLeaf
+        "Operator can be applied to Leaf-Nodes only." );
+
+      using RangeType = typename FiniteElementType_t<RowNode>::Traits::LocalBasisType::Traits::RangeType;
+
+      auto localFctF = localFunction(gridFctF_); localFctF.bind(context.element());
+
+      auto const& localFE = node.finiteElement();
+
+      int quad_order = this->getDegree(0,coeffOrder(localFctF),node);
+
+      using QuadratureRules = Dune::QuadratureRules<typename Context::Geometry::ctype, Context::dimension>;
+      auto const& quad = QuadratureRules::rule(context.type(), quad_order);
+
+      for (auto const& qp : quad) {
+        // Position of the current quadrature point in the reference element
+        decltype(auto) local = context.local(qp.position());
+
+        // The multiplicative factor in the integral transformation formula
+        const auto factor = context.integrationElement(qp.position()) * qp.weight();
+
+        // the values of the shape functions on the reference element at the quadrature point
+        std::vector<RangeType> shapeValues;
+        localFE.localBasis().evaluateFunction(local, shapeValues);
+
+        const auto f = localFctF(local);
+
+        for (std::size_t i = 0; i < localFE.size(); ++i) {
+          const int local_i = rowNode.localIndex(i);
+          elementVector[local_i][local_j] += f * shapeValues[i] * factor;
+        }
+      }
+
+      localFctF.unbind();
+    }
+
+  private:
+
+    template <class LF>
+    using HasLocalFunctionOrder = decltype( order(std::declval<LF>()) );
+
+    template <class LocalFct>
+    int coeffOrder(LocalFct const& localFct)
+    {
+      using Concept = Dune::Std::is_detected<HasLocalFunctionOrder, LocalFunction>;
+      return Dune::Hybrid::ifElse(Concept{},
+        [&](auto id) { return order(id(localFct)); },
+        [] (auto)    { return 0; });
+    }
+
+  private:
+    GridFctA gridFctA;
+    GridFctB gridFctB;
+    GridFctC gridFctC;
+    GridFctF gridFctF;
+  };
+
+
+  template <class GridFctA, class GridFctB, class GridFctC, class GridFctF, bool conserving = true>
+  auto convectionDiffusion(GridFctA const& gridFctA, GridFctB const& gridFctB,
+                           GridFctC const& gridFctC, GridFctF const& gridFctF,
+                           bool_t<conserving> = {})
+  {
+    return ConvectionDiffusionOperator<GridFctA, GridFctB, GridFctC, GridFctF, conserving>{gridFctA, gridFctB, gridFctC, gridFctF};
+  }
+
+  /** @} **/
+
+} // end namespace AMDiS
diff --git a/src/amdis/assembler/FirstOrderDivTestvecTrial.hpp b/src/amdis/assembler/FirstOrderDivTestvecTrial.hpp
index ecdcc7377f3b5094bd67d5fb738e97cc257029cb..f4524fd0457525b489e43b8045fc9bf33cd4cd60 100644
--- a/src/amdis/assembler/FirstOrderDivTestvecTrial.hpp
+++ b/src/amdis/assembler/FirstOrderDivTestvecTrial.hpp
@@ -18,34 +18,19 @@ namespace AMDiS
 
 
   /// first-order operator \f$ \langle\nabla\cdot\Psi, c\,\phi\rangle \f$
-  template <class GridFct, class QuadCreator>
-  class GridFunctionOperator<tag::divtestvec_trial, GridFct, QuadCreator>
-      : public GridFunctionOperator<tag::test_divtrialvec, GridFct, QuadCreator>
+  template <class GridFct>
+  class GridFunctionOperator<tag::divtestvec_trial, GridFct>
+      : public GridFunctionOperatorTransposed<GridFunctionOperator<tag::divtestvec_trial, GridFct>,
+                                              GridFunctionOperator<tag::test_divtrialvec, GridFct>>
   {
-    using Transposed = GridFunctionOperator<tag::test_divtrialvec, GridFct, QuadCreator>;
+    using Self = GridFunctionOperator;
+    using Transposed = GridFunctionOperator<tag::test_divtrialvec, GridFct>;
+    using Super = GridFunctionOperatorTransposed<Self, Transposed>;
 
   public:
-    GridFunctionOperator(tag::divtestvec_trial, GridFct const& expr, QuadCreator const& quadCreator)
-      : Transposed(tag::test_divtrialvec{}, expr, quadCreator)
+    GridFunctionOperator(tag::divtestvec_trial, GridFct const& expr)
+      : Super(tag::test_divtrialvec{}, expr)
     {}
-
-    template <class Context, class QuadratureRule,
-              class ElementMatrix, class RowNode, class ColNode, bool sameFE, bool sameNode>
-    void calculateElementMatrix(Context const& context,
-                                QuadratureRule const& quad,
-                                ElementMatrix& elementMatrix,
-                                RowNode const& rowNode,
-                                ColNode const& colNode,
-                                std::integral_constant<bool, sameFE> flagSameFE,
-                                std::integral_constant<bool, sameNode> flagSameNode)
-    {
-      static_assert(RowNode::isPower && ColNode::isLeaf,
-        "ColNode must be Leaf-Node and RowNode must be a Power-Node.");
-
-      auto elementMatrixTransposed = trans(elementMatrix);
-      Transposed::calculateElementMatrix(
-        context, quad, elementMatrixTransposed, colNode, rowNode, flagSameFE, flagSameNode);
-    }
   };
 
   /** @} **/
diff --git a/src/amdis/assembler/FirstOrderGradTestTrial.hpp b/src/amdis/assembler/FirstOrderGradTestTrial.hpp
index 1ca7e92a1d81e7651ebb98657170df40933e495f..eee21e9d8e3e5cc57e1d48ab5ca1629978b2f91c 100644
--- a/src/amdis/assembler/FirstOrderGradTestTrial.hpp
+++ b/src/amdis/assembler/FirstOrderGradTestTrial.hpp
@@ -18,31 +18,19 @@ namespace AMDiS
 
 
   /// first-order operator \f$ \langle\nabla\psi, \mathbf{b}\,\phi\rangle \f$
-  template <class GridFct, class QuadCreator>
-  class GridFunctionOperator<tag::gradtest_trial, GridFct, QuadCreator>
-      : public GridFunctionOperator<tag::test_gradtrial, GridFct, QuadCreator>
+  template <class GridFct>
+  class GridFunctionOperator<tag::gradtest_trial, GridFct>
+      : public GridFunctionOperatorTransposed<GridFunctionOperator<tag::gradtest_trial, GridFct>,
+                                              GridFunctionOperator<tag::test_gradtrial, GridFct>>
   {
-    using Transposed = GridFunctionOperator<tag::test_gradtrial, GridFct, QuadCreator>;
+    using Self = GridFunctionOperator;
+    using Transposed = GridFunctionOperator<tag::test_gradtrial, GridFct>;
+    using Super = GridFunctionOperatorTransposed<Self, Transposed>;
 
   public:
-    GridFunctionOperator(tag::gradtest_trial, GridFct const& expr, QuadCreator const& quadCreator)
-      : Transposed(tag::test_gradtrial{}, expr, quadCreator)
+    GridFunctionOperator(tag::gradtest_trial, GridFct const& expr)
+      : Super(tag::test_gradtrial{}, expr)
     {}
-
-    template <class Context, class QuadratureRule,
-              class ElementMatrix, class RowNode, class ColNode, bool sameFE, bool sameNode>
-    void calculateElementMatrix(Context const& context,
-                                QuadratureRule const& quad,
-                                ElementMatrix& elementMatrix,
-                                RowNode const& rowNode,
-                                ColNode const& colNode,
-                                std::integral_constant<bool, sameFE> flagSameFE,
-                                std::integral_constant<bool, sameNode> flagSameNode)
-    {
-      auto elementMatrixTransposed = trans(elementMatrix);
-      Transposed::calculateElementMatrix(
-        context, quad, elementMatrixTransposed, colNode, rowNode, flagSameFE, flagSameNode);
-    }
   };
 
   /** @} **/
diff --git a/src/amdis/assembler/FirstOrderGradTestTrialvec.hpp b/src/amdis/assembler/FirstOrderGradTestTrialvec.hpp
index 4d270a11bdbdc14dca7fe3bc8774435ae5228ca4..e1cf8d08a0f4c50c2f504a46bcb61840dd2e0db7 100644
--- a/src/amdis/assembler/FirstOrderGradTestTrialvec.hpp
+++ b/src/amdis/assembler/FirstOrderGradTestTrialvec.hpp
@@ -18,31 +18,19 @@ namespace AMDiS
 
 
   /// first-order operator \f$ \langle\nabla\psi, c\,\Phi\rangle \f$
-  template <class GridFct, class QuadCreator>
-  class GridFunctionOperator<tag::gradtest_trialvec, GridFct, QuadCreator>
-      : public GridFunctionOperator<tag::testvec_gradtrial, GridFct, QuadCreator>
+  template <class GridFct>
+  class GridFunctionOperator<tag::gradtest_trialvec, GridFct>
+      : public GridFunctionOperatorTransposed<GridFunctionOperator<tag::gradtest_trialvec, GridFct>,
+                                              GridFunctionOperator<tag::testvec_gradtrial, GridFct>>
   {
-    using Transposed = GridFunctionOperator<tag::testvec_gradtrial, GridFct, QuadCreator>;
+    using Self = GridFunctionOperator;
+    using Transposed = GridFunctionOperator<tag::testvec_gradtrial, GridFct>;
+    using Super = GridFunctionOperatorTransposed<Self, Transposed>;
 
   public:
-    GridFunctionOperator(tag::gradtest_trialvec, GridFct const& expr, QuadCreator const& quadCreator)
-      : Transposed(tag::testvec_gradtrial{}, expr, quadCreator)
+    GridFunctionOperator(tag::gradtest_trialvec, GridFct const& expr)
+      : Super(tag::testvec_gradtrial{}, expr)
     {}
-
-    template <class Context, class QuadratureRule,
-              class ElementMatrix, class RowNode, class ColNode, bool sameFE, bool sameNode>
-    void calculateElementMatrix(Context const& context,
-                                QuadratureRule const& quad,
-                                ElementMatrix& elementMatrix,
-                                RowNode const& rowNode,
-                                ColNode const& colNode,
-                                std::integral_constant<bool, sameFE> flagSameFE,
-                                std::integral_constant<bool, sameNode> flagSameNode)
-    {
-      auto elementMatrixTransposed = trans(elementMatrix);
-      Transposed::calculateElementMatrix(
-        context, quad, elementMatrixTransposed, colNode, rowNode, flagSameFE, flagSameNode);
-    }
   };
 
   /** @} **/
diff --git a/src/amdis/assembler/FirstOrderPartialTestTrial.hpp b/src/amdis/assembler/FirstOrderPartialTestTrial.hpp
index a37fc0437933a01237f3be7ee5d712965b48e0ae..14f9a892476197566a0aaf138648e0b19b32ae1a 100644
--- a/src/amdis/assembler/FirstOrderPartialTestTrial.hpp
+++ b/src/amdis/assembler/FirstOrderPartialTestTrial.hpp
@@ -21,31 +21,19 @@ namespace AMDiS
 
 
   /// first-order operator \f$ \langle\partial_i\psi, c\,\phi\rangle \f$
-  template <class GridFct, class QuadCreator>
-  class GridFunctionOperator<tag::partialtest_trial, GridFct, QuadCreator>
-      : public GridFunctionOperator<tag::test_partialtrial, GridFct, QuadCreator>
+  template <class GridFct>
+  class GridFunctionOperator<tag::partialtest_trial, GridFct>
+      : public GridFunctionOperatorTransposed<GridFunctionOperator<tag::partialtest_trial, GridFct>,
+                                              GridFunctionOperator<tag::test_partialtrial, GridFct>>
   {
-    using Transposed = GridFunctionOperator<tag::test_partialtrial, GridFct, QuadCreator>;
+    using Self = GridFunctionOperator;
+    using Transposed = GridFunctionOperator<tag::test_partialtrial, GridFct>;
+    using Super = GridFunctionOperatorTransposed<Self, Transposed>;
 
   public:
-    GridFunctionOperator(tag::partialtest_trial tag, GridFct const& expr, QuadCreator const& quadCreator)
-      : Transposed(tag::test_partialtrial{tag.comp}, expr, quadCreator)
+    GridFunctionOperator(tag::partialtest_trial tag, GridFct const& expr)
+      : Super(tag::test_partialtrial{tag.comp}, expr)
     {}
-
-    template <class Context, class QuadratureRule,
-              class ElementMatrix, class RowNode, class ColNode, bool sameFE, bool sameNode>
-    void calculateElementMatrix(Context const& context,
-                                QuadratureRule const& quad,
-                                ElementMatrix& elementMatrix,
-                                RowNode const& rowNode,
-                                ColNode const& colNode,
-                                std::integral_constant<bool, sameFE> flagSameFE,
-                                std::integral_constant<bool, sameNode> flagSameNode)
-    {
-      auto elementMatrixTransposed = trans(elementMatrix);
-      Transposed::calculateElementMatrix(
-        context, quad, elementMatrixTransposed, colNode, rowNode, flagSameFE, flagSameNode);
-    }
   };
 
   /** @} **/
diff --git a/src/amdis/assembler/FirstOrderTestDivTrialvec.hpp b/src/amdis/assembler/FirstOrderTestDivTrialvec.hpp
index d571babf97040af02165c1f6674f6dd1467b64a8..8c067d84ef35d96619a42d3ffca15717395ab73e 100644
--- a/src/amdis/assembler/FirstOrderTestDivTrialvec.hpp
+++ b/src/amdis/assembler/FirstOrderTestDivTrialvec.hpp
@@ -20,28 +20,25 @@ namespace AMDiS
 
 
   /// first-order operator \f$ \langle\psi, c\,\nabla\cdot\Phi\rangle \f$
-  template <class GridFct, class QuadCreator>
-  class GridFunctionOperator<tag::test_divtrialvec, GridFct, QuadCreator>
-      : public GridFunctionOperatorBase<GridFct, QuadCreator>
+  template <class GridFct>
+  class GridFunctionOperator<tag::test_divtrialvec, GridFct>
+      : public GridFunctionOperatorBase<GridFunctionOperator<tag::test_divtrialvec, GridFct>, GridFct>
   {
-    using Super = GridFunctionOperatorBase<GridFct, QuadCreator>;
+    using Self = GridFunctionOperator;
+    using Super = GridFunctionOperatorBase<Self, GridFct>;
 
     static_assert( Category::Scalar<typename GridFct::Range>, "Expression must be of scalar type." );
 
   public:
-    GridFunctionOperator(tag::test_divtrialvec, GridFct const& expr, QuadCreator const& quadCreator)
-      : Super(expr, quadCreator, 1)
+    GridFunctionOperator(tag::test_divtrialvec, GridFct const& expr)
+      : Super(expr, 1)
     {}
 
-    template <class Context, class QuadratureRule,
-              class ElementMatrix, class RowNode, class ColNode, bool sameFE, bool sameNode>
-    void calculateElementMatrix(Context const& context,
-                                QuadratureRule const& quad,
-                                ElementMatrix& elementMatrix,
-                                RowNode const& rowNode,
-                                ColNode const& colNode,
-                                std::integral_constant<bool, sameFE>,
-                                std::integral_constant<bool, sameNode>)
+    template <class Context, class RowNode, class ColNode, class ElementMatrix>
+    void getElementMatrix(Context const& context,
+                          RowNode const& rowNode,
+                          ColNode const& colNode,
+                          ElementMatrix& elementMatrix)
     {
       static_assert(RowNode::isLeaf && ColNode::isPower,
         "RowNode must be Leaf-Node and ColNode must be a Power-Node.");
@@ -52,35 +49,43 @@ namespace AMDiS
       auto const& rowLocalFE = rowNode.finiteElement();
       auto const& colLocalFE = colNode.child(0).finiteElement();
 
-      std::vector<Dune::FieldMatrix<double,1,Context::dim> > referenceGradients;
+      using RowLocalBasisType = typename std::decay_t<decltype(rowLocalFE)>::Traits::LocalBasisType;
+      using ColLocalBasisType = typename std::decay_t<decltype(colLocalFE)>::Traits::LocalBasisType;
 
-      for (std::size_t iq = 0; iq < quad.size(); ++iq) {
+      using RangeType = typename RowLocalBasisType::Traits::RangeType;
+      using RangeFieldType = typename ColLocalBasisType::Traits::RangeFieldType;
+      using JacobianType = typename ColLocalBasisType::Traits::JacobianType;
+
+      auto const& quad = this->getQuadratureRule(context.type(), rowNode, colNode);
+      for (auto const& qp : quad) {
         // Position of the current quadrature point in the reference element
-        decltype(auto) local = context.position(quad[iq].position());
+        decltype(auto) local = context.local(qp.position());
 
         // The transposed inverse Jacobian of the map from the reference element to the element
-        const auto jacobian = context.geometry.jacobianInverseTransposed(local);
+        const auto jacobian = context.geometry().jacobianInverseTransposed(local);
 
         // The multiplicative factor in the integral transformation formula
-        double factor = Super::coefficient(local) * context.integrationElement(quad[iq].position()) * quad[iq].weight();
+        const auto factor = Super::coefficient(local) * context.integrationElement(qp.position()) * qp.weight();
 
-        rowLocalFE.localBasis().evaluateFunction(local, rowShapeValues_);
+        thread_local std::vector<RangeType> shapeValues;
+        rowLocalFE.localBasis().evaluateFunction(local, shapeValues);
 
         // The gradients of the shape functions on the reference element
-        colLocalFE.localBasis().evaluateJacobian(local, referenceGradients);
+        thread_local std::vector<JacobianType> shapeGradients;
+        colLocalFE.localBasis().evaluateJacobian(local, shapeGradients);
 
         // Compute the shape function gradients on the real element
-        std::vector<Dune::FieldVector<double,Context::dow> > colGradients(referenceGradients.size());
+        thread_local std::vector<Dune::FieldVector<RangeFieldType,Context::dow> > colGradients(shapeGradients.size());
 
         for (std::size_t i = 0; i < colGradients.size(); ++i)
-          jacobian.mv(referenceGradients[i][0], colGradients[i]);
+          jacobian.mv(shapeGradients[i][0], colGradients[i]);
 
         for (std::size_t i = 0; i < rowLocalFE.size(); ++i) {
-          const int local_i = rowNode.localIndex(i);
-          double value = factor * rowShapeValues_[i];
+          const auto local_i = rowNode.localIndex(i);
+          const auto value = factor * shapeValues[i];
           for (std::size_t j = 0; j < colLocalFE.size(); ++j) {
             for (std::size_t k = 0; k < CHILDREN; ++k) {
-              const int local_kj = colNode.child(k).localIndex(j);
+              const auto local_kj = colNode.child(k).localIndex(j);
               elementMatrix[local_i][local_kj] += value * colGradients[j][k];
             }
           }
@@ -88,9 +93,6 @@ namespace AMDiS
       }
 
     }
-
-  private:
-    std::vector<Dune::FieldVector<double,1>> rowShapeValues_;
   };
 
   /** @} **/
diff --git a/src/amdis/assembler/FirstOrderTestGradTrial.hpp b/src/amdis/assembler/FirstOrderTestGradTrial.hpp
index a4ae5383cef98820a238a28ea2d6cc659c1ecf5e..017f69341836fed3038dc8074d0d0d3869f117fa 100644
--- a/src/amdis/assembler/FirstOrderTestGradTrial.hpp
+++ b/src/amdis/assembler/FirstOrderTestGradTrial.hpp
@@ -20,28 +20,25 @@ namespace AMDiS
 
 
   /// first-order operator \f$ \langle\psi, \mathbf{b}\cdot\nabla\phi\rangle \f$
-  template <class GridFct, class QuadCreator>
-  class GridFunctionOperator<tag::test_gradtrial, GridFct, QuadCreator>
-      : public GridFunctionOperatorBase<GridFct, QuadCreator>
+  template <class GridFct>
+  class GridFunctionOperator<tag::test_gradtrial, GridFct>
+      : public GridFunctionOperatorBase<GridFunctionOperator<tag::test_gradtrial, GridFct>, GridFct>
   {
-    using Super = GridFunctionOperatorBase<GridFct, QuadCreator>;
+    using Self = GridFunctionOperator;
+    using Super = GridFunctionOperatorBase<Self, GridFct>;
 
     static_assert( Category::Vector<typename GridFct::Range>, "Expression must be of vector type." );
 
   public:
-    GridFunctionOperator(tag::test_gradtrial, GridFct const& expr, QuadCreator const& quadCreator)
-      : Super(expr, quadCreator, 1)
+    GridFunctionOperator(tag::test_gradtrial, GridFct const& expr)
+      : Super(expr, 1)
     {}
 
-    template <class Context, class QuadratureRule,
-              class ElementMatrix, class RowNode, class ColNode, bool sameFE, bool sameNode>
-    void calculateElementMatrix(Context const& context,
-                                QuadratureRule const& quad,
-                                ElementMatrix& elementMatrix,
-                                RowNode const& rowNode,
-                                ColNode const& colNode,
-                                std::integral_constant<bool, sameFE>,
-                                std::integral_constant<bool, sameNode>)
+    template <class Context, class RowNode, class ColNode, class ElementMatrix>
+    void getElementMatrix(Context const& context,
+                          RowNode const& rowNode,
+                          ColNode const& colNode,
+                          ElementMatrix& elementMatrix)
     {
       static_assert(RowNode::isLeaf && ColNode::isLeaf,
         "Operator can be applied to Leaf-Nodes only.");
@@ -49,44 +46,48 @@ namespace AMDiS
       auto const& rowLocalFE = rowNode.finiteElement();
       auto const& colLocalFE = colNode.finiteElement();
 
-      std::vector<Dune::FieldMatrix<double,1,Context::dim> > referenceGradients;
+      using RowLocalBasisType = typename std::decay_t<decltype(rowLocalFE)>::Traits::LocalBasisType;
+      using ColLocalBasisType = typename std::decay_t<decltype(colLocalFE)>::Traits::LocalBasisType;
 
-      for (std::size_t iq = 0; iq < quad.size(); ++iq) {
+      using RangeType = typename RowLocalBasisType::Traits::RangeType;
+      using RangeFieldType = typename ColLocalBasisType::Traits::RangeFieldType;
+      using JacobianType = typename ColLocalBasisType::Traits::JacobianType;
+
+      auto const& quad = this->getQuadratureRule(context.type(), rowNode, colNode);
+      for (auto const& qp : quad) {
         // Position of the current quadrature point in the reference element
-        decltype(auto) local = context.position(quad[iq].position());
+        decltype(auto) local = context.local(qp.position());
 
         // The transposed inverse Jacobian of the map from the reference element to the element
-        const auto jacobian = context.geometry.jacobianInverseTransposed(local);
+        const auto jacobian = context.geometry().jacobianInverseTransposed(local);
 
         // The multiplicative factor in the integral transformation formula
-        double factor = context.integrationElement(quad[iq].position()) * quad[iq].weight();
+        const auto factor = context.integrationElement(qp.position()) * qp.weight();
         const auto b = Super::coefficient(local);
 
-        rowLocalFE.localBasis().evaluateFunction(local, rowShapeValues_);
+        thread_local std::vector<RangeType> shapeValues;
+        rowLocalFE.localBasis().evaluateFunction(local, shapeValues);
 
         // The gradients of the shape functions on the reference element
-        colLocalFE.localBasis().evaluateJacobian(local, referenceGradients);
+        thread_local std::vector<JacobianType> shapeGradients;
+        colLocalFE.localBasis().evaluateJacobian(local, shapeGradients);
 
         // Compute the shape function gradients on the real element
-        std::vector<Dune::FieldVector<double,Context::dow> > colGradients(referenceGradients.size());
-
+        thread_local std::vector<Dune::FieldVector<RangeFieldType,Context::dow> > colGradients(shapeGradients.size());
         for (std::size_t i = 0; i < colGradients.size(); ++i)
-          jacobian.mv(referenceGradients[i][0], colGradients[i]);
+          jacobian.mv(shapeGradients[i][0], colGradients[i]);
 
         for (std::size_t j = 0; j < colLocalFE.size(); ++j) {
-          const int local_j = colNode.localIndex(j);
-          double value = factor * (b * colGradients[j]);
+          const auto local_j = colNode.localIndex(j);
+          const auto value = factor * (b * colGradients[j]);
           for (std::size_t i = 0; i < rowLocalFE.size(); ++i) {
-            const int local_i = rowNode.localIndex(i);
-            elementMatrix[local_i][local_j] += value * rowShapeValues_[i];
+            const auto local_i = rowNode.localIndex(i);
+            elementMatrix[local_i][local_j] += value * shapeValues[i];
           }
         }
       }
 
     }
-
-  private:
-    std::vector<Dune::FieldVector<double,1>> rowShapeValues_;
   };
 
   /** @} **/
diff --git a/src/amdis/assembler/FirstOrderTestPartialTrial.hpp b/src/amdis/assembler/FirstOrderTestPartialTrial.hpp
index 2d900f5dbb17a585903486e9ca9275a9136c29e8..6595956af6261ec4a5cacff34235ba507d643a36 100644
--- a/src/amdis/assembler/FirstOrderTestPartialTrial.hpp
+++ b/src/amdis/assembler/FirstOrderTestPartialTrial.hpp
@@ -23,29 +23,26 @@ namespace AMDiS
 
 
   /// first-order operator \f$ \langle\psi, c\,\partial_i\phi\rangle \f$
-  template <class GridFct, class QuadCreator>
-  class GridFunctionOperator<tag::test_partialtrial, GridFct, QuadCreator>
-      : public GridFunctionOperatorBase<GridFct, QuadCreator>
+  template <class GridFct>
+  class GridFunctionOperator<tag::test_partialtrial, GridFct>
+      : public GridFunctionOperatorBase<GridFunctionOperator<tag::test_partialtrial, GridFct>, GridFct>
   {
-    using Super = GridFunctionOperatorBase<GridFct, QuadCreator>;
+    using Self = GridFunctionOperator;
+    using Super = GridFunctionOperatorBase<Self, GridFct>;
 
     static_assert( Category::Scalar<typename GridFct::Range>, "Expression must be of scalar type." );
 
   public:
-    GridFunctionOperator(tag::test_partialtrial tag, GridFct const& expr, QuadCreator const& quadCreator)
-      : Super(expr, quadCreator, 1)
+    GridFunctionOperator(tag::test_partialtrial tag, GridFct const& expr)
+      : Super(expr, 1)
       , comp_(tag.comp)
     {}
 
-    template <class Context, class QuadratureRule,
-              class ElementMatrix, class RowNode, class ColNode, bool sameFE, bool sameNode>
-    void calculateElementMatrix(Context const& context,
-                                QuadratureRule const& quad,
-                                ElementMatrix& elementMatrix,
-                                RowNode const& rowNode,
-                                ColNode const& colNode,
-                                std::integral_constant<bool, sameFE>,
-                                std::integral_constant<bool, sameNode>)
+    template <class Context, class RowNode, class ColNode, class ElementMatrix>
+    void getElementMatrix(Context const& context,
+                          RowNode const& rowNode,
+                          ColNode const& colNode,
+                          ElementMatrix& elementMatrix)
     {
       static_assert(RowNode::isLeaf && ColNode::isLeaf,
         "Operator can be applied to Leaf-Nodes only.");
@@ -53,48 +50,54 @@ namespace AMDiS
       auto const& rowLocalFE = rowNode.finiteElement();
       auto const& colLocalFE = colNode.finiteElement();
 
-      for (std::size_t iq = 0; iq < quad.size(); ++iq) {
+      using RowLocalBasisType = typename std::decay_t<decltype(rowLocalFE)>::Traits::LocalBasisType;
+      using ColLocalBasisType = typename std::decay_t<decltype(colLocalFE)>::Traits::LocalBasisType;
+
+      using RangeType = typename RowLocalBasisType::Traits::RangeType;
+      using RangeFieldType = typename ColLocalBasisType::Traits::RangeFieldType;
+      using JacobianType = typename ColLocalBasisType::Traits::JacobianType;
+
+      auto const& quad = this->getQuadratureRule(context.type(), rowNode, colNode);
+      for (auto const& qp : quad) {
         // Position of the current quadrature point in the reference element
-        decltype(auto) local = context.position(quad[iq].position());
+        decltype(auto) local = context.local(qp.position());
 
         // The transposed inverse Jacobian of the map from the reference element to the element
-        const auto jacobian = context.geometry.jacobianInverseTransposed(local);
+        const auto jacobian = context.geometry().jacobianInverseTransposed(local);
         assert(jacobian.M() == Context::dim);
 
         // The multiplicative factor in the integral transformation formula
-        double factor = context.integrationElement(quad[iq].position()) * quad[iq].weight();
-        double c = Super::coefficient(local);
+        const auto factor = context.integrationElement(qp.position()) * qp.weight();
+        const auto c = Super::coefficient(local);
 
-        rowLocalFE.localBasis().evaluateFunction(local, rowShapeValues_);
+        thread_local std::vector<RangeType> shapeValues;
+        rowLocalFE.localBasis().evaluateFunction(local, shapeValues);
 
         // The gradients of the shape functions on the reference element
-        std::vector<Dune::FieldMatrix<double,1,Context::dim> > referenceGradients;
-        colLocalFE.localBasis().evaluateJacobian(local, referenceGradients);
+        thread_local std::vector<JacobianType> shapeGradients;
+        colLocalFE.localBasis().evaluateJacobian(local, shapeGradients);
 
         // Compute the shape function gradients on the real element
-        std::vector<double> colPartial(referenceGradients.size());
-
+        thread_local std::vector<RangeFieldType> colPartial(shapeGradients.size());
         for (std::size_t i = 0; i < colPartial.size(); ++i) {
-          colPartial[i] = jacobian[comp_][0] * referenceGradients[i][0][0];
+          colPartial[i] = jacobian[comp_][0] * shapeGradients[i][0][0];
           for (std::size_t j = 1; j < jacobian.M(); ++j)
-            colPartial[i] += jacobian[comp_][j] * referenceGradients[i][0][j];
+            colPartial[i] += jacobian[comp_][j] * shapeGradients[i][0][j];
         }
 
         for (std::size_t j = 0; j < colLocalFE.size(); ++j) {
-          const int local_j = colNode.localIndex(j);
-          double value = factor * (c * colPartial[j]);
+          const auto local_j = colNode.localIndex(j);
+          const auto value = factor * (c * colPartial[j]);
           for (std::size_t i = 0; i < rowLocalFE.size(); ++i) {
-            const int local_i = rowNode.localIndex(i);
-            elementMatrix[local_i][local_j] += value * rowShapeValues_[i];
+            const auto local_i = rowNode.localIndex(i);
+            elementMatrix[local_i][local_j] += value * shapeValues[i];
           }
         }
       }
-
     }
 
   private:
     std::size_t comp_;
-    std::vector<Dune::FieldVector<double,1>> rowShapeValues_;
   };
 
   /** @} **/
diff --git a/src/amdis/assembler/FirstOrderTestvecGradTrial.hpp b/src/amdis/assembler/FirstOrderTestvecGradTrial.hpp
index 2bfd6d52d1bbda560b094803ea272d6325dd4de4..729edec4d6965d8458147e27627e435a6b4139d3 100644
--- a/src/amdis/assembler/FirstOrderTestvecGradTrial.hpp
+++ b/src/amdis/assembler/FirstOrderTestvecGradTrial.hpp
@@ -20,28 +20,25 @@ namespace AMDiS
 
 
   /// first-order operator \f$ \langle\Psi, c\,\nabla\phi\rangle \f$
-  template <class GridFct, class QuadCreator>
-  class GridFunctionOperator<tag::testvec_gradtrial, GridFct, QuadCreator>
-      : public GridFunctionOperatorBase<GridFct, QuadCreator>
+  template <class GridFct>
+  class GridFunctionOperator<tag::testvec_gradtrial, GridFct>
+      : public GridFunctionOperatorBase<GridFunctionOperator<tag::testvec_gradtrial, GridFct>, GridFct>
   {
-    using Super = GridFunctionOperatorBase<GridFct, QuadCreator>;
+    using Self = GridFunctionOperator;
+    using Super = GridFunctionOperatorBase<Self, GridFct>;
 
     static_assert( Category::Scalar<typename GridFct::Range>, "Expression must be of scalar type." );
 
   public:
-    GridFunctionOperator(tag::testvec_gradtrial, GridFct const& expr, QuadCreator const& quadCreator)
-      : Super(expr, quadCreator, 1)
+    GridFunctionOperator(tag::testvec_gradtrial, GridFct const& expr)
+      : Super(expr, 1)
     {}
 
-    template <class Context, class QuadratureRule,
-              class ElementMatrix, class RowNode, class ColNode, bool sameFE, bool sameNode>
-    void calculateElementMatrix(Context const& context,
-                                QuadratureRule const& quad,
-                                ElementMatrix& elementMatrix,
-                                RowNode const& rowNode,
-                                ColNode const& colNode,
-                                std::integral_constant<bool, sameFE>,
-                                std::integral_constant<bool, sameNode>)
+    template <class Context, class RowNode, class ColNode, class ElementMatrix>
+    void getElementMatrix(Context const& context,
+                          RowNode const& rowNode,
+                          ColNode const& colNode,
+                          ElementMatrix& elementMatrix)
     {
       static_assert(RowNode::isPower && ColNode::isLeaf,
         "ColNode must be Leaf-Node and RowNode must be a Power-Node.");
@@ -52,35 +49,42 @@ namespace AMDiS
       auto const& rowLocalFE = rowNode.child(0).finiteElement();
       auto const& colLocalFE = colNode.finiteElement();
 
-      std::vector<Dune::FieldMatrix<double,1,Context::dim> > referenceGradients;
+      using RowLocalBasisType = typename std::decay_t<decltype(rowLocalFE)>::Traits::LocalBasisType;
+      using ColLocalBasisType = typename std::decay_t<decltype(colLocalFE)>::Traits::LocalBasisType;
 
-      for (std::size_t iq = 0; iq < quad.size(); ++iq) {
+      using RangeType = typename RowLocalBasisType::Traits::RangeType;
+      using RangeFieldType = typename ColLocalBasisType::Traits::RangeFieldType;
+      using JacobianType = typename ColLocalBasisType::Traits::JacobianType;
+
+      auto const& quad = this->getQuadratureRule(context.type(), rowNode, colNode);
+      for (auto const& qp : quad) {
         // Position of the current quadrature point in the reference element
-        decltype(auto) local = context.position(quad[iq].position());
+        decltype(auto) local = context.local(qp.position());
 
         // The transposed inverse Jacobian of the map from the reference element to the element
-        const auto jacobian = context.geometry.jacobianInverseTransposed(local);
+        const auto jacobian = context.geometry().jacobianInverseTransposed(local);
 
         // The multiplicative factor in the integral transformation formula
-        double factor = Super::coefficient(local) * context.integrationElement(quad[iq].position()) * quad[iq].weight();
+        const auto factor = Super::coefficient(local) * context.integrationElement(qp.position()) * qp.weight();
 
-        rowLocalFE.localBasis().evaluateFunction(local, rowShapeValues_);
+        thread_local std::vector<RangeType> shapeValues;
+        rowLocalFE.localBasis().evaluateFunction(local, shapeValues);
 
         // The gradients of the shape functions on the reference element
-        colLocalFE.localBasis().evaluateJacobian(local, referenceGradients);
+        thread_local std::vector<JacobianType> shapeGradients;
+        colLocalFE.localBasis().evaluateJacobian(local, shapeGradients);
 
         // Compute the shape function gradients on the real element
-        std::vector<Dune::FieldVector<double,Context::dow> > colGradients(referenceGradients.size());
-
+        thread_local std::vector<Dune::FieldVector<RangeFieldType,Context::dow> > colGradients(shapeGradients.size());
         for (std::size_t i = 0; i < colGradients.size(); ++i)
-          jacobian.mv(referenceGradients[i][0], colGradients[i]);
+          jacobian.mv(shapeGradients[i][0], colGradients[i]);
 
         for (std::size_t i = 0; i < rowLocalFE.size(); ++i) {
-          double value = factor * rowShapeValues_[i];
+          const auto value = factor * shapeValues[i];
           for (std::size_t j = 0; j < colLocalFE.size(); ++j) {
-            const int local_j = colNode.localIndex(j);
+            const auto local_j = colNode.localIndex(j);
             for (std::size_t k = 0; k < CHILDREN; ++k) {
-              const int local_ki = rowNode.child(k).localIndex(i);
+              const auto local_ki = rowNode.child(k).localIndex(i);
               elementMatrix[local_ki][local_j] += value * colGradients[j][k];
             }
           }
@@ -88,9 +92,6 @@ namespace AMDiS
       }
 
     }
-
-  private:
-    std::vector<Dune::FieldVector<double,1>> rowShapeValues_;
   };
 
   /** @} **/
diff --git a/src/amdis/assembler/SecondOrderDivTestvecDivTrialvec.hpp b/src/amdis/assembler/SecondOrderDivTestvecDivTrialvec.hpp
index 6a4a62726ae5190576326379eabe0b658732ace1..350bc58fc39cb091cd9ad8cd28e893bda4ee318c 100644
--- a/src/amdis/assembler/SecondOrderDivTestvecDivTrialvec.hpp
+++ b/src/amdis/assembler/SecondOrderDivTestvecDivTrialvec.hpp
@@ -20,70 +20,92 @@ namespace AMDiS
 
 
   /// second-order operator \f$ \langle\nabla\cdot\Psi, c\,\nabla\cdot\Phi\rangle \f$
-  template <class GridFct, class QuadCreator>
-  class GridFunctionOperator<tag::divtestvec_divtrialvec, GridFct, QuadCreator>
-      : public GridFunctionOperatorBase<GridFct, QuadCreator>
+  template <class GridFct>
+  class GridFunctionOperator<tag::divtestvec_divtrialvec, GridFct>
+      : public GridFunctionOperatorBase<GridFunctionOperator<tag::divtestvec_divtrialvec, GridFct>, GridFct>
   {
-    using Super = GridFunctionOperatorBase<GridFct, QuadCreator>;
+    using Self = GridFunctionOperator;
+    using Super = GridFunctionOperatorBase<Self, GridFct>;
 
     static_assert( Category::Scalar<typename GridFct::Range>, "Expression must be of scalar type." );
 
   public:
-    GridFunctionOperator(tag::divtestvec_divtrialvec, GridFct const& expr, QuadCreator const& quadCreator)
-      : Super(expr, quadCreator, 2)
+    GridFunctionOperator(tag::divtestvec_divtrialvec, GridFct const& expr)
+      : Super(expr, 2)
     {}
 
-    template <class Context, class QuadratureRule,
-              class ElementMatrix, class RowNode, class ColNode, bool sameFE, bool sameNode>
-    void calculateElementMatrix(Context const& context,
-                                QuadratureRule const& quad,
-                                ElementMatrix& elementMatrix,
-                                RowNode const& rowNode,
-                                ColNode const& colNode,
-                                std::integral_constant<bool, sameFE>,
-                                std::integral_constant<bool, sameNode>)
+    template <class Context, class RowNode, class ColNode, class ElementMatrix>
+    void getElementMatrix(Context const& context,
+                          RowNode const& rowNode,
+                          ColNode const& colNode,
+                          ElementMatrix& elementMatrix)
     {
       static_assert(RowNode::isPower && ColNode::isPower,
         "Operator can be applied to Power-Nodes only.");
 
+      const bool sameFE = std::is_same<FiniteElementType_t<RowNode>, FiniteElementType_t<ColNode>>::value;
+      const bool sameNode = rowNode.treeIndex() == colNode.treeIndex();
+
+      auto const& quad = this->getQuadratureRule(context.type(), rowNode, colNode);
+      if (sameFE && sameNode)
+        getElementMatrixOptimized(context, quad, rowNode, colNode, elementMatrix);
+      else
+        getElementMatrixStandard(context, quad, rowNode, colNode, elementMatrix);
+    }
+
+  protected:
+    template <class Context, class QuadratureRule, class RowNode, class ColNode, class ElementMatrix>
+    void getElementMatrixStandard(Context const& context,
+                                  QuadratureRule const& quad,
+                                  RowNode const& rowNode,
+                                  ColNode const& colNode,
+                                  ElementMatrix& elementMatrix)
+    {
       static const std::size_t CHILDREN = RowNode::CHILDREN;
       static_assert( RowNode::CHILDREN == ColNode::CHILDREN, "" );
 
       auto const& rowLocalFE = rowNode.child(0).finiteElement();
       auto const& colLocalFE = colNode.child(0).finiteElement();
 
-      std::vector<Dune::FieldMatrix<double,1,Context::dim> > rowReferenceGradients;
-      std::vector<Dune::FieldMatrix<double,1,Context::dim> > colReferenceGradients;
+      using RowLocalBasisType = typename std::decay_t<decltype(rowLocalFE)>::Traits::LocalBasisType;
+      using ColLocalBasisType = typename std::decay_t<decltype(colLocalFE)>::Traits::LocalBasisType;
+
+      using RowFieldType = typename RowLocalBasisType::Traits::RangeFieldType;
+      using ColFieldType = typename ColLocalBasisType::Traits::RangeFieldType;
+      using RowJacobianType = typename RowLocalBasisType::Traits::JacobianType;
+      using ColJacobianType = typename ColLocalBasisType::Traits::JacobianType;
 
-      for (std::size_t iq = 0; iq < quad.size(); ++iq) {
+      for (auto const& qp : quad) {
         // Position of the current quadrature point in the reference element
-        decltype(auto) local = context.position(quad[iq].position());
+        decltype(auto) local = context.local(qp.position());
 
         // The transposed inverse Jacobian of the map from the reference element to the element
-        const auto jacobian = context.geometry.jacobianInverseTransposed(local);
+        const auto jacobian = context.geometry().jacobianInverseTransposed(local);
 
         // The multiplicative factor in the integral transformation formula
-        double factor = Super::coefficient(local) * context.integrationElement(quad[iq].position()) * quad[iq].weight();
+        const auto factor = Super::coefficient(local) * context.integrationElement(qp.position()) * qp.weight();
 
         // The gradients of the shape functions on the reference element
-        rowLocalFE.localBasis().evaluateJacobian(local, rowReferenceGradients);
-        colLocalFE.localBasis().evaluateJacobian(local, colReferenceGradients);
+        thread_local std::vector<RowJacobianType> rowShapeGradients;
+        rowLocalFE.localBasis().evaluateJacobian(local, rowShapeGradients);
 
-        // Compute the shape function gradients on the real element
-        std::vector<Dune::FieldVector<double,Context::dow> > rowGradients(rowReferenceGradients.size());
-        std::vector<Dune::FieldVector<double,Context::dow> > colGradients(colReferenceGradients.size());
+        thread_local std::vector<ColJacobianType> colShapeGradients;
+        colLocalFE.localBasis().evaluateJacobian(local, colShapeGradients);
 
+        // Compute the shape function gradients on the real element
+        thread_local std::vector<Dune::FieldVector<RowFieldType,Context::dow> > rowGradients(rowShapeGradients.size());
         for (std::size_t i = 0; i < rowGradients.size(); ++i)
-          jacobian.mv(rowReferenceGradients[i][0], rowGradients[i]);
+          jacobian.mv(rowShapeGradients[i][0], rowGradients[i]);
 
+        thread_local std::vector<Dune::FieldVector<ColFieldType,Context::dow> > colGradients(colShapeGradients.size());
         for (std::size_t i = 0; i < colGradients.size(); ++i)
-          jacobian.mv(colReferenceGradients[i][0], colGradients[i]);
+          jacobian.mv(colShapeGradients[i][0], colGradients[i]);
 
         for (std::size_t i = 0; i < rowLocalFE.size(); ++i) {
           for (std::size_t j = 0; j < colLocalFE.size(); ++j) {
             for (std::size_t k = 0; k < CHILDREN; ++k) {
-              const int local_ki = rowNode.child(k).localIndex(i);
-              const int local_kj = colNode.child(k).localIndex(j);
+              const auto local_ki = rowNode.child(k).localIndex(i);
+              const auto local_kj = colNode.child(k).localIndex(j);
               elementMatrix[local_ki][local_kj] += factor * rowGradients[i][k] * colGradients[j][k];
             }
           }
@@ -92,52 +114,48 @@ namespace AMDiS
     }
 
 
-    template <class Context, class QuadratureRule,
-              class ElementMatrix, class RowNode, class ColNode>
-    void calculateElementMatrix(Context const& context,
-                                QuadratureRule const& quad,
-                                ElementMatrix& elementMatrix,
-                                RowNode const& node,
-                                ColNode const& /*colNode*/,
-                                std::true_type /*sameFE*/,
-                                std::true_type /*sameNode*/)
+    template <class Context, class QuadratureRule, class Node, class ElementMatrix>
+    void getElementMatrixOptimized(Context const& context,
+                                   QuadratureRule const& quad,
+                                   Node const& node,
+                                   Node const& /*colNode*/,
+                                   ElementMatrix& elementMatrix)
     {
-      static_assert( std::is_same<typename RowNode::NodeTag, Dune::TypeTree::PowerNodeTag>::value &&
-                     std::is_same<typename ColNode::NodeTag, Dune::TypeTree::PowerNodeTag>::value, "" );
-
-      static const std::size_t CHILDREN = RowNode::CHILDREN;
+      static const std::size_t CHILDREN = Node::CHILDREN;
 
       auto const& localFE = node.child(0).finiteElement();
 
-      std::vector<Dune::FieldMatrix<double,1,Context::dim> > referenceGradients;
+      using LocalBasisType = typename std::decay_t<decltype(localFE)>::Traits::LocalBasisType;
+      using RangeFieldType = typename LocalBasisType::Traits::RangeFieldType;
+      using JacobianType = typename LocalBasisType::Traits::JacobianType;
 
-      for (std::size_t iq = 0; iq < quad.size(); ++iq) {
+      for (auto const& qp : quad) {
         // Position of the current quadrature point in the reference element
-        decltype(auto) local = context.position(quad[iq].position());
+        decltype(auto) local = context.local(qp.position());
 
         // The transposed inverse Jacobian of the map from the reference element to the element
-        const auto jacobian = context.geometry.jacobianInverseTransposed(local);
+        const auto jacobian = context.geometry().jacobianInverseTransposed(local);
 
         // The multiplicative factor in the integral transformation formula
-        double factor = Super::coefficient(local) * context.integrationElement(quad[iq].position()) * quad[iq].weight();
+        const auto factor = Super::coefficient(local) * context.integrationElement(qp.position()) * qp.weight();
 
         // The gradients of the shape functions on the reference element
-        localFE.localBasis().evaluateJacobian(local, referenceGradients);
+        thread_local std::vector<JacobianType> shapeGradients;
+        localFE.localBasis().evaluateJacobian(local, shapeGradients);
 
         // Compute the shape function gradients on the real element
-        std::vector<Dune::FieldVector<double,Context::dow> > gradients(referenceGradients.size());
-
+        thread_local std::vector<Dune::FieldVector<RangeFieldType,Context::dow> > gradients(shapeGradients.size());
         for (std::size_t i = 0; i < gradients.size(); ++i)
-          jacobian.mv(referenceGradients[i][0], gradients[i]);
+          jacobian.mv(shapeGradients[i][0], gradients[i]);
 
         for (std::size_t k = 0; k < CHILDREN; ++k) {
           for (std::size_t i = 0; i < localFE.size(); ++i) {
-            const int local_ki = node.child(k).localIndex(i);
-            double value = factor * gradients[i][k];
+            const auto local_ki = node.child(k).localIndex(i);
+            const auto value = factor * gradients[i][k];
             elementMatrix[local_ki][local_ki] += value * gradients[i][k];
 
             for (std::size_t j = i+1; j < localFE.size(); ++j) {
-              const int local_kj = node.child(k).localIndex(j);
+              const auto local_kj = node.child(k).localIndex(j);
               elementMatrix[local_ki][local_kj] += value * gradients[j][k];
               elementMatrix[local_kj][local_ki] += value * gradients[j][k];
             }
diff --git a/src/amdis/assembler/SecondOrderGradTestGradTrial.hpp b/src/amdis/assembler/SecondOrderGradTestGradTrial.hpp
index bfb3943d2eedc12cdbb29491b398e3ac190b9dc8..30c46b1b55ef6704fee4e3629b838b9008b9d7f5 100644
--- a/src/amdis/assembler/SecondOrderGradTestGradTrial.hpp
+++ b/src/amdis/assembler/SecondOrderGradTestGradTrial.hpp
@@ -20,128 +20,130 @@ namespace AMDiS
 
 
   /// second-order operator \f$ \langle\nabla\psi, c\,\nabla\phi\rangle \f$, or \f$ \langle\nabla\psi, A\,\nabla\phi\rangle \f$
-  template <class GridFct, class QuadCreator>
-  class GridFunctionOperator<tag::gradtest_gradtrial, GridFct, QuadCreator>
-      : public GridFunctionOperatorBase<GridFct, QuadCreator>
+  template <class GridFct>
+  class GridFunctionOperator<tag::gradtest_gradtrial, GridFct>
+      : public GridFunctionOperatorBase<GridFunctionOperator<tag::gradtest_gradtrial, GridFct>, GridFct>
   {
-    using Super = GridFunctionOperatorBase<GridFct, QuadCreator>;
+    using Self = GridFunctionOperator;
+    using Super = GridFunctionOperatorBase<Self, GridFct>;
 
     using expr_value_type = typename GridFct::Range;
     static_assert( Category::Scalar<expr_value_type> || Category::Matrix<expr_value_type>,
       "Expression must be of scalar or matrix type." );
 
   public:
-    GridFunctionOperator(tag::gradtest_gradtrial, GridFct const& expr, QuadCreator const& quadCreator)
-      : Super(expr, quadCreator, 2)
+    GridFunctionOperator(tag::gradtest_gradtrial, GridFct const& expr)
+      : Super(expr, 2)
     {}
 
-    template <class Context, class QuadratureRule,
-              class ElementMatrix, class RowNode, class ColNode, bool sameFE>
-    void calculateElementMatrix(Context const& context,
-                                QuadratureRule const& quad,
-                                ElementMatrix& elementMatrix,
-                                RowNode const& rowNode,
-                                ColNode const& colNode,
-                                std::integral_constant<bool, sameFE>,
-                                std::false_type /*sameNode*/)
+    template <class Context, class RowNode, class ColNode, class ElementMatrix>
+    void getElementMatrix(Context const& context,
+                          RowNode const& rowNode,
+                          ColNode const& colNode,
+                          ElementMatrix& elementMatrix)
     {
       static_assert(RowNode::isLeaf && ColNode::isLeaf,
         "Operator can be applied to Leaf-Nodes only.");
 
-      auto const& rowLocalFE = rowNode.finiteElement();
-      auto const& colLocalFE = colNode.finiteElement();
+      const bool sameFE = std::is_same<FiniteElementType_t<RowNode>, FiniteElementType_t<ColNode>>::value;
+      const bool sameNode = rowNode.treeIndex() == colNode.treeIndex();
+      using Category = ValueCategory_t<typename GridFct::Range>;
+
+      auto const& quad = this->getQuadratureRule(context.type(), rowNode, colNode);
+      if (sameFE && sameNode)
+        getElementMatrixOptimized(context, quad, rowNode, colNode, elementMatrix, Category{});
+      else if (sameFE)
+        getElementMatrixStandard(context, quad, rowNode, colNode, elementMatrix);
+      else
+        error_exit("Not implemented: currently only the implementation for equal fespaces available");
+    }
+
+  protected:
+
+    template <class Context, class QuadratureRule, class RowNode, class ColNode, class ElementMatrix>
+    void getElementMatrixStandard(Context const& context,
+                                  QuadratureRule const& quad,
+                                  RowNode const& rowNode,
+                                  ColNode const& colNode,
+                                  ElementMatrix& elementMatrix)
+    {
+      auto const& localFE = rowNode.finiteElement();
 
-      test_exit( sameFE, "Not implemented: currently only the implementation for equal fespaces available" );
+      using LocalBasisType = typename std::decay_t<decltype(localFE)>::Traits::LocalBasisType;
+      using RangeFieldType = typename LocalBasisType::Traits::RangeFieldType;
+      using JacobianType = typename LocalBasisType::Traits::JacobianType;
 
-      for (std::size_t iq = 0; iq < quad.size(); ++iq) {
+      for (auto const& qp : quad) {
         // Position of the current quadrature point in the reference element
-        decltype(auto) local = context.position(quad[iq].position());
+        decltype(auto) local = context.local(qp.position());
 
         // The transposed inverse Jacobian of the map from the reference element to the element
-        const auto jacobian = context.geometry.jacobianInverseTransposed(local);
+        const auto jacobian = context.geometry().jacobianInverseTransposed(local);
 
         // The multiplicative factor in the integral transformation formula
-        double factor = context.integrationElement(quad[iq].position()) * quad[iq].weight();
+        const auto factor = context.integrationElement(qp.position()) * qp.weight();
         const auto exprValue = Super::coefficient(local);
 
         // The gradients of the shape functions on the reference element
-        thread_local std::vector<Dune::FieldMatrix<double,1,Context::dim> > referenceGradients;
-        rowLocalFE.localBasis().evaluateJacobian(local, referenceGradients);
+        thread_local std::vector<JacobianType> shapeGradients;
+        localFE.localBasis().evaluateJacobian(local, shapeGradients);
 
         // Compute the shape function gradients on the real element
-        thread_local std::vector<Dune::FieldVector<double,Context::dow> > gradients(referenceGradients.size());
-
+        thread_local std::vector<Dune::FieldVector<RangeFieldType,Context::dow> > gradients(shapeGradients.size());
         for (std::size_t i = 0; i < gradients.size(); ++i)
-          jacobian.mv(referenceGradients[i][0], gradients[i]);
+          jacobian.mv(shapeGradients[i][0], gradients[i]);
 
-        for (std::size_t i = 0; i < rowLocalFE.size(); ++i) {
-          const int local_i = rowNode.localIndex(i);
-          for (std::size_t j = 0; j < colLocalFE.size(); ++j) {
-            const int local_j = colNode.localIndex(j);
+        for (std::size_t i = 0; i < localFE.size(); ++i) {
+          const auto local_i = rowNode.localIndex(i);
+          for (std::size_t j = 0; j < localFE.size(); ++j) {
+            const auto local_j = colNode.localIndex(j);
             elementMatrix[local_i][local_j] += eval(exprValue, factor, gradients[i], gradients[j]);
           }
         }
       }
     }
 
-
-    template <class Context, class QuadratureRule,
-              class ElementMatrix, class RowNode, class ColNode>
-    void calculateElementMatrix(Context const& context,
-                                QuadratureRule const& quad,
-                                ElementMatrix& elementMatrix,
-                                RowNode const& node,
-                                ColNode const& /*colNode*/,
-                                std::true_type /*sameFE*/,
-                                std::true_type /*sameNode*/)
-    {
-      static_assert(RowNode::isLeaf && ColNode::isLeaf,
-        "Operator can be applied to Leaf-Nodes only.");
-
-      using Category = ValueCategory_t<typename GridFct::Range>;
-      calcElementMatrix(context, quad, elementMatrix, node, Category{});
-    }
-
-  protected:
-
-    template <class Context, class QuadratureRule,
-              class ElementMatrix, class RowNode>
-    void calcElementMatrix(Context const& context,
-                           QuadratureRule const& quad,
-                           ElementMatrix& elementMatrix,
-                           RowNode const& node,
-                           tag::scalar)
+    template <class Context, class QuadratureRule, class Node, class ElementMatrix>
+    void getElementMatrixOptimized(Context const& context,
+                                   QuadratureRule const& quad,
+                                   Node const& node,
+                                   Node const& /*colNode*/,
+                                   ElementMatrix& elementMatrix,
+                                   tag::scalar)
     {
       auto const& localFE = node.finiteElement();
-      thread_local std::vector<Dune::FieldMatrix<double,1,Context::dim> > referenceGradients;
 
-      for (std::size_t iq = 0; iq < quad.size(); ++iq) {
+      using LocalBasisType = typename std::decay_t<decltype(localFE)>::Traits::LocalBasisType;
+      using RangeFieldType = typename LocalBasisType::Traits::RangeFieldType;
+      using JacobianType = typename LocalBasisType::Traits::JacobianType;
+
+      for (auto const& qp : quad) {
         // Position of the current quadrature point in the reference element
-        decltype(auto) local = context.position(quad[iq].position());
+        decltype(auto) local = context.local(qp.position());
 
         // The transposed inverse Jacobian of the map from the reference element to the element
-        const auto jacobian = context.geometry.jacobianInverseTransposed(local);
+        const auto jacobian = context.geometry().jacobianInverseTransposed(local);
 
         // The multiplicative factor in the integral transformation formula
-        double factor = Super::coefficient(local) * context.integrationElement(quad[iq].position()) * quad[iq].weight();
+        const auto factor = Super::coefficient(local) * context.integrationElement(qp.position()) * qp.weight();
 
         // The gradients of the shape functions on the reference element
-        localFE.localBasis().evaluateJacobian(local, referenceGradients);
+        thread_local std::vector<JacobianType> shapeGradients;
+        localFE.localBasis().evaluateJacobian(local, shapeGradients);
 
         // Compute the shape function gradients on the real element
-        thread_local std::vector<Dune::FieldVector<double,Context::dow> > gradients(referenceGradients.size());
-
+        thread_local std::vector<Dune::FieldVector<RangeFieldType,Context::dow> > gradients(shapeGradients.size());
         for (std::size_t i = 0; i < gradients.size(); ++i)
-          jacobian.mv(referenceGradients[i][0], gradients[i]);
+          jacobian.mv(shapeGradients[i][0], gradients[i]);
 
         for (std::size_t i = 0; i < localFE.size(); ++i) {
-          const int local_i = node.localIndex(i);
+          const auto local_i = node.localIndex(i);
 
           elementMatrix[local_i][local_i] += factor * (gradients[i] * gradients[i]);
 
           for (std::size_t j = i+1; j < localFE.size(); ++j) {
-            const int local_j = node.localIndex(j);
-            double value = factor * (gradients[i] * gradients[j]);
+            const auto local_j = node.localIndex(j);
+            const auto value = factor * (gradients[i] * gradients[j]);
 
             elementMatrix[local_i][local_j] += value;
             elementMatrix[local_j][local_i] += value;
@@ -150,42 +152,44 @@ namespace AMDiS
       }
     }
 
-
-    template <class Context, class QuadratureRule,
-              class ElementMatrix, class RowNode>
-    void calcElementMatrix(Context const& context,
-                           QuadratureRule const& quad,
-                           ElementMatrix& elementMatrix,
-                           RowNode const& node,
-                           tag::matrix)
+    template <class Context, class QuadratureRule, class Node, class ElementMatrix>
+    void getElementMatrixOptimized(Context const& context,
+                                   QuadratureRule const& quad,
+                                   Node const& node,
+                                   Node const& /*colNode*/,
+                                   ElementMatrix& elementMatrix,
+                                   tag::matrix)
     {
       auto const& localFE = node.finiteElement();
-      thread_local std::vector<Dune::FieldMatrix<double,1,Context::dim> > referenceGradients;
 
-      for (std::size_t iq = 0; iq < quad.size(); ++iq) {
+      using LocalBasisType = typename std::decay_t<decltype(localFE)>::Traits::LocalBasisType;
+      using RangeFieldType = typename LocalBasisType::Traits::RangeFieldType;
+      using JacobianType = typename LocalBasisType::Traits::JacobianType;
+
+      for (auto const& qp : quad) {
         // Position of the current quadrature point in the reference element
-        decltype(auto) local = context.position(quad[iq].position());
+        decltype(auto) local = context.local(qp.position());
 
         // The transposed inverse Jacobian of the map from the reference element to the element
-        const auto jacobian = context.geometry.jacobianInverseTransposed(local);
+        const auto jacobian = context.geometry().jacobianInverseTransposed(local);
 
         // The multiplicative factor in the integral transformation formula
-        double factor = context.integrationElement(quad[iq].position()) * quad[iq].weight();
+        const auto factor = context.integrationElement(qp.position()) * qp.weight();
         const auto exprValue = Super::coefficient(local);
 
         // The gradients of the shape functions on the reference element
-        localFE.localBasis().evaluateJacobian(local, referenceGradients);
+        thread_local std::vector<JacobianType> shapeGradients;
+        localFE.localBasis().evaluateJacobian(local, shapeGradients);
 
         // Compute the shape function gradients on the real element
-        thread_local std::vector<Dune::FieldVector<double,Context::dow> > gradients(referenceGradients.size());
-
+        thread_local std::vector<Dune::FieldVector<RangeFieldType,Context::dow> > gradients(shapeGradients.size());
         for (std::size_t i = 0; i < gradients.size(); ++i)
-          jacobian.mv(referenceGradients[i][0], gradients[i]);
+          jacobian.mv(shapeGradients[i][0], gradients[i]);
 
         for (std::size_t i = 0; i < localFE.size(); ++i) {
-          const int local_i = node.localIndex(i);
+          const auto local_i = node.localIndex(i);
           for (std::size_t j = 0; j < localFE.size(); ++j) {
-            const int local_j = node.localIndex(j);
+            const auto local_j = node.localIndex(j);
             elementMatrix[local_i][local_j] += eval(exprValue, factor, gradients[i], gradients[j]);
           }
         }
@@ -194,20 +198,22 @@ namespace AMDiS
 
   protected:
 
-    template <int dow>
-    double eval(Dune::FieldVector<double,1> const& scalar, double factor,
-                Dune::FieldVector<double,dow> const& grad_test,
-                Dune::FieldVector<double,dow> const& grad_trial) const
+    template <class S, class F, class T, int dow,
+      std::enable_if_t<Category::Scalar<S>,int> = 0>
+    T eval(S const& scalar, F factor,
+           Dune::FieldVector<T,dow> const& grad_test,
+           Dune::FieldVector<T,dow> const& grad_trial) const
     {
       return (scalar * factor) * (grad_test * grad_trial);
     }
 
-    template <int dow>
-    double eval(Dune::FieldMatrix<double,dow,dow> const& M, double factor,
-                Dune::FieldVector<double,dow> const& grad_test,
-                Dune::FieldVector<double,dow> const& grad_trial) const
+    template <class M, class F, class T, int dow,
+      std::enable_if_t<Category::Matrix<M>,int> = 0>
+    T eval(M const& mat, F factor,
+           Dune::FieldVector<T,dow> const& grad_test,
+           Dune::FieldVector<T,dow> const& grad_trial) const
     {
-      return factor * (grad_test * (M * grad_trial));
+      return factor * (grad_test * (mat * grad_trial));
     }
   };
 
diff --git a/src/amdis/assembler/SecondOrderPartialTestPartialTrial.hpp b/src/amdis/assembler/SecondOrderPartialTestPartialTrial.hpp
index f5c548add90cd01e37592c8e94de651665caa58b..bf964bbcbf86776d744be86887042af8c7ed6281 100644
--- a/src/amdis/assembler/SecondOrderPartialTestPartialTrial.hpp
+++ b/src/amdis/assembler/SecondOrderPartialTestPartialTrial.hpp
@@ -24,30 +24,27 @@ namespace AMDiS
 
 
   /// second-order operator \f$ \langle\partial_i\psi, c\,\partial_j\phi\rangle \f$
-  template <class GridFct, class QuadCreator>
-  class GridFunctionOperator<tag::partialtest_partialtrial, GridFct, QuadCreator>
-      : public GridFunctionOperatorBase<GridFct, QuadCreator>
+  template <class GridFct>
+  class GridFunctionOperator<tag::partialtest_partialtrial, GridFct>
+      : public GridFunctionOperatorBase<GridFunctionOperator<tag::partialtest_partialtrial, GridFct>, GridFct>
   {
-    using Super = GridFunctionOperatorBase<GridFct, QuadCreator>;
+    using Self = GridFunctionOperator;
+    using Super = GridFunctionOperatorBase<Self, GridFct>;
 
     static_assert( Category::Scalar<typename GridFct::Range>, "Expression must be of scalar type." );
 
   public:
-    GridFunctionOperator(tag::partialtest_partialtrial tag, GridFct const& expr, QuadCreator const& quadCreator)
-      : Super(expr, quadCreator, 2)
+    GridFunctionOperator(tag::partialtest_partialtrial tag, GridFct const& expr)
+      : Super(expr, 2)
       , compTest_(tag.comp_test)
       , compTrial_(tag.comp_trial)
     {}
 
-    template <class Context, class QuadratureRule,
-              class ElementMatrix, class RowNode, class ColNode, bool sameFE, bool sameNode>
-    void calculateElementMatrix(Context const& context,
-                                QuadratureRule const& quad,
-                                ElementMatrix& elementMatrix,
-                                RowNode const& rowNode,
-                                ColNode const& colNode,
-                                std::integral_constant<bool, sameFE>,
-                                std::integral_constant<bool, sameNode>)
+    template <class Context, class RowNode, class ColNode, class ElementMatrix>
+    void getElementMatrix(Context const& context,
+                          RowNode const& rowNode,
+                          ColNode const& colNode,
+                          ElementMatrix& elementMatrix)
     {
       static_assert(RowNode::isLeaf && ColNode::isLeaf,
         "Operator can be applied to Leaf-Nodes only.");
@@ -55,46 +52,52 @@ namespace AMDiS
       auto const& rowLocalFE = rowNode.finiteElement();
       auto const& colLocalFE = colNode.finiteElement();
 
-      std::vector<Dune::FieldMatrix<double,1,Context::dim> > rowReferenceGradients;
-      std::vector<Dune::FieldMatrix<double,1,Context::dim> > colReferenceGradients;
+      using RowLocalBasisType = typename std::decay_t<decltype(rowLocalFE)>::Traits::LocalBasisType;
+      using ColLocalBasisType = typename std::decay_t<decltype(colLocalFE)>::Traits::LocalBasisType;
 
-      for (std::size_t iq = 0; iq < quad.size(); ++iq) {
+      using RowFieldType = typename RowLocalBasisType::Traits::RangeFieldType;
+      using ColFieldType = typename ColLocalBasisType::Traits::RangeFieldType;
+      using RowJacobianType = typename RowLocalBasisType::Traits::JacobianType;
+      using ColJacobianType = typename ColLocalBasisType::Traits::JacobianType;
+
+      auto const& quad = this->getQuadratureRule(context.type(), rowNode, colNode);
+      for (auto const& qp : quad) {
         // Position of the current quadrature point in the reference element
-        decltype(auto) local = context.position(quad[iq].position());
+        decltype(auto) local = context.local(qp.position());
 
         // The transposed inverse Jacobian of the map from the reference element to the element
-        const auto jacobian = context.geometry.jacobianInverseTransposed(local);
+        const auto jacobian = context.geometry().jacobianInverseTransposed(local);
 
         // The multiplicative factor in the integral transformation formula
-        double factor = context.integrationElement(quad[iq].position()) * quad[iq].weight();
-        double c = Super::coefficient(local);
+        const auto factor = context.integrationElement(qp.position()) * qp.weight();
+        const auto c = Super::coefficient(local);
 
         // The gradients of the shape functions on the reference element
-        rowLocalFE.localBasis().evaluateJacobian(local, rowReferenceGradients);
-        colLocalFE.localBasis().evaluateJacobian(local, colReferenceGradients);
+        thread_local std::vector<RowJacobianType> rowShapeGradients;
+        rowLocalFE.localBasis().evaluateJacobian(local, rowShapeGradients);
+        thread_local std::vector<ColJacobianType> colShapeGradients;
+        colLocalFE.localBasis().evaluateJacobian(local, colShapeGradients);
 
         // Compute the shape function gradients on the real element
-        std::vector<double> rowPartial(rowReferenceGradients.size());
-        std::vector<double> colPartial(colReferenceGradients.size());
-
-        // transform gradients to global element
+        thread_local std::vector<RowFieldType> rowPartial(rowShapeGradients.size());
         for (std::size_t i = 0; i < rowPartial.size(); ++i) {
-          rowPartial[i] = jacobian[compTest_][0] * rowReferenceGradients[i][0][0];
+          rowPartial[i] = jacobian[compTest_][0] * rowShapeGradients[i][0][0];
           for (std::size_t j = 1; j < jacobian.cols(); ++j)
-            rowPartial[i] += jacobian[compTest_][j] * rowReferenceGradients[i][0][j];
+            rowPartial[i] += jacobian[compTest_][j] * rowShapeGradients[i][0][j];
         }
 
+        thread_local std::vector<ColFieldType> colPartial(colShapeGradients.size());
         for (std::size_t i = 0; i < colPartial.size(); ++i) {
-          colPartial[i] = jacobian[compTrial_][0] * colReferenceGradients[i][0][0];
+          colPartial[i] = jacobian[compTrial_][0] * colShapeGradients[i][0][0];
           for (std::size_t j = 1; j < jacobian.cols(); ++j)
-            colPartial[i] += jacobian[compTrial_][j] * colReferenceGradients[i][0][j];
+            colPartial[i] += jacobian[compTrial_][j] * colShapeGradients[i][0][j];
         }
 
         for (std::size_t j = 0; j < colLocalFE.size(); ++j) {
-          const int local_j = colNode.localIndex(j);
-          double value = factor * (c * colPartial[j]);
+          const auto local_j = colNode.localIndex(j);
+          const auto value = factor * (c * colPartial[j]);
           for (std::size_t i = 0; i < rowLocalFE.size(); ++i) {
-            const int local_i = rowNode.localIndex(i);
+            const auto local_i = rowNode.localIndex(i);
             elementMatrix[local_i][local_j] += value * rowPartial[i];
           }
         }
diff --git a/src/amdis/assembler/StokesOperator.hpp b/src/amdis/assembler/StokesOperator.hpp
index 46c191a66bfd1d42c3d558a71695fe714781cde0..8b6aa6a5b751dbaa6829b6c21c05d5ac4391846e 100644
--- a/src/amdis/assembler/StokesOperator.hpp
+++ b/src/amdis/assembler/StokesOperator.hpp
@@ -20,27 +20,25 @@ namespace AMDiS
 
 
   /// Stokes operator \f$ \langle\nabla\mathbf{v}, \nu \nabla\mathbf{u}\rangle + \langle\nabla\cdot\mathbf{v}, p\rangle + \langle q, \langle\nabla\cdot\mathbf{u}\rangle \f$
-  template <class ViscosityExpr, class QuadCreator>
-  class GridFunctionOperator<tag::stokes, ViscosityExpr, QuadCreator>
-      : public GridFunctionOperatorBase<ViscosityExpr, QuadCreator>
+  template <class ViscosityExpr>
+  class GridFunctionOperator<tag::stokes, ViscosityExpr>
+      : public GridFunctionOperatorBase<GridFunctionOperator<tag::stokes, ViscosityExpr>, ViscosityExpr>
   {
-    using Super = GridFunctionOperatorBase<ViscosityExpr, QuadCreator>;
+    using Self = GridFunctionOperator;
+    using Super = GridFunctionOperatorBase<Self, ViscosityExpr>;
 
     static_assert( Category::Scalar<typename ViscosityExpr::Range>, "Viscosity must be of scalar type." );
 
   public:
-    GridFunctionOperator(tag::stokes, ViscosityExpr const& expr, QuadCreator const& quadCreator)
-      : Super(expr, quadCreator, 1)
+    GridFunctionOperator(tag::stokes, ViscosityExpr const& expr)
+      : Super(expr, 1)
     {}
 
-    template <class Context, class QuadratureRule,
-              class ElementMatrix, class Node, class Flag1, class Flag2>
-    void calculateElementMatrix(Context const& context,
-                                QuadratureRule const& quad,
-                                ElementMatrix& elementMatrix,
-                                Node const& tree,
-                                Node const& /*colNode*/,
-                                Flag1, Flag2)
+    template <class Context, class Node, class ElementMatrix>
+    void getElementMatrix(Context const& context,
+                          Node const& tree,
+                          Node const& /*colNode*/,
+                          ElementMatrix& elementMatrix)
     {
       using VelocityNode = typename Node::template Child<0>::type;
       using PressureNode = typename Node::template Child<1>::type;
@@ -51,44 +49,50 @@ namespace AMDiS
       auto const& velocityLocalFE = tree.child(_0,0).finiteElement();
       auto const& pressureLocalFE = tree.child(_1).finiteElement();
 
-      std::vector<Dune::FieldVector<double,1>> pressureValues;
-      std::vector<Dune::FieldMatrix<double,1,Context::dim> > referenceGradients;
+      using VelocityLocalBasisType = typename std::decay_t<decltype(velocityLocalFE)>::Traits::LocalBasisType;
+      using PressureLocalBasisType = typename std::decay_t<decltype(pressureLocalFE)>::Traits::LocalBasisType;
 
-      for (std::size_t iq = 0; iq < quad.size(); ++iq) {
+      using PressureRange = typename PressureLocalBasisType::Traits::RangeType;
+      using RangeFieldType = typename VelocityLocalBasisType::Traits::RangeFieldType;
+      using VelocityJacobian = typename VelocityLocalBasisType::Traits::JacobianType;
+
+      auto const& quad = this->getQuadratureRule(context.type(), tree, tree);
+      for (auto const& qp : quad) {
         // Position of the current quadrature point in the reference element
-        decltype(auto) local = context.position(quad[iq].position());
+        decltype(auto) local = context.local(qp.position());
 
         // The transposed inverse Jacobian of the map from the reference element to the element
-        const auto jacobian = context.geometry.jacobianInverseTransposed(local);
+        const auto jacobian = context.geometry().jacobianInverseTransposed(local);
 
         // The multiplicative factor in the integral transformation formula
-        const double factor = context.integrationElement(quad[iq].position()) * quad[iq].weight();
-        const double vel_factor = Super::coefficient(local) * factor;
+        const auto factor = context.integrationElement(qp.position()) * qp.weight();
+        const auto vel_factor = Super::coefficient(local) * factor;
 
         // The gradients of the shape functions on the reference element
-        velocityLocalFE.localBasis().evaluateJacobian(local, referenceGradients);
+        thread_local std::vector<VelocityJacobian> shapeGradients;
+        velocityLocalFE.localBasis().evaluateJacobian(local, shapeGradients);
 
         // Compute the shape function gradients on the real element
-        std::vector<Dune::FieldVector<double,Context::dow> > gradients(referenceGradients.size());
-
+        thread_local std::vector<Dune::FieldVector<RangeFieldType,Context::dow> > gradients(shapeGradients.size());
         for (std::size_t i = 0; i < gradients.size(); ++i)
-          jacobian.mv(referenceGradients[i][0], gradients[i]);
+          jacobian.mv(shapeGradients[i][0], gradients[i]);
 
+        thread_local std::vector<PressureRange> pressureValues;
         pressureLocalFE.localBasis().evaluateFunction(local, pressureValues);
 
         // <viscosity * grad(u), grad(v)>
         for (std::size_t i = 0; i < velocityLocalFE.size(); ++i) {
-          auto value_ii = vel_factor * (gradients[i] * gradients[i]);
+          const auto value_ii = vel_factor * (gradients[i] * gradients[i]);
           for (std::size_t k = 0; k < Context::dow; ++k) {
-            std::size_t local_ki = tree.child(_0,k).localIndex(i);
+            const auto local_ki = tree.child(_0,k).localIndex(i);
             elementMatrix[local_ki][local_ki] += value_ii;
           }
 
           for (std::size_t j = i+1; j < velocityLocalFE.size(); ++j) {
-            auto value = vel_factor * (gradients[i] * gradients[j]);
+            const auto value = vel_factor * (gradients[i] * gradients[j]);
             for (std::size_t k = 0; k < Context::dow; ++k) {
-              std::size_t local_ki = tree.child(_0,k).localIndex(i);
-              std::size_t local_kj = tree.child(_0,k).localIndex(j);
+              const auto local_ki = tree.child(_0,k).localIndex(i);
+              const auto local_kj = tree.child(_0,k).localIndex(j);
               elementMatrix[local_ki][local_kj] += value;
               elementMatrix[local_kj][local_ki] += value;
             }
@@ -98,10 +102,10 @@ namespace AMDiS
         // <p, div(v)> + <div(u), q>
         for (std::size_t i = 0; i < velocityLocalFE.size(); ++i) {
           for (std::size_t j = 0; j < pressureLocalFE.size(); ++j) {
-            auto value = factor * pressureValues[j];
+            const auto value = factor * pressureValues[j];
             for (std::size_t k = 0; k < Context::dow; ++k) {
-              std::size_t local_v = tree.child(_0,k).localIndex(i);
-              std::size_t local_p = tree.child(_1).localIndex(j);
+              const auto local_v = tree.child(_0,k).localIndex(i);
+              const auto local_p = tree.child(_1).localIndex(j);
 
               elementMatrix[local_v][local_p] += gradients[i][k] * value;
               elementMatrix[local_p][local_v] += gradients[i][k] * value;
diff --git a/src/amdis/assembler/ZeroOrderTest.hpp b/src/amdis/assembler/ZeroOrderTest.hpp
index fd6f6786d6379430cff6b018b3df8d758c6c2588..39dbdb1f7d62c0e51fcd9b23dad923ba903c7197 100644
--- a/src/amdis/assembler/ZeroOrderTest.hpp
+++ b/src/amdis/assembler/ZeroOrderTest.hpp
@@ -19,48 +19,49 @@ namespace AMDiS
 
 
   /// zero-order vector-operator \f$ (c\, \psi) \f$
-  template <class GridFct, class QuadCreator>
-  class GridFunctionOperator<tag::test, GridFct, QuadCreator>
-      : public GridFunctionOperatorBase<GridFct, QuadCreator>
+  template <class GridFct>
+  class GridFunctionOperator<tag::test, GridFct>
+      : public GridFunctionOperatorBase<GridFunctionOperator<tag::test, GridFct>, GridFct>
   {
-    using Super = GridFunctionOperatorBase<GridFct, QuadCreator>;
+    using Self = GridFunctionOperator;
+    using Super = GridFunctionOperatorBase<Self, GridFct>;
 
     static_assert( Category::Scalar<typename GridFct::Range>, "Expression must be of scalar type." );
 
   public:
-    GridFunctionOperator(tag::test, GridFct const& expr, QuadCreator const& quadCreator)
-      : Super(expr, quadCreator, 0)
+    GridFunctionOperator(tag::test, GridFct const& expr)
+      : Super(expr, 0)
     {}
 
-    template <class Context, class QuadratureRule,
-              class ElementVector, class Node>
-    void calculateElementVector(Context const& context,
-                                QuadratureRule const& quad,
-                                ElementVector& elementVector,
-                                Node const& node)
+    template <class Context, class Node, class ElementVector>
+    void getElementVector(Context const& context,
+                          Node const& node,
+                          ElementVector& elementVector)
     {
       static_assert(Node::isLeaf, "Operator can be applied to Leaf-Nodes only");
 
       auto const& localFE = node.finiteElement();
 
-      for (std::size_t iq = 0; iq < quad.size(); ++iq) {
+      using LocalBasisType = typename std::decay_t<decltype(localFE)>::Traits::LocalBasisType;
+      using RangeType = typename LocalBasisType::Traits::RangeType;
+
+      auto const& quad = this->getQuadratureRule(context.type(), node);
+      for (auto const& qp : quad) {
         // Position of the current quadrature point in the reference element
-        decltype(auto) local = context.position(quad[iq].position());
+        decltype(auto) local = context.local(qp.position());
 
         // The multiplicative factor in the integral transformation formula
-        double factor = Super::coefficient(local) * context.integrationElement(quad[iq].position()) * quad[iq].weight();
+        const auto factor = Super::coefficient(local) * context.integrationElement(qp.position()) * qp.weight();
 
-        localFE.localBasis().evaluateFunction(local, shapeValues_);
+        thread_local std::vector<RangeType> shapeValues;
+        localFE.localBasis().evaluateFunction(local, shapeValues);
 
         for (std::size_t i = 0; i < localFE.size(); ++i) {
-          const int local_i = node.localIndex(i);
-          elementVector[local_i] += factor * shapeValues_[i];
+          const auto local_i = node.localIndex(i);
+          elementVector[local_i] += factor * shapeValues[i];
         }
       }
     }
-
-  private:
-    std::vector<Dune::FieldVector<double,1>> shapeValues_;
   };
 
   /** @} **/
diff --git a/src/amdis/assembler/ZeroOrderTestTrial.hpp b/src/amdis/assembler/ZeroOrderTestTrial.hpp
index d7df01aacf32518142e2c33126a8a22f17d46bb4..2315ea92b8d0fcc886e68f768f0ccda719e36c9f 100644
--- a/src/amdis/assembler/ZeroOrderTestTrial.hpp
+++ b/src/amdis/assembler/ZeroOrderTestTrial.hpp
@@ -19,28 +19,43 @@ namespace AMDiS
 
 
   /// zero-order operator \f$ \langle\psi, c\,\phi\rangle \f$
-  template <class GridFct, class QuadCreator>
-  class GridFunctionOperator<tag::test_trial, GridFct, QuadCreator>
-      : public GridFunctionOperatorBase<GridFct, QuadCreator>
+  template <class GridFct>
+  class GridFunctionOperator<tag::test_trial, GridFct>
+      : public GridFunctionOperatorBase<GridFunctionOperator<tag::test_trial, GridFct>, GridFct>
   {
-    using Super = GridFunctionOperatorBase<GridFct, QuadCreator>;
+    using Self = GridFunctionOperator;
+    using Super = GridFunctionOperatorBase<Self, GridFct>;
 
     static_assert( Category::Scalar<typename GridFct::Range>, "Expression must be of scalar type." );
 
   public:
-    GridFunctionOperator(tag::test_trial, GridFct const& expr, QuadCreator const& quadCreator)
-      : Super(expr, quadCreator, 0)
+    GridFunctionOperator(tag::test_trial, GridFct const& expr)
+      : Super(expr, 0)
     {}
 
-    template <class Context, class QuadratureRule,
-              class ElementMatrix, class RowNode, class ColNode, bool sameFE>
-    void calculateElementMatrix(Context const& context,
-                                QuadratureRule const& quad,
-                                ElementMatrix& elementMatrix,
-                                RowNode const& rowNode,
-                                ColNode const& colNode,
-                                std::integral_constant<bool, sameFE>,
-                                std::false_type /*sameNode*/)
+    template <class Context, class RowNode, class ColNode, class ElementMatrix>
+    void getElementMatrix(Context const& context,
+                          RowNode const& rowNode,
+                          ColNode const& colNode,
+                          ElementMatrix& elementMatrix)
+    {
+      const bool sameFE = std::is_same<FiniteElementType_t<RowNode>, FiniteElementType_t<ColNode>>::value;
+      const bool sameNode = rowNode.treeIndex() == colNode.treeIndex();
+
+      auto const& quad = this->getQuadratureRule(context.type(), rowNode, colNode);
+      if (sameFE && sameNode)
+        getElementMatrixOptimized(context, quad, rowNode, colNode, elementMatrix);
+      else
+        getElementMatrixStandard(context, quad, rowNode, colNode, elementMatrix);
+    }
+
+  protected:
+    template <class Context, class QuadratureRule, class RowNode, class ColNode, class ElementMatrix>
+    void getElementMatrixStandard(Context const& context,
+                                  QuadratureRule const& quad,
+                                  RowNode const& rowNode,
+                                  ColNode const& colNode,
+                                  ElementMatrix& elementMatrix)
     {
       static_assert(RowNode::isLeaf && ColNode::isLeaf,
         "Operator can be applied to Leaf-Nodes only.");
@@ -48,26 +63,35 @@ namespace AMDiS
       auto const& rowLocalFE = rowNode.finiteElement();
       auto const& colLocalFE = colNode.finiteElement();
 
-      auto* rowShapeValues = &rowShapeValues_;
-      auto* colShapeValues = (sameFE ? &rowShapeValues_ : &colShapeValues_);
+      using RowLocalBasisType = typename std::decay_t<decltype(rowLocalFE)>::Traits::LocalBasisType;
+      using ColLocalBasisType = typename std::decay_t<decltype(colLocalFE)>::Traits::LocalBasisType;
+      using RowRangeType = typename RowLocalBasisType::Traits::RangeType;
+      using ColRangeType = typename ColLocalBasisType::Traits::RangeType;
 
-      for (std::size_t iq = 0; iq < quad.size(); ++iq) {
+      thread_local std::vector<RowRangeType> rowShapeValueBuffer;
+      thread_local std::vector<ColRangeType> colShapeValueBuffer;
+
+      const bool sameFE = std::is_same<decltype(rowLocalFE),decltype(colLocalFE)>::value;
+      auto* rowShapeValues = &rowShapeValueBuffer;
+      auto* colShapeValues = (sameFE ? &rowShapeValueBuffer : &colShapeValueBuffer);
+
+      for (auto const& qp : quad) {
         // Position of the current quadrature point in the reference element
-        decltype(auto) local = context.position(quad[iq].position());
+        decltype(auto) local = context.local(qp.position());
 
         // The multiplicative factor in the integral transformation formula
-        double factor = Super::coefficient(local) * context.integrationElement(quad[iq].position()) * quad[iq].weight();
+        const auto factor = Super::coefficient(local) * context.integrationElement(qp.position()) * qp.weight();
 
-        rowLocalFE.localBasis().evaluateFunction(local, rowShapeValues_);
+        rowLocalFE.localBasis().evaluateFunction(local, rowShapeValueBuffer);
         if (!sameFE)
-          colLocalFE.localBasis().evaluateFunction(local, colShapeValues_);
+          colLocalFE.localBasis().evaluateFunction(local, colShapeValueBuffer);
 
         for (std::size_t i = 0; i < rowLocalFE.size(); ++i) {
-          const int local_i = rowNode.localIndex(i);
-          double value = factor * (*rowShapeValues)[i];
+          const auto local_i = rowNode.localIndex(i);
+          const auto value = factor * (*rowShapeValues)[i];
 
           for (std::size_t j = 0; j < colLocalFE.size(); ++j) {
-            const int local_j = colNode.localIndex(j);
+            const auto local_j = colNode.localIndex(j);
             elementMatrix[local_i][local_j] += value * (*colShapeValues)[j];
           }
         }
@@ -76,38 +100,39 @@ namespace AMDiS
 
 
     template <class Context, class QuadratureRule,
-              class ElementMatrix, class RowNode, class ColNode>
-    void calculateElementMatrix(Context const& context,
-                                QuadratureRule const& quad,
-                                ElementMatrix& elementMatrix,
-                                RowNode const& node,
-                                ColNode const& /*colNode*/,
-                                std::true_type /*sameFE*/,
-                                std::true_type /*sameNode*/)
+              class RowNode, class ColNode, class ElementMatrix>
+    void getElementMatrixOptimized(Context const& context,
+                                   QuadratureRule const& quad,
+                                   RowNode const& node,
+                                   ColNode const& /*colNode*/,
+                                   ElementMatrix& elementMatrix)
     {
       static_assert(RowNode::isLeaf && ColNode::isLeaf,
         "Operator can be applied to Leaf-Nodes only.");
 
       auto const& localFE = node.finiteElement();
-      auto& shapeValues = rowShapeValues_;
 
-      for (std::size_t iq = 0; iq < quad.size(); ++iq) {
+      using LocalBasisType = typename std::decay_t<decltype(localFE)>::Traits::LocalBasisType;
+      using RangeType = typename LocalBasisType::Traits::RangeType;
+
+      for (auto const& qp : quad) {
         // Position of the current quadrature point in the reference element
-        decltype(auto) local = context.position(quad[iq].position());
+        decltype(auto) local = context.local(qp.position());
 
         // The multiplicative factor in the integral transformation formula
-        const double factor = Super::coefficient(local) * context.integrationElement(quad[iq].position()) * quad[iq].weight();
+        const auto factor = Super::coefficient(local) * context.integrationElement(qp.position()) * qp.weight();
 
+        thread_local std::vector<RangeType> shapeValues;
         localFE.localBasis().evaluateFunction(local, shapeValues);
 
         for (std::size_t i = 0; i < localFE.size(); ++i) {
-          const int local_i = node.localIndex(i);
+          const auto local_i = node.localIndex(i);
 
-          double value = factor * shapeValues[i];
+          const auto value = factor * shapeValues[i];
           elementMatrix[local_i][local_i] += value * shapeValues[i];
 
           for (std::size_t j = i+1; j < localFE.size(); ++j) {
-            const int local_j = node.localIndex(j);
+            const auto local_j = node.localIndex(j);
 
             elementMatrix[local_i][local_j] += value * shapeValues[j];
             elementMatrix[local_j][local_i] += value * shapeValues[j];
@@ -115,10 +140,6 @@ namespace AMDiS
         }
       }
     }
-
-  private:
-    std::vector<Dune::FieldVector<double,1>> rowShapeValues_;
-    std::vector<Dune::FieldVector<double,1>> colShapeValues_;
   };
 
   /** @} **/
diff --git a/src/amdis/assembler/ZeroOrderTestTrialvec.hpp b/src/amdis/assembler/ZeroOrderTestTrialvec.hpp
index fbe2f3575f1ff39d7e940c500ca12b5fc22098cb..a4dbec12faf19910b049af8a46c3aa802e44511e 100644
--- a/src/amdis/assembler/ZeroOrderTestTrialvec.hpp
+++ b/src/amdis/assembler/ZeroOrderTestTrialvec.hpp
@@ -19,28 +19,25 @@ namespace AMDiS
 
 
   /// zero-order operator \f$ \langle\psi, \mathbf{b}\cdot\Phi\rangle \f$
-  template <class GridFct, class QuadCreator>
-  class GridFunctionOperator<tag::test_trialvec, GridFct, QuadCreator>
-      : public GridFunctionOperatorBase<GridFct, QuadCreator>
+  template <class GridFct>
+  class GridFunctionOperator<tag::test_trialvec, GridFct>
+      : public GridFunctionOperatorBase<GridFunctionOperator<tag::test_trialvec, GridFct>, GridFct>
   {
-    using Super = GridFunctionOperatorBase<GridFct, QuadCreator>;
+    using Self = GridFunctionOperator;
+    using Super = GridFunctionOperatorBase<Self, GridFct>;
 
     static_assert( Category::Vector<typename GridFct::Range>, "Expression must be of vector type." );
 
   public:
-    GridFunctionOperator(tag::test_trialvec, GridFct const& expr, QuadCreator const& quadCreator)
-      : Super(expr, quadCreator, 0)
+    GridFunctionOperator(tag::test_trialvec, GridFct const& expr)
+      : Super(expr, 0)
     {}
 
-    template <class Context, class QuadratureRule,
-              class ElementMatrix, class RowNode, class ColNode, bool sameFE, bool sameNode>
-    void calculateElementMatrix(Context const& context,
-                                QuadratureRule const& quad,
-                                ElementMatrix& elementMatrix,
-                                RowNode const& rowNode,
-                                ColNode const& colNode,
-                                std::integral_constant<bool, sameFE>,
-                                std::integral_constant<bool, sameNode>)
+    template <class Context, class RowNode, class ColNode, class ElementMatrix>
+    void getElementMatrix(Context const& context,
+                          RowNode const& rowNode,
+                          ColNode const& colNode,
+                          ElementMatrix& elementMatrix)
     {
       static_assert(RowNode::isLeaf && ColNode::isPower,
         "RowNode must be Leaf-Node and ColNode must be a Power-Node.");
@@ -51,39 +48,45 @@ namespace AMDiS
       auto const& rowLocalFE = rowNode.finiteElement();
       auto const& colLocalFE = colNode.child(0).finiteElement();
 
-      auto* rowShapeValues = &rowShapeValues_;
-      auto* colShapeValues = (sameFE ? &rowShapeValues_ : &colShapeValues_);
+      using RowLocalBasisType = typename std::decay_t<decltype(rowLocalFE)>::Traits::LocalBasisType;
+      using ColLocalBasisType = typename std::decay_t<decltype(colLocalFE)>::Traits::LocalBasisType;
+      using RowRangeType = typename RowLocalBasisType::Traits::RangeType;
+      using ColRangeType = typename ColLocalBasisType::Traits::RangeType;
 
-      for (std::size_t iq = 0; iq < quad.size(); ++iq) {
+      thread_local std::vector<RowRangeType> rowShapeValueBuffer;
+      thread_local std::vector<ColRangeType> colShapeValueBuffer;
+
+      const bool sameFE = std::is_same<decltype(rowLocalFE),decltype(colLocalFE)>::value;
+      auto* rowShapeValues = &rowShapeValueBuffer;
+      auto* colShapeValues = (sameFE ? &rowShapeValueBuffer : &colShapeValueBuffer);
+
+      auto const& quad = this->getQuadratureRule(context.type(), rowNode, colNode);
+      for (auto const& qp : quad) {
         // Position of the current quadrature point in the reference element
-        decltype(auto) local = context.position(quad[iq].position());
+        decltype(auto) local = context.local(qp.position());
 
         // The multiplicative factor in the integral transformation formula
-        double factor = context.integrationElement(quad[iq].position()) * quad[iq].weight();
+        const auto factor = context.integrationElement(qp.position()) * qp.weight();
         const auto b = Super::coefficient(local);
 
-        rowLocalFE.localBasis().evaluateFunction(local, rowShapeValues_);
+        rowLocalFE.localBasis().evaluateFunction(local, rowShapeValueBuffer);
         if (!sameFE)
-          colLocalFE.localBasis().evaluateFunction(local, colShapeValues_);
+          colLocalFE.localBasis().evaluateFunction(local, colShapeValueBuffer);
 
         for (std::size_t i = 0; i < rowLocalFE.size(); ++i) {
-          const int local_i = rowNode.localIndex(i);
+          const auto local_i = rowNode.localIndex(i);
 
           for (std::size_t j = 0; j < colLocalFE.size(); ++j) {
-            auto value = b * (factor * (*rowShapeValues)[i] * (*colShapeValues)[j]);
+            const auto value = b * (factor * (*rowShapeValues)[i] * (*colShapeValues)[j]);
 
             for (std::size_t k = 0; k < CHILDREN; ++k) {
-              const int local_kj = colNode.child(k).localIndex(j);
+              const auto local_kj = colNode.child(k).localIndex(j);
               elementMatrix[local_i][local_kj] += value[k];
             }
           }
         }
       }
     }
-
-  private:
-    std::vector<Dune::FieldVector<double,1>> rowShapeValues_;
-    std::vector<Dune::FieldVector<double,1>> colShapeValues_;
   };
 
   /** @} **/
diff --git a/src/amdis/assembler/ZeroOrderTestvec.hpp b/src/amdis/assembler/ZeroOrderTestvec.hpp
index 49a5d40a90568993957cc2de0a16aa413178d96a..68a3fe3f9922ef51fce5e2250633d3952658bc7b 100644
--- a/src/amdis/assembler/ZeroOrderTestvec.hpp
+++ b/src/amdis/assembler/ZeroOrderTestvec.hpp
@@ -19,25 +19,24 @@ namespace AMDiS
 
 
   /// zero-order vector-operator \f$ (\mathbf{b}\cdot\Psi) \f$
-  template <class GridFct, class QuadCreator>
-  class GridFunctionOperator<tag::testvec, GridFct, QuadCreator>
-      : public GridFunctionOperatorBase<GridFct, QuadCreator>
+  template <class GridFct>
+  class GridFunctionOperator<tag::testvec, GridFct>
+      : public GridFunctionOperatorBase<GridFunctionOperator<tag::testvec, GridFct>, GridFct>
   {
-    using Super = GridFunctionOperatorBase<GridFct, QuadCreator>;
+    using Self = GridFunctionOperator;
+    using Super = GridFunctionOperatorBase<Self, GridFct>;
 
     static_assert( Category::Vector<typename GridFct::Range>, "Expression must be of vector type." );
 
   public:
-    GridFunctionOperator(tag::testvec, GridFct const& expr, QuadCreator const& quadCreator)
-      : Super(expr, quadCreator, 0)
+    GridFunctionOperator(tag::testvec, GridFct const& expr)
+      : Super(expr, 0)
     {}
 
-    template <class Context, class QuadratureRule,
-              class ElementVector, class Node>
-    void calculateElementVector(Context const& context,
-                                QuadratureRule const& quad,
-                                ElementVector& elementVector,
-                                Node const& node)
+    template <class Context, class Node, class ElementVector>
+    void getElementVector(Context const& context,
+                          Node const& node,
+                          ElementVector& elementVector)
     {
       static_assert(Node::isPower,
         "Operator can be applied to Power-Nodes only.");
@@ -46,28 +45,30 @@ namespace AMDiS
 
       auto const& localFE = node.child(0).finiteElement();
 
-      for (std::size_t iq = 0; iq < quad.size(); ++iq) {
+      using LocalBasisType = typename std::decay_t<decltype(localFE)>::Traits::LocalBasisType;
+      using RangeType = typename LocalBasisType::Traits::RangeType;
+
+      auto const& quad = this->getQuadratureRule(context.type(), node);
+      for (auto const& qp : quad) {
         // Position of the current quadrature point in the reference element
-        decltype(auto) local = context.position(quad[iq].position());
+        decltype(auto) local = context.local(qp.position());
 
         // The multiplicative factor in the integral transformation formula
-        double factor = context.integrationElement(quad[iq].position()) * quad[iq].weight();
+        const auto factor = context.integrationElement(qp.position()) * qp.weight();
         const auto exprValue = Super::coefficient(local);
 
-        localFE.localBasis().evaluateFunction(local, shapeValues_);
+        thread_local std::vector<RangeType> shapeValues;
+        localFE.localBasis().evaluateFunction(local, shapeValues);
 
         for (std::size_t i = 0; i < localFE.size(); ++i) {
-          auto value = exprValue * (factor * shapeValues_[i]);
+          const auto value = exprValue * (factor * shapeValues[i]);
           for (std::size_t k = 0; k < CHILDREN; ++k) {
-            const int local_ki = node.child(k).localIndex(i);
+            const auto local_ki = node.child(k).localIndex(i);
             elementVector[local_ki] += value[k];
           }
         }
       }
     }
-
-  private:
-    std::vector<Dune::FieldVector<double,1>> shapeValues_;
   };
 
   /** @} **/
diff --git a/src/amdis/assembler/ZeroOrderTestvecTrial.hpp b/src/amdis/assembler/ZeroOrderTestvecTrial.hpp
index d6083d1ae911f0751411b9b10e7f7b8c4012dd46..ad67fa578977a89dd2b5a4a60977feb6526eab4f 100644
--- a/src/amdis/assembler/ZeroOrderTestvecTrial.hpp
+++ b/src/amdis/assembler/ZeroOrderTestvecTrial.hpp
@@ -18,31 +18,19 @@ namespace AMDiS
 
 
   /// zero-order operator \f$ \langle\Psi, \mathbf{b}\,\phi\rangle \f$
-  template <class GridFct, class QuadCreator>
-  class GridFunctionOperator<tag::testvec_trial, GridFct, QuadCreator>
-      : public GridFunctionOperator<tag::test_trialvec, GridFct, QuadCreator>
+  template <class GridFct>
+  class GridFunctionOperator<tag::testvec_trial, GridFct>
+      : public GridFunctionOperatorTransposed<GridFunctionOperator<tag::testvec_trial, GridFct>,
+                                              GridFunctionOperator<tag::test_trialvec, GridFct>>
   {
-    using Transposed = GridFunctionOperator<tag::test_trialvec, GridFct, QuadCreator>;
+    using Self = GridFunctionOperator;
+    using Transposed = GridFunctionOperator<tag::test_trialvec, GridFct>;
+    using Super = GridFunctionOperatorTransposed<Self, Transposed>;
 
   public:
-    GridFunctionOperator(tag::testvec_trial, GridFct const& expr, QuadCreator const& quadCreator)
-      : Transposed(tag::test_trialvec{}, expr, quadCreator)
+    GridFunctionOperator(tag::testvec_trial, GridFct const& expr)
+      : Super(tag::test_trialvec{}, expr)
     {}
-
-    template <class Context, class QuadratureRule,
-              class ElementMatrix, class RowNode, class ColNode, bool sameFE, bool sameNode>
-    void calculateElementMatrix(Context const& context,
-                                QuadratureRule const& quad,
-                                ElementMatrix& elementMatrix,
-                                RowNode const& rowNode,
-                                ColNode const& colNode,
-                                std::integral_constant<bool, sameFE> flagSameFE,
-                                std::integral_constant<bool, sameNode> flagSameNode)
-    {
-      auto elementMatrixTransposed = trans(elementMatrix);
-      Transposed::calculateElementMatrix(
-        context, quad, elementMatrixTransposed, colNode, rowNode, flagSameFE, flagSameNode);
-    }
   };
 
   /** @} **/
diff --git a/src/amdis/assembler/ZeroOrderTestvecTrialvec.hpp b/src/amdis/assembler/ZeroOrderTestvecTrialvec.hpp
index 77a478e8685d749c9e69a451988fc83b585c0732..fefccfbe0427f8351076a6e8f5375b0d519404bd 100644
--- a/src/amdis/assembler/ZeroOrderTestvecTrialvec.hpp
+++ b/src/amdis/assembler/ZeroOrderTestvecTrialvec.hpp
@@ -19,30 +19,27 @@ namespace AMDiS
 
 
   /// zero-order operator \f$ \langle\Psi, c\,\Phi\rangle \f$, or \f$ \langle\Psi, A\,\Phi\rangle \f$
-  template <class GridFct, class QuadCreator>
-  class GridFunctionOperator<tag::testvec_trialvec, GridFct, QuadCreator>
-      : public GridFunctionOperatorBase<GridFct, QuadCreator>
+  template <class GridFct>
+  class GridFunctionOperator<tag::testvec_trialvec, GridFct>
+      : public GridFunctionOperatorBase<GridFunctionOperator<tag::testvec_trialvec, GridFct>, GridFct>
   {
-    using Super = GridFunctionOperatorBase<GridFct, QuadCreator>;
+    using Self = GridFunctionOperator;
+    using Super = GridFunctionOperatorBase<Self, GridFct>;
 
     using expr_value_type = typename GridFct::Range;
     static_assert( Category::Scalar<expr_value_type> || Category::Matrix<expr_value_type>,
       "Expression must be of scalar or matrix type." );
 
   public:
-    GridFunctionOperator(tag::testvec_trialvec, GridFct const& expr, QuadCreator const& quadCreator)
-      : Super(expr, quadCreator, 0)
+    GridFunctionOperator(tag::testvec_trialvec, GridFct const& expr)
+      : Super(expr, 0)
     {}
 
-    template <class Context, class QuadratureRule,
-              class ElementMatrix, class RowNode, class ColNode, bool sameFE, bool sameNode>
-    void calculateElementMatrix(Context const& context,
-                                QuadratureRule const& quad,
-                                ElementMatrix& elementMatrix,
-                                RowNode const& rowNode,
-                                ColNode const& colNode,
-                                bool_t<sameFE>,
-                                bool_t<sameNode>)
+    template <class Context, class RowNode, class ColNode, class ElementMatrix>
+    void getElementMatrix(Context const& context,
+                          RowNode const& rowNode,
+                          ColNode const& colNode,
+                          ElementMatrix& elementMatrix)
     {
       static_assert(RowNode::isPower && ColNode::isPower,
         "Operator can be applied to Power-Nodes only.");
@@ -50,54 +47,65 @@ namespace AMDiS
       static_assert( (RowNode::CHILDREN == ColNode::CHILDREN), "" );
       // theoretically the restriction of the same nr of childs would not be necessary
 
+      const bool sameFE = std::is_same<FiniteElementType_t<RowNode>, FiniteElementType_t<ColNode>>::value;
+      const bool sameNode = rowNode.treeIndex() == colNode.treeIndex();
+
       using Category = ValueCategory_t<expr_value_type>;
-      calcElementMatrix(context, quad, elementMatrix, rowNode, colNode,
-        bool_<sameFE>,
-        bool_<sameNode>,
-        Category{});
+
+      auto const& quad = this->getQuadratureRule(context.type(), rowNode, colNode);
+      if (sameFE && sameNode && std::is_same<Category,tag::scalar>::value)
+        getElementMatrixOptimized(context, quad, rowNode, colNode, elementMatrix, Category{});
+      else
+        getElementMatrixStandard(context, quad, rowNode, colNode, elementMatrix, Category{});
     }
 
   protected: // specialization for different value-categories
 
-    template <class Context, class QuadratureRule,
-              class ElementMatrix, class RowNode, class ColNode, bool sameFE>
-    void calcElementMatrix(Context const& context,
-                           QuadratureRule const& quad,
-                           ElementMatrix& elementMatrix,
-                           RowNode const& rowNode,
-                           ColNode const& colNode,
-                           bool_t<sameFE>,
-                           std::false_type /*sameNode*/,
-                           tag::scalar)
+    template <class Context, class QuadratureRule, class RowNode, class ColNode, class ElementMatrix>
+    void getElementMatrixStandard(Context const& context,
+                                  QuadratureRule const& quad,
+                                  RowNode const& rowNode,
+                                  ColNode const& colNode,
+                                  ElementMatrix& elementMatrix,
+                                  tag::scalar)
     {
       static const std::size_t CHILDREN = RowNode::CHILDREN;
 
       auto const& rowLocalFE = rowNode.child(0).finiteElement();
       auto const& colLocalFE = colNode.child(0).finiteElement();
 
-      auto* rowShapeValues = &rowShapeValues_;
-      auto* colShapeValues = (sameFE ? &rowShapeValues_ : &colShapeValues_);
+      using RowLocalBasisType = typename std::decay_t<decltype(rowLocalFE)>::Traits::LocalBasisType;
+      using ColLocalBasisType = typename std::decay_t<decltype(colLocalFE)>::Traits::LocalBasisType;
+      using RowRangeType = typename RowLocalBasisType::Traits::RangeType;
+      using ColRangeType = typename ColLocalBasisType::Traits::RangeType;
+
+      thread_local std::vector<RowRangeType> rowShapeValueBuffer;
+      thread_local std::vector<ColRangeType> colShapeValueBuffer;
+
+      const bool sameFE = std::is_same<decltype(rowLocalFE),decltype(colLocalFE)>::value;
+      auto* rowShapeValues = &rowShapeValueBuffer;
+      auto* colShapeValues = (sameFE ? &rowShapeValueBuffer : &colShapeValueBuffer);
 
-      for (std::size_t iq = 0; iq < quad.size(); ++iq) {
+      for (auto const& qp : quad) {
         // Position of the current quadrature point in the reference element
-        decltype(auto) local = context.position(quad[iq].position());
+        decltype(auto) local = context.local(qp.position());
 
         // The multiplicative factor in the integral transformation formula
-        double factor = Super::coefficient(local) * context.integrationElement(quad[iq].position()) * quad[iq].weight();
+        const auto factor = Super::coefficient(local) * context.integrationElement(qp.position()) * qp.weight();
 
-        rowLocalFE.localBasis().evaluateFunction(local, rowShapeValues_);
+        rowLocalFE.localBasis().evaluateFunction(local, rowShapeValueBuffer);
         if (!sameFE)
-          colLocalFE.localBasis().evaluateFunction(local, colShapeValues_);
+          colLocalFE.localBasis().evaluateFunction(local, colShapeValueBuffer);
 
         for (std::size_t i = 0; i < rowLocalFE.size(); ++i) {
-          double value = factor * (*rowShapeValues)[i];
+          const auto value = factor * (*rowShapeValues)[i];
 
           for (std::size_t j = 0; j < colLocalFE.size(); ++j) {
-            double value0 = value * (*colShapeValues)[j];
+            const auto value0 = value * (*colShapeValues)[j];
 
             for (std::size_t k = 0; k < CHILDREN; ++k) {
-              const int local_ki = rowNode.child(k).localIndex(i);
-              const int local_kj = colNode.child(k).localIndex(j);
+              const auto local_ki = rowNode.child(k).localIndex(i);
+              const auto local_kj = colNode.child(k).localIndex(j);
 
               elementMatrix[local_ki][local_kj] += value0;
             }
@@ -107,46 +115,45 @@ namespace AMDiS
     }
 
 
-    template <class Context, class QuadratureRule,
-              class ElementMatrix, class RowNode, class ColNode>
-    void calcElementMatrix(Context const& context,
-                           QuadratureRule const& quad,
-                           ElementMatrix& elementMatrix,
-                           RowNode const& node,
-                           ColNode const& /*colNode*/,
-                           std::true_type /*sameFE*/,
-                           std::true_type /*sameNode*/,
-                           tag::scalar)
+    template <class Context, class QuadratureRule, class RowNode, class ColNode, class ElementMatrix>
+    void getElementMatrixOptimized(Context const& context,
+                                   QuadratureRule const& quad,
+                                   RowNode const& node,
+                                   ColNode const& /*colNode*/,
+                                   ElementMatrix& elementMatrix,
+                                   tag::scalar)
     {
       static const std::size_t CHILDREN = RowNode::CHILDREN;
 
       auto const& localFE = node.child(0).finiteElement();
 
-      auto& shapeValues = rowShapeValues_;
+      using LocalBasisType = typename std::decay_t<decltype(localFE)>::Traits::LocalBasisType;
+      using RangeType = typename LocalBasisType::Traits::RangeType;
 
-      for (std::size_t iq = 0; iq < quad.size(); ++iq) {
+      for (auto const& qp : quad) {
         // Position of the current quadrature point in the reference element
-        decltype(auto) local = context.position(quad[iq].position());
+        decltype(auto) local = context.local(qp.position());
 
         // The multiplicative factor in the integral transformation formula
-        double factor = Super::coefficient(local) * context.integrationElement(quad[iq].position()) * quad[iq].weight();
+        const auto factor = Super::coefficient(local) * context.integrationElement(qp.position()) * qp.weight();
 
+        thread_local std::vector<RangeType> shapeValues;
         localFE.localBasis().evaluateFunction(local, shapeValues);
 
         for (std::size_t i = 0; i < localFE.size(); ++i) {
-          double value = factor * shapeValues[i];
+          const auto value = factor * shapeValues[i];
 
           for (std::size_t k = 0; k < CHILDREN; ++k) {
-            const int local_ki = node.child(k).localIndex(i);
+            const auto local_ki = node.child(k).localIndex(i);
             elementMatrix[local_ki][local_ki] += value * shapeValues[i];
           }
 
           for (std::size_t j = i+1; j < localFE.size(); ++j) {
-            double value0 = value * shapeValues[j];
+            const auto value0 = value * shapeValues[j];
 
             for (std::size_t k = 0; k < CHILDREN; ++k) {
-              const int local_ki = node.child(k).localIndex(i);
-              const int local_kj = node.child(k).localIndex(j);
+              const auto local_ki = node.child(k).localIndex(i);
+              const auto local_kj = node.child(k).localIndex(j);
 
               elementMatrix[local_ki][local_kj] += value0;
               elementMatrix[local_kj][local_ki] += value0;
@@ -156,16 +163,13 @@ namespace AMDiS
       }
     }
 
-    template <class Context, class QuadratureRule,
-              class ElementMatrix, class RowNode, class ColNode, bool sameFE, bool sameNode>
-    void calcElementMatrix(Context const& context,
-                           QuadratureRule const& quad,
-                           ElementMatrix& elementMatrix,
-                           RowNode const& rowNode,
-                           ColNode const& colNode,
-                           bool_t<sameFE>,
-                           bool_t<sameNode>,
-                           tag::matrix)
+    template <class Context, class QuadratureRule, class RowNode, class ColNode, class ElementMatrix>
+    void getElementMatrixStandard(Context const& context,
+                                  QuadratureRule const& quad,
+                                  RowNode const& rowNode,
+                                  ColNode const& colNode,
+                                  ElementMatrix& elementMatrix,
+                                  tag::matrix)
     {
       static const std::size_t CHILDREN = RowNode::CHILDREN;
       static_assert( std::is_same<expr_value_type, Dune::FieldMatrix<double, CHILDREN, CHILDREN>>::value, "" );
@@ -173,32 +177,41 @@ namespace AMDiS
       auto const& rowLocalFE = rowNode.child(0).finiteElement();
       auto const& colLocalFE = colNode.child(0).finiteElement();
 
-      auto* rowShapeValues = &rowShapeValues_;
-      auto* colShapeValues = (sameFE ? &rowShapeValues_ : &colShapeValues_);
+      using RowLocalBasisType = typename std::decay_t<decltype(rowLocalFE)>::Traits::LocalBasisType;
+      using ColLocalBasisType = typename std::decay_t<decltype(colLocalFE)>::Traits::LocalBasisType;
+      using RowRangeType = typename RowLocalBasisType::Traits::RangeType;
+      using ColRangeType = typename ColLocalBasisType::Traits::RangeType;
 
-      for (std::size_t iq = 0; iq < quad.size(); ++iq) {
+      thread_local std::vector<RowRangeType> rowShapeValueBuffer;
+      thread_local std::vector<ColRangeType> colShapeValueBuffer;
+
+      const bool sameFE = std::is_same<decltype(rowLocalFE),decltype(colLocalFE)>::value;
+      auto* rowShapeValues = &rowShapeValueBuffer;
+      auto* colShapeValues = (sameFE ? &rowShapeValueBuffer : &colShapeValueBuffer);
+
+      for (auto const& qp : quad) {
         // Position of the current quadrature point in the reference element
-        decltype(auto) local = context.position(quad[iq].position());
+        decltype(auto) local = context.local(qp.position());
 
         // The multiplicative factor in the integral transformation formula
-        double factor = context.integrationElement(quad[iq].position()) * quad[iq].weight();
+        const auto factor = context.integrationElement(qp.position()) * qp.weight();
         const Dune::FieldMatrix<double, CHILDREN, CHILDREN> exprValue = Super::coefficient(local);
 
-        rowLocalFE.localBasis().evaluateFunction(local, rowShapeValues_);
+        rowLocalFE.localBasis().evaluateFunction(local, rowShapeValueBuffer);
         if (!sameFE)
-          colLocalFE.localBasis().evaluateFunction(local, colShapeValues_);
+          colLocalFE.localBasis().evaluateFunction(local, colShapeValueBuffer);
 
         for (std::size_t i = 0; i < rowLocalFE.size(); ++i) {
-          double value0 = factor * (*rowShapeValues)[i];
+          const auto value0 = factor * (*rowShapeValues)[i];
 
           for (std::size_t j = 0; j < colLocalFE.size(); ++j) {
-            double value = value0 * (*colShapeValues)[j];
-            auto mat = exprValue * value;
+            const auto value = value0 * (*colShapeValues)[j];
+            const auto mat = exprValue * value;
 
             for (std::size_t k0 = 0; k0 < CHILDREN; ++k0) {
-              const int local_ki = rowNode.child(k0).localIndex(i);
+              const auto local_ki = rowNode.child(k0).localIndex(i);
               for (std::size_t k1 = 0; k1 < CHILDREN; ++k1) {
-                const int local_kj = colNode.child(k1).localIndex(j);
+                const auto local_kj = colNode.child(k1).localIndex(j);
 
                 elementMatrix[local_ki][local_kj] += mat[k0][k1];
               }
@@ -208,9 +221,16 @@ namespace AMDiS
       }
     }
 
-  private:
-    std::vector<Dune::FieldVector<double,1>> rowShapeValues_;
-    std::vector<Dune::FieldVector<double,1>> colShapeValues_;
+    template <class Context, class QuadratureRule, class RowNode, class ColNode, class ElementMatrix>
+    void getElementMatrixOptimized(Context const& context,
+                                   QuadratureRule const& quad,
+                                   RowNode const& node,
+                                   ColNode const& /*colNode*/,
+                                   ElementMatrix& elementMatrix,
+                                   tag::matrix)
+    {
+      assert(false && "Not implemented.");
+    }
   };
 
   /** @} **/
diff --git a/src/amdis/common/FieldMatVec.hpp b/src/amdis/common/FieldMatVec.hpp
index b8fac3891e22d708fb6fba10616bd42cf935897d..3786be2b455a22901b47b05685c7db605397c1dd 100644
--- a/src/amdis/common/FieldMatVec.hpp
+++ b/src/amdis/common/FieldMatVec.hpp
@@ -217,6 +217,12 @@ namespace AMDiS
   template <class T, int N, int M>
   FieldVector<T,N> operator*(FieldMatrix<T,N,M> const& mat, FieldVector<T,M> const& vec);
 
+  template <class T, int N, int M>
+  FieldMatrix<T,N,M> operator*(FieldMatrix<T,N,M> mat, FieldVector<T,1> const& scalar);
+
+  template <class T, int N>
+  FieldMatrix<T,N,1> operator*(FieldMatrix<T,N,1> mat, FieldVector<T,1> const& scalar);
+
 
 
   template <class T, int M, int N, int L>
diff --git a/src/amdis/common/FieldMatVec.inc.hpp b/src/amdis/common/FieldMatVec.inc.hpp
index 8353b618d2725aef034be1640605f2c59a985dff..83254c16d7663a36a2df2d4135f127650eae0ebf 100644
--- a/src/amdis/common/FieldMatVec.inc.hpp
+++ b/src/amdis/common/FieldMatVec.inc.hpp
@@ -404,6 +404,18 @@ FieldVector<T,N> operator*(FieldMatrix<T,N,M> const& mat, FieldVector<T,M> const
   return Dune::FMatrixHelp::mult(mat, vec);
 }
 
+template <class T, int N, int M>
+FieldMatrix<T,N,M> operator*(FieldMatrix<T,N,M> mat, FieldVector<T,1> const& scalar)
+{
+  return mat *= scalar[0];
+}
+
+template <class T, int N>
+FieldMatrix<T,N,1> operator*(FieldMatrix<T,N,1> mat, FieldVector<T,1> const& scalar)
+{
+  return mat *= scalar[0];
+}
+
 
 
 template <class T, int M, int N, int L>
diff --git a/src/amdis/common/Utility.hpp b/src/amdis/common/Utility.hpp
index dd897b88aeb70d0833899abd705c9c5909490c7a..08f9683e07d22ad3440048b47f44bff570b8d876 100644
--- a/src/amdis/common/Utility.hpp
+++ b/src/amdis/common/Utility.hpp
@@ -4,6 +4,12 @@
 #include <type_traits>
 #include <vector>
 
+#if AMDIS_HAS_CXX_CONSTEXPR_IF
+#define IF_CONSTEXPR if constexpr
+#else
+#define IF_CONSTEXPR if
+#endif
+
 namespace AMDiS
 {
   namespace Impl
@@ -46,6 +52,23 @@ namespace AMDiS
 
   // ---------------------------------------------------------------------------
 
+  // base template for tuple size, must implement a static value member
+  template <class TupleType>
+  struct TupleSize;
+
+  // utility constant returning the value of the tuple size
+  template <class TupleType>
+  constexpr std::size_t TupleSize_v = TupleSize<TupleType>::value;
+
+
+  // base template for retrieving the tuple element type, must implement the type member `type`
+  template <size_t I, class TupleType>
+  struct TupleElement;
+
+  // base template for retrieving the tuple element type
+  template <size_t I, class TupleType>
+  using TupleElement_t = typename TupleElement<I, TupleType>::type;
+
 
   /// A variadic type list
   template <class... Ts>
@@ -55,6 +78,23 @@ namespace AMDiS
   using Types_t = Types<std::decay_t<Ts>...>;
 
 
+  template <class... T>
+  struct TupleSize<Types<T...>>
+    : std::integral_constant<std::size_t, sizeof...(T)> {};
+
+  template <size_t I, class Head, class... Tail>
+  struct TupleElement<I, Types<Head, Tail...>>
+  {
+    using type = typename TupleElement<I-1, Types<Tail...>>::type;
+  };
+
+  template <class Head, class... Tail>
+  struct TupleElement<0, Types<Head, Tail...>>
+  {
+    using type = Head;
+  };
+
+
   /// Alias that indicates ownership of resources
   template <class T>
   using owner = T;
diff --git a/src/amdis/gridfunctions/AnalyticGridFunction.hpp b/src/amdis/gridfunctions/AnalyticGridFunction.hpp
index 995a07c552c7f687524bd69e0d6f086edabc5536..0faaa28dba1fef64e0ae68923e6c167625d39ec2 100644
--- a/src/amdis/gridfunctions/AnalyticGridFunction.hpp
+++ b/src/amdis/gridfunctions/AnalyticGridFunction.hpp
@@ -118,9 +118,9 @@ namespace AMDiS
     }
 
     /// \brief Return the LocalFunction of the AnalyticGridFunction.
-    friend LocalFunction localFunction(AnalyticGridFunction const& gf)
+    LocalFunction localFunction() const
     {
-      return LocalFunction{gf.fct_};
+      return {fct_};
     }
 
 
@@ -138,6 +138,12 @@ namespace AMDiS
   };
 
 
+  template <class F, class GV>
+  auto localFunction(AnalyticGridFunction<F,GV> const& gf)
+  {
+    return gf.localFunction();
+  }
+
   /// \brief Return a GridFunction representing the derivative of a functor.
   // [expects: Functor f has free function derivative(f)]
   template <class F, class GV>
diff --git a/src/amdis/gridfunctions/ConstantGridFunction.hpp b/src/amdis/gridfunctions/ConstantGridFunction.hpp
index d139d0f51a1890d94fb5f012d69bd4dc85fec736..978f8a5b3baade57e88ebeb50dbfcffdeba959de 100644
--- a/src/amdis/gridfunctions/ConstantGridFunction.hpp
+++ b/src/amdis/gridfunctions/ConstantGridFunction.hpp
@@ -41,8 +41,15 @@ namespace AMDiS
       return value_;
     }
 
-    /// \relates ConstantLocalFunction
-    friend int order(ConstantLocalFunction const& /*lf*/)
+    auto derivative() const
+    {
+      using RawSignature = typename Dune::Functions::SignatureTraits<R(D)>::RawSignature;
+      using DerivativeRange = typename Dune::Functions::DefaultDerivativeTraits<RawSignature>::Range;
+      DerivativeRange diff(0);
+      return ConstantLocalFunction<DerivativeRange(D),LocalContext,DerivativeRange>{diff};
+    }
+
+    int order() const
     {
       return 0;
     }
@@ -55,12 +62,17 @@ namespace AMDiS
   template <class R, class D, class LocalContext, class T>
   auto derivative(ConstantLocalFunction<R(D), LocalContext, T> const& lf)
   {
-    using RawSignature = typename Dune::Functions::SignatureTraits<R(D)>::RawSignature;
-    using DerivativeRange = typename Dune::Functions::DefaultDerivativeTraits<RawSignature>::Range;
-    DerivativeRange diff(0);
-    return ConstantLocalFunction<DerivativeRange(D),LocalContext,DerivativeRange>{diff};
+    return lf.derivative();
   }
 
+  /// \relates ConstantLocalFunction
+  template <class R, class D, class LocalContext, class T>
+  int order(ConstantLocalFunction<R(D), LocalContext, T> const& lf)
+  {
+    return lf.order();
+  }
+
+
 
   /**
     * \addtogroup GridFunctions
@@ -106,7 +118,10 @@ namespace AMDiS
       return entitySet_;
     }
 
-    T const& value() const { return value_; }
+    LocalFunction localFunction() const
+    {
+      return {value_};
+    }
 
   private:
     T value_;
@@ -115,9 +130,9 @@ namespace AMDiS
 
   /// Return the LocalFunction of the \ref ConstantGridFunction. \relates ConstantGridFunction
   template <class T, class GV>
-  typename ConstantGridFunction<T,GV>::LocalFunction localFunction(ConstantGridFunction<T,GV> const& gf)
+  auto localFunction(ConstantGridFunction<T,GV> const& gf)
   {
-    return {gf.value()};
+    return gf.localFunction();
   }
 
   /** @} **/
diff --git a/src/amdis/gridfunctions/FunctorGridFunction.hpp b/src/amdis/gridfunctions/FunctorGridFunction.hpp
index c7905acd403c91df98836a5e006694d51f56b8e9..26653e29be5f67a7c87397a44183b88d126dfbeb 100644
--- a/src/amdis/gridfunctions/FunctorGridFunction.hpp
+++ b/src/amdis/gridfunctions/FunctorGridFunction.hpp
@@ -9,6 +9,7 @@
 #include <amdis/common/IndexSeq.hpp>
 #include <amdis/common/Loops.hpp>
 #include <amdis/common/Mpl.hpp>
+#include <amdis/utility/Tuple.hpp>
 #include <amdis/gridfunctions/GridFunctionConcepts.hpp>
 
 namespace AMDiS
@@ -34,9 +35,11 @@ namespace AMDiS
   } // end namespace Impl
 
 
+  // forward declaration
   template <class Signatur, class Functor, class... LocalFunctions>
   class FunctorLocalFunction;
 
+  // implementation
   template <class R, class D, class Functor, class... LocalFunctions>
   class FunctorLocalFunction<R(D), Functor, LocalFunctions...>
   {
@@ -46,10 +49,10 @@ namespace AMDiS
 
   public:
     /// Constructor. Stores copies of the functor and localFunction(gridfunction)s.
-    template <class... LocalFcts>
-    FunctorLocalFunction(Functor const& fct, LocalFcts&&... localFcts)
+    template <class... GridFcts>
+    FunctorLocalFunction(Functor const& fct, GridFcts&&... gridFcts)
       : fct_(fct)
-      , localFcts_(std::forward<LocalFcts>(localFcts)...)
+      , localFcts_{tag::mapped_arg{}, [](auto const& gf) { return localFunction(gf); }, std::forward<GridFcts>(gridFcts)...}
     {}
 
     /// Calls \ref bind for all localFunctions
@@ -80,7 +83,7 @@ namespace AMDiS
     template <class Op, std::size_t... I>
     auto eval(Op op, Indices<I...>) const
     {
-      return fct_(op(std::get<I>(localFcts_))...);
+      return fct_(op(get<I>(localFcts_))...);
     }
 
   public:
@@ -90,14 +93,14 @@ namespace AMDiS
       return fct_;
     }
 
-    std::tuple<LocalFunctions...> const& localFcts() const
+    Tuple<LocalFunctions...> const& localFcts() const
     {
       return localFcts_;
     }
 
   private:
     Functor fct_;
-    std::tuple<LocalFunctions...> localFcts_;
+    Tuple<LocalFunctions...> localFcts_;
   };
 
   // Derivative of the LocalFunction of a FunctorGridFunction, utilizing
@@ -109,21 +112,24 @@ namespace AMDiS
     REQUIRES(Concepts::HasPartial<F>)>
   auto derivative(FunctorLocalFunction<Sig,F,LFs...> const& lf)
   {
-    auto index_seq = std::make_index_sequence<sizeof...(LFs)>{};
+    auto index_seq = std::index_sequence_for<LFs...>{};
 
     // d_i(f)[lgfs...] * lgfs_i
     auto term_i = [&](auto const _i)
     {
-      auto di_f = Dune::Std::apply([&](auto const&... lgfs) {
+      using Dune::Std::apply;
+      auto di_f = apply([&](auto const&... lgfs) {
         return makeFunctorGridFunction(partial(lf.fct(), _i), lgfs...);
       }, lf.localFcts());
 
-      auto const& lgfs_i = std::get<decltype(_i)::value>(lf.localFcts());
+      using std::get;
+      auto const& lgfs_i = get<decltype(_i)::value>(lf.localFcts());
       return makeFunctorGridFunction(Operation::Multiplies{}, di_f, derivative(lgfs_i));
     };
 
-    // sum_i [ d_i(f)[lgfs...] * derivative(lgfs_i) ]
-    auto gridFct = Dune::Std::apply([&](auto const... _i)
+    // sum_i [ d_i(f)[lgfs...] * derivative(lgfs_i)
+    using Dune::Std::apply;
+    auto gridFct = apply([&](auto const... _i)
     {
       return makeFunctorGridFunction(Operation::Plus{}, term_i(_i)...);
     }, index_seq);
@@ -139,7 +145,8 @@ namespace AMDiS
              && all_of_v<Concepts::HasOrder<LFs>...>)>
   int order(FunctorLocalFunction<Sig,F,LFs...> const& lf)
   {
-    return Dune::Std::apply([&lf](auto const&... lgfs) { return order(lf.fct(), order(lgfs)...); },
+    using Dune::Std::apply;
+    return apply([&lf](auto const&... lgfs) { return order(lf.fct(), order(lgfs)...); },
       lf.localFcts());
   }
 
@@ -200,7 +207,8 @@ namespace AMDiS
     /// Return the stored \ref EntitySet of the first GridFunction
     EntitySet const& entitySet() const
     {
-      return std::get<0>(gridFcts_).entitySet();
+      using std::get;
+      return get<0>(gridFcts_).entitySet();
     }
 
     auto const& fct() const { return fct_; }
@@ -210,7 +218,8 @@ namespace AMDiS
     template <class Outer, class Inner, std::size_t... I>
     auto eval(Outer outer, Inner inner, Indices<I...>) const
     {
-      return outer(inner(std::get<I>(gridFcts_))...);
+      using std::get;
+      return outer(inner(get<I>(gridFcts_))...);
     }
 
   private:
@@ -226,7 +235,7 @@ namespace AMDiS
     return Dune::Std::apply([&gf](auto const&... gridFcts)
     {
       using LocalFunction = typename FunctorGridFunction<F,GFs...>::LocalFunction;
-      return LocalFunction{gf.fct(), localFunction(gridFcts)...};
+      return LocalFunction{gf.fct(), gridFcts...};
     },
     gf.gridFcts());
   }
diff --git a/src/amdis/utility/Tuple.hpp b/src/amdis/utility/Tuple.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..68e47efcf9b23ae2c4eddcac93493ac92f8744b0
--- /dev/null
+++ b/src/amdis/utility/Tuple.hpp
@@ -0,0 +1,240 @@
+#pragma once
+
+#include <utility>
+
+#include <amdis/common/Mpl.hpp>
+#include <amdis/common/Utility.hpp>
+
+#include <dune/common/typetraits.hh>
+
+namespace AMDiS
+{
+  // inspired by Sasha Goldshtein's implementation,
+  // see http://blogs.microsoft.co.il/sasha/2015/01/12/implementing-tuple-part-1
+
+  namespace tag
+  {
+    struct mapped_arg {};
+  }
+
+  namespace Impl
+  {
+    template <std::size_t, class T>
+    struct TupleEntry
+    {
+      TupleEntry() = default;
+
+      explicit TupleEntry(T const& value) : value_(value) {}
+      explicit TupleEntry(T&& value) : value_(std::move(value)) {}
+
+      // create value through a map of arguments
+      template <class Map, class... Args>
+      TupleEntry(tag::mapped_arg, Map const& map, Args&&... args)
+        : value_(map(std::forward<Args>(args)...))
+      {}
+
+      T value_;
+    };
+
+    template <class Sequences, class... Types>
+    struct TupleImpl;
+
+    template <class T>
+    struct IsTupleImpl : std::false_type {};
+
+    template <class Sequences, class... Types>
+    struct IsTupleImpl<TupleImpl<Sequences,Types...>> : std::true_type {};
+
+
+    template <std::size_t... Indices, class... Types>
+    struct TupleImpl<std::index_sequence<Indices...>, Types...>
+      : TupleEntry<Indices, Types>...
+    {
+      TupleImpl() = default;
+
+      /// constructor from sequence of types, disabling this for copy and move constructor
+      template <class... OtherTypes,
+        std::enable_if_t<(sizeof...(OtherTypes) == sizeof...(Types)), int> = 0,
+        std::enable_if_t<none_of_v<IsTupleImpl<std::decay_t<OtherTypes>>::value...>, int> = 0>
+      explicit TupleImpl(OtherTypes&&... other)
+        : TupleEntry<Indices, Types>{std::forward<OtherTypes>(other)}...
+      {}
+
+      template <class Map, class... OtherTypes,
+        std::enable_if_t<(sizeof...(OtherTypes) == sizeof...(Types)), int> = 0>
+      TupleImpl(tag::mapped_arg, Map const& map, OtherTypes&&... other)
+        : TupleEntry<Indices, Types>{tag::mapped_arg{}, map, std::forward<OtherTypes>(other)}...
+      {}
+    };
+
+  } // end namespace Impl
+
+  /// \brief Implementation of tuple, that allows mapped construction
+  template <class... Types>
+  class Tuple
+    : public Impl::TupleImpl<std::index_sequence_for<Types...>, Types...>
+  {
+    using Super = Impl::TupleImpl<std::index_sequence_for<Types...>, Types...>;
+
+  public:
+    Tuple() = default;
+    Tuple(Tuple const&) = default;
+    Tuple(Tuple&&) = default;
+    Tuple& operator=(Tuple const&) = default;
+    Tuple& operator=(Tuple&&) = default;
+
+    // construct from types different from Types, but convertible
+    template <class... OtherTypes,
+      std::enable_if_t<(sizeof...(OtherTypes) == sizeof...(Types)), int> = 0>
+    explicit Tuple(OtherTypes&&... other)
+      : Super{std::forward<OtherTypes>(other)...}
+    {}
+
+    // constructor from tuple of possibly other types
+    template <class... OtherTypes,
+      std::enable_if_t<(sizeof...(OtherTypes) == sizeof...(Types)), int> = 0>
+    explicit Tuple(Tuple<OtherTypes...> const& that)
+      : Super{that}
+    {}
+
+    // constructor from tuple of possibly other types
+    template <class... OtherTypes,
+      std::enable_if_t<(sizeof...(OtherTypes) == sizeof...(Types)), int> = 0>
+    explicit Tuple(Tuple<OtherTypes...>&& that)
+      : Super{std::move(that)}
+    {}
+
+    // constructor using a map to build the elements
+    template <class Map, class... OtherTypes,
+      std::enable_if_t<(sizeof...(OtherTypes) == sizeof...(Types)), int> = 0>
+    Tuple(tag::mapped_arg, Map const& map, OtherTypes&&... other)
+      : Super{tag::mapped_arg{}, map, std::forward<OtherTypes>(other)...}
+    {}
+
+    /// Get the I'th tuple element by reference
+    template <std::size_t I>
+    auto& operator[](std::integral_constant<std::size_t,I>)
+    {
+      Impl::TupleEntry<I, TupleElement_t<I, AMDiS::Types<Types...>>>& base = *this;
+      return base.value_;
+    }
+
+    /// Get the I'th tuple element by const reference
+    template <std::size_t I>
+    auto const& operator[](std::integral_constant<std::size_t,I>) const
+    {
+      Impl::TupleEntry<I, TupleElement_t<I, AMDiS::Types<Types...>>> const& base = *this;
+      return base.value_;
+    }
+  };
+
+
+  /// Get the I'th tuple element type
+  template <std::size_t I, class... T>
+  struct TupleElement<I, Tuple<T...>>
+    : TupleElement<I, Types<T...>> {};
+
+
+  /// Get the size of the tuple
+  template <class... T>
+  struct TupleSize<Tuple<T...>>
+    : std::integral_constant<std::size_t, sizeof...(T)> {};
+
+
+  namespace Impl
+  {
+    /// Get the I'th tuple element by reference
+    template <std::size_t I, class... T>
+    auto& get(Tuple<T...>& tuple)
+    {
+      Impl::TupleEntry<I, TupleElement_t<I, Types<T...>>>& base = tuple;
+      return base.value_;
+    }
+
+    /// Get the I'th tuple element by const reference
+    template <std::size_t I, class... T>
+    auto const& get(Tuple<T...> const& tuple)
+    {
+      Impl::TupleEntry<I, TupleElement_t<I, Types<T...>>> const& base = tuple;
+      return base.value_;
+    }
+
+    /// Get the I'th tuple element by rvalue reference
+    template <std::size_t I, class... T>
+    auto&& get(Tuple<T...>&& tuple)
+    {
+      Impl::TupleEntry<I, TupleElement_t<I, Types<T...>>>&& base = tuple;
+      return std::move(base.value_);
+    }
+
+    template<class F, class TupleType, std::size_t... i>
+    decltype(auto) applyImpl(F&& f, TupleType&& args, std::index_sequence<i...>)
+    {
+      return f(get<i>(args)...);
+    }
+
+    template <class F, class... T>
+    decltype(auto) apply(F&& f, Tuple<T...> const& args)
+    {
+      auto indices = std::index_sequence_for<T...>{};
+      return applyImpl(std::forward<F>(f), args, indices);
+    }
+
+    template <class F, class... T>
+    decltype(auto) apply(F&& f, Tuple<T...>& args)
+    {
+      auto indices = std::index_sequence_for<T...>{};
+      return applyImpl(std::forward<F>(f), args, indices);
+    }
+
+  } // end namespace Impl
+
+  // inject get(Tuple) into namespace AMDiS by `using` directive
+  using Impl::get;
+
+  // inject apply(F,Tuple) into namespace AMDiS by `using` directive
+  using Impl::apply;
+
+
+  template <class... Types>
+  Tuple<Types&&...> forwardAsTuple(Types&&... elements)
+  {
+    return Tuple<Types&&...>{std::forward<Types>(elements)...};
+  }
+
+  template <class... Types>
+  Tuple<Types&...> tie(Types&... elements)
+  {
+    return Tuple<Types&...>{elements...};
+  }
+
+} // end namespace AMDiS
+
+namespace std
+{
+  template <class... T>
+  struct tuple_size<AMDiS::Tuple<T...>>
+    : AMDiS::TupleSize<AMDiS::Tuple<T...>> {};
+
+  template <std::size_t I, class... T>
+  struct tuple_element<I, AMDiS::Tuple<T...>>
+    : AMDiS::TupleElement<I, AMDiS::Tuple<T...>> {};
+
+  // inject get(AMDiS::Tuple) into namespace std by `using` directive
+  using AMDiS::Impl::get;
+
+  // inject apply(F,AMDiS::Tuple) into namespace std by `using` directive
+  using AMDiS::Impl::apply;
+
+} // end namespace std
+
+
+namespace Dune
+{
+  namespace Std
+  {
+#if ! DUNE_HAVE_CXX_APPLY && ! DUNE_HAVE_CXX_EXPERIMENTAL_APPLY
+  using AMDiS::Impl::apply;
+#endif
+  } // end namespace Std
+} // end namespace Dune
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 73701a09ed01b65438f4cf3c7bab0ad55c500955..d17857e7666b34c42e57a158b594adc94f67270f 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -4,7 +4,7 @@ dune_add_test(SOURCES AdaptInfoTest.cpp
 
 dune_add_test(SOURCES ClonablePtrTest.cpp
   LINK_LIBRARIES amdis)
-  
+
 dune_add_test(SOURCES ConceptsTest.cpp
   LINK_LIBRARIES amdis)
 
@@ -38,3 +38,6 @@ dune_add_test(SOURCES TreeDataTest.cpp
 
 dune_add_test(SOURCES TupleUtilityTest.cpp
   LINK_LIBRARIES amdis)
+
+dune_add_test(SOURCES TupleTest.cpp
+  LINK_LIBRARIES amdis)
diff --git a/test/TupleTest.cpp b/test/TupleTest.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..10c39b28807026f04bd3d0c59d7783fb256f0468
--- /dev/null
+++ b/test/TupleTest.cpp
@@ -0,0 +1,51 @@
+#include <tuple>
+#include <iostream>
+
+#include <amdis/utility/Tuple.hpp>
+#include <dune/common/hybridutilities.hh>
+#include <dune/common/std/apply.hh>
+
+#include "Tests.hpp"
+
+using namespace AMDiS;
+
+struct B {};
+
+struct A
+{
+  A() = delete;
+  A(A const&) = delete;
+  A(A&&) = delete;
+
+  A(B,B) {};
+};
+
+struct MakeA
+{
+  A operator()(B const& b) const
+  {
+    return {b,b};
+  }
+};
+
+
+int main()
+{
+  Tuple<double,int> u{1.3, 2};
+  Tuple<double,int> v{1, 2};
+
+  Tuple<int, double> t1;
+  get<0>(t1) = 42;
+
+  Tuple<int> t2(3);
+  Tuple<int> t3(t1);
+
+  // construct a tuple of non-copyable and non-movable types
+  Tuple<A> t4(tag::mapped_arg{}, MakeA{}, B{});
+
+  Dune::Hybrid::forEach(u, [](auto value) { std::cout << value << "\n"; });
+
+  Dune::Std::apply([](auto... values) { return std::make_tuple(values...); }, u);
+
+  return report_errors();
+}
\ No newline at end of file