diff --git a/AMDiS/CMakeLists.txt b/AMDiS/CMakeLists.txt
index 65676ce32b1aed83c32892efd37cfd2c15852401..658b61fdf0d47953483bb3b5f100f274f9d565c1 100644
--- a/AMDiS/CMakeLists.txt
+++ b/AMDiS/CMakeLists.txt
@@ -257,6 +257,7 @@ if(ENABLE_PARALLEL_DOMAIN)
 		list(APPEND PARALLEL_DOMAIN_AMDIS_SRC 
 			${SOURCE_DIR}/parallel/BddcMlSolver.cc
 			${SOURCE_DIR}/parallel/ParallelCoarseSpaceMatVec.cc
+			${SOURCE_DIR}/parallel/PetscHelper.cc
 		        ${SOURCE_DIR}/parallel/PetscMultigridPrecon.cc
 			${SOURCE_DIR}/parallel/PetscSolver.cc
 			${SOURCE_DIR}/parallel/PetscProblemStat.cc
diff --git a/AMDiS/src/parallel/PetscHelper.cc b/AMDiS/src/parallel/PetscHelper.cc
new file mode 100644
index 0000000000000000000000000000000000000000..ad3c35c00a62130111536b2b6c92a67a3e18c301
--- /dev/null
+++ b/AMDiS/src/parallel/PetscHelper.cc
@@ -0,0 +1,75 @@
+//
+// Software License for AMDiS
+//
+// Copyright (c) 2010 Dresden University of Technology 
+// All rights reserved.
+// Authors: Simon Vey, Thomas Witkowski et al.
+//
+// This file is part of AMDiS
+//
+// See also license.opensource.txt in the distribution.
+
+
+#include "parallel/PetscHelper.h"
+
+namespace AMDiS {
+
+  namespace petsc_helper {
+
+    using namespace std;
+    
+    void getMatLocalColumn(Mat mat, PetscMatCol &matCol)
+    {
+      PetscInt firstIndex, lastIndex;
+      MatGetOwnershipRange(mat, &firstIndex, &lastIndex);
+    
+      PetscInt nCols;
+      const PetscInt *cols;
+      const PetscScalar *values;
+      
+      for (int row = firstIndex; row < lastIndex; row++) {
+	MatGetRow(mat, row, &nCols, &cols, &values);
+	
+	for (int i = 0; i < nCols; i++) {
+	  if (values[i] != 0.0) {
+	    matCol[cols[i]].first.push_back(row - firstIndex);
+	    matCol[cols[i]].second.push_back(values[i]);
+	  }
+	}
+
+	MatRestoreRow(mat, row, &nCols, &cols, &values);
+      }
+    }
+
+
+    void setMatLocalColumn(Mat mat, int column, Vec vec)
+    {
+      PetscInt firstIndex;
+      MatGetOwnershipRange(mat, &firstIndex, PETSC_NULL);
+
+      PetscInt vecSize;
+      VecGetLocalSize(vec, &vecSize);
+
+      PetscScalar *tmpValues;
+      VecGetArray(vec, &tmpValues);
+      for (int i  = 0; i < vecSize; i++)
+	MatSetValue(mat, 
+		    firstIndex + i,
+		    column,
+		    tmpValues[i],
+		    ADD_VALUES);
+      VecRestoreArray(vec, &tmpValues);
+    }
+
+
+    void getColumnVec(const SparseCol &matCol, Vec vec)
+    {
+      VecSet(vec, 0.0);
+      VecSetValues(vec, matCol.first.size(), 
+		   &(matCol.first[0]), &(matCol.second[0]), INSERT_VALUES);
+      VecAssemblyBegin(vec);
+      VecAssemblyEnd(vec);
+    }
+  }
+  
+}
diff --git a/AMDiS/src/parallel/PetscHelper.h b/AMDiS/src/parallel/PetscHelper.h
new file mode 100644
index 0000000000000000000000000000000000000000..01edbc1daa046603a98ee2bc626f9b62c09821b4
--- /dev/null
+++ b/AMDiS/src/parallel/PetscHelper.h
@@ -0,0 +1,76 @@
+// ============================================================================
+// ==                                                                        ==
+// == AMDiS - Adaptive multidimensional simulations                          ==
+// ==                                                                        ==
+// ==  http://www.amdis-fem.org                                              ==
+// ==                                                                        ==
+// ============================================================================
+//
+// Software License for AMDiS
+//
+// Copyright (c) 2010 Dresden University of Technology 
+// All rights reserved.
+// Authors: Simon Vey, Thomas Witkowski et al.
+//
+// This file is part of AMDiS
+//
+// See also license.opensource.txt in the distribution.
+
+
+
+/** \file PetscHelper.h */
+
+#ifndef AMDIS_PETSCHELPER_H
+#define AMDIS_PETSCHELPER_H
+
+#include <mpi.h>
+#include <map>
+#include <vector>
+#include <petsc.h>
+
+namespace AMDiS {
+
+  using namespace std;
+
+  /** \brief
+   * In this namespace, we collect several auxiliary functions for using
+   * PETSc in AMDiS. Many of these function may be replaced by new PETSc 
+   * function in upcoming versions.
+   */
+  namespace petsc_helper {
+
+    /// Defines a PETSc matrix column wise
+    typedef pair<vector<int>, vector<double> > SparseCol;
+    typedef map<int, SparseCol> PetscMatCol;
+
+    /** \brief
+     * Returns for a distributed matrix on each rank the local matrix in a
+     * sparce column format.
+     *
+     * \param[in]   mat     PETSc distributerd matrix.
+     * \param[out]  matCol  The sparse column represenation of the local matrix.
+     */
+    void getMatLocalColumn(Mat mat, PetscMatCol &matCol);
+
+    /** \brief
+     * Set a local column vector in a distributed matrix.
+     *
+     * \param[out]  mat     Distributed matrix.
+     * \param[in]   column  Column index.
+     * \param[in]   vec     Column vector.
+     */
+    void setMatLocalColumn(Mat mat, int column, Vec vec);
+
+    /** \brief
+     * Create a local PETSc vector representing the column of a matrix
+     * stored in \ref PetscMatCol type format.
+     *
+     * \param[in]   column   Sparse column representation.
+     * \param[out]  vec      Vector representing one column of the matrix.
+     */
+    void getColumnVec(const SparseCol &matCol, Vec vec);
+  }
+
+}
+
+#endif
diff --git a/AMDiS/src/parallel/PetscSolverFeti.cc b/AMDiS/src/parallel/PetscSolverFeti.cc
index 121089df9b5c850af5140599d0dbeac0f149ba64..95bb9eb8e6f81d881a80841d802ef73be837c45e 100644
--- a/AMDiS/src/parallel/PetscSolverFeti.cc
+++ b/AMDiS/src/parallel/PetscSolverFeti.cc
@@ -12,6 +12,7 @@
 
 #include "AMDiS.h"
 #include "MatrixVector.h"
+#include "parallel/PetscHelper.h"
 #include "parallel/PetscSolverFeti.h"
 #include "parallel/PetscSolverFetiDebug.h"
 #include "parallel/PetscSolverFetiMonitor.h"
@@ -745,6 +746,9 @@ namespace AMDiS {
 	    TEST_EXIT_DBG(isPrimal(feSpaces[i], **it) == false)
 	      ("Should not be primal!\n");
 
+	    if (dirichletRows[feSpaces[i]].count(**it))
+	      continue;
+
 	    int col = lagrangeMap.getMatIndex(i, **it);
 	    double value = 1.0;
 	    MatSetValue(mat_augmented_lagrange, rowCounter, col, value, INSERT_VALUES);
@@ -818,86 +822,68 @@ namespace AMDiS {
     } else {
       MSG("Create direct schur primal solver!\n");
 
-      TEST_EXIT(!augmentedLagrange)("Not yet supported!\n");
-
       double wtime = MPI::Wtime();
 
+      TEST_EXIT(!augmentedLagrange)("Not yet supported!\n");
       TEST_EXIT_DBG(meshLevel == 0)
 	("Does not support for multilevel, check usage of localDofMap.\n");
 
+
+      // === First, calculate matK = inv(A_BB) A_BPi:                      ===
+      // ===    - get all local column vectors from A_BPi                  ===
+      // ===    - solve with A_BB for this column vector as the rhs vector ===
+      // ===    - set the result to the corresponding column vector of     ===
+      // ===      matrix matK                                              ===
+
       int nRowsRankPrimal = primalDofMap.getRankDofs();
       int nRowsOverallPrimal = primalDofMap.getOverallDofs();
       int nRowsRankB = localDofMap.getRankDofs();
 
-      Mat matBPi;
-      MatCreateAIJ(mpiCommGlobal,
-		   nRowsRankB, nRowsRankPrimal, 
-		   nGlobalOverallInterior, nRowsOverallPrimal,
-		   150, PETSC_NULL, 150, PETSC_NULL, &matBPi);
-      MatSetOption(matBPi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE);
-
-
-      PetscInt nCols;
-      const PetscInt *cols;
-      const PetscScalar *values;
-
-      map<int, vector<pair<int, double> > > mat_b_primal_cols;
-
-      for (int i = 0; i < nRowsRankB; i++) {
-	PetscInt row = localDofMap.getStartDofs() + i;
-	MatGetRow(subdomain->getMatInteriorCoarse(), row, &nCols, &cols, &values);
-
-	for (int j = 0; j < nCols; j++)
-	  if (values[j] != 0.0)
-	    mat_b_primal_cols[cols[j]].push_back(make_pair(i, values[j]));
-
-	MatRestoreRow(subdomain->getMatInteriorCoarse(), row, &nCols, &cols, &values);
-      }
+      // Transform matrix A_BPi into a sparse column format.
+      petsc_helper::PetscMatCol mat_b_primal_cols;
+      petsc_helper::getMatLocalColumn(subdomain->getMatInteriorCoarse(), 
+				      mat_b_primal_cols);
 
       TEST_EXIT(static_cast<int>(mat_b_primal_cols.size()) == 
 		primalDofMap.getLocalDofs())
 	("Should not happen!\n");
 
-      for (map<int, vector<pair<int, double> > >::iterator it = mat_b_primal_cols.begin();
-	   it != mat_b_primal_cols.end(); ++it) {
-	Vec tmpVec;
-	VecCreateSeq(PETSC_COMM_SELF, nRowsRankB, &tmpVec);
-
- 	for (unsigned int i = 0; i < it->second.size(); i++) 
- 	  VecSetValue(tmpVec, 
- 		      it->second[i].first, it->second[i].second, INSERT_VALUES);
+      Vec tmpVec;
+      VecCreateSeq(PETSC_COMM_SELF, nRowsRankB, &tmpVec);
 
-	VecAssemblyBegin(tmpVec);
-	VecAssemblyEnd(tmpVec);
+      Mat matK;
+      MatCreateAIJ(mpiCommGlobal,
+		   nRowsRankB, nRowsRankPrimal, 
+		   nGlobalOverallInterior, nRowsOverallPrimal,
+		   150, PETSC_NULL, 150, PETSC_NULL, &matK);
+      MatSetOption(matK, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE);
 
+      // Solve for all column vectors of mat A_BPi and create matrix matK
+      for (petsc_helper::PetscMatCol::iterator it = mat_b_primal_cols.begin();
+	   it != mat_b_primal_cols.end(); ++it) {
+	petsc_helper::getColumnVec(it->second, tmpVec);
 	subdomain->solve(tmpVec, tmpVec);
-
-	PetscScalar *tmpValues;
-	VecGetArray(tmpVec, &tmpValues);
-	for (int i  = 0; i < nRowsRankB; i++)
-	  MatSetValue(matBPi, 
-		      localDofMap.getStartDofs() + i,
-		      it->first,
-		      tmpValues[i],
-		      ADD_VALUES);
-	VecRestoreArray(tmpVec, &tmpValues);
-
-	VecDestroy(&tmpVec);
+	petsc_helper::setMatLocalColumn(matK, it->first, tmpVec);
       }
 
-      MatAssemblyBegin(matBPi, MAT_FINAL_ASSEMBLY);
-      MatAssemblyEnd(matBPi, MAT_FINAL_ASSEMBLY);
+      VecDestroy(&tmpVec);
 
+      MatAssemblyBegin(matK, MAT_FINAL_ASSEMBLY);
+      MatAssemblyEnd(matK, MAT_FINAL_ASSEMBLY);
+
+      // Calculate: mat_schur_primal = A_PiPi - A_PiB inv(A_BB) ABPi
+      //                             = A_PiPi - A_PiB matK
       MatDuplicate(subdomain->getMatCoarse(), MAT_COPY_VALUES, 
 		   &mat_schur_primal);
-
       Mat matPrimal;
-      MatMatMult(subdomain->getMatCoarseInterior(), matBPi, MAT_INITIAL_MATRIX, 
+      MatMatMult(subdomain->getMatCoarseInterior(), matK, MAT_INITIAL_MATRIX, 
 		 PETSC_DEFAULT, &matPrimal);
       MatAXPY(mat_schur_primal, -1.0, matPrimal, DIFFERENT_NONZERO_PATTERN);
-
       MatDestroy(&matPrimal);
-      MatDestroy(&matBPi);
+      MatDestroy(&matK);
+
+
+      // === Create KSP solver object and set appropriate solver options. ====
 
       KSPCreate(mpiCommGlobal, &ksp_schur_primal);
       KSPSetOperators(ksp_schur_primal, mat_schur_primal, mat_schur_primal,
@@ -910,6 +896,9 @@ namespace AMDiS {
       PCFactorSetMatSolverPackage(pc_schur_primal, MATSOLVERMUMPS);
       KSPSetFromOptions(ksp_schur_primal);
 
+
+      // === And finally print timings, if required. ===
+
       if (printTimings) {
 	MPI::COMM_WORLD.Barrier();
 	MatInfo minfo;