diff --git a/AMDiS/src/DOFVector.cc b/AMDiS/src/DOFVector.cc
index abd1b4a7111041a93c642c0b81a218026079df64..50825ea6e0993ebd7d06ef5f45a82fe3b5ca407a 100644
--- a/AMDiS/src/DOFVector.cc
+++ b/AMDiS/src/DOFVector.cc
@@ -101,8 +101,14 @@ namespace AMDiS {
       for (int i = 0; i < nBasFcts; i++)
         uh[i] = operator[](localIndices[i]);
       value = basFcts->evalUh(lambda, uh);
-    } else
+    } else {
+#ifdef HAVE_PARALLEL_DOMAIN_AMDIS
+      return 0.0;
+#else
       throw(std::runtime_error("Can not eval DOFVector at point p, because point is outside geometry."));
+#endif
+    }
+
 
     if (oldElInfo == NULL)
       delete elInfo;
diff --git a/AMDiS/src/parallel/CheckerPartitioner.cc b/AMDiS/src/parallel/CheckerPartitioner.cc
index 3e94a91136f0650ede4b43a5adfe0d46837e9ad4..4d8c747c6e31b426608223984d21ec97b15ee6ae 100644
--- a/AMDiS/src/parallel/CheckerPartitioner.cc
+++ b/AMDiS/src/parallel/CheckerPartitioner.cc
@@ -15,7 +15,35 @@
 
 
 namespace AMDiS {
+
+  CheckerPartitioner::CheckerPartitioner(MPI::Intracomm *comm)
+    : MeshPartitioner(comm),
+      mpiRank(mpiComm->Get_rank()),
+      mpiSize(mpiComm->Get_size()),
+      mode(0),
+      multilevel(false)
+  {
+    string modestr = "";
+    Parameters::get("parallel->partitioner->mode", modestr);
+    
+    if (modestr == "x-stripes")
+      mode = 1;
+    else if (modestr == "y-stripes")
+      mode = 2;
+    else if (modestr == "z-stripes")
+      mode = 3;
+    else if (modestr == "tetrahedron-stripes")
+      mode = 4;
+    else if (modestr == "multilevel")
+      multilevel = true;
+    else {
+      if (modestr != "") {
+	ERROR_EXIT("No partitioner mode \"%s\"!\n", modestr.c_str());
+      }
+    }
+  }
   
+
   bool CheckerPartitioner::createInitialPartitioning()
   {
     FUNCNAME("CheckerPartitioner::createInitialPartitioning()");
@@ -47,6 +75,13 @@ namespace AMDiS {
       TEST_EXIT(MPI::COMM_WORLD.Get_size() == 16)
 	("Multilevel partitioning is implemented for 16 nodes only!\n");
     }
+
+
+    if (mode == 4) {
+      TEST_EXIT(mesh->getDim() == 3)("Works only in 3D!\n");
+      createTetrahedronStripes();
+    }
+
     
     int dim = mesh->getDim();
     TraverseStack stack;
@@ -114,6 +149,31 @@ namespace AMDiS {
 
 	break;
 
+      case 4:
+	// tetrahedron-stripes
+
+	{
+	  int inStripe = -1;
+	  int stripePos = -1;
+	  for (int stripe = 0; stripe < elInStripe.size(); stripe++) {
+	    for (int pos = 0; pos < elInStripe[stripe].size(); pos++) {
+	      if (elInStripe[stripe][pos] == elIndex) {
+		inStripe = stripe;
+		stripePos = pos;
+		break;
+	      }
+	    }
+	    if (inStripe >= 0)
+	      break;
+	  }
+
+	  TEST_EXIT(inStripe >= 0)("Should not happen!\n");
+
+	  elInRank = inStripe;
+	}
+
+	break;
+
       default:
 	ERROR_EXIT("Mode %d does not exists for checker based mesh partitioning!\n", 
 		   mode);
@@ -131,4 +191,115 @@ namespace AMDiS {
     return true;
   }
 
+
+  void CheckerPartitioner::createTetrahedronStripes()
+  {
+    FUNCNAME("CheckerPartitioner::createTetrahedronStripes()");
+
+    vector<vector<MacroElement*> > stripes;
+
+    int nElements = 0;
+    TraverseStack stack;
+    ElInfo *elInfo = 
+      stack.traverseFirst(mesh, 0, Mesh::CALL_EL_LEVEL | Mesh::FILL_COORDS);
+    while (elInfo) {
+      TEST_EXIT(elInfo->getLevel() == 0)("Should not happen!\n");
+
+      Element *el = elInfo->getElement();
+      int elIndex = el->getIndex();
+
+      int zeroCoordCounter = 0;
+      for (int i = 0; i < mesh->getGeo(VERTEX); i++)
+	if (fabs(elInfo->getCoord(i)[2]) < 1e-10)
+	  zeroCoordCounter++;      
+
+      if (zeroCoordCounter == 3) {
+	vector<MacroElement*> tmp;
+	tmp.push_back(elInfo->getMacroElement());
+	stripes.push_back(tmp);
+
+	vector<int> tmpIndex;
+	tmpIndex.push_back(elInfo->getMacroElement()->getIndex());
+	elInStripe.push_back(tmpIndex);
+      }
+
+      nElements++;
+      elInfo = stack.traverseNext(elInfo);
+    }
+    
+    TEST_EXIT(mpiSize % stripes.size() == 0)
+      ("Should not happen! mpiSize = %d but %d bottom elements found!\n",
+       mpiSize, stripes.size());
+
+    int testElementCounter = 0;
+    for (int stripe = 0; stripe < stripes.size(); stripe++) {
+      MacroElement *mel = stripes[stripe][0];
+
+      set<int> localDofs;
+      for (int i = 0; i < mesh->getGeo(VERTEX); i++)
+	if (fabs(mel->getCoord(i)[2]) < 1e-10)
+	  localDofs.insert(i);
+      TEST_EXIT(localDofs.size() == 3)("Should not happen!\n");
+
+
+      while (mel != NULL) {
+	int replaceDof = -1;
+	for (int i = 0; i < mesh->getGeo(VERTEX); i++)
+	  if (localDofs.count(i) == 0)
+	    replaceDof = i;
+	
+	bool found = false;
+	for (std::set<int>::iterator dit = localDofs.begin(); 
+	     dit != localDofs.end(); ++dit) {
+	  WorldVector<double> c0 = mel->getCoord(*dit);
+	  WorldVector<double> c1 = mel->getCoord(replaceDof);
+	  
+	  if (fabs(c0[0] - c1[0]) < 1e-10 &&
+	      fabs(c0[1] - c1[1]) < 1e-10 &&
+	      fabs(c0[2] - c1[2]) > 1e-10) {
+	    found = true;
+	    localDofs.erase(dit);
+	    localDofs.insert(replaceDof);
+	    break;
+	  }
+	}
+	TEST_EXIT(found)("Should not happen!\n");
+	
+	
+	set<DegreeOfFreedom> faceDofs;      
+	for (std::set<int>::iterator dit = localDofs.begin(); 
+	     dit != localDofs.end(); ++dit)
+	  faceDofs.insert(mel->getElement()->getDof(*dit, 0));	
+	TEST_EXIT(faceDofs.size() == 3)("Should not happen!\n");
+
+	
+	int localFace = -1;
+	for (int i = 0; i < mesh->getGeo(FACE); i++) {
+	  DofFace face = mel->getElement()->getFace(i);
+	  bool allVertexInFace = 
+	    faceDofs.count(face.get<0>()) &&
+	    faceDofs.count(face.get<1>()) &&
+	    faceDofs.count(face.get<2>());
+	  if (allVertexInFace) {
+	    localFace = i;
+	    break;
+	  }
+	}
+	TEST_EXIT(localFace >= 0)("Should not happen!\n");
+
+	MacroElement *mel_neigh = mel->getNeighbour(localFace);
+	if (mel_neigh) {
+	  stripes[stripe].push_back(mel_neigh);
+	  elInStripe[stripe].push_back(mel_neigh->getIndex());
+	}	
+	
+	mel = mel_neigh;
+      }
+
+      testElementCounter += stripes[stripe].size();
+    }
+
+    TEST_EXIT(testElementCounter == nElements)("Should not happen!\n");
+  }
+
 }
diff --git a/AMDiS/src/parallel/CheckerPartitioner.h b/AMDiS/src/parallel/CheckerPartitioner.h
index 02ad14ca061f7f158e5b516b08cc825913274d42..f1c4809a04cbaa9d4e61f2f222abd371672ef2e0 100644
--- a/AMDiS/src/parallel/CheckerPartitioner.h
+++ b/AMDiS/src/parallel/CheckerPartitioner.h
@@ -35,35 +35,14 @@ namespace AMDiS {
   class CheckerPartitioner : public MeshPartitioner
   {
   public:
-    CheckerPartitioner(MPI::Intracomm *comm)
-      : MeshPartitioner(comm),
-	mpiRank(mpiComm->Get_rank()),
-	mpiSize(mpiComm->Get_size()),
-	mode(0),
-	multilevel(false)
-    {
-      string modestr = "";
-      Parameters::get("parallel->partitioner->mode", modestr);
-
-      if (modestr == "x-stripes")
-	mode = 1;
-      else if (modestr == "y-stripes")
-	mode = 2;
-      else if (modestr == "z-stripes")
-	mode = 3;
-      else if (modestr == "multilevel")
-	multilevel = true;
-      else {
-	if (modestr != "") {
-	  ERROR_EXIT("No partitioner mode \"%s\"!\n", modestr.c_str());
-	}
-      }
-    }
+    CheckerPartitioner(MPI::Intracomm *comm);
 
     ~CheckerPartitioner() {}
 
     bool createInitialPartitioning();
 
+    void createTetrahedronStripes();
+
     /// \ref MeshPartitioner::partition
     bool partition(map<int, double> &elemWeights, PartitionMode mode = INITIAL)
     {
@@ -81,8 +60,13 @@ namespace AMDiS {
     /// 0: standard mode: each node gets one box
     /// 1: x-stripes: each node gets one x-stripe of boxes
     /// 2: y-stripes: each node gets one y-stripe of boxes
+    /// 3: z-stripes: each node gets one y-stripe of boxes
+    /// 4: tetrahedron-stripes: alias Hieram mode :)
     int mode;
 
+    /// Only used in mode 4.
+    vector<vector<int> > elInStripe;
+
     bool multilevel;
   };
 }
diff --git a/AMDiS/src/parallel/MeshDistributor.cc b/AMDiS/src/parallel/MeshDistributor.cc
index 4f7a2a87ad7304d2adfc3edfb485bfdd331ddc77..0c65e0ac3bdee220bda8c394473e648c895de294 100644
--- a/AMDiS/src/parallel/MeshDistributor.cc
+++ b/AMDiS/src/parallel/MeshDistributor.cc
@@ -579,10 +579,8 @@ namespace AMDiS {
     vector<ParallelDofMapping*>::iterator it = 
       find(dofMaps.begin(), dofMaps.end(), &dofMap);
 
-    TEST_EXIT(it != dofMaps.end())
-      ("Cannot find Parallel DOF mapping object which should be removed!\n");
-
-    dofMaps.erase(it);
+    if (it != dofMaps.end())
+      dofMaps.erase(it);
   }
 
 
@@ -908,7 +906,7 @@ namespace AMDiS {
   {
     FUNCNAME("MeshDistributor::createMeshLevelStructure()");
 
-    int levelMode = -1;
+    int levelMode = 0;
     Parameters::get("parallel->level mode", levelMode);
 
     TEST_EXIT(levelMode >= 0)("Wrong level mode %d!\n", levelMode);
@@ -1305,7 +1303,8 @@ namespace AMDiS {
   {
     int mapSize = data.size();
     SerUtil::serialize(out, mapSize);
-    for (map<int, map<const FiniteElemSpace*, DofContainer> >::iterator it = data.begin(); it != data.end(); ++it) {
+    for (map<int, map<const FiniteElemSpace*, DofContainer> >::iterator it = data.begin(); 
+	 it != data.end(); ++it) {
       int rank = it->first;
       SerUtil::serialize(out, rank);
 
diff --git a/AMDiS/src/parallel/ParallelCoarseSpaceSolver.cc b/AMDiS/src/parallel/ParallelCoarseSpaceSolver.cc
index 486465d369b498c2cd942a4741dfe1186599eec4..a5e56b24377516959c7c526752b0cd117d09f6ff 100644
--- a/AMDiS/src/parallel/ParallelCoarseSpaceSolver.cc
+++ b/AMDiS/src/parallel/ParallelCoarseSpaceSolver.cc
@@ -69,16 +69,7 @@ namespace AMDiS {
     meshLevel = level;
     meshDistributor = md;
 
-#if 0
-    mpiCommGlobal = meshDistributor->getMpiComm(meshLevel);
-    if (meshLevel + 1 < 
-	meshDistributor->getMeshLevelData().getNumberOfLevels())
-      mpiCommLocal = meshDistributor->getMpiComm(meshLevel + 1);
-    else 
-      mpiCommLocal = MPI::COMM_SELF;
-#endif
     domainComm = meshDistributor->getMpiComm(meshLevel);
-
     if (meshLevel >= 1)
       coarseSpaceComm = meshDistributor->getMpiComm(meshLevel - 1);
   }
@@ -316,6 +307,7 @@ namespace AMDiS {
     FUNCNAME("ParallelCoarseSpaceSolver::vecDestroy()");
 
     int nVec = vecSol.size();
+    
     for (int i = 0; i < nVec; i++) {
       VecDestroy(&vecSol[i]);
       VecDestroy(&vecRhs[i]);
@@ -391,7 +383,7 @@ namespace AMDiS {
       int groupRowsInterior = 0;
       if (domainComm.Get_rank() == 0)
 	groupRowsInterior = interiorMap->getOverallDofs();
-
+      
       mpi::getDofNumbering(coarseSpaceComm, groupRowsInterior,
 			   rStartInterior, nGlobalOverallInterior);
       
@@ -400,7 +392,7 @@ namespace AMDiS {
 	tmp = rStartInterior;
       
       domainComm.Allreduce(&tmp, &rStartInterior, 1, MPI_INT, MPI_SUM);
-  }
+    }
   }
 
 }
diff --git a/AMDiS/src/parallel/ParallelCoarseSpaceSolver.h b/AMDiS/src/parallel/ParallelCoarseSpaceSolver.h
index 9b1b2ed70c48e4e975f0160ac1ef42005065c5f5..1df792a45deb42cfb0d5f758012d4519abb59915 100644
--- a/AMDiS/src/parallel/ParallelCoarseSpaceSolver.h
+++ b/AMDiS/src/parallel/ParallelCoarseSpaceSolver.h
@@ -99,6 +99,20 @@ namespace AMDiS {
     /// Destroys PETSc vector objects.
     void vecDestroy();
 
+
+    /// Just for super trick
+    vector<vector<Mat> >& getMat()
+    {
+      return mat;
+    }
+
+    /// Just for super trick
+    vector<Vec>& getVecRhs()
+    {
+      return vecRhs;
+    }
+
+
     /// Get interior matrix.
     inline Mat& getMatInterior()
     {
@@ -243,7 +257,7 @@ namespace AMDiS {
     /// zero stricture.
     bool checkMeshChange();
 
-  private:
+  protected:
     /// Matrix of PETSc matrices. mat[0][0] is the interior discretization
     /// matrix, mat[1][1] corresponds to the first coarse space and so on. 
     /// mat[i][j], with i not equal to j, are the coupling between the interior
@@ -291,7 +305,6 @@ namespace AMDiS {
     /// some phase fields.
     bool alwaysCreateNnzStructure;
 
-  protected:
     /// Prefix string for parameters in init file.
     string initFileStr;
     
diff --git a/AMDiS/src/parallel/ParallelDebug.cc b/AMDiS/src/parallel/ParallelDebug.cc
index 5152477fee8160882137eb8df912b94e1ea7c97d..a0440abbad7ac3fefcc59d60d103f0e20f73af12 100644
--- a/AMDiS/src/parallel/ParallelDebug.cc
+++ b/AMDiS/src/parallel/ParallelDebug.cc
@@ -766,10 +766,10 @@ namespace AMDiS {
   {
     FUNCNAME("ParallelDebug::printBoundaryInfo()");
 
-    // int tmp = 0;
-    // Parameters::get("parallel->debug->print boundary info", tmp);
-    // if (tmp <= 0 && force == false)
-    //   return;
+    int tmp = 0;
+    Parameters::get("parallel->debug->print boundary info", tmp);
+    if (tmp <= 0 && force == false)
+      return;
 
     MSG("Interior boundary info:\n");
 
@@ -1018,6 +1018,8 @@ namespace AMDiS {
   {
     FUNCNAME("ParallelDebug::writeCsvElementMap()");
 
+    return;
+
     MSG("writing local Element map to CSV File \n");
 
     Mesh *mesh = feSpace->getMesh();
diff --git a/AMDiS/src/parallel/ParallelDofMapping.cc b/AMDiS/src/parallel/ParallelDofMapping.cc
index e69ac7194cc72fe2f8f43c24e43714d209c211d2..587684e544f427ee1d09f5630b76b710a02e78b5 100644
--- a/AMDiS/src/parallel/ParallelDofMapping.cc
+++ b/AMDiS/src/parallel/ParallelDofMapping.cc
@@ -74,7 +74,7 @@ namespace AMDiS {
     nOverallDofs = 0;
     rStartDofs = 0;
     mpi::getDofNumbering(mpiComm, nRankDofs, rStartDofs, nOverallDofs);
-    
+
     // === If required, compute also the global indices. ===
     
     if (globalMapping) {
diff --git a/AMDiS/src/parallel/PetscHelper.cc b/AMDiS/src/parallel/PetscHelper.cc
index 3035cc2d89e39c36d3e51c2e491ba72305f8fe71..8970f73f3d823925bbb8ebde94a0f6de68b1eb4a 100644
--- a/AMDiS/src/parallel/PetscHelper.cc
+++ b/AMDiS/src/parallel/PetscHelper.cc
@@ -74,7 +74,7 @@ namespace AMDiS {
     }
 
 
-    void blockMatMatSolve(KSP ksp, Mat mat0, Mat &mat1)
+    void blockMatMatSolve(MPI::Intracomm mpiComm, KSP ksp, Mat mat0, Mat &mat1)
     {
       // === We have to  calculate mat1 = ksp mat0:                       ===
       // ===    - get all local column vectors from mat0                  ===
@@ -86,7 +86,7 @@ namespace AMDiS {
       MatGetLocalSize(mat0, &localRows, &localCols);
       MatGetSize(mat0, &globalRows, &globalCols);
 
-      MatCreateAIJ(PETSC_COMM_WORLD,
+      MatCreateAIJ(mpiComm,
 		   localRows, localCols, globalRows, globalCols,
 		   150, PETSC_NULL, 150, PETSC_NULL, &mat1);
       MatSetOption(mat1, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE);
@@ -253,7 +253,7 @@ namespace AMDiS {
 			 const char* kspPrefix,
 			 KSPType kspType, 
 			 PCType pcType, 
-			 MatSolverPackage matSolverPackage,
+			 const MatSolverPackage matSolverPackage,
 			 PetscReal rtol,
 			 PetscReal atol,
 			 PetscInt maxIt)
diff --git a/AMDiS/src/parallel/PetscHelper.h b/AMDiS/src/parallel/PetscHelper.h
index d86d01446676ead35c6360f85ab8ee88d3715901..db1ae080008f0f68cb93ca84848f3d025de19be6 100644
--- a/AMDiS/src/parallel/PetscHelper.h
+++ b/AMDiS/src/parallel/PetscHelper.h
@@ -77,11 +77,15 @@ namespace AMDiS {
      * task. The overall number of rows of local matrices A must be the
      * number of distriubted rows in B.
      *
-     * \param[in]   ksp   inv(A) matrix given by a PETSc solver object.
-     * \param[in]   mat0  matrix B
-     * \param[out]  mat1  resulting matrix C, is created inside the function
+     * \param[in]   mpiComm  MPI Communicator object (must fit with ksp)
+     * \param[in]   ksp      inv(A) matrix given by a PETSc solver object.
+     * \param[in]   mat0     matrix B
+     * \param[out]  mat1     resulting matrix C, is created inside the function
      */
-    void blockMatMatSolve(KSP ksp, Mat mat0, Mat &mat1);
+    void blockMatMatSolve(MPI::Intracomm mpiComm, 
+			  KSP ksp, 
+			  Mat mat0, 
+			  Mat &mat1);
 
     /** \brief
      * Converts a 2x2 nested matrix to a MATAIJ matrix (thus not nested).
@@ -92,10 +96,10 @@ namespace AMDiS {
     void matNestConvert(Mat matNest, Mat &mat);
 
     void setSolverWithLu(KSP ksp, 
-			const char* kspPrefix,
+			 const char* kspPrefix,
 			 KSPType kspType, 
 			 PCType pcType, 
-			 MatSolverPackage matSolverPackage,
+			 const MatSolverPackage matSolverPackage,
 			 PetscReal rtol = PETSC_DEFAULT,
 			 PetscReal atol = PETSC_DEFAULT,
 			 PetscInt maxIt = PETSC_DEFAULT);
diff --git a/AMDiS/src/parallel/PetscSolver.cc b/AMDiS/src/parallel/PetscSolver.cc
index b53ffdc2f981e0ba370f838fe1298f40443b7e40..247ff76baf036bfcb583de3ec0dfd55450bd57b7 100644
--- a/AMDiS/src/parallel/PetscSolver.cc
+++ b/AMDiS/src/parallel/PetscSolver.cc
@@ -23,7 +23,7 @@ namespace AMDiS {
   PetscSolver::PetscSolver(string name)
     : ParallelCoarseSpaceSolver(name),
       dofMap(FESPACE_WISE, true),
-      dofMapSd(FESPACE_WISE, true),
+      dofMapSubDomain(FESPACE_WISE, true),
       parallelDofMappingsRegistered(false),
       kspPrefix(""),
       removeRhsNullspace(false),    
@@ -73,19 +73,21 @@ namespace AMDiS {
       parallelDofMappingsRegistered = true;
 
       dofMap.init(componentSpaces, feSpaces);
-      dofMap.setMpiComm(levelData.getMpiComm(0));
-      dofMap.setDofComm(meshDistributor->getDofComm(0));
+      dofMap.setMpiComm(levelData.getMpiComm(meshLevel));
+      dofMap.setDofComm(meshDistributor->getDofComm(meshLevel));
       dofMap.clear();
       meshDistributor->registerDofMap(dofMap);     
 
-      if (nLevels > 1 && levelData.getMpiComm(1) != MPI::COMM_SELF) {
-	MSG("WARNING: MAKE GENERAL!\n");
-	dofMapSd.init(componentSpaces, feSpaces);
-	dofMapSd.setMpiComm(levelData.getMpiComm(1));
-	dofMapSd.setDofComm(meshDistributor->getDofComm(1));
-	dofMapSd.clear();
-	meshDistributor->registerDofMap(dofMapSd);
+      if (meshLevel + 1 < nLevels && 
+	  levelData.getMpiComm(meshLevel + 1) != MPI::COMM_SELF) {
+	dofMapSubDomain.init(componentSpaces, feSpaces);
+	dofMapSubDomain.setMpiComm(levelData.getMpiComm(meshLevel + 1));
+	dofMapSubDomain.setDofComm(meshDistributor->getDofComm(meshLevel + 1));
+	dofMapSubDomain.clear();
+	meshDistributor->registerDofMap(dofMapSubDomain);
       }
+
+      meshDistributor->updateParallelDofMappings();
     }
   }
 
@@ -114,6 +116,12 @@ namespace AMDiS {
   {
     FUNCNAME("PetscSolver::solveGlobal()");
 
+    int s, ls;
+    VecGetSize(rhs, &s);
+    VecGetLocalSize(rhs, &ls);
+
+    MSG("Solve global %d %d\n", ls, s);
+
     ERROR_EXIT("Not implemented!\n");
   }
 
diff --git a/AMDiS/src/parallel/PetscSolver.h b/AMDiS/src/parallel/PetscSolver.h
index fb0b6cc8109d36f0e028f210aa64bcaff7b64c01..ea149c3d548de213e9b620df825e8837e040f4e6 100644
--- a/AMDiS/src/parallel/PetscSolver.h
+++ b/AMDiS/src/parallel/PetscSolver.h
@@ -181,7 +181,10 @@ namespace AMDiS {
     vector<const FiniteElemSpace*> feSpaces;
 
     ///
-    ParallelDofMapping dofMap, dofMapSd;
+    ParallelDofMapping dofMap;
+
+    ///
+    ParallelDofMapping dofMapSubDomain;
 
     /// If the parallel DOF mappaings of this solver are registered to the
     /// mesh distributor object, this variable is set to true to remove them
diff --git a/AMDiS/src/parallel/PetscSolverFeti.cc b/AMDiS/src/parallel/PetscSolverFeti.cc
index 97b359ff8b052df3aa609ac72a33f12573a10239..67cc8cda23da6df1ccaa4317073b520131541c72 100644
--- a/AMDiS/src/parallel/PetscSolverFeti.cc
+++ b/AMDiS/src/parallel/PetscSolverFeti.cc
@@ -29,6 +29,7 @@ namespace AMDiS {
 
   PetscSolverFeti::PetscSolverFeti(string name)
     : PetscSolver(name),
+      fetiSolverType(EXACT),
       primalDofMap(COMPONENT_WISE),
       dualDofMap(COMPONENT_WISE),
       interfaceDofMap(COMPONENT_WISE),
@@ -39,8 +40,6 @@ namespace AMDiS {
       subDomainIsLocal(true),
       subdomain(NULL),
       massMatrixSolver(NULL),
-      rStartInterior(0),
-      nGlobalOverallInterior(0),
       printTimings(false),
       dirichletMode(0),
       stokesMode(false),
@@ -92,10 +91,17 @@ namespace AMDiS {
       MSG("WARNING: CHECK THIS HERE BEFORE GOING INTO RUNNING MULTILEVEL FETI-DP!\n");
       Parameters::get("parallel->level mode", levelMode);
     }
-
+    
+    {
+      int tmp = 0;
+      Parameters::get(initFileStr + "->feti->inexact", tmp);
+      if (tmp == 1)
+	fetiSolverType = INEXACT;
+      if (tmp == 2)
+	fetiSolverType = INEXACT_REDUCED;
+    }
 
     Parameters::get("parallel->print timings", printTimings);
-
   }
 
 
@@ -128,6 +134,7 @@ namespace AMDiS {
       subdomain->setSymmetric(isSymmetric);
       subdomain->setHandleDirichletRows(dirichletMode == 0);
       subdomain->setMeshDistributor(meshDistributor, meshLevel + 1);
+      subdomain->init(componentSpaces, feSpaces);
 
       delete solverCreator;
     }
@@ -274,10 +281,14 @@ namespace AMDiS {
 	MSG("  nRankDuals = %d  nOverallDuals = %d\n",
 	    dualDofMap[i].nRankDofs, 
 	    dualDofMap[i].nOverallDofs);
-	
+
 	MSG("  nRankLagrange = %d  nOverallLagrange = %d\n",
 	    lagrangeMap[i].nRankDofs, 
 	    lagrangeMap[i].nOverallDofs);
+
+	MSG("  nRankLocal = %d  nOverallLocal = %d\n",
+	    localDofMap[i].nRankDofs,
+	    localDofMap[i].nOverallDofs);       
       }
     }
 
@@ -324,6 +335,9 @@ namespace AMDiS {
       if (dirichletRows[component].count(**it))
 	continue;
 
+      if (meshLevel == 1 && not (*interiorMap)[component].isSet(**it))
+	continue;
+
       if (subDomainIsLocal) {
 	primals.insert(**it);
       } else {
@@ -377,8 +391,11 @@ namespace AMDiS {
 
       if (isPrimal(component, **it))
 	continue;
+
+      if (meshLevel == 1 && not (*interiorMap)[component].isSet(**it))
+	continue;
       
-      if (subDomainIsLocal || dofMapSd[feSpace].isRankDof(**it))
+      if (subDomainIsLocal || dofMapSubDomain[feSpace].isRankDof(**it))
 	dualDofMap[component].insertRankDof(**it);
     }
   }
@@ -433,7 +450,7 @@ namespace AMDiS {
 	dofs.reserve(it.getDofs().size());
 
 	for (; !it.endDofIter(); it.nextDof()) {
-	  if (dofMapSd[feSpace].isRankDof(it.getDofIndex()))
+	  if (dofMapSubDomain[feSpace].isRankDof(it.getDofIndex()))
 	    dofs.push_back(1);
 	  else
 	    dofs.push_back(0);
@@ -500,7 +517,7 @@ namespace AMDiS {
       bool recvFromRank = false;
       for (; !it.endDofIter(); it.nextDof()) {
 	if (!isPrimal(component, it.getDofIndex())) {
- 	  if (subDomainIsLocal || dofMapSd[feSpace].isRankDof(it.getDofIndex())) {
+ 	  if (subDomainIsLocal || dofMapSubDomain[feSpace].isRankDof(it.getDofIndex())) {
 	    recvFromRank = true;
 	    break;
 	  }
@@ -518,7 +535,7 @@ namespace AMDiS {
       int i = 0;
       for (; !it.endDofIter(); it.nextDof())
 	if (!isPrimal(component, it.getDofIndex()))
- 	  if (subDomainIsLocal || dofMapSd[feSpace].isRankDof(it.getDofIndex()))
+ 	  if (subDomainIsLocal || dofMapSubDomain[feSpace].isRankDof(it.getDofIndex()))
 	    boundaryDofRanks[feSpace][it.getDofIndex()] = 
 	      stdMpi.getRecvData(it.getRank())[i++];
 	  else {
@@ -574,6 +591,9 @@ namespace AMDiS {
 	  dirichletRows[component].count(i))
 	continue;      
 
+      if (meshLevel == 1 &&  not (*interiorMap)[component].isSet(i))
+	continue;
+
       if (subDomainIsLocal) {
 	localDofMap[component].insertRankDof(i, nLocalInterior);
 	
@@ -582,7 +602,7 @@ namespace AMDiS {
 	
 	nLocalInterior++;	
       } else {
-	if (dofMapSd[feSpace].isRankDof(i))
+	if (dofMapSubDomain[feSpace].isRankDof(i))
 	  localDofMap[component].insertRankDof(i);
 	else
 	  localDofMap[component].insertNonRankDof(i);
@@ -599,7 +619,7 @@ namespace AMDiS {
       if (subDomainIsLocal) {
 	localDofMap[component].insertRankDof(it->first);
       } else {
-	if (dofMapSd[feSpace].isRankDof(it->first))
+	if (dofMapSubDomain[feSpace].isRankDof(it->first))
 	  localDofMap[component].insertRankDof(it->first);
 	else 
 	  localDofMap[component].insertNonRankDof(it->first);
@@ -613,7 +633,7 @@ namespace AMDiS {
     FUNCNAME("PetscSolverFeti::createMatLagrange()");
 
     double wtime = MPI::Wtime();
-    int mpiRank = meshDistributor->getMpiRank();
+    int mpiRank = domainComm.Get_rank();
 
     // === Create distributed matrix for Lagrange constraints. ===
 
@@ -687,12 +707,17 @@ namespace AMDiS {
     MatAssemblyEnd(mat_lagrange, MAT_FINAL_ASSEMBLY);
 
 
+#if (DEBUG != 0)
     {
       int nZeroRows = PetscSolverFetiDebug::testZeroRows(mat_lagrange);
       int m,n;
       MatGetSize(mat_lagrange, &m ,&n);
+      mpi::globalAdd(domainComm, nZeroRows);
       MSG("Lagrange matrix has %d zero rows and global size of %d %d!\n", nZeroRows, m, n);
+
+      TEST_EXIT(nZeroRows == 0)("Lagrange matrix has zero rows!\n");
     }
+#endif
 
 
     // === If required, create \ref mat_lagrange_scaled ===
@@ -700,7 +725,9 @@ namespace AMDiS {
     VecAssemblyBegin(vec_scale_lagrange);
     VecAssemblyEnd(vec_scale_lagrange);
 
-    if (fetiPreconditioner != FETI_NONE || stokesMode) {
+    if (fetiPreconditioner != FETI_NONE || 
+	fetiSolverType == INEXACT || 
+	stokesMode) {
       MatDuplicate(mat_lagrange, MAT_COPY_VALUES, &mat_lagrange_scaled);
       MatDiagonalScale(mat_lagrange_scaled, vec_scale_lagrange, PETSC_NULL);
     }
@@ -935,7 +962,7 @@ namespace AMDiS {
     FUNCNAME("PetscSolverFeti::createSchurPrimalKsp()");
 
     if (schurPrimalSolver == 0) {
-      MSG("Create iterative schur primal solver!\n");
+      MSG("Create iterative schur primal solver on level %d!\n", meshLevel);
 
       if (augmentedLagrange == false) {
 	schurPrimalData.subSolver = subdomain;
@@ -989,10 +1016,6 @@ namespace AMDiS {
 
       double wtime = MPI::Wtime();
 
-      TEST_EXIT_DBG(subDomainIsLocal)
-	("Does not support for multilevel, check usage of localDofMap.\n");
-
-
       // === Create explicit matrix representation of the Schur primal system. ===
 
       if (!augmentedLagrange)
@@ -1042,14 +1065,14 @@ namespace AMDiS {
     FUNCNAME("PetscSolverFeti::createMatExplicitSchurPrimal()");
 
     int creationMode = 0;
-    Parameters::get("parallel->feti->schur primal creation mode", creationMode);
+    Parameters::get(initFileStr + "->feti->schur primal creation mode", creationMode);
     if (creationMode == 0) {
       // matK = inv(A_BB) A_BPi
       Mat matK;
-      petsc_helper::blockMatMatSolve(subdomain->getSolver(), 
+      petsc_helper::blockMatMatSolve(domainComm,
+				     subdomain->getSolver(), 
 				     subdomain->getMatInteriorCoarse(),
 				     matK);
-     
       
       // mat_schur_primal = A_PiPi - A_PiB inv(A_BB) A_BPi
       //                  = A_PiPi - A_PiB matK
@@ -1058,14 +1081,12 @@ namespace AMDiS {
       MatAYPX(mat_schur_primal, -1.0, subdomain->getMatCoarse(), DIFFERENT_NONZERO_PATTERN);
 
       MatDestroy(&matK);
-    } else {
-      Mat tmp;
-      
+    } else {  
       schurPrimalData.subSolver = subdomain;
       
       localDofMap.createVec(schurPrimalData.tmp_vec_b, nGlobalOverallInterior);
       primalDofMap.createVec(schurPrimalData.tmp_vec_primal);
-      
+      Mat tmp;
       MatCreateShell(domainComm,
 		     primalDofMap.getRankDofs(), 
 		     primalDofMap.getRankDofs(), 
@@ -1101,7 +1122,8 @@ namespace AMDiS {
       
       // matTmp = inv(A_BB) A_BPi
       Mat matTmp;
-      petsc_helper::blockMatMatSolve(subdomain->getSolver(), 
+      petsc_helper::blockMatMatSolve(domainComm,
+				     subdomain->getSolver(), 
 				     subdomain->getMatInteriorCoarse(),
 				     matTmp);
       
@@ -1125,7 +1147,10 @@ namespace AMDiS {
       MatTranspose(mat_augmented_lagrange, MAT_INITIAL_MATRIX, &qT);     
       MatTransposeMatMult(mat_lagrange, qT, MAT_INITIAL_MATRIX, PETSC_DEFAULT, 
 			  &jTqT);
-      petsc_helper::blockMatMatSolve(subdomain->getSolver(), jTqT, matTmp);
+      petsc_helper::blockMatMatSolve(domainComm, 
+				     subdomain->getSolver(), 
+				     jTqT, 
+				     matTmp);
       MatDestroy(&qT);
       MatDestroy(&jTqT);
       
@@ -1222,6 +1247,26 @@ namespace AMDiS {
   {
     FUNCNAME("PetscSolverFeti::createFetiKsp()");
 
+    switch (fetiSolverType) {
+    case EXACT:
+      createFetiExactKsp();
+      break;
+    case INEXACT:
+      createFetiInexactKsp();
+      break;
+    case INEXACT_REDUCED:
+      createFetiInexactReducedKsp();
+      break;
+    default:
+      ERROR_EXIT("Should not happen!\n");
+    }
+  }
+
+
+  void PetscSolverFeti::createFetiExactKsp()
+  {
+    FUNCNAME("PetscSolverFeti::createFetiExactKsp()");
+
     // === Create FETI-DP solver object. ===
 
     fetiData.mat_lagrange = &mat_lagrange;
@@ -1295,177 +1340,290 @@ namespace AMDiS {
 
     switch (fetiPreconditioner) {
     case FETI_DIRICHLET:
-      TEST_EXIT(subDomainIsLocal)
-	("Check for localDofMap.getLocalMatIndex, which should not work for multilevel FETI-DP!\n");
+      KSPGetPC(ksp_feti, &precon_feti);
+      createFetiPreconDirichlet(precon_feti);    
+      break;
 
-      TEST_EXIT(!stokesMode)
-	("Stokes mode does not yet support the Dirichlet precondition!\n");
+    case FETI_LUMPED:
+      KSPGetPC(ksp_feti, &precon_feti);
+      createFetiPreconLumped(precon_feti);
+      break;
 
-      KSPCreate(PETSC_COMM_SELF, &ksp_interior);
-      KSPSetOperators(ksp_interior, mat_interior_interior, mat_interior_interior, 
-		      SAME_NONZERO_PATTERN);
-      KSPSetOptionsPrefix(ksp_interior, "precon_interior_");
-      KSPSetType(ksp_interior, KSPPREONLY);
-      PC pc_interior;
-      KSPGetPC(ksp_interior, &pc_interior);
-      if (isSymmetric) {
-	PCSetType(pc_interior, PCCHOLESKY);
-	PCFactorSetMatSolverPackage(pc_interior, MATSOLVERMUMPS);
-      } else {
-	PCSetType(pc_interior, PCLU);
-	PCFactorSetMatSolverPackage(pc_interior, MATSOLVERUMFPACK);
+    default:
+      break;
+    }
+  }
+
+  
+  void PetscSolverFeti::createFetiInexactKsp()
+  {
+    FUNCNAME("PetscSolverFeti::createFetiInexactKsp()");
+
+    // === Init solver ===
+
+    int localSize = 
+      localDofMap.getRankDofs() +
+      primalDofMap.getRankDofs() +
+      lagrangeMap.getRankDofs();
+
+    int globalSize = 
+      nGlobalOverallInterior +
+      primalDofMap.getOverallDofs() + 
+      lagrangeMap.getOverallDofs();
+
+    fetiInexactData.matBB = &(subdomain->getMatInterior());
+    fetiInexactData.matBPi = &(subdomain->getMatInteriorCoarse());
+    fetiInexactData.matPiB = &(subdomain->getMatCoarseInterior());
+    fetiInexactData.matPiPi = &(subdomain->getMatCoarse());
+    fetiInexactData.mat_lagrange = &mat_lagrange;
+
+    localDofMap.createVec(fetiInexactData.tmp_vec_b0);
+    localDofMap.createVec(fetiInexactData.tmp_vec_b1);
+
+    MatCreateShell(domainComm,
+		   localSize, localSize, globalSize, globalSize,
+		   &fetiInexactData, &mat_feti);
+
+    MatShellSetOperation(mat_feti, MATOP_MULT, 
+			 (void(*)(void))petscMultMatFetiInexact);	
+
+    KSPCreate(domainComm, &ksp_feti);
+    KSPSetOperators(ksp_feti, mat_feti, mat_feti, SAME_NONZERO_PATTERN);
+    KSPSetOptionsPrefix(ksp_feti, "feti_");
+    KSPSetType(ksp_feti, KSPGMRES);
+    KSPSetTolerances(ksp_feti, 0, 1e-8, 1e+3, 1000);
+    KSPSetFromOptions(ksp_feti);
+
+
+    // === Init preconditioner ===
+
+    fetiInexactPreconData.ksp_schur = ksp_schur_primal;
+    fetiInexactPreconData.ksp_interior = subdomain->getSolver();
+    fetiInexactPreconData.matPiB = &(subdomain->getMatCoarseInterior());
+    fetiInexactPreconData.matBPi = &(subdomain->getMatInteriorCoarse());
+    fetiInexactPreconData.mat_lagrange = &mat_lagrange;
+    localDofMap.createVec(fetiInexactPreconData.tmp_vec_b0);
+
+    KSPCreate(domainComm, &(fetiInexactPreconData.ksp_pc_feti));
+    KSPSetOperators(fetiInexactPreconData.ksp_pc_feti, 
+		    mat_lagrange, mat_lagrange, 
+		    SAME_NONZERO_PATTERN);
+    KSPGetPC(fetiInexactPreconData.ksp_pc_feti, 
+	     &(fetiInexactPreconData.pc_feti));
+    createFetiPreconLumped(fetiInexactPreconData.pc_feti);
+    PCSetUp(fetiInexactPreconData.pc_feti);
+    
+    // === Setup pc object ===  
+
+    PC pc;
+    KSPGetPC(ksp_feti, &pc);
+    PCSetType(pc, PCSHELL);
+    PCShellSetApply(pc, pcInexactFetiShell);
+    PCShellSetContext(pc, &fetiInexactPreconData);
+  }
+
+
+  void PetscSolverFeti::createFetiInexactReducedKsp()
+  {
+    FUNCNAME("PetscSolverFeti::createFetiInexactReducedKsp()");
+
+    ERROR_EXIT("Not yet implemented!\n");
+  }
+
+
+  void PetscSolverFeti::createFetiPreconLumped(PC pc)
+  {
+    FUNCNAME("PetscSolverFeti::createFetiPreconLumped()");
+
+    FetiLumpedPreconData *lumpedData = 
+      (stokesMode ? &fetiInterfaceLumpedPreconData : &fetiLumpedPreconData);
+    
+    lumpedData->mat_lagrange_scaled = &mat_lagrange_scaled;
+    lumpedData->mat_duals_duals = &mat_duals_duals;
+    
+    VecCreateMPI(meshDistributor->getMeshLevelData().getMpiComm(meshLevel),
+		 localDofMap.getRankDofs(),
+		 nGlobalOverallInterior, &(lumpedData->tmp_vec_b0));
+    MatGetVecs(mat_duals_duals, PETSC_NULL, 
+	       &(lumpedData->tmp_vec_duals0));
+    MatGetVecs(mat_duals_duals, PETSC_NULL, 
+	       &(lumpedData->tmp_vec_duals1));
+    
+    for (unsigned int component = 0; component < componentSpaces.size(); 
+	 component++) {
+      if (stokesMode && component == pressureComponent)
+	continue;
+      
+      DofMap &dualMap = dualDofMap[component].getMap();
+      for (DofMap::iterator it = dualMap.begin(); it != dualMap.end(); ++it) {
+	DegreeOfFreedom d = it->first;
+	int matIndexLocal = localDofMap.getLocalMatIndex(component, d);
+	int matIndexDual = dualDofMap.getLocalMatIndex(component, d);
+	lumpedData->localToDualMap[matIndexLocal] = matIndexDual;
       }
-      KSPSetFromOptions(ksp_interior);
-            
-      fetiDirichletPreconData.mat_lagrange_scaled = &mat_lagrange_scaled;
-      fetiDirichletPreconData.mat_interior_interior = &mat_interior_interior;
-      fetiDirichletPreconData.mat_duals_duals = &mat_duals_duals;
-      fetiDirichletPreconData.mat_interior_duals = &mat_interior_duals;
-      fetiDirichletPreconData.mat_duals_interior = &mat_duals_interior;
-      fetiDirichletPreconData.ksp_interior = &ksp_interior;
+    }
+    
+    if (stokesMode) {
+      // === Create mass matrix solver ===
       
-      localDofMap.createVec(fetiDirichletPreconData.tmp_vec_b, 
-			    nGlobalOverallInterior);
-      MatGetVecs(mat_duals_duals, PETSC_NULL, 
-		 &(fetiDirichletPreconData.tmp_vec_duals0));
-      MatGetVecs(mat_duals_duals, PETSC_NULL, 
-		 &(fetiDirichletPreconData.tmp_vec_duals1));
-      MatGetVecs(mat_interior_interior, PETSC_NULL, 
-		 &(fetiDirichletPreconData.tmp_vec_interior));
-
-      TEST_EXIT_DBG(subDomainIsLocal)
-	("Should not happen, check usage of localDofMap!\n");
-
-      for (unsigned int component = 0; component < componentSpaces.size(); component++) {
-	DofMap &dualMap = dualDofMap[component].getMap();
-	for (DofMap::iterator it = dualMap.begin(); it != dualMap.end(); ++it) {
-	  DegreeOfFreedom d = it->first;	  
-	  int matIndexLocal = localDofMap.getLocalMatIndex(component, d);
-	  int matIndexDual = dualDofMap.getLocalMatIndex(component, d);
-	  fetiDirichletPreconData.localToDualMap[matIndexLocal] = matIndexDual;
-	}
+      const FiniteElemSpace *pressureFeSpace = 
+	componentSpaces[pressureComponent];
+      
+      // Create parallel DOF mapping in pressure space.
+      ParallelDofMapping *massMapping = NULL;
+      if (massMatrixSolver) {
+	massMapping = massMatrixSolver->getDofMapping();
+      } else {
+	massMapping = 
+	  new ParallelDofMapping(COMPONENT_WISE, true);
+	massMapping->init(pressureFeSpace, pressureFeSpace);
+	massMapping->setDofComm(meshDistributor->getDofComm(meshLevel));
+	massMapping->setMpiComm(meshDistributor->getMeshLevelData().getMpiComm(0));
+      }	   
+      (*massMapping)[0] = interfaceDofMap[pressureComponent];
+      massMapping->update();
+      
+      DOFMatrix massMatrix(pressureFeSpace, pressureFeSpace);
+      Operator op(pressureFeSpace, pressureFeSpace);
+      Simple_ZOT zot;
+      op.addTerm(&zot);
+      massMatrix.assembleOperator(op);
+      
+      if (!massMatrixSolver) {
+	massMatrixSolver = new PetscSolverGlobalMatrix("");
+	massMatrixSolver->setKspPrefix("mass_");
+	massMatrixSolver->setMeshDistributor(meshDistributor, meshLevel);
+	massMatrixSolver->setDofMapping(massMapping);
       }
+      
+      massMatrixSolver->fillPetscMatrix(&massMatrix);
+      
+      int r, c;
+      MatGetSize(massMatrixSolver->getMatInterior(), &r, &c);
+      MatInfo info;
+      MatGetInfo(massMatrixSolver->getMatInterior(), MAT_GLOBAL_SUM, &info);
+      MSG("MASS MAT INFO:  size = %d x %d   nnz = %d\n",
+	  r, c, static_cast<int>(info.nz_used));
+      
+      fetiInterfaceLumpedPreconData.ksp_mass = massMatrixSolver->getSolver();
+      
+      
+      // === Create tmp vectors ===
+      
+      localDofMap.createVec(fetiInterfaceLumpedPreconData.tmp_vec_b1);
+      primalDofMap.createVec(fetiInterfaceLumpedPreconData.tmp_primal);
+      fetiInterfaceLumpedPreconData.subSolver = subdomain;	
+    }
 
-      KSPGetPC(ksp_feti, &precon_feti);
-      PCSetType(precon_feti, PCSHELL);
-      PCShellSetContext(precon_feti, static_cast<void*>(&fetiDirichletPreconData));
-      PCShellSetApply(precon_feti, petscApplyFetiDirichletPrecon);
+    // === Set PC object ===
 
+    PCSetType(pc, PCSHELL);    
+    if (stokesMode) {
+      PCShellSetContext(pc, static_cast<void*>(&fetiInterfaceLumpedPreconData));
+      PCShellSetApply(pc, petscApplyFetiInterfaceLumpedPrecon);
+    } else {
+      PCShellSetContext(pc, static_cast<void*>(&fetiLumpedPreconData));
+      PCShellSetApply(pc, petscApplyFetiLumpedPrecon);
+    }  
+  }
 
-      // For the case, that we want to print the timings, we force the LU 
-      // factorization of the local problems to be done here explicitly.
-      if (printTimings) {
-	double wtime = MPI::Wtime();
-	KSPSetUp(ksp_interior);
-	KSPSetUpOnBlocks(ksp_interior);
-	MPI::COMM_WORLD.Barrier();
-	MSG("FETI-DP timing 08: %.5f seconds (factorization of Dirichlet preconditoner matrices)\n",
-	    MPI::Wtime() - wtime);
-      }
-      
-      break;
 
-    case FETI_LUMPED:
-      {
-	FetiLumpedPreconData *lumpedData = 
-	  (stokesMode ? &fetiInterfaceLumpedPreconData : &fetiLumpedPreconData);
+  void PetscSolverFeti::createFetiPreconDirichlet(PC pc)
+  {
+    FUNCNAME("PetscSolverFeti::createFetiPreconDirichlet()");
 
-	lumpedData->mat_lagrange_scaled = &mat_lagrange_scaled;
-	lumpedData->mat_duals_duals = &mat_duals_duals;
-	
-	localDofMap.createVec(lumpedData->tmp_vec_b0);
-	MatGetVecs(mat_duals_duals, PETSC_NULL, 
-		   &(lumpedData->tmp_vec_duals0));
-	MatGetVecs(mat_duals_duals, PETSC_NULL, 
-		   &(lumpedData->tmp_vec_duals1));
-	
-	for (unsigned int component = 0; component < componentSpaces.size(); component++) {
-	  if (stokesMode && component == pressureComponent)
-	    continue;
-	  
-	  DofMap &dualMap = dualDofMap[component].getMap();
-	  for (DofMap::iterator it = dualMap.begin(); it != dualMap.end(); ++it) {
-	    DegreeOfFreedom d = it->first;
-	    int matIndexLocal = localDofMap.getLocalMatIndex(component, d);
-	    int matIndexDual = dualDofMap.getLocalMatIndex(component, d);
-	    lumpedData->localToDualMap[matIndexLocal] = matIndexDual;
-	  }
-	}
+    TEST_EXIT(subDomainIsLocal)
+      ("Check for localDofMap.getLocalMatIndex, which should not work for multilevel FETI-DP!\n");
+    
+    TEST_EXIT(!stokesMode)
+      ("Stokes mode does not yet support the Dirichlet precondition!\n");
+
+    KSPCreate(PETSC_COMM_SELF, &ksp_interior);
+    KSPSetOperators(ksp_interior, mat_interior_interior, mat_interior_interior, 
+		    SAME_NONZERO_PATTERN);
+    KSPSetOptionsPrefix(ksp_interior, "precon_interior_");
+    KSPSetType(ksp_interior, KSPPREONLY);
+    PC pc_interior;
+    KSPGetPC(ksp_interior, &pc_interior);
+    if (isSymmetric) {
+      PCSetType(pc_interior, PCCHOLESKY);
+      PCFactorSetMatSolverPackage(pc_interior, MATSOLVERMUMPS);
+    } else {
+      PCSetType(pc_interior, PCLU);
+      PCFactorSetMatSolverPackage(pc_interior, MATSOLVERUMFPACK);
+    }
+    KSPSetFromOptions(ksp_interior);
+    
+    fetiDirichletPreconData.mat_lagrange_scaled = &mat_lagrange_scaled;
+    fetiDirichletPreconData.mat_interior_interior = &mat_interior_interior;
+    fetiDirichletPreconData.mat_duals_duals = &mat_duals_duals;
+    fetiDirichletPreconData.mat_interior_duals = &mat_interior_duals;
+    fetiDirichletPreconData.mat_duals_interior = &mat_duals_interior;
+    fetiDirichletPreconData.ksp_interior = &ksp_interior;
+    
+    VecCreateMPI(meshDistributor->getMeshLevelData().getMpiComm(meshLevel),
+		 localDofMap.getRankDofs(),
+		 nGlobalOverallInterior, &(fetiDirichletPreconData.tmp_vec_b));
+    MatGetVecs(mat_duals_duals, PETSC_NULL, 
+	       &(fetiDirichletPreconData.tmp_vec_duals0));
+    MatGetVecs(mat_duals_duals, PETSC_NULL, 
+	       &(fetiDirichletPreconData.tmp_vec_duals1));
+    MatGetVecs(mat_interior_interior, PETSC_NULL, 
+	       &(fetiDirichletPreconData.tmp_vec_interior));
+    
+    TEST_EXIT_DBG(subDomainIsLocal)
+      ("Should not happen, check usage of localDofMap!\n");
+    
+    for (unsigned int component = 0; component < componentSpaces.size(); component++) {
+      DofMap &dualMap = dualDofMap[component].getMap();
+      for (DofMap::iterator it = dualMap.begin(); it != dualMap.end(); ++it) {
+	DegreeOfFreedom d = it->first;	  
+	int matIndexLocal = localDofMap.getLocalMatIndex(component, d);
+	int matIndexDual = dualDofMap.getLocalMatIndex(component, d);
+	fetiDirichletPreconData.localToDualMap[matIndexLocal] = matIndexDual;
+      }
+    }
 
-	if (stokesMode) {
-	  // === Create mass matrix solver ===
-
-	  const FiniteElemSpace *pressureFeSpace = 
-	    componentSpaces[pressureComponent];
-
-	  // Create parallel DOF mapping in pressure space.
-	  ParallelDofMapping *massMapping = NULL;
-	  if (massMatrixSolver) {
-	    massMapping = massMatrixSolver->getDofMapping();
-	  } else {
-	    massMapping = 
-	      new ParallelDofMapping(COMPONENT_WISE, true);
-	    massMapping->init(pressureFeSpace, pressureFeSpace);
-	    massMapping->setDofComm(meshDistributor->getDofComm(meshLevel));
-	    massMapping->setMpiComm(meshDistributor->getMeshLevelData().getMpiComm(0));
-	  }	   
-	  (*massMapping)[0] = interfaceDofMap[pressureComponent];
-	  massMapping->update();
-	  
-	  DOFMatrix massMatrix(pressureFeSpace, pressureFeSpace);
-	  Operator op(pressureFeSpace, pressureFeSpace);
-	  Simple_ZOT zot;
-	  op.addTerm(&zot);
-	  massMatrix.assembleOperator(op);
-
-	  if (!massMatrixSolver) {
-	    massMatrixSolver = new PetscSolverGlobalMatrix("");
-	    massMatrixSolver->setKspPrefix("mass_");
-	    massMatrixSolver->setMeshDistributor(meshDistributor, meshLevel);
-	    massMatrixSolver->setDofMapping(massMapping);
-	  }
-	   
-	  massMatrixSolver->fillPetscMatrix(&massMatrix);
-
-	  int r, c;
-	  MatGetSize(massMatrixSolver->getMatInterior(), &r, &c);
-	  MatInfo info;
-	  MatGetInfo(massMatrixSolver->getMatInterior(), MAT_GLOBAL_SUM, &info);
-	  MSG("MASS MAT INFO:  size = %d x %d   nnz = %d\n",
-	      r, c, static_cast<int>(info.nz_used));
-	  
-	  fetiInterfaceLumpedPreconData.ksp_mass = massMatrixSolver->getSolver();
+    PCSetType(pc, PCSHELL);
+    PCShellSetContext(pc, static_cast<void*>(&fetiDirichletPreconData));
+    PCShellSetApply(pc, petscApplyFetiDirichletPrecon);
 
+    // For the case, that we want to print the timings, we force the LU 
+    // factorization of the local problems to be done here explicitly.
+    if (printTimings) {
+      double wtime = MPI::Wtime();
+      KSPSetUp(ksp_interior);
+      KSPSetUpOnBlocks(ksp_interior);
+      MPI::COMM_WORLD.Barrier();
+      MSG("FETI-DP timing 08: %.5f seconds (factorization of Dirichlet preconditoner matrices)\n",
+	  MPI::Wtime() - wtime);
+    }
+  }
 
-	  // === Create tmp vectors ===
- 
-	  localDofMap.createVec(fetiInterfaceLumpedPreconData.tmp_vec_b1);
-	  primalDofMap.createVec(fetiInterfaceLumpedPreconData.tmp_primal);
-	  fetiInterfaceLumpedPreconData.subSolver = subdomain;	
-	}
-	
-	KSPGetPC(ksp_feti, &precon_feti);
-	PCSetType(precon_feti, PCSHELL);
 
-	if (stokesMode) {
-	  PCShellSetContext(precon_feti, static_cast<void*>(&fetiInterfaceLumpedPreconData));
-	  PCShellSetApply(precon_feti, petscApplyFetiInterfaceLumpedPrecon);
-	} else {
-	  PCShellSetContext(precon_feti, static_cast<void*>(&fetiLumpedPreconData));
-	  PCShellSetApply(precon_feti, petscApplyFetiLumpedPrecon);
-	}
-      }
+  void PetscSolverFeti::destroyFetiKsp()
+  {
+    FUNCNAME("PetscSolverFeti::destroyFetiKsp()");
 
+    switch (fetiSolverType) {
+    case EXACT:
+      destroyFetiExactKsp();
       break;
-    default:
+    case INEXACT:
+      destroyFetiInexactKsp();
+      break;
+    case INEXACT_REDUCED:
+      destroyFetiInexactReducedKsp();
       break;
+    default:
+      ERROR_EXIT("Should not happen!\n");
     }
   }
-  
 
-  void PetscSolverFeti::destroyFetiKsp()
+
+  void PetscSolverFeti::destroyFetiExactKsp()
   {
-    FUNCNAME("PetscSolverFeti::destroyFetiKsp()");
+    FUNCNAME("PetscSolverFeti::destroyFetiExactKsp()");
 
     // === Destroy FETI-DP solver object. ===
 
@@ -1531,6 +1689,26 @@ namespace AMDiS {
   }
 
 
+  void PetscSolverFeti::destroyFetiInexactKsp()
+  {
+    FUNCNAME("PetscSolverFeti::destroyFetiInexactKsp()");
+
+    VecDestroy(&(fetiInexactData.tmp_vec_b0));
+    VecDestroy(&(fetiInexactData.tmp_vec_b1));
+    MatDestroy(&mat_feti);
+    KSPDestroy(&ksp_feti);
+
+    VecDestroy(&(fetiInexactPreconData.tmp_vec_b0));
+    KSPDestroy(&(fetiInexactPreconData.ksp_pc_feti));
+  }
+
+
+  void PetscSolverFeti::destroyFetiInexactReducedKsp()
+  {
+    FUNCNAME("PetscSolverFeti::destroyFetiInexactReducedKsp()");
+  }
+
+
   void PetscSolverFeti::createNullSpace()
   {
     FUNCNAME("PetscSolverFeti::createNullSpace()");
@@ -1601,6 +1779,11 @@ namespace AMDiS {
   {
     FUNCNAME("PetscSolverFeti::dbgMatrix()");
 
+    if (levelMode == 2 && meshLevel == 0) {
+      MSG("WARNING: MAKE MORE GENERAL!\n");
+      return;
+    }
+
 #if (DEBUG != 0)
     PetscInt nRow, nCol;
     MatGetLocalSize(subdomain->getMatInterior(), &nRow, &nCol);
@@ -1807,7 +1990,7 @@ namespace AMDiS {
 	  int petscIndex = localDofMap.getLocalMatIndex(component, it->first);
 	  dofVec[it->first] = localSolB[petscIndex];
 	} else {
-	  if (dofMapSd[componentSpaces[component]].isRankDof(it->first)) {
+	  if (dofMapSubDomain[componentSpaces[component]].isRankDof(it->first)) {
 	    int petscIndex = localDofMap.getLocalMatIndex(component, it->first);
 	    TEST_EXIT(petscIndex < bsize)("Should not happen!\n");
 	    dofVec[it->first] = localSolB[petscIndex];	 
@@ -1901,8 +2084,6 @@ namespace AMDiS {
     createDirichletData(*mat);
 
     createFetiData();
-
-    TEST_EXIT(coarseSpaceMap.size() == 0)("This is not yet supported!\n");
  
     // === Create matrices for the FETI-DP method. ===
 
@@ -1912,6 +2093,27 @@ namespace AMDiS {
   
     subdomain->fillPetscMatrix(mat);
 
+
+    // === SUPER TRICK ===
+
+    if (meshLevel == 1) {
+      MSG("START MAT TRICK!\n");
+      mlSubdomain = new PetscSolverGlobalMatrix("");
+      mlSubdomain->initParameters();
+      mlSubdomain->setSymmetric(isSymmetric);
+      mlSubdomain->setHandleDirichletRows(dirichletMode == 0);
+      mlSubdomain->setMeshDistributor(meshDistributor, meshLevel);
+      mlSubdomain->init(componentSpaces, feSpaces);
+      mlSubdomain->setDofMapping(interiorMap);
+      mlSubdomain->setCoarseSpaceDofMapping(coarseSpaceMap[0]);
+      mlSubdomain->fillPetscMatrix(mat);
+      this->mat = mlSubdomain->getMat();
+      MSG("END MAT TRICK!\n");
+    }
+
+
+
+
     if (printTimings) {
       MPI::COMM_WORLD.Barrier();
       MSG("FETI-DP timing 02: %.5f seconds (creation of interior matrices)\n",
@@ -1951,9 +2153,14 @@ namespace AMDiS {
   {
     FUNCNAME("PetscSolverFeti::fillPetscRhs()");
 
-    TEST_EXIT(coarseSpaceMap.size() == 0)("This is not yet supported!\n");
-
     subdomain->fillPetscRhs(vec);
+
+    if (meshLevel == 1) {
+      MSG("START VEC TRICK!\n");
+      mlSubdomain->fillPetscRhs(vec);
+      this->vecRhs = mlSubdomain->getVecRhs();
+      MSG("END VEC TRICK!\n"); 
+    }
   }
 
 
@@ -1961,7 +2168,7 @@ namespace AMDiS {
   {
     FUNCNAME("PetscSolverFeti::createPreconditionerMatrix()");
 
-    if (fetiPreconditioner == FETI_NONE)
+    if (fetiPreconditioner == FETI_NONE && fetiSolverType == EXACT)
       return;
 
     double wtime = MPI::Wtime();
@@ -2048,8 +2255,7 @@ namespace AMDiS {
 	  if (isPrimal(rowComponent, *cursor))
 	    continue;
 
-	  switch (fetiPreconditioner) {
-	  case FETI_DIRICHLET:
+	  if (fetiPreconditioner == FETI_DIRICHLET) {
 	    colsLocal.clear();
 	    colsLocalOther.clear();
 	    valuesLocal.clear();
@@ -2110,11 +2316,9 @@ namespace AMDiS {
 		MatSetValues(mat_duals_interior, 1, &rowIndex, colsLocalOther.size(),
 			     &(colsLocalOther[0]), &(valuesLocalOther[0]), INSERT_VALUES);
 	    }
+	  } 
 
-	    break;
-
-
-	  case FETI_LUMPED:
+	  if (fetiPreconditioner == FETI_LUMPED || fetiSolverType == INEXACT) {
 	    if (!isDual(rowComponent, *cursor))
 	      continue;
 
@@ -2136,24 +2340,17 @@ namespace AMDiS {
 	      valuesLocal.push_back(value(*icursor));
 	    }
 
-	    {
-	      int rowIndex = dualDofMap.getLocalMatIndex(rowComponent, *cursor);		
-	      MatSetValues(mat_duals_duals, 1, &rowIndex, colsLocal.size(),
-			   &(colsLocal[0]), &(valuesLocal[0]), INSERT_VALUES);
-	    }
-	    break;
-
-
-	  default:
-	    break;
-	  }	   
+	    int rowIndex = dualDofMap.getLocalMatIndex(rowComponent, *cursor);		
+	    MatSetValues(mat_duals_duals, 1, &rowIndex, colsLocal.size(),
+			 &(colsLocal[0]), &(valuesLocal[0]), INSERT_VALUES);
+	  }
 	} 
       }
     }
     
     // === Start global assembly procedure for preconditioner matrices. ===
     
-    if (fetiPreconditioner == FETI_LUMPED) {
+    if (fetiPreconditioner == FETI_LUMPED || fetiSolverType == INEXACT) {
       MatAssemblyBegin(mat_duals_duals, MAT_FINAL_ASSEMBLY);
       MatAssemblyEnd(mat_duals_duals, MAT_FINAL_ASSEMBLY); 
     }
@@ -2180,73 +2377,54 @@ namespace AMDiS {
   }
   
 
-  void PetscSolverFeti::solveFetiMatrix(SystemVector &vec)
+  void PetscSolverFeti::solveFeti(Vec &rhsInterior, Vec &rhsCoarse,
+				  Vec &solInterior, Vec &solCoarse)
   {
-    FUNCNAME("PetscSolverFeti::solveFetiMatrix()");
+    FUNCNAME("PetscSolverFeti::solveFeti()");
 
-    // The function was removed when FETI-DP was reformulated for mixed finite
-    // elements. To reconstruct this function, check for revision number 2024.
-    ERROR_EXIT("This function was removed!\n");
+    switch (fetiSolverType) {
+    case EXACT:
+      solveFetiExact(rhsInterior, rhsCoarse, solInterior, solCoarse);
+      break;
+    case INEXACT:
+      solveFetiInexact(rhsInterior, rhsCoarse, solInterior, solCoarse);
+      break;
+    case INEXACT_REDUCED:
+      solveFetiInexactReduced(rhsInterior, rhsCoarse, solInterior, solCoarse);
+      break;
+    default:
+      ERROR_EXIT("Should not happen!\n");
+    }
   }
 
 
-  void PetscSolverFeti::solveReducedFetiMatrix(SystemVector &vec)
+  void PetscSolverFeti::solveFetiExact(Vec &rhsInterior, Vec &rhsCoarse,
+				       Vec &solInterior, Vec &solCoarse)
   {
-    FUNCNAME("PetscSolverFeti::solveReducedFetiMatrix()");
-
-#if (DEBUG != 0)
-    if (stokesMode)
-      PetscSolverFetiDebug::debugNullSpace(*this, vec);
-#endif
+    FUNCNAME("PetscSolverFeti::solveFetiExact()");
 
     // === Some temporary vectors. ===
 
-    Vec tmp_b0, tmp_b1;
-    VecCreateMPI(meshDistributor->getMeshLevelData().getMpiComm(meshLevel),
-		 localDofMap.getRankDofs(),
-		 nGlobalOverallInterior, &tmp_b0);
+    Vec tmp_b1;
     VecCreateMPI(meshDistributor->getMeshLevelData().getMpiComm(meshLevel),
 		 localDofMap.getRankDofs(),
 		 nGlobalOverallInterior, &tmp_b1);
 
-    Vec tmp_primal0, tmp_primal1;
-    primalDofMap.createVec(tmp_primal0);
+    Vec tmp_primal1;
     primalDofMap.createVec(tmp_primal1);
 
-    Vec tmp_lagrange, tmp_interface;
+    Vec tmp_lagrange;
     MatGetVecs(mat_lagrange, PETSC_NULL, &tmp_lagrange);
 
     // === Create RHS and solution vectors. ===
 
     Vec vecRhs, vecSol;
-    Vec vecRhsLagrange, vecSolLagrange, vecRhsInterface, vecSolInterface;
-    if (!stokesMode) {
-      MatGetVecs(mat_lagrange, PETSC_NULL, &vecRhsLagrange);
-      MatGetVecs(mat_lagrange, PETSC_NULL, &vecSolLagrange);
-
-      vecRhs = vecRhsLagrange;
-      vecSol = vecSolLagrange;
-    } else {
-      MatGetVecs(mat_lagrange, PETSC_NULL, &vecRhsLagrange);
-      MatGetVecs(mat_lagrange, PETSC_NULL, &vecSolLagrange);
-
-
-      interfaceDofMap.createVec(tmp_interface);
-      interfaceDofMap.createVec(vecRhsInterface);
-      interfaceDofMap.createVec(vecSolInterface);
-
-      Vec vecRhsArray[2] = {vecRhsInterface, vecRhsLagrange}; 
-      VecCreateNest(domainComm, 2, PETSC_NULL, vecRhsArray, &vecRhs);
-
-      Vec vecSolArray[2] = {vecSolInterface, vecSolLagrange}; 
-      VecCreateNest(domainComm, 2, PETSC_NULL, vecSolArray, &vecSol);
-
-      VecAssemblyBegin(vecRhs);
-      VecAssemblyEnd(vecRhs);
-
-      VecAssemblyBegin(vecSol);
-      VecAssemblyEnd(vecSol);
-    }
+    Vec vecRhsLagrange, vecSolLagrange;
+    MatGetVecs(mat_lagrange, PETSC_NULL, &vecRhsLagrange);
+    MatGetVecs(mat_lagrange, PETSC_NULL, &vecSolLagrange);
+    
+    vecRhs = vecRhsLagrange;
+    vecSol = vecSolLagrange;
 
     VecDuplicate(vecSol, &fetiKspData.draft);
 
@@ -2254,89 +2432,30 @@ namespace AMDiS {
 
     double wtime = MPI::Wtime();
 
-    if (!stokesMode) {
-      //    d = L inv(K_BB) f_B - L inv(K_BB) K_BPi inv(S_PiPi) [f_Pi - K_PiB inv(K_BB) f_B]
-      // vecRhs = L * inv(K_BB) * f_B
-      subdomain->solveGlobal(subdomain->getVecRhsInterior(), tmp_b0);
-      MatMult(mat_lagrange, tmp_b0, vecRhsLagrange);
-	
-      // tmp_primal0 = M_PiB * inv(K_BB) * f_B
-      MatMult(subdomain->getMatCoarseInterior(), tmp_b0, tmp_primal0);
-      
-      // tmp_primal0 = f_Pi - M_PiB * inv(K_BB) * f_B
-      VecAXPBY(tmp_primal0, 1.0, -1.0, subdomain->getVecRhsCoarse());
-      
-      if (!augmentedLagrange) {
-	double wtime2 = MPI::Wtime();
-	// tmp_primal0 = inv(S_PiPi) (f_Pi - M_PiB * inv(K_BB) * f_B)
-	KSPSolve(ksp_schur_primal, tmp_primal0, tmp_primal0);
-	if (printTimings) {
-	  MSG("FETI-DP timing 09a: %.5f seconds (create rhs vector)\n", 
-	      MPI::Wtime() - wtime2);
-	}
-	
-	MatMult(subdomain->getMatInteriorCoarse(), tmp_primal0, tmp_b0);
-	subdomain->solveGlobal(tmp_b0, tmp_b0);
-	MatMult(mat_lagrange, tmp_b0, tmp_lagrange);
-      } else {
-	Vec tmp_mu;
-	MatGetVecs(mat_augmented_lagrange, PETSC_NULL, &tmp_mu);
-	
-	MatMult(mat_augmented_lagrange, vecRhsLagrange, tmp_mu);
-	VecScale(tmp_mu, -1.0);
-
-	Vec vec_array[2] = {tmp_primal0, tmp_mu};
-	Vec vec_nest;
-	VecCreateNest(PETSC_COMM_WORLD, 2, PETSC_NULL, vec_array, &vec_nest);	
+    //    d = L inv(K_BB) f_B - L inv(K_BB) K_BPi inv(S_PiPi) [f_Pi - K_PiB inv(K_BB) f_B]
+    // vecRhs = L * inv(K_BB) * f_B
+    subdomain->solveGlobal(rhsInterior, solInterior);
 
-	KSPSolve(ksp_schur_primal, vec_nest, vec_nest);
-
-	// 1 Step
-	MatMult(subdomain->getMatInteriorCoarse(), tmp_primal0, tmp_b0);
-	subdomain->solveGlobal(tmp_b0, tmp_b0);
-	MatMult(mat_lagrange, tmp_b0, tmp_lagrange);
-
-	// 2 Step
-	Vec tmp_lagrange1;
-	VecDuplicate(tmp_lagrange, &tmp_lagrange1);
-	MatMultTranspose(mat_augmented_lagrange, tmp_mu, tmp_lagrange1);
-	MatMultTranspose(mat_lagrange, tmp_lagrange1, tmp_b0);
-	subdomain->solveGlobal(tmp_b0, tmp_b0);
-	MatMult(mat_lagrange, tmp_b0, tmp_lagrange1);
-	VecAXPY(tmp_lagrange, 1.0, tmp_lagrange1);
-	VecDestroy(&tmp_lagrange1);
-
-
-	VecDestroy(&tmp_mu);
-	VecDestroy(&vec_nest);
-      }
-
-      VecAXPY(vecRhsLagrange, -1.0, tmp_lagrange);
-    } else {
-      TEST_EXIT(!augmentedLagrange)("Not yet implemented!\n");
-
-      subdomain->solveGlobal(subdomain->getVecRhsInterior(), tmp_b0);  
-
-      MatMult(subdomain->getMatCoarseInterior(), tmp_b0, tmp_primal0);
-      VecAXPBY(tmp_primal0, 1.0, -1.0, subdomain->getVecRhsCoarse());
-      double wtime2 = MPI::Wtime();
-      // tmp_primal0 = inv(S_PiPi) (f_Pi - M_PiB * inv(K_BB) * f_B)
-      KSPSolve(ksp_schur_primal, tmp_primal0, tmp_primal0);
-      if (printTimings) {
-	MSG("FETI-DP timing 09a: %.5f seconds (create rhs vector)\n", 
-	    MPI::Wtime() - wtime2);
-      }
-      
-      MatMult(subdomain->getMatInteriorCoarse(), tmp_primal0, tmp_b1);
-      subdomain->solveGlobal(tmp_b1, tmp_b1);  
-      VecAXPY(tmp_b0, -1.0, tmp_b1);
-      
-      MatMult(mat_lagrange, tmp_b0, vecRhsLagrange);
-      
-      MatMult(subdomain->getMatCoarseInterior(1), tmp_b0, vecRhsInterface);
-      MatMult(subdomain->getMatCoarse(1, 0), tmp_primal0, tmp_interface);
-      VecAXPY(vecRhsInterface, 1.0, tmp_interface);
+    MatMult(mat_lagrange, solInterior, vecRhsLagrange); 
+    
+    // solCoarse = M_PiB * inv(K_BB) * f_B
+    MatMult(subdomain->getMatCoarseInterior(), solInterior, solCoarse);
+    
+    // solCoarse = f_Pi - M_PiB * inv(K_BB) * f_B
+    VecAXPBY(solCoarse, 1.0, -1.0, rhsCoarse);
+    
+    double wtime2 = MPI::Wtime();
+    // solCoarse = inv(S_PiPi) (f_Pi - M_PiB * inv(K_BB) * f_B)
+    KSPSolve(ksp_schur_primal, solCoarse, solCoarse);
+    if (printTimings) {
+      MSG("FETI-DP timing 09a: %.5f seconds (create rhs vector)\n", 
+	  MPI::Wtime() - wtime2);
     }
+    
+    MatMult(subdomain->getMatInteriorCoarse(), solCoarse, solInterior);
+    subdomain->solveGlobal(solInterior, solInterior);
+    MatMult(mat_lagrange, solInterior, tmp_lagrange);
+    VecAXPY(vecRhsLagrange, -1.0, tmp_lagrange);
 
     if (printTimings) {
       MPI::COMM_WORLD.Barrier();
@@ -2367,61 +2486,21 @@ namespace AMDiS {
 
     // === Solve for u_primals. ===
 
-    if (!stokesMode) {
-      MatMultTranspose(mat_lagrange, vecSol, tmp_b0);
-      VecAYPX(tmp_b0, -1.0, subdomain->getVecRhsInterior());  
-    } else {
-      MatMultTranspose(mat_lagrange, vecSolLagrange, tmp_b0);
-      VecAYPX(tmp_b0, -1.0, subdomain->getVecRhsInterior());  
+    MatMultTranspose(mat_lagrange, vecSol, solInterior);
+    VecAYPX(solInterior, -1.0, rhsInterior);  
 
-      MatMult(subdomain->getMatInteriorCoarse(1), vecSolInterface, tmp_b1);
-      VecAXPY(tmp_b0, -1.0, tmp_b1);
-    }
+    subdomain->solveGlobal(solInterior, tmp_b1);
+    MatMult(subdomain->getMatCoarseInterior(), tmp_b1, solCoarse);
+    VecAYPX(solCoarse, -1.0, rhsCoarse);
 
-    subdomain->solveGlobal(tmp_b0, tmp_b1);
-    MatMult(subdomain->getMatCoarseInterior(), tmp_b1, tmp_primal0);
-    VecAYPX(tmp_primal0, -1.0, subdomain->getVecRhsCoarse());
-
-    if (stokesMode) {
-      MatMult(subdomain->getMatCoarse(0, 1), vecSolInterface, tmp_primal1);
-      VecAXPY(tmp_primal0, -1.0, tmp_primal1);
-    }
-
-    if (augmentedLagrange == false) {
-      KSPSolve(ksp_schur_primal, tmp_primal0, tmp_primal0);
-    } else {
-      Vec tmp_mu;
-      MatGetVecs(mat_augmented_lagrange, PETSC_NULL, &tmp_mu);
-      
-      MatMult(mat_lagrange, tmp_b1, tmp_lagrange);
-      MatMult(mat_augmented_lagrange, tmp_lagrange, tmp_mu);
-      VecScale(tmp_mu, -1.0);
-      
-      Vec vec_array[2] = {tmp_primal0, tmp_mu};
-      Vec vec_nest;
-      VecCreateNest(PETSC_COMM_WORLD, 2, PETSC_NULL, vec_array, &vec_nest);	
-
-      KSPSolve(ksp_schur_primal, vec_nest, vec_nest);
-
-      MatMultTranspose(mat_augmented_lagrange, tmp_mu, tmp_lagrange);
-      MatMultTranspose(mat_lagrange, tmp_lagrange, tmp_b1);
-      VecAXPY(tmp_b0, -1.0, tmp_b1);
-
-      VecDestroy(&tmp_mu);
-      VecDestroy(&vec_nest);
-    }
+    KSPSolve(ksp_schur_primal, solCoarse, solCoarse);
 
     // === Solve for u_b. ===
 
-    MatMult(subdomain->getMatInteriorCoarse(), tmp_primal0, tmp_b1);
-    VecAXPY(tmp_b0, -1.0, tmp_b1);
-    subdomain->solveGlobal(tmp_b0, tmp_b0);
-
-    // === And recover AMDiS solution vectors. ===
+    MatMult(subdomain->getMatInteriorCoarse(), solCoarse, tmp_b1);
+    VecAXPY(solInterior, -1.0, tmp_b1);
+    subdomain->solveGlobal(solInterior, solInterior);
 
-    recoverSolution(tmp_b0, tmp_primal0, vec);
-    if (stokesMode)
-      recoverInterfaceSolution(vecSolInterface, vec);
     
     // === Print timings and delete data. ===
 
@@ -2433,17 +2512,52 @@ namespace AMDiS {
 
     VecDestroy(&vecRhs);
     VecDestroy(&vecSol);
-    VecDestroy(&tmp_b0);
     VecDestroy(&tmp_b1);
     VecDestroy(&tmp_lagrange);
-    VecDestroy(&tmp_primal0);
     VecDestroy(&tmp_primal1);
+  }
 
-    if (stokesMode) {
-      VecDestroy(&tmp_interface);
-      VecDestroy(&vecRhsInterface);
-      VecDestroy(&vecSolInterface);      
-    }
+
+  void PetscSolverFeti::solveFetiInexact(Vec &rhsInterior, Vec &rhsCoarse,
+					 Vec &solInterior, Vec &solCoarse)
+  {
+    FUNCNAME("PetscSolverFeti::solveFetiInexact()");    
+
+    Vec tmpLagrange0, tmpLagrange1;
+    lagrangeMap.createVec(tmpLagrange0);
+    lagrangeMap.createVec(tmpLagrange1);
+    VecSet(tmpLagrange0, 0.0);
+    VecSet(tmpLagrange1, 0.0);
+
+    Vec nestVecRhs[3];
+    nestVecRhs[0] = rhsInterior;
+    nestVecRhs[1] = rhsCoarse;
+    nestVecRhs[2] = tmpLagrange0;
+
+    Vec nestVecSol[3];
+    nestVecSol[0] = solInterior;
+    nestVecSol[1] = solCoarse;
+    nestVecSol[2] = tmpLagrange1;
+
+    Vec vecRhs, vecSol;
+    VecCreateNest(domainComm, 3, PETSC_NULL, nestVecRhs, &vecRhs);
+    VecCreateNest(domainComm, 3, PETSC_NULL, nestVecSol, &vecSol);
+
+    KSPSolve(ksp_feti, vecRhs, vecSol);
+
+    VecDestroy(&vecRhs);
+    VecDestroy(&vecSol);
+    VecDestroy(&tmpLagrange0);
+    VecDestroy(&tmpLagrange1);
+  }
+
+
+  void PetscSolverFeti::solveFetiInexactReduced(Vec &rhsInterior, Vec &rhsCoarse,
+						Vec &solInterior, Vec &solCoarse)
+  {
+    FUNCNAME("PetscSolverFeti::solveFetiInexactReduced()");
+
+    ERROR_EXIT("Not yet implemented!\n");
   }
 
 
@@ -2487,15 +2601,101 @@ namespace AMDiS {
   {
     FUNCNAME("PetscSolverFeti::solvePetscMatrix()");
 
-    int debug = 0;
-    Parameters::get("parallel->debug feti", debug);
+    Vec solInterior;
+    VecCreateMPI(meshDistributor->getMeshLevelData().getMpiComm(meshLevel),
+		 localDofMap.getRankDofs(),
+		 nGlobalOverallInterior, 
+		 &solInterior);
+
+    Vec solCoarse;
+    primalDofMap.createVec(solCoarse);
+
+    solveFeti(subdomain->getVecRhsInterior(), 
+	      subdomain->getVecRhsCoarse(),
+	      solInterior,
+	      solCoarse);
+
+    // === And recover AMDiS solution vectors. ===
+
+    recoverSolution(solInterior, solCoarse, vec);  
 
-    if (debug)
-      solveFetiMatrix(vec);
-    else
-      solveReducedFetiMatrix(vec);
+    VecDestroy(&solInterior);
+    VecDestroy(&solCoarse);
 
     MeshDistributor::globalMeshDistributor->synchVector(vec);
   }
 
+
+  void PetscSolverFeti::solveGlobal(Vec &rhs, Vec &sol)
+  {
+    FUNCNAME("PetscSolverFeti::solveGlobal()");
+    
+    Vec rhsInterior, rhsCoarse, solInterior, solCoarse;
+    VecCreateMPI(domainComm,
+		 localDofMap.getRankDofs(),
+		 nGlobalOverallInterior, 
+		 &rhsInterior);
+    primalDofMap.createVec(rhsCoarse);
+    VecDuplicate(rhsInterior, &solInterior);
+    VecDuplicate(rhsCoarse, &solCoarse);
+
+    int offset = 0;
+    {
+      int domainLocal = 0, nSuperLocal = 0;
+      if (domainComm.Get_rank() == 0)
+	domainLocal = interiorMap->getOverallDofs();
+      
+      mpi::getDofNumbering(meshDistributor->getMpiComm(meshLevel - 1),
+			   domainLocal, offset, nSuperLocal);
+
+      int tmp = 0;
+      if (domainComm.Get_rank() == 0)
+	tmp = offset;
+      
+      domainComm.Allreduce(&tmp, &offset, 1, MPI_INT, MPI_SUM);
+    }
+
+    vector<int> localFromRhs, coarseFromRhs;
+    vector<int> rhsToLocal, rhsToCoarse;
+
+    int nComponents = componentSpaces.size();
+    for (int i = 0; i < nComponents; i++) {
+      DofMap &dMap = localDofMap[i].getMap();
+
+      for (DofMap::iterator it = dMap.begin(); it != dMap.end(); ++it) {
+	int matL = localDofMap.getMatIndex(i, it->first) + rStartInterior;
+	int matI = interiorMap->getMatIndex(i, it->first) + offset;
+
+	localFromRhs.push_back(matL);
+	rhsToLocal.push_back(matI);
+      }
+    }
+
+    for (int i = 0; i < nComponents; i++) {
+      DofMap &dMap = primalDofMap[i].getMap();
+
+      for (DofMap::iterator it = dMap.begin(); it != dMap.end(); ++it) {
+	int matL = primalDofMap.getMatIndex(i, it->first);
+	int matI = interiorMap->getMatIndex(i, it->first) + offset;
+
+	coarseFromRhs.push_back(matL);
+	rhsToCoarse.push_back(matI);
+      }
+    }
+
+    copyVec(rhs, rhsInterior, rhsToLocal, localFromRhs);
+    copyVec(rhs, rhsCoarse, rhsToCoarse, coarseFromRhs);
+  
+    solveFeti(rhsInterior, rhsCoarse, solInterior, solCoarse);
+
+    copyVec(solInterior, sol, localFromRhs, rhsToLocal);
+    copyVec(solCoarse, sol, coarseFromRhs, rhsToCoarse);
+
+    MPI::COMM_WORLD.Barrier();
+
+    VecDestroy(&rhsInterior);
+    VecDestroy(&rhsCoarse);
+    VecDestroy(&solInterior);
+    VecDestroy(&solCoarse);
+  }
 }
diff --git a/AMDiS/src/parallel/PetscSolverFeti.h b/AMDiS/src/parallel/PetscSolverFeti.h
index a20a771bf404173da987db63978fd603b099e409..3ade93c80870f02d5b8e9aa565ebe06c0cba79eb 100644
--- a/AMDiS/src/parallel/PetscSolverFeti.h
+++ b/AMDiS/src/parallel/PetscSolverFeti.h
@@ -72,6 +72,9 @@ namespace AMDiS {
     /// Solve the system using FETI-DP method.
     void solvePetscMatrix(SystemVector &vec, AdaptInfo *adaptInfo);
 
+    /// Just for the super trick
+    void solveGlobal(Vec &rhs, Vec &sol);
+
     /// Destroys all matrix data structures.
     void destroyMatrixData();
 
@@ -150,7 +153,8 @@ namespace AMDiS {
 
     void createMatAugmentedLagrange();
 
-    bool testWirebasketEdge(BoundaryObject &edge, const FiniteElemSpace *feSpace);
+    bool testWirebasketEdge(BoundaryObject &edge, 
+			    const FiniteElemSpace *feSpace);
 
     ///
     void createPreconditionerMatrix(Matrix<DOFMatrix*> *mat);
@@ -171,9 +175,33 @@ namespace AMDiS {
     /// Creates PETSc KSP solver object for the FETI-DP operator, \ref ksp_feti
     void createFetiKsp();
 
+    ///
+    void createFetiExactKsp();
+
+    ///
+    void createFetiInexactKsp();
+
+    ///
+    void createFetiInexactReducedKsp();
+
+    ///
+    void createFetiPreconLumped(PC pc);
+
+    ///
+    void createFetiPreconDirichlet(PC pc);
+
     /// Destroys FETI-DP operator, \ref ksp_feti
     void destroyFetiKsp();
 
+    ///
+    void destroyFetiExactKsp();
+
+    ///
+    void destroyFetiInexactKsp();
+
+    ///
+    void destroyFetiInexactReducedKsp();
+
     /// Create the null space of the FETI-DP operator (if there is one) and
     /// attachets it to the corresponding matrices and KSP objects.
     void createNullSpace();
@@ -204,25 +232,26 @@ namespace AMDiS {
     void recoverInterfaceSolution(Vec& vecInterface, 
 				  SystemVector &vec);
 
-    /** \brief
-     * Solves the FETI-DP system globally, thus without reducing it to the 
-     * Lagrange multipliers. This should be used for debugging only to test
-     * if the FETI-DP system is setup correctly.
-     *
-     * \param[out]  vec    Solution DOF vectors.
-     */
-    void solveFetiMatrix(SystemVector &vec);
+    ///
+    void solveFeti(Vec &rhsInterior, Vec &rhsCoarse,
+		   Vec &solInterior, Vec &solCoarse);
 
-    /** \brief
-     * Solves the FETI-DP system with reducing it first to the Lagrange
-     * multipliers. This is what one expects when using the FETI-DP methid :)
-     *
-     * \param[out]  vec    Solution DOF vectors.
-     */
-    void solveReducedFetiMatrix(SystemVector &vec);
+    ///
+    void solveFetiExact(Vec &rhsInterior, Vec &rhsCoarse,
+			Vec &solInterior, Vec &solCoarse);
 
+    ///
+    void solveFetiInexact(Vec &rhsInterior, Vec &rhsCoarse,
+			  Vec &solInterior, Vec &solCoarse);
+
+    ///
+    void solveFetiInexactReduced(Vec &rhsInterior, Vec &rhsCoarse,
+				 Vec &solInterior, Vec &solCoarse);
+
+    ///
     void resetStatistics();
 
+    ///
     void printStatistics();
 
     /// Checks whether a given DOF is a primal DOF in a given component.
@@ -247,6 +276,9 @@ namespace AMDiS {
     }
 
   protected:
+    /// Type of FETI-DP solver, i.e., exact or some inexact version
+    FetiSolverType fetiSolverType;
+
     /// Mapping from primal DOF indices to a global index of primals.
     ParallelDofMapping primalDofMap;
 
@@ -309,6 +341,12 @@ namespace AMDiS {
     /// Data for MatMult operation in matrix \ref mat_feti
     FetiData fetiData;
 
+    ///
+    FetiInexactData fetiInexactData;
+
+    ///
+    FetiInexactPreconData fetiInexactPreconData;
+
     /// Defines which preconditioner should be used to solve the reduced
     /// FETI-DP system.
     FetiPreconditioner fetiPreconditioner;
@@ -338,11 +376,10 @@ namespace AMDiS {
 
     PetscSolver *subdomain;
 
-    PetscSolver *massMatrixSolver;
-
-    int rStartInterior;
+    // Just a trick for multi level things, should be removed and generalized!
+    PetscSolver *mlSubdomain;
 
-    int nGlobalOverallInterior;
+    PetscSolver *massMatrixSolver;
 
     bool printTimings;
 
diff --git a/AMDiS/src/parallel/PetscSolverFetiOperators.cc b/AMDiS/src/parallel/PetscSolverFetiOperators.cc
index e1abec273184b56fbcd3343f46783d478cb36f68..e25f3e940647f365a4e7d03aa24c61a0d4277090 100644
--- a/AMDiS/src/parallel/PetscSolverFetiOperators.cc
+++ b/AMDiS/src/parallel/PetscSolverFetiOperators.cc
@@ -15,6 +15,26 @@
 
 namespace AMDiS {
 
+  void copyGlobalLocal(Vec globalB, Vec localB)
+  {
+    double *valueGlobal, *valueLocal;
+    VecGetArray(globalB, &valueGlobal);
+    VecGetArray(localB, &valueLocal);
+    
+    int nGlobal, nLocal;
+    VecGetLocalSize(globalB, &nGlobal);
+    VecGetLocalSize(localB, &nLocal);
+    
+    TEST_EXIT(nGlobal == nLocal)("Should not happen!\n");
+    
+    for (int i = 0; i < nGlobal; i++)
+      valueLocal[i] = valueGlobal[i];
+
+    VecRestoreArray(globalB, &valueGlobal);
+    VecRestoreArray(localB, &valueLocal);   
+  }
+
+
   int petscMultMatSchurPrimal(Mat mat, Vec x, Vec y)
   {
     // S_PiPi = K_PiPi - K_PiB inv(K_BB) K_BPi
@@ -194,6 +214,173 @@ namespace AMDiS {
   }
 
 
+  // y = mat * x
+  int petscMultMatFetiInexact(Mat mat, Vec x, Vec y)
+  {
+    FUNCNAME("petscMultMatFetiInexact()");
+
+    void *ctx;
+    MatShellGetContext(mat, &ctx);
+    FetiInexactData* data = static_cast<FetiInexactData*>(ctx);
+
+    Vec xInterior, xPrimal, xLagrange;
+    Vec yInterior, yPrimal, yLagrange;
+
+    VecNestGetSubVec(x, 0, &xInterior);
+    VecNestGetSubVec(x, 1, &xPrimal);
+    VecNestGetSubVec(x, 2, &xLagrange);
+
+    VecNestGetSubVec(y, 0, &yInterior);
+    VecNestGetSubVec(y, 1, &yPrimal);
+    VecNestGetSubVec(y, 2, &yLagrange);
+
+    Vec tmpInterior;
+    VecDuplicate(xInterior, &tmpInterior);
+    MatMult(*(data->matBPi), xPrimal, tmpInterior);
+    MatMultTranspose(*(data->mat_lagrange), xLagrange, yInterior);
+    VecAXPY(yInterior, 1.0, tmpInterior);
+
+    {
+      {
+	double *valueRhs, *valueTmp;
+	VecGetArray(xInterior, &valueRhs);
+	VecGetArray(data->tmp_vec_b0, &valueTmp);
+	
+	int nRhsLocal, nTmpLocal;
+	VecGetLocalSize(xInterior, &nRhsLocal);
+	VecGetLocalSize(data->tmp_vec_b0, &nTmpLocal);
+	
+	TEST_EXIT(nRhsLocal == nTmpLocal)("Should not happen!\n");
+	
+	for (int i = 0; i < nRhsLocal; i++)
+	  valueTmp[i] = valueRhs[i];
+	
+	VecRestoreArray(xInterior, &valueRhs);
+	VecRestoreArray(data->tmp_vec_b0, &valueTmp);
+      }
+      
+      MatMult(*(data->matBB), data->tmp_vec_b0, data->tmp_vec_b1);
+      
+      
+      {
+	double *valueRhs, *valueTmp;
+	VecGetArray(tmpInterior, &valueRhs);
+	VecGetArray(data->tmp_vec_b1, &valueTmp);
+	
+	int nRhsLocal, nTmpLocal;
+	VecGetLocalSize(yInterior, &nRhsLocal);
+	VecGetLocalSize(data->tmp_vec_b1, &nTmpLocal);
+	
+	TEST_EXIT(nRhsLocal == nTmpLocal)("Should not happen!\n");
+	
+	for (int i = 0; i < nRhsLocal; i++)
+	  valueRhs[i] = valueTmp[i];
+	
+	VecRestoreArray(tmpInterior, &valueRhs);
+	VecRestoreArray(data->tmp_vec_b1, &valueTmp);
+      }
+      
+      VecAXPY(yInterior, 1.0, tmpInterior);
+    }
+     
+    {
+      Vec tmpPrimal;
+      VecDuplicate(xPrimal, &tmpPrimal);
+      
+      MatMult(*(data->matPiB), xInterior, tmpPrimal);
+      MatMult(*(data->matPiPi), xPrimal, yPrimal);
+      VecAXPY(yPrimal, 1.0, tmpPrimal);
+      
+      VecDestroy(&tmpPrimal);
+    }
+
+    MatMult(*(data->mat_lagrange), xInterior, yLagrange);
+
+
+    VecDestroy(&tmpInterior);
+
+
+    return 0;
+  }
+
+
+  PetscErrorCode pcInexactFetiShell(PC pc, Vec x, Vec y)
+  {
+    void *ctx;
+    PCShellGetContext(pc, &ctx);
+    FetiInexactPreconData* data = static_cast<FetiInexactPreconData*>(ctx);
+
+    Vec xInterior, xPrimal, xLagrange;
+    Vec yInterior, yPrimal, yLagrange;
+
+    VecNestGetSubVec(x, 0, &xInterior);
+    VecNestGetSubVec(x, 1, &xPrimal);
+    VecNestGetSubVec(x, 2, &xLagrange);
+
+    VecNestGetSubVec(y, 0, &yInterior);
+    VecNestGetSubVec(y, 1, &yPrimal);
+    VecNestGetSubVec(y, 2, &yLagrange);
+
+
+    Vec tmpPrimal;
+    VecDuplicate(xPrimal, &tmpPrimal);
+    
+    Vec tmpInterior0, tmpInterior1;
+    VecDuplicate(xInterior, &tmpInterior0);
+    VecDuplicate(xInterior, &tmpInterior1);
+
+    {
+      // === First Block ===
+
+      copyGlobalLocal(xInterior, data->tmp_vec_b0);
+      KSPSolve(data->ksp_interior, data->tmp_vec_b0, data->tmp_vec_b0);
+      copyGlobalLocal(data->tmp_vec_b0, tmpInterior0);
+      MatMult(*(data->matPiB), tmpInterior0, tmpPrimal);
+      VecAYPX(tmpPrimal, -1.0, xPrimal);
+
+
+      // === Second Block ===
+
+      // tmpInterior0 already calculated
+      
+      KSPSolve(data->ksp_schur, tmpPrimal, tmpPrimal);
+
+
+      // === Third Block ===
+
+      // tmpPrimal already calculated
+
+      MatMult(*(data->matBPi), tmpPrimal, tmpInterior1);
+      copyGlobalLocal(tmpInterior1, data->tmp_vec_b0);
+      KSPSolve(data->ksp_interior, data->tmp_vec_b0, data->tmp_vec_b0);
+      copyGlobalLocal(data->tmp_vec_b0, tmpInterior1);
+      
+      VecAXPY(tmpInterior0, -1.0, tmpInterior1);             
+
+      VecCopy(tmpInterior0, yInterior);
+      VecCopy(tmpPrimal, yPrimal);
+    }
+
+
+    {
+      Vec tmpLagrange;
+      VecDuplicate(xLagrange, &tmpLagrange);
+
+      MatMult(*(data->mat_lagrange), yInterior, tmpLagrange);
+      PCApply(data->pc_feti, tmpLagrange, yLagrange);
+
+      PCApply(data->pc_feti, xLagrange, tmpLagrange);
+      VecAXPY(yLagrange, -1.0, tmpLagrange);
+      
+      VecDestroy(&tmpLagrange);
+    }
+
+    VecDestroy(&tmpPrimal);
+    VecDestroy(&tmpInterior0);
+    VecDestroy(&tmpInterior1);
+  }
+
+
   // y = mat * x
   int petscMultMatFetiAugmented(Mat mat, Vec x, Vec y)
   {
@@ -429,6 +616,7 @@ namespace AMDiS {
 
     // === K_DD ===
 
+
     MatMult(*(data->mat_duals_duals), data->tmp_vec_duals0, data->tmp_vec_duals1);
 
 
diff --git a/AMDiS/src/parallel/PetscSolverFetiOperators.h b/AMDiS/src/parallel/PetscSolverFetiOperators.h
index ce9542093182abf95a1d3e411fbf2c2be919a2ee..d09328323fa0e6c95b33d55e8d44a7b4c8567f7e 100644
--- a/AMDiS/src/parallel/PetscSolverFetiOperators.h
+++ b/AMDiS/src/parallel/PetscSolverFetiOperators.h
@@ -28,6 +28,8 @@
 
 namespace AMDiS {
 
+  void copyGlobalLocal(Vec globalB, Vec localB);
+
   /// 
   int petscMultMatSchurPrimal(Mat mat, Vec x, Vec y);
 
@@ -37,6 +39,12 @@ namespace AMDiS {
   /// FETI-DP operator
   int petscMultMatFeti(Mat mat, Vec x, Vec y);
 
+  /// Inexact FETI-DP operator
+  int petscMultMatFetiInexact(Mat mat, Vec x, Vec y);
+  
+  /// Inexact FETI-DP preconditioner
+  PetscErrorCode pcInexactFetiShell(PC pc, Vec x, Vec y);
+
   /// FETI-DP operator with augmented Lagrange constraints
   int petscMultMatFetiAugmented(Mat mat, Vec x, Vec y);
 
diff --git a/AMDiS/src/parallel/PetscSolverFetiStructs.h b/AMDiS/src/parallel/PetscSolverFetiStructs.h
index 2ed5cedb39d4a2d50d40969c1d314bcd3b5f0909..811547f15333cd833c03e3e2c9f2074362c8ba07 100644
--- a/AMDiS/src/parallel/PetscSolverFetiStructs.h
+++ b/AMDiS/src/parallel/PetscSolverFetiStructs.h
@@ -32,6 +32,16 @@ namespace AMDiS {
 
   class PetscSolverFeti;
 
+
+  enum FetiSolverType {
+    // Standard exakt FETI-DP system
+    EXACT,
+    // Inexact FETI-DP
+    INEXACT,
+    // Inexact reduced FETI-DP
+    INEXACT_REDUCED
+  };
+
   /** \brief
    * This structure is used when defining the MatShell operation for solving 
    * primal schur complement. \ref petscMultMatSchurPrimal
@@ -98,6 +108,32 @@ namespace AMDiS {
   };
 
 
+  struct FetiInexactData {
+    Mat *matBB, *matBPi, *matPiB, *matPiPi;
+
+    Mat *mat_lagrange;
+
+    Vec tmp_vec_b0, tmp_vec_b1;
+  };
+
+
+  struct FetiInexactPreconData {
+    KSP ksp_schur;
+
+    KSP ksp_interior;
+
+    KSP ksp_pc_feti;
+
+    PC pc_feti;
+
+    Mat *matPiB, *matBPi;
+
+    Mat *mat_lagrange;
+
+    Vec tmp_vec_b0;
+  };
+
+
   struct FetiDirichletPreconData {
     /// Matrix of scaled Lagrange variables.
     Mat *mat_lagrange_scaled;
diff --git a/AMDiS/src/parallel/PetscSolverGlobalMatrix.cc b/AMDiS/src/parallel/PetscSolverGlobalMatrix.cc
index bda315c2cce18ed2c15823b2cc1bd472a0790aaa..2b1ffc8481a2d46c3092c661ec62ba52feb5e30c 100644
--- a/AMDiS/src/parallel/PetscSolverGlobalMatrix.cc
+++ b/AMDiS/src/parallel/PetscSolverGlobalMatrix.cc
@@ -413,9 +413,6 @@ namespace AMDiS {
   {
     FUNCNAME("PetscSolverGlobalMatrix::solveGlobal()");
 
-    double wtime = MPI::Wtime();
-    double t0 = 0.0, t1 = 0.0;
-
     Vec tmp;
     if (domainComm.Get_size() == 1)
       interiorMap->createLocalVec(tmp);
@@ -432,13 +429,7 @@ namespace AMDiS {
     VecRestoreArray(rhs, &rhsValues);
     VecRestoreArray(tmp, &tmpValues);
 
-    t0 = MPI::Wtime() - wtime;
-
-    wtime = MPI::Wtime();
     KSPSolve(kspInterior, tmp, tmp);
-    t1 = MPI::Wtime() - wtime;
-
-    wtime = MPI::Wtime();
 
     VecGetArray(tmp, &tmpValues);
     VecGetArray(sol, &rhsValues);
@@ -450,9 +441,6 @@ namespace AMDiS {
     VecRestoreArray(tmp, &tmpValues);
 
     VecDestroy(&tmp);
-    t0 += MPI::Wtime() - wtime;
-
-    //    MSG("TIMEING: %.5f %.5f\n", t0, t1);
   }
 
 
@@ -540,7 +528,7 @@ namespace AMDiS {
   }  
 
 
-  void PetscSolverGlobalMatrix::exitSolver(KSP ksp)
+  void PetscSolverGlobalMatrix::exitSolver(KSP &ksp)
   {
     FUNCNAME("PetscSolverGlobalMatrix::exitSolver()");
 
@@ -871,6 +859,8 @@ namespace AMDiS {
     MatNullSpaceCreate(domainComm, PETSC_FALSE, 1, &nullSpaceBasis, &matNullSpace);
     KSPSetNullSpace(ksp, matNullSpace);
     MatNullSpaceDestroy(&matNullSpace);
+
+    VecDestroy(&nullSpaceBasis);
   }
 
 
diff --git a/AMDiS/src/parallel/PetscSolverGlobalMatrix.h b/AMDiS/src/parallel/PetscSolverGlobalMatrix.h
index 040151de5b95b24ea31a996ec9dac912ecb4cf09..90a36b22d4b04da23e2b663b0cbeeb38ffab3804 100644
--- a/AMDiS/src/parallel/PetscSolverGlobalMatrix.h
+++ b/AMDiS/src/parallel/PetscSolverGlobalMatrix.h
@@ -100,7 +100,7 @@ namespace AMDiS {
 
     virtual void initSolver(KSP &ksp);
 
-    virtual void exitSolver(KSP ksp);
+    virtual void exitSolver(KSP &ksp);
 
     virtual void initPreconditioner(PC pc);
 
diff --git a/AMDiS/src/parallel/PetscSolverNavierStokes.cc b/AMDiS/src/parallel/PetscSolverNavierStokes.cc
index e3a9aa11a11fdd0cd5d1c20cc75c82d6e5a2a770..428dad486b81d5659ee135ecd273f1494906d362 100644
--- a/AMDiS/src/parallel/PetscSolverNavierStokes.cc
+++ b/AMDiS/src/parallel/PetscSolverNavierStokes.cc
@@ -104,6 +104,8 @@ namespace AMDiS {
     FUNCNAME("PetscSolverNavierStokes::initSolver()");
 
     // Create FGMRES based outer solver
+    
+    MSG("CREATE POS 1: %p\n", &ksp);
     KSPCreate(domainComm, &ksp);
     KSPSetOperators(ksp, getMatInterior(), getMatInterior(), SAME_NONZERO_PATTERN);
     KSPMonitorSet(ksp, KSPMonitorTrueResidualNorm, PETSC_NULL, PETSC_NULL);
@@ -136,6 +138,7 @@ namespace AMDiS {
     PCFieldSplitSetSchurFactType(pc, PC_FIELDSPLIT_SCHUR_FACT_FULL);
     createFieldSplit(pc, "velocity", velocityComponents);
     createFieldSplit(pc, "pressure", pressureComponent);
+    PCSetFromOptions(pc);
 
     KSPSetUp(kspInterior);
 
@@ -174,7 +177,6 @@ namespace AMDiS {
     if (pressureNullSpace)
       setConstantNullSpace(kspSchur);
 
-
     // === Mass matrix solver ===
 
     const FiniteElemSpace *pressureFeSpace = componentSpaces[pressureComponent];
@@ -324,8 +326,11 @@ namespace AMDiS {
     FUNCNAME("PetscSolverNavierStokes::exitPreconditioner()");
 
     massMatrixSolver->destroyMatrixData();
+    massMatrixSolver->destroyVectorData();
     laplaceMatrixSolver->destroyMatrixData();
+    laplaceMatrixSolver->destroyVectorData();
     conDifMatrixSolver->destroyMatrixData();
+    conDifMatrixSolver->destroyVectorData();
 
     delete massMatrixSolver;
     massMatrixSolver = NULL;
diff --git a/AMDiS/src/parallel/StdMpi.cc b/AMDiS/src/parallel/StdMpi.cc
index 2bec40b83df87fef2aee8622e58d1af5ce1527b9..97d13fb436996d4743e761c75ae5e884ec60b759 100644
--- a/AMDiS/src/parallel/StdMpi.cc
+++ b/AMDiS/src/parallel/StdMpi.cc
@@ -183,7 +183,8 @@ namespace AMDiS {
     return size;
   }
 
-  void StdMpiHelper<vector<vector<double> > >::createBuffer(vector<vector<double> > &data, double *buf)
+  void StdMpiHelper<vector<vector<double> > >::createBuffer(vector<vector<double> > &data, 
+							    double *buf)
   {
     buf[0] = data.size();
     int counter = 1;
@@ -195,7 +196,9 @@ namespace AMDiS {
     }
   }
 
-  void StdMpiHelper<vector<vector<double> > >::makeFromBuffer(vector<vector<double> > &data, double *buf, int bufSize)
+  void StdMpiHelper<vector<vector<double> > >::makeFromBuffer(vector<vector<double> > &data, 
+							      double *buf, 
+							      int bufSize)
   {
     data.resize(static_cast<unsigned int>(buf[0]));
     int counter = 1;
diff --git a/AMDiS/src/parallel/StdMpi.h b/AMDiS/src/parallel/StdMpi.h
index 0800cacac6e2a034c81d629f25084ceb136be84a..22a1d0d1ef71cb164e6ba92dda8472729a1843cd 100644
--- a/AMDiS/src/parallel/StdMpi.h
+++ b/AMDiS/src/parallel/StdMpi.h
@@ -427,6 +427,11 @@ namespace AMDiS {
       }
  
       MPI::Request::Waitall(requestCounter, request);
+
+#if (DEBUG != 0)
+      bool testall = MPI::Request::Testall(requestCounter, request);
+      TEST_EXIT(testall)("Should not happen!\n");
+#endif
     }