From c21d2182fbb730239da93c6f1439b1bc9ad5fb50 Mon Sep 17 00:00:00 2001
From: Thomas Witkowski <thomas.witkowski@gmx.de>
Date: Mon, 8 Oct 2012 13:46:40 +0000
Subject: [PATCH] Added first version of FETI-DP with augmented coarse grid.

---
 AMDiS/src/Recovery.cc                         |  76 ++++--
 AMDiS/src/parallel/PetscSolverFeti.cc         | 232 +++++++++++++++---
 AMDiS/src/parallel/PetscSolverFeti.h          |  16 ++
 .../src/parallel/PetscSolverFetiOperators.cc  | 100 +++++++-
 AMDiS/src/parallel/PetscSolverFetiOperators.h |   6 +
 AMDiS/src/parallel/PetscSolverFetiStructs.h   |  22 ++
 6 files changed, 403 insertions(+), 49 deletions(-)

diff --git a/AMDiS/src/Recovery.cc b/AMDiS/src/Recovery.cc
index 51603fe6..79d03eef 100644
--- a/AMDiS/src/Recovery.cc
+++ b/AMDiS/src/Recovery.cc
@@ -11,6 +11,9 @@
 
 
 #include "Recovery.h"
+#ifdef HAVE_PARALLEL_DOMAIN_AMDIS
+#include "parallel/MeshDistributor.h"
+#endif
 
 RecoveryStructure& RecoveryStructure::operator=(const RecoveryStructure& rhs) 
 { 
@@ -386,7 +389,7 @@ void Recovery::compute_node_sums(DOFVector<double> *uh, ElInfo *elInfo,
 
   TEST_EXIT_DBG(!gradient)
     ("SPR of flux or gradient need computing interior sums\n");
-  TEST_EXIT_DBG(feSpace->getMesh()->getDim()==1)
+  TEST_EXIT_DBG(feSpace->getMesh()->getDim() == 1)
     ("At the moment only for linear finite elements.\n");
 
   WorldVector<double> node;  // For world coordinates at nodes.
@@ -514,6 +517,13 @@ void Recovery::fill_struct_vec(DOFVector<double> *uh,
   if (degree == 2 && dim > 1)
     fill_flag |= Mesh::FILL_NEIGH | Mesh::FILL_OPP_COORDS;
 
+#ifdef HAVE_PARALLEL_DOMAIN_AMDIS
+  DofContainer boundaryDofs;
+  DofContainerSet boundaryDofSet;
+  MeshDistributor::globalMeshDistributor->getAllBoundaryDofs(feSpace, 0, boundaryDofs);
+  boundaryDofSet.insert(boundaryDofs.begin(), boundaryDofs.end());
+#endif
+
   TraverseStack stack;
   ElInfo *el_info = stack.traverseFirst(mesh, -1, fill_flag);
 
@@ -523,7 +533,13 @@ void Recovery::fill_struct_vec(DOFVector<double> *uh,
     int n_neighbors = 0;     // counting interior vertices of element
     for (int i = 0; i < n_vertices; i++) {
       DegreeOfFreedom k = dof[i][preDofs[VERTEX]];
-      if (el_info->getBoundary(VERTEX, i) == INTERIOR)
+#ifdef HAVE_PARALLEL_DOMAIN_AMDIS
+      bool isInterior = (el_info->getBoundary(VERTEX, i) == INTERIOR) && (boundaryDofSet.count(dof[i]) == 0);
+#else
+      bool isInterior = el_info->getBoundary(VERTEX, i) == INTERIOR;
+#endif
+
+      if (isInterior)
 	interior_vertices[n_neighbors++] = k;
     }
 
@@ -531,6 +547,12 @@ void Recovery::fill_struct_vec(DOFVector<double> *uh,
       ("Each element should have a least one interior vertex!\n");
 
     for (int i = 0; i < n_vertices; i++) {     // Handling nodes on vertices.
+#ifdef HAVE_PARALLEL_DOMAIN_AMDIS
+      bool isInterior = (el_info->getBoundary(VERTEX, i) == INTERIOR) && (boundaryDofSet.count(dof[i]) == 0);
+#else
+      bool isInterior = el_info->getBoundary(VERTEX, i) == INTERIOR;
+#endif
+
       DegreeOfFreedom k = dof[i][preDofs[VERTEX]];
       
       // Setting world coordinates of node.
@@ -539,7 +561,7 @@ void Recovery::fill_struct_vec(DOFVector<double> *uh,
 	*(*struct_vec)[k].coords = el_info->getCoord(i);
       }
 
-      if (el_info->getBoundary(VERTEX, i) == INTERIOR) {	
+      if (isInterior) {	
 	// Allocating memory for matrix and right hand side.
 	if (!(*struct_vec)[k].A) {
 	  (*struct_vec)[k].A = new Matrix<double>(n_monomials, n_monomials);
@@ -584,10 +606,12 @@ void Recovery::fill_struct_vec(DOFVector<double> *uh,
 
     int n_count = n_vertices;
 
-    if (dim > 1)     // Handling nodes on edges.
-      for (int i = 0; i < n_edges; i++)
+    if (dim > 1) {     // Handling nodes on edges.
+      for (int i = 0; i < n_edges; i++) {
+	bool isEdgeInterior = el_info->getBoundary(EDGE, i) == INTERIOR;
+
 	for (int j = 0; j < (*nDOFs)[EDGE]; j++) {
-	  DegreeOfFreedom k = dof[n_vertices+i][preDofs[EDGE] + j];
+	  DegreeOfFreedom k = dof[n_vertices + i][preDofs[EDGE] + j];
 
 	  if (!(*struct_vec)[k].coords) {
 	    // Setting world coordinates of node.
@@ -599,23 +623,27 @@ void Recovery::fill_struct_vec(DOFVector<double> *uh,
 	    // Setting list of adjacent interior vertices.
 	    (*struct_vec)[k].neighbors = new std::set<DegreeOfFreedom>;
 	    
-	    if (el_info->getBoundary(EDGE, i) == INTERIOR)
+	    if (isEdgeInterior) {
 	      for (int m = 0; m < 2; m++) {
 		int l = Global::getReferenceElement(dim)->getVertexOfEdge(i, m);
 		if (el_info->getBoundary(VERTEX, l) == INTERIOR)
 		  (*struct_vec)[k].neighbors->insert(dof[l][preDofs[VERTEX]]);
-	      } else
+	      } 
+	    } else {
 	      for (int m = 0; m < n_neighbors; m++)
 		(*struct_vec)[k].neighbors->insert(interior_vertices[m]);
+	    }
 	  }
 	  
 	  n_count++;
 	}
+      }
+    }
 
     if (dim == 3)     // Handling nodes on faces.
       for (int i = 0; i < n_faces; i++)
 	for (int j = 0; j < (*nDOFs)[FACE]; j++) {
-	  DegreeOfFreedom k = dof[n_vertices+n_edges+i][preDofs[FACE] + j];
+	  DegreeOfFreedom k = dof[n_vertices+n_edges + i][preDofs[FACE] + j];
 	  
 	  if (!(*struct_vec)[k].coords) {
 	    // Setting world coordinates of node.
@@ -695,6 +723,11 @@ void Recovery::recoveryUh(DOFVector<double> *uh, DOFVector<double> &rec_vec)
   DOFVector<double>::Iterator result_it(result, USED_DOFS);
   std::set<DegreeOfFreedom>::const_iterator setIterator;
 
+// #ifdef HAVE_PARALLEL_DOMAIN_AMDIS
+//   DOFVector<int> neighbourSize(feSpace, "neighbourSize");
+//   neighbourSize.set(0);
+// #endif
+
   for (SV_it.reset(), result_it.reset(); !result_it.end(); ++SV_it, ++result_it) {
     if ((*SV_it).rec_uh) {
       *result_it = (*(*SV_it).rec_uh)[0];
@@ -704,15 +737,30 @@ void Recovery::recoveryUh(DOFVector<double> *uh, DOFVector<double> &rec_vec)
 	     setIterator != (*SV_it).neighbors->end();
 	     ++setIterator) {
 	  for (int i = 0; i < n_monomials; i++)
-	    *result_it = *result_it + (*(*struct_vec)[*setIterator].rec_uh)[i] *
-	      (*(*matrix_fcts)[0][i])(*(*SV_it).coords,
-				      *(*struct_vec)[*setIterator].coords);
+	    *result_it += (*(*struct_vec)[*setIterator].rec_uh)[i] *
+	      (*(*matrix_fcts)[0][i])(*(*SV_it).coords, *(*struct_vec)[*setIterator].coords);
 	}
+// #ifdef HAVE_PARALLEL_DOMAIN_AMDIS
+// 	neighbourSize[result_it.getDOFIndex()] = (*SV_it).neighbors->size();
+// #else
 	*result_it /= (*SV_it).neighbors->size();
-      } else
-	*result_it=0.0;
+	//#endif
+      } else {
+	*result_it = 0.0;
+      }
     }
   }
+
+#ifdef HAVE_PARALLEL_DOMAIN_AMDIS
+//   MeshDistributor::globalMeshDistributor->synchAddVector(rec_vec);
+//   MeshDistributor::globalMeshDistributor->synchAddVector(neighbourSize);
+
+//   for (result_it.reset(); !result_it.end(); ++result_it)
+//     if (neighbourSize[result_it.getDOFIndex()] > 0) 
+//       *result_it /= neighbourSize[result_it.getDOFIndex()];
+
+  MeshDistributor::globalMeshDistributor->synchVector(rec_vec);
+#endif
 }
 
 
diff --git a/AMDiS/src/parallel/PetscSolverFeti.cc b/AMDiS/src/parallel/PetscSolverFeti.cc
index e341663a..d2ecad85 100644
--- a/AMDiS/src/parallel/PetscSolverFeti.cc
+++ b/AMDiS/src/parallel/PetscSolverFeti.cc
@@ -38,6 +38,7 @@ namespace AMDiS {
       nGlobalOverallInterior(0),
       printTimings(false),
       stokesMode(false),
+      augmentedLagrange(false),
       pressureComponent(-1)
   {
     FUNCNAME("PetscSolverFeti::PetscSolverFeti()");
@@ -68,6 +69,8 @@ namespace AMDiS {
       meshLevel = 1;
 
     Parameters::get("parallel->print timings", printTimings);
+
+    Parameters::get("parallel->feti->augmented lagrange", augmentedLagrange);
   }
 
 
@@ -213,6 +216,7 @@ namespace AMDiS {
     for (unsigned int i = 0; i < meshDistributor->getFeSpaces().size(); i++) {
       const FiniteElemSpace *feSpace = meshDistributor->getFeSpace(i);
       createLagrange(feSpace);
+      createAugmentedLagrange(feSpace);
     }
 
     lagrangeMap.update();
@@ -524,6 +528,15 @@ namespace AMDiS {
   }
 
 
+  void PetscSolverFeti::createAugmentedLagrange(const FiniteElemSpace *feSpace)
+  {
+    FUNCNAME("PetscSolverFeti::createAugmentedLagrange()");
+
+    if (!augmentedLagrange)
+      return;
+  }
+
+
   void PetscSolverFeti::createIndexB(const FiniteElemSpace *feSpace)
   {
     FUNCNAME("PetscSolverFeti::createIndexB()");
@@ -644,6 +657,73 @@ namespace AMDiS {
   }
 
 
+  void PetscSolverFeti::createMatAugmentedLagrange(vector<const FiniteElemSpace*> &feSpaces)
+  {
+    FUNCNAME("PetscSolverFeti::createMatAugmentedLagrange()");
+
+    if (!augmentedLagrange)
+      return;
+
+    double wtime = MPI::Wtime();
+
+    nRankEdges = 0;
+    nOverallEdges = 0;
+    InteriorBoundary &intBound = meshDistributor->getIntBoundary();
+    for (InteriorBoundary::iterator it(intBound.getOwn()); !it.end(); ++it)
+      if (it->rankObj.subObj == EDGE)
+	nRankEdges++;
+    
+    int rStartEdges = 0;
+    mpi::getDofNumbering(mpiCommGlobal, nRankEdges, rStartEdges, nOverallEdges);
+
+    MSG("nRankEdges = %d, nOverallEdges = %d\n", nRankEdges, nOverallEdges);
+
+    nRankEdges *= feSpaces.size();
+    rStartEdges *= feSpaces.size();
+    nOverallEdges *= feSpaces.size();
+
+    MatCreateAIJ(mpiCommGlobal,
+		 lagrangeMap.getRankDofs(), nRankEdges,
+		 lagrangeMap.getOverallDofs(), nOverallEdges,
+		 1, PETSC_NULL, 1, PETSC_NULL, 
+		 &mat_augmented_lagrange);
+    MatSetOption(mat_augmented_lagrange, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE);
+
+    int colCounter = rStartEdges;
+    for (InteriorBoundary::iterator it(intBound.getOwn()); !it.end(); ++it) {
+      if (it->rankObj.subObj == EDGE) {
+	for (int i = 0; i < feSpaces.size(); i++) {
+	  DofContainer edgeDofs;
+	  it->rankObj.el->getAllDofs(feSpaces[i], it->rankObj, edgeDofs);
+
+	  MSG("SIZE = %d\n", edgeDofs.size());
+
+	  for (DofContainer::iterator it = edgeDofs.begin();
+	       it != edgeDofs.end(); it++) {
+	    TEST_EXIT_DBG(isPrimal(feSpaces[i], **it) == false)
+	      ("Should not be primal!\n");
+
+	    int row = lagrangeMap.getMatIndex(i, **it);
+	    double value = 1.0;
+	    MatSetValue(mat_augmented_lagrange, row, colCounter, value, INSERT_VALUES);
+	  }
+
+	  colCounter++;
+	}
+      }
+    }
+
+    MatAssemblyBegin(mat_augmented_lagrange, MAT_FINAL_ASSEMBLY);
+    MatAssemblyEnd(mat_augmented_lagrange, MAT_FINAL_ASSEMBLY);
+
+    if (printTimings) {
+      MPI::COMM_WORLD.Barrier();
+      MSG("FETI-DP timing 05a: %.5f seconds (creation of augmented lagrange constraint matrix)\n", 
+	  MPI::Wtime() - wtime);
+    }
+  }
+
+
   void PetscSolverFeti::createSchurPrimalKsp(vector<const FiniteElemSpace*> &feSpaces)
   {
     FUNCNAME("PetscSolverFeti::createSchurPrimalKsp()");
@@ -651,21 +731,43 @@ namespace AMDiS {
     if (schurPrimalSolver == 0) {
       MSG("Create iterative schur primal solver!\n");
 
-      schurPrimalData.subSolver = subdomain;
-      
-      localDofMap.createVec(schurPrimalData.tmp_vec_b, nGlobalOverallInterior);
-      primalDofMap.createVec(schurPrimalData.tmp_vec_primal);
+      if (augmentedLagrange == false) {
+	schurPrimalData.subSolver = subdomain;
+	
+	localDofMap.createVec(schurPrimalData.tmp_vec_b, nGlobalOverallInterior);
+	primalDofMap.createVec(schurPrimalData.tmp_vec_primal);
+	
+	MatCreateShell(mpiCommGlobal,
+		       primalDofMap.getRankDofs(), 
+		       primalDofMap.getRankDofs(), 
+		       primalDofMap.getOverallDofs(), 
+		       primalDofMap.getOverallDofs(),
+		       &schurPrimalData, 
+		       &mat_schur_primal);
+	MatShellSetOperation(mat_schur_primal, MATOP_MULT, 
+			     (void(*)(void))petscMultMatSchurPrimal);	
+      } else {
+	schurPrimalAugmentedData.subSolver = subdomain;
+
+	localDofMap.createVec(schurPrimalAugmentedData.tmp_vec_b0, nGlobalOverallInterior);
+	localDofMap.createVec(schurPrimalAugmentedData.tmp_vec_b1, nGlobalOverallInterior);
+	primalDofMap.createVec(schurPrimalAugmentedData.tmp_vec_primal);
+	lagrangeMap.createVec(schurPrimalAugmentedData.tmp_vec_lagrange);
+
+	schurPrimalAugmentedData.mat_lagrange = &mat_lagrange;
+	schurPrimalAugmentedData.mat_augmented_lagrange = &mat_augmented_lagrange;
+
+	MatCreateShell(mpiCommGlobal,
+		       primalDofMap.getRankDofs() + nRankEdges, 
+		       primalDofMap.getRankDofs() + nRankEdges, 
+		       primalDofMap.getOverallDofs() + nOverallEdges, 
+		       primalDofMap.getOverallDofs() + nOverallEdges,
+		       &schurPrimalAugmentedData, 
+		       &mat_schur_primal);
+	MatShellSetOperation(mat_schur_primal, MATOP_MULT, 
+			     (void(*)(void))petscMultMatSchurPrimalAugmented);
+      }
 
-      MatCreateShell(mpiCommGlobal,
-		     primalDofMap.getRankDofs(), 
-		     primalDofMap.getRankDofs(), 
-		     primalDofMap.getOverallDofs(), 
-		     primalDofMap.getOverallDofs(),
-		     &schurPrimalData, 
-		     &mat_schur_primal);
-      MatShellSetOperation(mat_schur_primal, MATOP_MULT, 
-			   (void(*)(void))petscMultMatSchurPrimal);
-      
       KSPCreate(mpiCommGlobal, &ksp_schur_primal);
       KSPSetOperators(ksp_schur_primal, mat_schur_primal, mat_schur_primal, SAME_NONZERO_PATTERN);
       KSPSetOptionsPrefix(ksp_schur_primal, "schur_primal_");
@@ -674,6 +776,8 @@ namespace AMDiS {
     } else {
       MSG("Create direct schur primal solver!\n");
 
+      TEST_EXIT(!augmentedLagrange)("Not yet supported!\n");
+
       double wtime = MPI::Wtime();
 
       TEST_EXIT_DBG(meshLevel == 0)
@@ -789,10 +893,21 @@ namespace AMDiS {
     FUNCNAME("PetscSolverFeti::destroySchurPrimal()");
 
     if (schurPrimalSolver == 0) {
-      schurPrimalData.subSolver = NULL;
-
-      VecDestroy(&schurPrimalData.tmp_vec_b);
-      VecDestroy(&schurPrimalData.tmp_vec_primal);
+      if (augmentedLagrange == false) {
+	schurPrimalData.subSolver = NULL;
+	
+	VecDestroy(&schurPrimalData.tmp_vec_b);
+	VecDestroy(&schurPrimalData.tmp_vec_primal);
+      } else {
+	schurPrimalAugmentedData.subSolver = NULL;
+	schurPrimalAugmentedData.mat_lagrange = NULL;
+	schurPrimalAugmentedData.mat_augmented_lagrange = NULL;
+
+	VecDestroy(&schurPrimalAugmentedData.tmp_vec_b0);
+	VecDestroy(&schurPrimalAugmentedData.tmp_vec_b1);
+	VecDestroy(&schurPrimalAugmentedData.tmp_vec_primal);
+	VecDestroy(&schurPrimalAugmentedData.tmp_vec_lagrange);
+      }
     }
     
     MatDestroy(&mat_schur_primal);
@@ -821,9 +936,18 @@ namespace AMDiS {
 		     lagrangeMap.getOverallDofs(), 
 		     lagrangeMap.getOverallDofs(),
 		     &fetiData, &mat_feti);
-      MatShellSetOperation(mat_feti, MATOP_MULT, 
-			   (void(*)(void))petscMultMatFeti);
+      if (augmentedLagrange == false) {
+	MatShellSetOperation(mat_feti, MATOP_MULT, 
+			     (void(*)(void))petscMultMatFeti);
+      } else {
+	fetiData.mat_augmented_lagrange = &mat_augmented_lagrange;
+	primalDofMap.createVec(fetiData.tmp_vec_primal1);
+	MatShellSetOperation(mat_feti, MATOP_MULT, 
+			     (void(*)(void))petscMultMatFetiAugmented);	
+      }
     }  else {
+      TEST_EXIT_DBG(!augmentedLagrange)("Not yet implemented!\n");
+
       localDofMap.createVec(fetiData.tmp_vec_b1, nGlobalOverallInterior);
       primalDofMap.createVec(fetiData.tmp_vec_primal1);
       interfaceDofMap.createVec(fetiData.tmp_vec_interface);
@@ -1047,6 +1171,11 @@ namespace AMDiS {
     VecDestroy(&fetiData.tmp_vec_lagrange);
     VecDestroy(&fetiData.tmp_vec_primal0);
 
+    if (augmentedLagrange) {
+      fetiData.mat_augmented_lagrange = PETSC_NULL;
+      VecDestroy(&fetiData.tmp_vec_b1);
+    }
+
     if (stokesMode) {
       VecDestroy(&fetiData.tmp_vec_b1);
       VecDestroy(&fetiData.tmp_vec_primal1);
@@ -1470,6 +1599,9 @@ namespace AMDiS {
     // === Create and fill PETSc matrix for Lagrange constraints. ===
     createMatLagrange(feSpaces);
 
+    // === ===
+    createMatAugmentedLagrange(feSpaces);
+
     // === Create PETSc solver for the Schur complement on primal variables. ===   
     createSchurPrimalKsp(feSpaces);
 
@@ -1775,27 +1907,58 @@ namespace AMDiS {
       // vecRhs = L * inv(K_BB) * f_B
       subdomain->solveGlobal(subdomain->getVecRhsInterior(), tmp_b0);
       MatMult(mat_lagrange, tmp_b0, vecRhsLagrange);
-
+	
       // tmp_primal0 = M_PiB * inv(K_BB) * f_B
       MatMult(subdomain->getMatCoarseInterior(), tmp_b0, tmp_primal0);
       
       // tmp_primal0 = f_Pi - M_PiB * inv(K_BB) * f_B
       VecAXPBY(tmp_primal0, 1.0, -1.0, subdomain->getVecRhsCoarse());
-
-      double wtime2 = MPI::Wtime();
-      // tmp_primal0 = inv(S_PiPi) (f_Pi - M_PiB * inv(K_BB) * f_B)
-      KSPSolve(ksp_schur_primal, tmp_primal0, tmp_primal0);
-      if (printTimings) {
-	MSG("FETI-DP timing 09a: %.5f seconds (create rhs vector)\n", 
-	    MPI::Wtime() - wtime2);
-      }
-      
-      MatMult(subdomain->getMatInteriorCoarse(), tmp_primal0, tmp_b0);
-      subdomain->solveGlobal(tmp_b0, tmp_b0);
-      MatMult(mat_lagrange, tmp_b0, tmp_lagrange);
       
+      if (!augmentedLagrange) {
+	double wtime2 = MPI::Wtime();
+	// tmp_primal0 = inv(S_PiPi) (f_Pi - M_PiB * inv(K_BB) * f_B)
+	KSPSolve(ksp_schur_primal, tmp_primal0, tmp_primal0);
+	if (printTimings) {
+	  MSG("FETI-DP timing 09a: %.5f seconds (create rhs vector)\n", 
+	      MPI::Wtime() - wtime2);
+	}
+	
+	MatMult(subdomain->getMatInteriorCoarse(), tmp_primal0, tmp_b0);
+	subdomain->solveGlobal(tmp_b0, tmp_b0);
+	MatMult(mat_lagrange, tmp_b0, tmp_lagrange);
+      } else {
+	Vec tmp_mu;
+	MatGetVecs(mat_augmented_lagrange, PETSC_NULL, &tmp_mu);
+	Vec vec_array[2] = {tmp_primal0, tmp_mu};
+	Vec vec_nest;
+	VecCreateNest(PETSC_COMM_WORLD, 2, PETSC_NULL, vec_array, &vec_nest);
+
+	KSPSolve(ksp_schur_primal, vec_nest, vec_nest);
+
+	// 1 Step
+	MatMult(subdomain->getMatInteriorCoarse(), tmp_primal0, tmp_b0);
+	subdomain->solveGlobal(tmp_b0, tmp_b0);
+	MatMult(mat_lagrange, tmp_b0, tmp_lagrange);
+
+	// 2 Step
+	Vec tmp_lagrange1;
+	VecDuplicate(tmp_lagrange, &tmp_lagrange1);
+	MatMult(mat_augmented_lagrange, tmp_mu, tmp_lagrange1);
+	MatMultTranspose(mat_lagrange, tmp_lagrange1, tmp_b0);
+	subdomain->solveGlobal(tmp_b0, tmp_b0);
+	MatMult(mat_lagrange, tmp_b0, tmp_lagrange1);
+	VecAXPY(tmp_lagrange, 1.0, tmp_lagrange1);
+	VecDestroy(&tmp_lagrange1);
+
+
+	VecDestroy(&tmp_mu);
+	VecDestroy(&vec_nest);
+      }
+
       VecAXPBY(vecRhsLagrange, -1.0, 1.0, tmp_lagrange);
     } else {
+      TEST_EXIT(augmentedLagrange)("Not yet implemented!\n");
+
       subdomain->solveGlobal(subdomain->getVecRhsInterior(), tmp_b0);  
 
       MatMult(subdomain->getMatCoarseInterior(), tmp_b0, tmp_primal0);
@@ -1915,6 +2078,9 @@ namespace AMDiS {
 
     MatDestroy(&mat_lagrange);
 
+    if (augmentedLagrange)
+      MatDestroy(&mat_augmented_lagrange);
+
     // === Destroy preconditioner data structures. ===
 
     if (fetiPreconditioner != FETI_NONE)
diff --git a/AMDiS/src/parallel/PetscSolverFeti.h b/AMDiS/src/parallel/PetscSolverFeti.h
index a90a1a61..d30f5d5d 100644
--- a/AMDiS/src/parallel/PetscSolverFeti.h
+++ b/AMDiS/src/parallel/PetscSolverFeti.h
@@ -122,6 +122,8 @@ namespace AMDiS {
     /// variables.
     void createLagrange(const FiniteElemSpace *feSpace);
 
+    void createAugmentedLagrange(const FiniteElemSpace *feSpace);
+
     /// Creates a global index of the B variables.
     void createIndexB(const FiniteElemSpace *feSpace);
 
@@ -129,6 +131,8 @@ namespace AMDiS {
     /// to \ref mat_lagrange.
     void createMatLagrange(vector<const FiniteElemSpace*> &feSpaces);
 
+    void createMatAugmentedLagrange(vector<const FiniteElemSpace*> &feSpaces);
+
     ///
     void createPreconditionerMatrix(Matrix<DOFMatrix*> *mat);
 
@@ -255,6 +259,9 @@ namespace AMDiS {
     /// Global PETSc matrix of Lagrange variables.
     Mat mat_lagrange;
 
+    ///
+    Mat mat_augmented_lagrange; 
+
     /// 0: Solve the Schur complement on primal variables with iterative solver.
     /// 1: Create the Schur complement matrix explicitly and solve it with a
     ///    direct solver.
@@ -271,6 +278,9 @@ namespace AMDiS {
     /// Data for MatMult operation in matrix \ref mat_schur_primal
     SchurPrimalData schurPrimalData;
 
+    ///
+    SchurPrimalAugmentedData schurPrimalAugmentedData;
+
     /// PETSc solver object to solve a system with FETI-DP.    
     KSP ksp_feti;
 
@@ -317,6 +327,12 @@ namespace AMDiS {
 
     bool printTimings;
 
+    bool augmentedLagrange;
+
+    int nRankEdges;
+
+    int nOverallEdges;
+
     bool stokesMode;
 
     const FiniteElemSpace* pressureFeSpace;
diff --git a/AMDiS/src/parallel/PetscSolverFetiOperators.cc b/AMDiS/src/parallel/PetscSolverFetiOperators.cc
index 3e9ed715..fb6c2ad3 100644
--- a/AMDiS/src/parallel/PetscSolverFetiOperators.cc
+++ b/AMDiS/src/parallel/PetscSolverFetiOperators.cc
@@ -34,13 +34,57 @@ namespace AMDiS {
   }
 
 
+  int petscMultMatSchurPrimalAugmented(Mat mat, Vec x, Vec y)
+  {
+    void *ctx;
+    MatShellGetContext(mat, &ctx);
+    SchurPrimalAugmentedData* data = 
+      static_cast<SchurPrimalAugmentedData*>(ctx);
+
+    Vec x_primal, x_mu, y_primal, y_mu;    
+    VecNestGetSubVec(x, 0, &x_primal);
+    VecNestGetSubVec(x, 1, &x_mu);
+    VecNestGetSubVec(y, 0, &y_primal);
+    VecNestGetSubVec(y, 1, &y_mu);
+
+    // inv(K_BB) K_BPi x_Pi
+    MatMult(data->subSolver->getMatInteriorCoarse(), x_primal, data->tmp_vec_b0);
+    data->subSolver->solveGlobal(data->tmp_vec_b0, data->tmp_vec_b0);
+
+    // inv(K_BB) J^T Q x_mu
+    MatMult(*(data->mat_augmented_lagrange), x_mu, data->tmp_vec_lagrange);
+    MatMultTranspose(*(data->mat_lagrange), data->tmp_vec_lagrange, data->tmp_vec_b1);
+    data->subSolver->solveGlobal(data->tmp_vec_b1, data->tmp_vec_b1);
+
+    // y_Pi = (K_PiPi - K_PiB inv(K_BB) K_BPi) x_pi
+    MatMult(data->subSolver->getMatCoarseInterior(), data->tmp_vec_b0,
+	    data->tmp_vec_primal);
+    MatMult(data->subSolver->getMatCoarse(), x_primal, y_primal);
+    VecAXPY(y_primal, -1.0, data->tmp_vec_primal);
+
+    // y_Pi += (-K_PiB inv(K_BB) J^T Q) x_mu
+    MatMult(data->subSolver->getMatCoarseInterior(), data->tmp_vec_b1,
+	    data->tmp_vec_primal);
+    VecAXPY(y_primal, -1.0, data->tmp_vec_primal);
+
+    // y_mu = (-Q^T J inv(K_BB) K_BPi) x_Pi + (-Q^T J inv(K_BB) J^T Q) x_mu
+    //      = -Q^T J (inv(K_BB) K_BPi x_Pi + inv(K_BB) J^T Q x_mu)
+    VecAXPY(data->tmp_vec_b0, 1.0, data->tmp_vec_b1);
+    MatMult(*(data->mat_lagrange), data->tmp_vec_b0, data->tmp_vec_lagrange);
+    MatMult(*(data->mat_augmented_lagrange), data->tmp_vec_lagrange, y_mu);
+    VecScale(y_mu, -1.0);
+
+    return 0;
+  }
+
+
   // y = mat * x
   int petscMultMatFeti(Mat mat, Vec x, Vec y)
   {
     FUNCNAME("petscMultMatFeti()");
 
-    //    F = L inv(K_BB) trans(L) + L inv(K_BB) K_BPi inv(S_PiPi) K_PiB inv(K_BB) trans(L)
-    // => F = L [I + inv(K_BB) K_BPi inv(S_PiPi) K_PiB] inv(K_BB) trans(L)
+    //    F = J inv(K_BB) trans(J) + J inv(K_BB) K_BPi inv(S_PiPi) K_PiB inv(K_BB) trans(J)
+    // => F = J [I + inv(K_BB) K_BPi inv(S_PiPi) K_PiB] inv(K_BB) trans(J)
 
     double wtime = MPI::Wtime();
 
@@ -81,6 +125,58 @@ namespace AMDiS {
   }
 
 
+  // y = mat * x
+  int petscMultMatFetiAugmented(Mat mat, Vec x, Vec y)
+  {
+    FUNCNAME("petscMultMatFetiAugmented()");
+
+    void *ctx;
+    MatShellGetContext(mat, &ctx);
+    FetiData* data = static_cast<FetiData*>(ctx);
+
+    Vec vec_mu0, vec_mu1;
+    MatGetVecs(*(data->mat_augmented_lagrange), PETSC_NULL, &vec_mu0);
+    VecDuplicate(vec_mu0, &vec_mu1);
+
+    MatMultTranspose(*(data->mat_lagrange), x, data->tmp_vec_b0);
+    data->subSolver->solveGlobal(data->tmp_vec_b0, data->tmp_vec_b0);
+
+    MatMult(data->subSolver->getMatCoarseInterior(), data->tmp_vec_b0, data->tmp_vec_primal0);
+    MatMult(*(data->mat_lagrange), data->tmp_vec_b0, data->tmp_vec_lagrange);
+    MatMultTranspose(*(data->mat_augmented_lagrange), data->tmp_vec_lagrange, vec_mu0);
+
+    Vec vec_array0[2] = {data->tmp_vec_primal0, vec_mu0};
+    Vec vec_array1[2] = {data->tmp_vec_primal1, vec_mu1};
+    Vec vec_nest0, vec_nest1;
+    VecCreateNest(PETSC_COMM_WORLD, 2, PETSC_NULL, vec_array0, &vec_nest0);
+    VecCreateNest(PETSC_COMM_WORLD, 2, PETSC_NULL, vec_array1, &vec_nest1);
+
+    KSPSolve(*(data->ksp_schur_primal), vec_nest0, vec_nest1);
+
+    // Step 1
+    MatMult(*(data->mat_lagrange), data->tmp_vec_b0, y);
+
+    // Step 2
+    MatMult(data->subSolver->getMatInteriorCoarse(), data->tmp_vec_primal1, data->tmp_vec_b0);
+    data->subSolver->solveGlobal(data->tmp_vec_b0, data->tmp_vec_b0);
+    MatMult(*(data->mat_lagrange), data->tmp_vec_b0, data->tmp_vec_lagrange);
+    VecAXPY(y, 1.0, data->tmp_vec_lagrange);
+
+    // Step 3
+    MatMult(*(data->mat_augmented_lagrange), vec_mu1, data->tmp_vec_lagrange);
+    MatMultTranspose(*(data->mat_lagrange), data->tmp_vec_lagrange, data->tmp_vec_b0);
+    data->subSolver->solveGlobal(data->tmp_vec_b0, data->tmp_vec_b0);
+    MatMult(*(data->mat_lagrange), data->tmp_vec_b0, data->tmp_vec_lagrange);
+    VecAXPY(y, 1.0, data->tmp_vec_lagrange);
+
+    VecDestroy(&vec_mu0);
+    VecDestroy(&vec_mu1);
+    VecDestroy(&vec_nest0);
+    VecDestroy(&vec_nest1);
+    return 0;
+  }
+
+
   int petscMultMatFetiInterface(Mat mat, Vec x, Vec y)
   {
     FUNCNAME("petscMultMatFetiInterface()");
diff --git a/AMDiS/src/parallel/PetscSolverFetiOperators.h b/AMDiS/src/parallel/PetscSolverFetiOperators.h
index e305b25c..ce954209 100644
--- a/AMDiS/src/parallel/PetscSolverFetiOperators.h
+++ b/AMDiS/src/parallel/PetscSolverFetiOperators.h
@@ -31,9 +31,15 @@ namespace AMDiS {
   /// 
   int petscMultMatSchurPrimal(Mat mat, Vec x, Vec y);
 
+  /// 
+  int petscMultMatSchurPrimalAugmented(Mat mat, Vec x, Vec y);
+
   /// FETI-DP operator
   int petscMultMatFeti(Mat mat, Vec x, Vec y);
 
+  /// FETI-DP operator with augmented Lagrange constraints
+  int petscMultMatFetiAugmented(Mat mat, Vec x, Vec y);
+
   /// FETI-DP operator used for Stokes like problems.
   int petscMultMatFetiInterface(Mat mat, Vec x, Vec y);
 
diff --git a/AMDiS/src/parallel/PetscSolverFetiStructs.h b/AMDiS/src/parallel/PetscSolverFetiStructs.h
index 72ee3e15..11eafe4e 100644
--- a/AMDiS/src/parallel/PetscSolverFetiStructs.h
+++ b/AMDiS/src/parallel/PetscSolverFetiStructs.h
@@ -47,6 +47,25 @@ namespace AMDiS {
   };
 
 
+  /** \brief
+   *
+   */
+  struct SchurPrimalAugmentedData {
+    /// Temporal vectors on the B variables.
+    Vec tmp_vec_b0, tmp_vec_b1;
+
+    Vec tmp_vec_primal;
+
+    Vec tmp_vec_lagrange;
+
+    Mat *mat_lagrange;
+
+    Mat *mat_augmented_lagrange;
+
+    PetscSolver* subSolver;
+  };
+
+
   /** \brief
    * This structure is used when defining the FETI-DP operator for solving 
    * the system matrix reduced to the Lagrange multipliers.
@@ -56,6 +75,9 @@ namespace AMDiS {
     /// Matrix of Lagrange variables.
     Mat *mat_lagrange;
 
+    ///
+    Mat *mat_augmented_lagrange;
+
     /// Temporal vectors on the B variables.
     Vec tmp_vec_b0, tmp_vec_b1;
 
-- 
GitLab