From c21d2182fbb730239da93c6f1439b1bc9ad5fb50 Mon Sep 17 00:00:00 2001 From: Thomas Witkowski <thomas.witkowski@gmx.de> Date: Mon, 8 Oct 2012 13:46:40 +0000 Subject: [PATCH] Added first version of FETI-DP with augmented coarse grid. --- AMDiS/src/Recovery.cc | 76 ++++-- AMDiS/src/parallel/PetscSolverFeti.cc | 232 +++++++++++++++--- AMDiS/src/parallel/PetscSolverFeti.h | 16 ++ .../src/parallel/PetscSolverFetiOperators.cc | 100 +++++++- AMDiS/src/parallel/PetscSolverFetiOperators.h | 6 + AMDiS/src/parallel/PetscSolverFetiStructs.h | 22 ++ 6 files changed, 403 insertions(+), 49 deletions(-) diff --git a/AMDiS/src/Recovery.cc b/AMDiS/src/Recovery.cc index 51603fe6..79d03eef 100644 --- a/AMDiS/src/Recovery.cc +++ b/AMDiS/src/Recovery.cc @@ -11,6 +11,9 @@ #include "Recovery.h" +#ifdef HAVE_PARALLEL_DOMAIN_AMDIS +#include "parallel/MeshDistributor.h" +#endif RecoveryStructure& RecoveryStructure::operator=(const RecoveryStructure& rhs) { @@ -386,7 +389,7 @@ void Recovery::compute_node_sums(DOFVector<double> *uh, ElInfo *elInfo, TEST_EXIT_DBG(!gradient) ("SPR of flux or gradient need computing interior sums\n"); - TEST_EXIT_DBG(feSpace->getMesh()->getDim()==1) + TEST_EXIT_DBG(feSpace->getMesh()->getDim() == 1) ("At the moment only for linear finite elements.\n"); WorldVector<double> node; // For world coordinates at nodes. @@ -514,6 +517,13 @@ void Recovery::fill_struct_vec(DOFVector<double> *uh, if (degree == 2 && dim > 1) fill_flag |= Mesh::FILL_NEIGH | Mesh::FILL_OPP_COORDS; +#ifdef HAVE_PARALLEL_DOMAIN_AMDIS + DofContainer boundaryDofs; + DofContainerSet boundaryDofSet; + MeshDistributor::globalMeshDistributor->getAllBoundaryDofs(feSpace, 0, boundaryDofs); + boundaryDofSet.insert(boundaryDofs.begin(), boundaryDofs.end()); +#endif + TraverseStack stack; ElInfo *el_info = stack.traverseFirst(mesh, -1, fill_flag); @@ -523,7 +533,13 @@ void Recovery::fill_struct_vec(DOFVector<double> *uh, int n_neighbors = 0; // counting interior vertices of element for (int i = 0; i < n_vertices; i++) { DegreeOfFreedom k = dof[i][preDofs[VERTEX]]; - if (el_info->getBoundary(VERTEX, i) == INTERIOR) +#ifdef HAVE_PARALLEL_DOMAIN_AMDIS + bool isInterior = (el_info->getBoundary(VERTEX, i) == INTERIOR) && (boundaryDofSet.count(dof[i]) == 0); +#else + bool isInterior = el_info->getBoundary(VERTEX, i) == INTERIOR; +#endif + + if (isInterior) interior_vertices[n_neighbors++] = k; } @@ -531,6 +547,12 @@ void Recovery::fill_struct_vec(DOFVector<double> *uh, ("Each element should have a least one interior vertex!\n"); for (int i = 0; i < n_vertices; i++) { // Handling nodes on vertices. +#ifdef HAVE_PARALLEL_DOMAIN_AMDIS + bool isInterior = (el_info->getBoundary(VERTEX, i) == INTERIOR) && (boundaryDofSet.count(dof[i]) == 0); +#else + bool isInterior = el_info->getBoundary(VERTEX, i) == INTERIOR; +#endif + DegreeOfFreedom k = dof[i][preDofs[VERTEX]]; // Setting world coordinates of node. @@ -539,7 +561,7 @@ void Recovery::fill_struct_vec(DOFVector<double> *uh, *(*struct_vec)[k].coords = el_info->getCoord(i); } - if (el_info->getBoundary(VERTEX, i) == INTERIOR) { + if (isInterior) { // Allocating memory for matrix and right hand side. if (!(*struct_vec)[k].A) { (*struct_vec)[k].A = new Matrix<double>(n_monomials, n_monomials); @@ -584,10 +606,12 @@ void Recovery::fill_struct_vec(DOFVector<double> *uh, int n_count = n_vertices; - if (dim > 1) // Handling nodes on edges. - for (int i = 0; i < n_edges; i++) + if (dim > 1) { // Handling nodes on edges. + for (int i = 0; i < n_edges; i++) { + bool isEdgeInterior = el_info->getBoundary(EDGE, i) == INTERIOR; + for (int j = 0; j < (*nDOFs)[EDGE]; j++) { - DegreeOfFreedom k = dof[n_vertices+i][preDofs[EDGE] + j]; + DegreeOfFreedom k = dof[n_vertices + i][preDofs[EDGE] + j]; if (!(*struct_vec)[k].coords) { // Setting world coordinates of node. @@ -599,23 +623,27 @@ void Recovery::fill_struct_vec(DOFVector<double> *uh, // Setting list of adjacent interior vertices. (*struct_vec)[k].neighbors = new std::set<DegreeOfFreedom>; - if (el_info->getBoundary(EDGE, i) == INTERIOR) + if (isEdgeInterior) { for (int m = 0; m < 2; m++) { int l = Global::getReferenceElement(dim)->getVertexOfEdge(i, m); if (el_info->getBoundary(VERTEX, l) == INTERIOR) (*struct_vec)[k].neighbors->insert(dof[l][preDofs[VERTEX]]); - } else + } + } else { for (int m = 0; m < n_neighbors; m++) (*struct_vec)[k].neighbors->insert(interior_vertices[m]); + } } n_count++; } + } + } if (dim == 3) // Handling nodes on faces. for (int i = 0; i < n_faces; i++) for (int j = 0; j < (*nDOFs)[FACE]; j++) { - DegreeOfFreedom k = dof[n_vertices+n_edges+i][preDofs[FACE] + j]; + DegreeOfFreedom k = dof[n_vertices+n_edges + i][preDofs[FACE] + j]; if (!(*struct_vec)[k].coords) { // Setting world coordinates of node. @@ -695,6 +723,11 @@ void Recovery::recoveryUh(DOFVector<double> *uh, DOFVector<double> &rec_vec) DOFVector<double>::Iterator result_it(result, USED_DOFS); std::set<DegreeOfFreedom>::const_iterator setIterator; +// #ifdef HAVE_PARALLEL_DOMAIN_AMDIS +// DOFVector<int> neighbourSize(feSpace, "neighbourSize"); +// neighbourSize.set(0); +// #endif + for (SV_it.reset(), result_it.reset(); !result_it.end(); ++SV_it, ++result_it) { if ((*SV_it).rec_uh) { *result_it = (*(*SV_it).rec_uh)[0]; @@ -704,15 +737,30 @@ void Recovery::recoveryUh(DOFVector<double> *uh, DOFVector<double> &rec_vec) setIterator != (*SV_it).neighbors->end(); ++setIterator) { for (int i = 0; i < n_monomials; i++) - *result_it = *result_it + (*(*struct_vec)[*setIterator].rec_uh)[i] * - (*(*matrix_fcts)[0][i])(*(*SV_it).coords, - *(*struct_vec)[*setIterator].coords); + *result_it += (*(*struct_vec)[*setIterator].rec_uh)[i] * + (*(*matrix_fcts)[0][i])(*(*SV_it).coords, *(*struct_vec)[*setIterator].coords); } +// #ifdef HAVE_PARALLEL_DOMAIN_AMDIS +// neighbourSize[result_it.getDOFIndex()] = (*SV_it).neighbors->size(); +// #else *result_it /= (*SV_it).neighbors->size(); - } else - *result_it=0.0; + //#endif + } else { + *result_it = 0.0; + } } } + +#ifdef HAVE_PARALLEL_DOMAIN_AMDIS +// MeshDistributor::globalMeshDistributor->synchAddVector(rec_vec); +// MeshDistributor::globalMeshDistributor->synchAddVector(neighbourSize); + +// for (result_it.reset(); !result_it.end(); ++result_it) +// if (neighbourSize[result_it.getDOFIndex()] > 0) +// *result_it /= neighbourSize[result_it.getDOFIndex()]; + + MeshDistributor::globalMeshDistributor->synchVector(rec_vec); +#endif } diff --git a/AMDiS/src/parallel/PetscSolverFeti.cc b/AMDiS/src/parallel/PetscSolverFeti.cc index e341663a..d2ecad85 100644 --- a/AMDiS/src/parallel/PetscSolverFeti.cc +++ b/AMDiS/src/parallel/PetscSolverFeti.cc @@ -38,6 +38,7 @@ namespace AMDiS { nGlobalOverallInterior(0), printTimings(false), stokesMode(false), + augmentedLagrange(false), pressureComponent(-1) { FUNCNAME("PetscSolverFeti::PetscSolverFeti()"); @@ -68,6 +69,8 @@ namespace AMDiS { meshLevel = 1; Parameters::get("parallel->print timings", printTimings); + + Parameters::get("parallel->feti->augmented lagrange", augmentedLagrange); } @@ -213,6 +216,7 @@ namespace AMDiS { for (unsigned int i = 0; i < meshDistributor->getFeSpaces().size(); i++) { const FiniteElemSpace *feSpace = meshDistributor->getFeSpace(i); createLagrange(feSpace); + createAugmentedLagrange(feSpace); } lagrangeMap.update(); @@ -524,6 +528,15 @@ namespace AMDiS { } + void PetscSolverFeti::createAugmentedLagrange(const FiniteElemSpace *feSpace) + { + FUNCNAME("PetscSolverFeti::createAugmentedLagrange()"); + + if (!augmentedLagrange) + return; + } + + void PetscSolverFeti::createIndexB(const FiniteElemSpace *feSpace) { FUNCNAME("PetscSolverFeti::createIndexB()"); @@ -644,6 +657,73 @@ namespace AMDiS { } + void PetscSolverFeti::createMatAugmentedLagrange(vector<const FiniteElemSpace*> &feSpaces) + { + FUNCNAME("PetscSolverFeti::createMatAugmentedLagrange()"); + + if (!augmentedLagrange) + return; + + double wtime = MPI::Wtime(); + + nRankEdges = 0; + nOverallEdges = 0; + InteriorBoundary &intBound = meshDistributor->getIntBoundary(); + for (InteriorBoundary::iterator it(intBound.getOwn()); !it.end(); ++it) + if (it->rankObj.subObj == EDGE) + nRankEdges++; + + int rStartEdges = 0; + mpi::getDofNumbering(mpiCommGlobal, nRankEdges, rStartEdges, nOverallEdges); + + MSG("nRankEdges = %d, nOverallEdges = %d\n", nRankEdges, nOverallEdges); + + nRankEdges *= feSpaces.size(); + rStartEdges *= feSpaces.size(); + nOverallEdges *= feSpaces.size(); + + MatCreateAIJ(mpiCommGlobal, + lagrangeMap.getRankDofs(), nRankEdges, + lagrangeMap.getOverallDofs(), nOverallEdges, + 1, PETSC_NULL, 1, PETSC_NULL, + &mat_augmented_lagrange); + MatSetOption(mat_augmented_lagrange, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE); + + int colCounter = rStartEdges; + for (InteriorBoundary::iterator it(intBound.getOwn()); !it.end(); ++it) { + if (it->rankObj.subObj == EDGE) { + for (int i = 0; i < feSpaces.size(); i++) { + DofContainer edgeDofs; + it->rankObj.el->getAllDofs(feSpaces[i], it->rankObj, edgeDofs); + + MSG("SIZE = %d\n", edgeDofs.size()); + + for (DofContainer::iterator it = edgeDofs.begin(); + it != edgeDofs.end(); it++) { + TEST_EXIT_DBG(isPrimal(feSpaces[i], **it) == false) + ("Should not be primal!\n"); + + int row = lagrangeMap.getMatIndex(i, **it); + double value = 1.0; + MatSetValue(mat_augmented_lagrange, row, colCounter, value, INSERT_VALUES); + } + + colCounter++; + } + } + } + + MatAssemblyBegin(mat_augmented_lagrange, MAT_FINAL_ASSEMBLY); + MatAssemblyEnd(mat_augmented_lagrange, MAT_FINAL_ASSEMBLY); + + if (printTimings) { + MPI::COMM_WORLD.Barrier(); + MSG("FETI-DP timing 05a: %.5f seconds (creation of augmented lagrange constraint matrix)\n", + MPI::Wtime() - wtime); + } + } + + void PetscSolverFeti::createSchurPrimalKsp(vector<const FiniteElemSpace*> &feSpaces) { FUNCNAME("PetscSolverFeti::createSchurPrimalKsp()"); @@ -651,21 +731,43 @@ namespace AMDiS { if (schurPrimalSolver == 0) { MSG("Create iterative schur primal solver!\n"); - schurPrimalData.subSolver = subdomain; - - localDofMap.createVec(schurPrimalData.tmp_vec_b, nGlobalOverallInterior); - primalDofMap.createVec(schurPrimalData.tmp_vec_primal); + if (augmentedLagrange == false) { + schurPrimalData.subSolver = subdomain; + + localDofMap.createVec(schurPrimalData.tmp_vec_b, nGlobalOverallInterior); + primalDofMap.createVec(schurPrimalData.tmp_vec_primal); + + MatCreateShell(mpiCommGlobal, + primalDofMap.getRankDofs(), + primalDofMap.getRankDofs(), + primalDofMap.getOverallDofs(), + primalDofMap.getOverallDofs(), + &schurPrimalData, + &mat_schur_primal); + MatShellSetOperation(mat_schur_primal, MATOP_MULT, + (void(*)(void))petscMultMatSchurPrimal); + } else { + schurPrimalAugmentedData.subSolver = subdomain; + + localDofMap.createVec(schurPrimalAugmentedData.tmp_vec_b0, nGlobalOverallInterior); + localDofMap.createVec(schurPrimalAugmentedData.tmp_vec_b1, nGlobalOverallInterior); + primalDofMap.createVec(schurPrimalAugmentedData.tmp_vec_primal); + lagrangeMap.createVec(schurPrimalAugmentedData.tmp_vec_lagrange); + + schurPrimalAugmentedData.mat_lagrange = &mat_lagrange; + schurPrimalAugmentedData.mat_augmented_lagrange = &mat_augmented_lagrange; + + MatCreateShell(mpiCommGlobal, + primalDofMap.getRankDofs() + nRankEdges, + primalDofMap.getRankDofs() + nRankEdges, + primalDofMap.getOverallDofs() + nOverallEdges, + primalDofMap.getOverallDofs() + nOverallEdges, + &schurPrimalAugmentedData, + &mat_schur_primal); + MatShellSetOperation(mat_schur_primal, MATOP_MULT, + (void(*)(void))petscMultMatSchurPrimalAugmented); + } - MatCreateShell(mpiCommGlobal, - primalDofMap.getRankDofs(), - primalDofMap.getRankDofs(), - primalDofMap.getOverallDofs(), - primalDofMap.getOverallDofs(), - &schurPrimalData, - &mat_schur_primal); - MatShellSetOperation(mat_schur_primal, MATOP_MULT, - (void(*)(void))petscMultMatSchurPrimal); - KSPCreate(mpiCommGlobal, &ksp_schur_primal); KSPSetOperators(ksp_schur_primal, mat_schur_primal, mat_schur_primal, SAME_NONZERO_PATTERN); KSPSetOptionsPrefix(ksp_schur_primal, "schur_primal_"); @@ -674,6 +776,8 @@ namespace AMDiS { } else { MSG("Create direct schur primal solver!\n"); + TEST_EXIT(!augmentedLagrange)("Not yet supported!\n"); + double wtime = MPI::Wtime(); TEST_EXIT_DBG(meshLevel == 0) @@ -789,10 +893,21 @@ namespace AMDiS { FUNCNAME("PetscSolverFeti::destroySchurPrimal()"); if (schurPrimalSolver == 0) { - schurPrimalData.subSolver = NULL; - - VecDestroy(&schurPrimalData.tmp_vec_b); - VecDestroy(&schurPrimalData.tmp_vec_primal); + if (augmentedLagrange == false) { + schurPrimalData.subSolver = NULL; + + VecDestroy(&schurPrimalData.tmp_vec_b); + VecDestroy(&schurPrimalData.tmp_vec_primal); + } else { + schurPrimalAugmentedData.subSolver = NULL; + schurPrimalAugmentedData.mat_lagrange = NULL; + schurPrimalAugmentedData.mat_augmented_lagrange = NULL; + + VecDestroy(&schurPrimalAugmentedData.tmp_vec_b0); + VecDestroy(&schurPrimalAugmentedData.tmp_vec_b1); + VecDestroy(&schurPrimalAugmentedData.tmp_vec_primal); + VecDestroy(&schurPrimalAugmentedData.tmp_vec_lagrange); + } } MatDestroy(&mat_schur_primal); @@ -821,9 +936,18 @@ namespace AMDiS { lagrangeMap.getOverallDofs(), lagrangeMap.getOverallDofs(), &fetiData, &mat_feti); - MatShellSetOperation(mat_feti, MATOP_MULT, - (void(*)(void))petscMultMatFeti); + if (augmentedLagrange == false) { + MatShellSetOperation(mat_feti, MATOP_MULT, + (void(*)(void))petscMultMatFeti); + } else { + fetiData.mat_augmented_lagrange = &mat_augmented_lagrange; + primalDofMap.createVec(fetiData.tmp_vec_primal1); + MatShellSetOperation(mat_feti, MATOP_MULT, + (void(*)(void))petscMultMatFetiAugmented); + } } else { + TEST_EXIT_DBG(!augmentedLagrange)("Not yet implemented!\n"); + localDofMap.createVec(fetiData.tmp_vec_b1, nGlobalOverallInterior); primalDofMap.createVec(fetiData.tmp_vec_primal1); interfaceDofMap.createVec(fetiData.tmp_vec_interface); @@ -1047,6 +1171,11 @@ namespace AMDiS { VecDestroy(&fetiData.tmp_vec_lagrange); VecDestroy(&fetiData.tmp_vec_primal0); + if (augmentedLagrange) { + fetiData.mat_augmented_lagrange = PETSC_NULL; + VecDestroy(&fetiData.tmp_vec_b1); + } + if (stokesMode) { VecDestroy(&fetiData.tmp_vec_b1); VecDestroy(&fetiData.tmp_vec_primal1); @@ -1470,6 +1599,9 @@ namespace AMDiS { // === Create and fill PETSc matrix for Lagrange constraints. === createMatLagrange(feSpaces); + // === === + createMatAugmentedLagrange(feSpaces); + // === Create PETSc solver for the Schur complement on primal variables. === createSchurPrimalKsp(feSpaces); @@ -1775,27 +1907,58 @@ namespace AMDiS { // vecRhs = L * inv(K_BB) * f_B subdomain->solveGlobal(subdomain->getVecRhsInterior(), tmp_b0); MatMult(mat_lagrange, tmp_b0, vecRhsLagrange); - + // tmp_primal0 = M_PiB * inv(K_BB) * f_B MatMult(subdomain->getMatCoarseInterior(), tmp_b0, tmp_primal0); // tmp_primal0 = f_Pi - M_PiB * inv(K_BB) * f_B VecAXPBY(tmp_primal0, 1.0, -1.0, subdomain->getVecRhsCoarse()); - - double wtime2 = MPI::Wtime(); - // tmp_primal0 = inv(S_PiPi) (f_Pi - M_PiB * inv(K_BB) * f_B) - KSPSolve(ksp_schur_primal, tmp_primal0, tmp_primal0); - if (printTimings) { - MSG("FETI-DP timing 09a: %.5f seconds (create rhs vector)\n", - MPI::Wtime() - wtime2); - } - - MatMult(subdomain->getMatInteriorCoarse(), tmp_primal0, tmp_b0); - subdomain->solveGlobal(tmp_b0, tmp_b0); - MatMult(mat_lagrange, tmp_b0, tmp_lagrange); + if (!augmentedLagrange) { + double wtime2 = MPI::Wtime(); + // tmp_primal0 = inv(S_PiPi) (f_Pi - M_PiB * inv(K_BB) * f_B) + KSPSolve(ksp_schur_primal, tmp_primal0, tmp_primal0); + if (printTimings) { + MSG("FETI-DP timing 09a: %.5f seconds (create rhs vector)\n", + MPI::Wtime() - wtime2); + } + + MatMult(subdomain->getMatInteriorCoarse(), tmp_primal0, tmp_b0); + subdomain->solveGlobal(tmp_b0, tmp_b0); + MatMult(mat_lagrange, tmp_b0, tmp_lagrange); + } else { + Vec tmp_mu; + MatGetVecs(mat_augmented_lagrange, PETSC_NULL, &tmp_mu); + Vec vec_array[2] = {tmp_primal0, tmp_mu}; + Vec vec_nest; + VecCreateNest(PETSC_COMM_WORLD, 2, PETSC_NULL, vec_array, &vec_nest); + + KSPSolve(ksp_schur_primal, vec_nest, vec_nest); + + // 1 Step + MatMult(subdomain->getMatInteriorCoarse(), tmp_primal0, tmp_b0); + subdomain->solveGlobal(tmp_b0, tmp_b0); + MatMult(mat_lagrange, tmp_b0, tmp_lagrange); + + // 2 Step + Vec tmp_lagrange1; + VecDuplicate(tmp_lagrange, &tmp_lagrange1); + MatMult(mat_augmented_lagrange, tmp_mu, tmp_lagrange1); + MatMultTranspose(mat_lagrange, tmp_lagrange1, tmp_b0); + subdomain->solveGlobal(tmp_b0, tmp_b0); + MatMult(mat_lagrange, tmp_b0, tmp_lagrange1); + VecAXPY(tmp_lagrange, 1.0, tmp_lagrange1); + VecDestroy(&tmp_lagrange1); + + + VecDestroy(&tmp_mu); + VecDestroy(&vec_nest); + } + VecAXPBY(vecRhsLagrange, -1.0, 1.0, tmp_lagrange); } else { + TEST_EXIT(augmentedLagrange)("Not yet implemented!\n"); + subdomain->solveGlobal(subdomain->getVecRhsInterior(), tmp_b0); MatMult(subdomain->getMatCoarseInterior(), tmp_b0, tmp_primal0); @@ -1915,6 +2078,9 @@ namespace AMDiS { MatDestroy(&mat_lagrange); + if (augmentedLagrange) + MatDestroy(&mat_augmented_lagrange); + // === Destroy preconditioner data structures. === if (fetiPreconditioner != FETI_NONE) diff --git a/AMDiS/src/parallel/PetscSolverFeti.h b/AMDiS/src/parallel/PetscSolverFeti.h index a90a1a61..d30f5d5d 100644 --- a/AMDiS/src/parallel/PetscSolverFeti.h +++ b/AMDiS/src/parallel/PetscSolverFeti.h @@ -122,6 +122,8 @@ namespace AMDiS { /// variables. void createLagrange(const FiniteElemSpace *feSpace); + void createAugmentedLagrange(const FiniteElemSpace *feSpace); + /// Creates a global index of the B variables. void createIndexB(const FiniteElemSpace *feSpace); @@ -129,6 +131,8 @@ namespace AMDiS { /// to \ref mat_lagrange. void createMatLagrange(vector<const FiniteElemSpace*> &feSpaces); + void createMatAugmentedLagrange(vector<const FiniteElemSpace*> &feSpaces); + /// void createPreconditionerMatrix(Matrix<DOFMatrix*> *mat); @@ -255,6 +259,9 @@ namespace AMDiS { /// Global PETSc matrix of Lagrange variables. Mat mat_lagrange; + /// + Mat mat_augmented_lagrange; + /// 0: Solve the Schur complement on primal variables with iterative solver. /// 1: Create the Schur complement matrix explicitly and solve it with a /// direct solver. @@ -271,6 +278,9 @@ namespace AMDiS { /// Data for MatMult operation in matrix \ref mat_schur_primal SchurPrimalData schurPrimalData; + /// + SchurPrimalAugmentedData schurPrimalAugmentedData; + /// PETSc solver object to solve a system with FETI-DP. KSP ksp_feti; @@ -317,6 +327,12 @@ namespace AMDiS { bool printTimings; + bool augmentedLagrange; + + int nRankEdges; + + int nOverallEdges; + bool stokesMode; const FiniteElemSpace* pressureFeSpace; diff --git a/AMDiS/src/parallel/PetscSolverFetiOperators.cc b/AMDiS/src/parallel/PetscSolverFetiOperators.cc index 3e9ed715..fb6c2ad3 100644 --- a/AMDiS/src/parallel/PetscSolverFetiOperators.cc +++ b/AMDiS/src/parallel/PetscSolverFetiOperators.cc @@ -34,13 +34,57 @@ namespace AMDiS { } + int petscMultMatSchurPrimalAugmented(Mat mat, Vec x, Vec y) + { + void *ctx; + MatShellGetContext(mat, &ctx); + SchurPrimalAugmentedData* data = + static_cast<SchurPrimalAugmentedData*>(ctx); + + Vec x_primal, x_mu, y_primal, y_mu; + VecNestGetSubVec(x, 0, &x_primal); + VecNestGetSubVec(x, 1, &x_mu); + VecNestGetSubVec(y, 0, &y_primal); + VecNestGetSubVec(y, 1, &y_mu); + + // inv(K_BB) K_BPi x_Pi + MatMult(data->subSolver->getMatInteriorCoarse(), x_primal, data->tmp_vec_b0); + data->subSolver->solveGlobal(data->tmp_vec_b0, data->tmp_vec_b0); + + // inv(K_BB) J^T Q x_mu + MatMult(*(data->mat_augmented_lagrange), x_mu, data->tmp_vec_lagrange); + MatMultTranspose(*(data->mat_lagrange), data->tmp_vec_lagrange, data->tmp_vec_b1); + data->subSolver->solveGlobal(data->tmp_vec_b1, data->tmp_vec_b1); + + // y_Pi = (K_PiPi - K_PiB inv(K_BB) K_BPi) x_pi + MatMult(data->subSolver->getMatCoarseInterior(), data->tmp_vec_b0, + data->tmp_vec_primal); + MatMult(data->subSolver->getMatCoarse(), x_primal, y_primal); + VecAXPY(y_primal, -1.0, data->tmp_vec_primal); + + // y_Pi += (-K_PiB inv(K_BB) J^T Q) x_mu + MatMult(data->subSolver->getMatCoarseInterior(), data->tmp_vec_b1, + data->tmp_vec_primal); + VecAXPY(y_primal, -1.0, data->tmp_vec_primal); + + // y_mu = (-Q^T J inv(K_BB) K_BPi) x_Pi + (-Q^T J inv(K_BB) J^T Q) x_mu + // = -Q^T J (inv(K_BB) K_BPi x_Pi + inv(K_BB) J^T Q x_mu) + VecAXPY(data->tmp_vec_b0, 1.0, data->tmp_vec_b1); + MatMult(*(data->mat_lagrange), data->tmp_vec_b0, data->tmp_vec_lagrange); + MatMult(*(data->mat_augmented_lagrange), data->tmp_vec_lagrange, y_mu); + VecScale(y_mu, -1.0); + + return 0; + } + + // y = mat * x int petscMultMatFeti(Mat mat, Vec x, Vec y) { FUNCNAME("petscMultMatFeti()"); - // F = L inv(K_BB) trans(L) + L inv(K_BB) K_BPi inv(S_PiPi) K_PiB inv(K_BB) trans(L) - // => F = L [I + inv(K_BB) K_BPi inv(S_PiPi) K_PiB] inv(K_BB) trans(L) + // F = J inv(K_BB) trans(J) + J inv(K_BB) K_BPi inv(S_PiPi) K_PiB inv(K_BB) trans(J) + // => F = J [I + inv(K_BB) K_BPi inv(S_PiPi) K_PiB] inv(K_BB) trans(J) double wtime = MPI::Wtime(); @@ -81,6 +125,58 @@ namespace AMDiS { } + // y = mat * x + int petscMultMatFetiAugmented(Mat mat, Vec x, Vec y) + { + FUNCNAME("petscMultMatFetiAugmented()"); + + void *ctx; + MatShellGetContext(mat, &ctx); + FetiData* data = static_cast<FetiData*>(ctx); + + Vec vec_mu0, vec_mu1; + MatGetVecs(*(data->mat_augmented_lagrange), PETSC_NULL, &vec_mu0); + VecDuplicate(vec_mu0, &vec_mu1); + + MatMultTranspose(*(data->mat_lagrange), x, data->tmp_vec_b0); + data->subSolver->solveGlobal(data->tmp_vec_b0, data->tmp_vec_b0); + + MatMult(data->subSolver->getMatCoarseInterior(), data->tmp_vec_b0, data->tmp_vec_primal0); + MatMult(*(data->mat_lagrange), data->tmp_vec_b0, data->tmp_vec_lagrange); + MatMultTranspose(*(data->mat_augmented_lagrange), data->tmp_vec_lagrange, vec_mu0); + + Vec vec_array0[2] = {data->tmp_vec_primal0, vec_mu0}; + Vec vec_array1[2] = {data->tmp_vec_primal1, vec_mu1}; + Vec vec_nest0, vec_nest1; + VecCreateNest(PETSC_COMM_WORLD, 2, PETSC_NULL, vec_array0, &vec_nest0); + VecCreateNest(PETSC_COMM_WORLD, 2, PETSC_NULL, vec_array1, &vec_nest1); + + KSPSolve(*(data->ksp_schur_primal), vec_nest0, vec_nest1); + + // Step 1 + MatMult(*(data->mat_lagrange), data->tmp_vec_b0, y); + + // Step 2 + MatMult(data->subSolver->getMatInteriorCoarse(), data->tmp_vec_primal1, data->tmp_vec_b0); + data->subSolver->solveGlobal(data->tmp_vec_b0, data->tmp_vec_b0); + MatMult(*(data->mat_lagrange), data->tmp_vec_b0, data->tmp_vec_lagrange); + VecAXPY(y, 1.0, data->tmp_vec_lagrange); + + // Step 3 + MatMult(*(data->mat_augmented_lagrange), vec_mu1, data->tmp_vec_lagrange); + MatMultTranspose(*(data->mat_lagrange), data->tmp_vec_lagrange, data->tmp_vec_b0); + data->subSolver->solveGlobal(data->tmp_vec_b0, data->tmp_vec_b0); + MatMult(*(data->mat_lagrange), data->tmp_vec_b0, data->tmp_vec_lagrange); + VecAXPY(y, 1.0, data->tmp_vec_lagrange); + + VecDestroy(&vec_mu0); + VecDestroy(&vec_mu1); + VecDestroy(&vec_nest0); + VecDestroy(&vec_nest1); + return 0; + } + + int petscMultMatFetiInterface(Mat mat, Vec x, Vec y) { FUNCNAME("petscMultMatFetiInterface()"); diff --git a/AMDiS/src/parallel/PetscSolverFetiOperators.h b/AMDiS/src/parallel/PetscSolverFetiOperators.h index e305b25c..ce954209 100644 --- a/AMDiS/src/parallel/PetscSolverFetiOperators.h +++ b/AMDiS/src/parallel/PetscSolverFetiOperators.h @@ -31,9 +31,15 @@ namespace AMDiS { /// int petscMultMatSchurPrimal(Mat mat, Vec x, Vec y); + /// + int petscMultMatSchurPrimalAugmented(Mat mat, Vec x, Vec y); + /// FETI-DP operator int petscMultMatFeti(Mat mat, Vec x, Vec y); + /// FETI-DP operator with augmented Lagrange constraints + int petscMultMatFetiAugmented(Mat mat, Vec x, Vec y); + /// FETI-DP operator used for Stokes like problems. int petscMultMatFetiInterface(Mat mat, Vec x, Vec y); diff --git a/AMDiS/src/parallel/PetscSolverFetiStructs.h b/AMDiS/src/parallel/PetscSolverFetiStructs.h index 72ee3e15..11eafe4e 100644 --- a/AMDiS/src/parallel/PetscSolverFetiStructs.h +++ b/AMDiS/src/parallel/PetscSolverFetiStructs.h @@ -47,6 +47,25 @@ namespace AMDiS { }; + /** \brief + * + */ + struct SchurPrimalAugmentedData { + /// Temporal vectors on the B variables. + Vec tmp_vec_b0, tmp_vec_b1; + + Vec tmp_vec_primal; + + Vec tmp_vec_lagrange; + + Mat *mat_lagrange; + + Mat *mat_augmented_lagrange; + + PetscSolver* subSolver; + }; + + /** \brief * This structure is used when defining the FETI-DP operator for solving * the system matrix reduced to the Lagrange multipliers. @@ -56,6 +75,9 @@ namespace AMDiS { /// Matrix of Lagrange variables. Mat *mat_lagrange; + /// + Mat *mat_augmented_lagrange; + /// Temporal vectors on the B variables. Vec tmp_vec_b0, tmp_vec_b1; -- GitLab