diff --git a/AMDiS/src/DOFVector.cc b/AMDiS/src/DOFVector.cc index abd1b4a7111041a93c642c0b81a218026079df64..50825ea6e0993ebd7d06ef5f45a82fe3b5ca407a 100644 --- a/AMDiS/src/DOFVector.cc +++ b/AMDiS/src/DOFVector.cc @@ -101,8 +101,14 @@ namespace AMDiS { for (int i = 0; i < nBasFcts; i++) uh[i] = operator[](localIndices[i]); value = basFcts->evalUh(lambda, uh); - } else + } else { +#ifdef HAVE_PARALLEL_DOMAIN_AMDIS + return 0.0; +#else throw(std::runtime_error("Can not eval DOFVector at point p, because point is outside geometry.")); +#endif + } + if (oldElInfo == NULL) delete elInfo; diff --git a/AMDiS/src/parallel/CheckerPartitioner.cc b/AMDiS/src/parallel/CheckerPartitioner.cc index 3e94a91136f0650ede4b43a5adfe0d46837e9ad4..4d8c747c6e31b426608223984d21ec97b15ee6ae 100644 --- a/AMDiS/src/parallel/CheckerPartitioner.cc +++ b/AMDiS/src/parallel/CheckerPartitioner.cc @@ -15,7 +15,35 @@ namespace AMDiS { + + CheckerPartitioner::CheckerPartitioner(MPI::Intracomm *comm) + : MeshPartitioner(comm), + mpiRank(mpiComm->Get_rank()), + mpiSize(mpiComm->Get_size()), + mode(0), + multilevel(false) + { + string modestr = ""; + Parameters::get("parallel->partitioner->mode", modestr); + + if (modestr == "x-stripes") + mode = 1; + else if (modestr == "y-stripes") + mode = 2; + else if (modestr == "z-stripes") + mode = 3; + else if (modestr == "tetrahedron-stripes") + mode = 4; + else if (modestr == "multilevel") + multilevel = true; + else { + if (modestr != "") { + ERROR_EXIT("No partitioner mode \"%s\"!\n", modestr.c_str()); + } + } + } + bool CheckerPartitioner::createInitialPartitioning() { FUNCNAME("CheckerPartitioner::createInitialPartitioning()"); @@ -47,6 +75,13 @@ namespace AMDiS { TEST_EXIT(MPI::COMM_WORLD.Get_size() == 16) ("Multilevel partitioning is implemented for 16 nodes only!\n"); } + + + if (mode == 4) { + TEST_EXIT(mesh->getDim() == 3)("Works only in 3D!\n"); + createTetrahedronStripes(); + } + int dim = mesh->getDim(); TraverseStack stack; @@ -114,6 +149,31 @@ namespace AMDiS { break; + case 4: + // tetrahedron-stripes + + { + int inStripe = -1; + int stripePos = -1; + for (int stripe = 0; stripe < elInStripe.size(); stripe++) { + for (int pos = 0; pos < elInStripe[stripe].size(); pos++) { + if (elInStripe[stripe][pos] == elIndex) { + inStripe = stripe; + stripePos = pos; + break; + } + } + if (inStripe >= 0) + break; + } + + TEST_EXIT(inStripe >= 0)("Should not happen!\n"); + + elInRank = inStripe; + } + + break; + default: ERROR_EXIT("Mode %d does not exists for checker based mesh partitioning!\n", mode); @@ -131,4 +191,115 @@ namespace AMDiS { return true; } + + void CheckerPartitioner::createTetrahedronStripes() + { + FUNCNAME("CheckerPartitioner::createTetrahedronStripes()"); + + vector<vector<MacroElement*> > stripes; + + int nElements = 0; + TraverseStack stack; + ElInfo *elInfo = + stack.traverseFirst(mesh, 0, Mesh::CALL_EL_LEVEL | Mesh::FILL_COORDS); + while (elInfo) { + TEST_EXIT(elInfo->getLevel() == 0)("Should not happen!\n"); + + Element *el = elInfo->getElement(); + int elIndex = el->getIndex(); + + int zeroCoordCounter = 0; + for (int i = 0; i < mesh->getGeo(VERTEX); i++) + if (fabs(elInfo->getCoord(i)[2]) < 1e-10) + zeroCoordCounter++; + + if (zeroCoordCounter == 3) { + vector<MacroElement*> tmp; + tmp.push_back(elInfo->getMacroElement()); + stripes.push_back(tmp); + + vector<int> tmpIndex; + tmpIndex.push_back(elInfo->getMacroElement()->getIndex()); + elInStripe.push_back(tmpIndex); + } + + nElements++; + elInfo = stack.traverseNext(elInfo); + } + + TEST_EXIT(mpiSize % stripes.size() == 0) + ("Should not happen! mpiSize = %d but %d bottom elements found!\n", + mpiSize, stripes.size()); + + int testElementCounter = 0; + for (int stripe = 0; stripe < stripes.size(); stripe++) { + MacroElement *mel = stripes[stripe][0]; + + set<int> localDofs; + for (int i = 0; i < mesh->getGeo(VERTEX); i++) + if (fabs(mel->getCoord(i)[2]) < 1e-10) + localDofs.insert(i); + TEST_EXIT(localDofs.size() == 3)("Should not happen!\n"); + + + while (mel != NULL) { + int replaceDof = -1; + for (int i = 0; i < mesh->getGeo(VERTEX); i++) + if (localDofs.count(i) == 0) + replaceDof = i; + + bool found = false; + for (std::set<int>::iterator dit = localDofs.begin(); + dit != localDofs.end(); ++dit) { + WorldVector<double> c0 = mel->getCoord(*dit); + WorldVector<double> c1 = mel->getCoord(replaceDof); + + if (fabs(c0[0] - c1[0]) < 1e-10 && + fabs(c0[1] - c1[1]) < 1e-10 && + fabs(c0[2] - c1[2]) > 1e-10) { + found = true; + localDofs.erase(dit); + localDofs.insert(replaceDof); + break; + } + } + TEST_EXIT(found)("Should not happen!\n"); + + + set<DegreeOfFreedom> faceDofs; + for (std::set<int>::iterator dit = localDofs.begin(); + dit != localDofs.end(); ++dit) + faceDofs.insert(mel->getElement()->getDof(*dit, 0)); + TEST_EXIT(faceDofs.size() == 3)("Should not happen!\n"); + + + int localFace = -1; + for (int i = 0; i < mesh->getGeo(FACE); i++) { + DofFace face = mel->getElement()->getFace(i); + bool allVertexInFace = + faceDofs.count(face.get<0>()) && + faceDofs.count(face.get<1>()) && + faceDofs.count(face.get<2>()); + if (allVertexInFace) { + localFace = i; + break; + } + } + TEST_EXIT(localFace >= 0)("Should not happen!\n"); + + MacroElement *mel_neigh = mel->getNeighbour(localFace); + if (mel_neigh) { + stripes[stripe].push_back(mel_neigh); + elInStripe[stripe].push_back(mel_neigh->getIndex()); + } + + mel = mel_neigh; + } + + testElementCounter += stripes[stripe].size(); + } + + TEST_EXIT(testElementCounter == nElements)("Should not happen!\n"); + } + } diff --git a/AMDiS/src/parallel/CheckerPartitioner.h b/AMDiS/src/parallel/CheckerPartitioner.h index 02ad14ca061f7f158e5b516b08cc825913274d42..f1c4809a04cbaa9d4e61f2f222abd371672ef2e0 100644 --- a/AMDiS/src/parallel/CheckerPartitioner.h +++ b/AMDiS/src/parallel/CheckerPartitioner.h @@ -35,35 +35,14 @@ namespace AMDiS { class CheckerPartitioner : public MeshPartitioner { public: - CheckerPartitioner(MPI::Intracomm *comm) - : MeshPartitioner(comm), - mpiRank(mpiComm->Get_rank()), - mpiSize(mpiComm->Get_size()), - mode(0), - multilevel(false) - { - string modestr = ""; - Parameters::get("parallel->partitioner->mode", modestr); - - if (modestr == "x-stripes") - mode = 1; - else if (modestr == "y-stripes") - mode = 2; - else if (modestr == "z-stripes") - mode = 3; - else if (modestr == "multilevel") - multilevel = true; - else { - if (modestr != "") { - ERROR_EXIT("No partitioner mode \"%s\"!\n", modestr.c_str()); - } - } - } + CheckerPartitioner(MPI::Intracomm *comm); ~CheckerPartitioner() {} bool createInitialPartitioning(); + void createTetrahedronStripes(); + /// \ref MeshPartitioner::partition bool partition(map<int, double> &elemWeights, PartitionMode mode = INITIAL) { @@ -81,8 +60,13 @@ namespace AMDiS { /// 0: standard mode: each node gets one box /// 1: x-stripes: each node gets one x-stripe of boxes /// 2: y-stripes: each node gets one y-stripe of boxes + /// 3: z-stripes: each node gets one y-stripe of boxes + /// 4: tetrahedron-stripes: alias Hieram mode :) int mode; + /// Only used in mode 4. + vector<vector<int> > elInStripe; + bool multilevel; }; } diff --git a/AMDiS/src/parallel/MeshDistributor.cc b/AMDiS/src/parallel/MeshDistributor.cc index 4f7a2a87ad7304d2adfc3edfb485bfdd331ddc77..0c65e0ac3bdee220bda8c394473e648c895de294 100644 --- a/AMDiS/src/parallel/MeshDistributor.cc +++ b/AMDiS/src/parallel/MeshDistributor.cc @@ -579,10 +579,8 @@ namespace AMDiS { vector<ParallelDofMapping*>::iterator it = find(dofMaps.begin(), dofMaps.end(), &dofMap); - TEST_EXIT(it != dofMaps.end()) - ("Cannot find Parallel DOF mapping object which should be removed!\n"); - - dofMaps.erase(it); + if (it != dofMaps.end()) + dofMaps.erase(it); } @@ -908,7 +906,7 @@ namespace AMDiS { { FUNCNAME("MeshDistributor::createMeshLevelStructure()"); - int levelMode = -1; + int levelMode = 0; Parameters::get("parallel->level mode", levelMode); TEST_EXIT(levelMode >= 0)("Wrong level mode %d!\n", levelMode); @@ -1305,7 +1303,8 @@ namespace AMDiS { { int mapSize = data.size(); SerUtil::serialize(out, mapSize); - for (map<int, map<const FiniteElemSpace*, DofContainer> >::iterator it = data.begin(); it != data.end(); ++it) { + for (map<int, map<const FiniteElemSpace*, DofContainer> >::iterator it = data.begin(); + it != data.end(); ++it) { int rank = it->first; SerUtil::serialize(out, rank); diff --git a/AMDiS/src/parallel/ParallelCoarseSpaceSolver.cc b/AMDiS/src/parallel/ParallelCoarseSpaceSolver.cc index 486465d369b498c2cd942a4741dfe1186599eec4..a5e56b24377516959c7c526752b0cd117d09f6ff 100644 --- a/AMDiS/src/parallel/ParallelCoarseSpaceSolver.cc +++ b/AMDiS/src/parallel/ParallelCoarseSpaceSolver.cc @@ -69,16 +69,7 @@ namespace AMDiS { meshLevel = level; meshDistributor = md; -#if 0 - mpiCommGlobal = meshDistributor->getMpiComm(meshLevel); - if (meshLevel + 1 < - meshDistributor->getMeshLevelData().getNumberOfLevels()) - mpiCommLocal = meshDistributor->getMpiComm(meshLevel + 1); - else - mpiCommLocal = MPI::COMM_SELF; -#endif domainComm = meshDistributor->getMpiComm(meshLevel); - if (meshLevel >= 1) coarseSpaceComm = meshDistributor->getMpiComm(meshLevel - 1); } @@ -316,6 +307,7 @@ namespace AMDiS { FUNCNAME("ParallelCoarseSpaceSolver::vecDestroy()"); int nVec = vecSol.size(); + for (int i = 0; i < nVec; i++) { VecDestroy(&vecSol[i]); VecDestroy(&vecRhs[i]); @@ -391,7 +383,7 @@ namespace AMDiS { int groupRowsInterior = 0; if (domainComm.Get_rank() == 0) groupRowsInterior = interiorMap->getOverallDofs(); - + mpi::getDofNumbering(coarseSpaceComm, groupRowsInterior, rStartInterior, nGlobalOverallInterior); @@ -400,7 +392,7 @@ namespace AMDiS { tmp = rStartInterior; domainComm.Allreduce(&tmp, &rStartInterior, 1, MPI_INT, MPI_SUM); - } + } } } diff --git a/AMDiS/src/parallel/ParallelCoarseSpaceSolver.h b/AMDiS/src/parallel/ParallelCoarseSpaceSolver.h index 9b1b2ed70c48e4e975f0160ac1ef42005065c5f5..1df792a45deb42cfb0d5f758012d4519abb59915 100644 --- a/AMDiS/src/parallel/ParallelCoarseSpaceSolver.h +++ b/AMDiS/src/parallel/ParallelCoarseSpaceSolver.h @@ -99,6 +99,20 @@ namespace AMDiS { /// Destroys PETSc vector objects. void vecDestroy(); + + /// Just for super trick + vector<vector<Mat> >& getMat() + { + return mat; + } + + /// Just for super trick + vector<Vec>& getVecRhs() + { + return vecRhs; + } + + /// Get interior matrix. inline Mat& getMatInterior() { @@ -243,7 +257,7 @@ namespace AMDiS { /// zero stricture. bool checkMeshChange(); - private: + protected: /// Matrix of PETSc matrices. mat[0][0] is the interior discretization /// matrix, mat[1][1] corresponds to the first coarse space and so on. /// mat[i][j], with i not equal to j, are the coupling between the interior @@ -291,7 +305,6 @@ namespace AMDiS { /// some phase fields. bool alwaysCreateNnzStructure; - protected: /// Prefix string for parameters in init file. string initFileStr; diff --git a/AMDiS/src/parallel/ParallelDebug.cc b/AMDiS/src/parallel/ParallelDebug.cc index 5152477fee8160882137eb8df912b94e1ea7c97d..a0440abbad7ac3fefcc59d60d103f0e20f73af12 100644 --- a/AMDiS/src/parallel/ParallelDebug.cc +++ b/AMDiS/src/parallel/ParallelDebug.cc @@ -766,10 +766,10 @@ namespace AMDiS { { FUNCNAME("ParallelDebug::printBoundaryInfo()"); - // int tmp = 0; - // Parameters::get("parallel->debug->print boundary info", tmp); - // if (tmp <= 0 && force == false) - // return; + int tmp = 0; + Parameters::get("parallel->debug->print boundary info", tmp); + if (tmp <= 0 && force == false) + return; MSG("Interior boundary info:\n"); @@ -1018,6 +1018,8 @@ namespace AMDiS { { FUNCNAME("ParallelDebug::writeCsvElementMap()"); + return; + MSG("writing local Element map to CSV File \n"); Mesh *mesh = feSpace->getMesh(); diff --git a/AMDiS/src/parallel/ParallelDofMapping.cc b/AMDiS/src/parallel/ParallelDofMapping.cc index e69ac7194cc72fe2f8f43c24e43714d209c211d2..587684e544f427ee1d09f5630b76b710a02e78b5 100644 --- a/AMDiS/src/parallel/ParallelDofMapping.cc +++ b/AMDiS/src/parallel/ParallelDofMapping.cc @@ -74,7 +74,7 @@ namespace AMDiS { nOverallDofs = 0; rStartDofs = 0; mpi::getDofNumbering(mpiComm, nRankDofs, rStartDofs, nOverallDofs); - + // === If required, compute also the global indices. === if (globalMapping) { diff --git a/AMDiS/src/parallel/PetscHelper.cc b/AMDiS/src/parallel/PetscHelper.cc index 3035cc2d89e39c36d3e51c2e491ba72305f8fe71..8970f73f3d823925bbb8ebde94a0f6de68b1eb4a 100644 --- a/AMDiS/src/parallel/PetscHelper.cc +++ b/AMDiS/src/parallel/PetscHelper.cc @@ -74,7 +74,7 @@ namespace AMDiS { } - void blockMatMatSolve(KSP ksp, Mat mat0, Mat &mat1) + void blockMatMatSolve(MPI::Intracomm mpiComm, KSP ksp, Mat mat0, Mat &mat1) { // === We have to calculate mat1 = ksp mat0: === // === - get all local column vectors from mat0 === @@ -86,7 +86,7 @@ namespace AMDiS { MatGetLocalSize(mat0, &localRows, &localCols); MatGetSize(mat0, &globalRows, &globalCols); - MatCreateAIJ(PETSC_COMM_WORLD, + MatCreateAIJ(mpiComm, localRows, localCols, globalRows, globalCols, 150, PETSC_NULL, 150, PETSC_NULL, &mat1); MatSetOption(mat1, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE); @@ -253,7 +253,7 @@ namespace AMDiS { const char* kspPrefix, KSPType kspType, PCType pcType, - MatSolverPackage matSolverPackage, + const MatSolverPackage matSolverPackage, PetscReal rtol, PetscReal atol, PetscInt maxIt) diff --git a/AMDiS/src/parallel/PetscHelper.h b/AMDiS/src/parallel/PetscHelper.h index d86d01446676ead35c6360f85ab8ee88d3715901..db1ae080008f0f68cb93ca84848f3d025de19be6 100644 --- a/AMDiS/src/parallel/PetscHelper.h +++ b/AMDiS/src/parallel/PetscHelper.h @@ -77,11 +77,15 @@ namespace AMDiS { * task. The overall number of rows of local matrices A must be the * number of distriubted rows in B. * - * \param[in] ksp inv(A) matrix given by a PETSc solver object. - * \param[in] mat0 matrix B - * \param[out] mat1 resulting matrix C, is created inside the function + * \param[in] mpiComm MPI Communicator object (must fit with ksp) + * \param[in] ksp inv(A) matrix given by a PETSc solver object. + * \param[in] mat0 matrix B + * \param[out] mat1 resulting matrix C, is created inside the function */ - void blockMatMatSolve(KSP ksp, Mat mat0, Mat &mat1); + void blockMatMatSolve(MPI::Intracomm mpiComm, + KSP ksp, + Mat mat0, + Mat &mat1); /** \brief * Converts a 2x2 nested matrix to a MATAIJ matrix (thus not nested). @@ -92,10 +96,10 @@ namespace AMDiS { void matNestConvert(Mat matNest, Mat &mat); void setSolverWithLu(KSP ksp, - const char* kspPrefix, + const char* kspPrefix, KSPType kspType, PCType pcType, - MatSolverPackage matSolverPackage, + const MatSolverPackage matSolverPackage, PetscReal rtol = PETSC_DEFAULT, PetscReal atol = PETSC_DEFAULT, PetscInt maxIt = PETSC_DEFAULT); diff --git a/AMDiS/src/parallel/PetscSolver.cc b/AMDiS/src/parallel/PetscSolver.cc index b53ffdc2f981e0ba370f838fe1298f40443b7e40..247ff76baf036bfcb583de3ec0dfd55450bd57b7 100644 --- a/AMDiS/src/parallel/PetscSolver.cc +++ b/AMDiS/src/parallel/PetscSolver.cc @@ -23,7 +23,7 @@ namespace AMDiS { PetscSolver::PetscSolver(string name) : ParallelCoarseSpaceSolver(name), dofMap(FESPACE_WISE, true), - dofMapSd(FESPACE_WISE, true), + dofMapSubDomain(FESPACE_WISE, true), parallelDofMappingsRegistered(false), kspPrefix(""), removeRhsNullspace(false), @@ -73,19 +73,21 @@ namespace AMDiS { parallelDofMappingsRegistered = true; dofMap.init(componentSpaces, feSpaces); - dofMap.setMpiComm(levelData.getMpiComm(0)); - dofMap.setDofComm(meshDistributor->getDofComm(0)); + dofMap.setMpiComm(levelData.getMpiComm(meshLevel)); + dofMap.setDofComm(meshDistributor->getDofComm(meshLevel)); dofMap.clear(); meshDistributor->registerDofMap(dofMap); - if (nLevels > 1 && levelData.getMpiComm(1) != MPI::COMM_SELF) { - MSG("WARNING: MAKE GENERAL!\n"); - dofMapSd.init(componentSpaces, feSpaces); - dofMapSd.setMpiComm(levelData.getMpiComm(1)); - dofMapSd.setDofComm(meshDistributor->getDofComm(1)); - dofMapSd.clear(); - meshDistributor->registerDofMap(dofMapSd); + if (meshLevel + 1 < nLevels && + levelData.getMpiComm(meshLevel + 1) != MPI::COMM_SELF) { + dofMapSubDomain.init(componentSpaces, feSpaces); + dofMapSubDomain.setMpiComm(levelData.getMpiComm(meshLevel + 1)); + dofMapSubDomain.setDofComm(meshDistributor->getDofComm(meshLevel + 1)); + dofMapSubDomain.clear(); + meshDistributor->registerDofMap(dofMapSubDomain); } + + meshDistributor->updateParallelDofMappings(); } } @@ -114,6 +116,12 @@ namespace AMDiS { { FUNCNAME("PetscSolver::solveGlobal()"); + int s, ls; + VecGetSize(rhs, &s); + VecGetLocalSize(rhs, &ls); + + MSG("Solve global %d %d\n", ls, s); + ERROR_EXIT("Not implemented!\n"); } diff --git a/AMDiS/src/parallel/PetscSolver.h b/AMDiS/src/parallel/PetscSolver.h index fb0b6cc8109d36f0e028f210aa64bcaff7b64c01..ea149c3d548de213e9b620df825e8837e040f4e6 100644 --- a/AMDiS/src/parallel/PetscSolver.h +++ b/AMDiS/src/parallel/PetscSolver.h @@ -181,7 +181,10 @@ namespace AMDiS { vector<const FiniteElemSpace*> feSpaces; /// - ParallelDofMapping dofMap, dofMapSd; + ParallelDofMapping dofMap; + + /// + ParallelDofMapping dofMapSubDomain; /// If the parallel DOF mappaings of this solver are registered to the /// mesh distributor object, this variable is set to true to remove them diff --git a/AMDiS/src/parallel/PetscSolverFeti.cc b/AMDiS/src/parallel/PetscSolverFeti.cc index 97b359ff8b052df3aa609ac72a33f12573a10239..67cc8cda23da6df1ccaa4317073b520131541c72 100644 --- a/AMDiS/src/parallel/PetscSolverFeti.cc +++ b/AMDiS/src/parallel/PetscSolverFeti.cc @@ -29,6 +29,7 @@ namespace AMDiS { PetscSolverFeti::PetscSolverFeti(string name) : PetscSolver(name), + fetiSolverType(EXACT), primalDofMap(COMPONENT_WISE), dualDofMap(COMPONENT_WISE), interfaceDofMap(COMPONENT_WISE), @@ -39,8 +40,6 @@ namespace AMDiS { subDomainIsLocal(true), subdomain(NULL), massMatrixSolver(NULL), - rStartInterior(0), - nGlobalOverallInterior(0), printTimings(false), dirichletMode(0), stokesMode(false), @@ -92,10 +91,17 @@ namespace AMDiS { MSG("WARNING: CHECK THIS HERE BEFORE GOING INTO RUNNING MULTILEVEL FETI-DP!\n"); Parameters::get("parallel->level mode", levelMode); } - + + { + int tmp = 0; + Parameters::get(initFileStr + "->feti->inexact", tmp); + if (tmp == 1) + fetiSolverType = INEXACT; + if (tmp == 2) + fetiSolverType = INEXACT_REDUCED; + } Parameters::get("parallel->print timings", printTimings); - } @@ -128,6 +134,7 @@ namespace AMDiS { subdomain->setSymmetric(isSymmetric); subdomain->setHandleDirichletRows(dirichletMode == 0); subdomain->setMeshDistributor(meshDistributor, meshLevel + 1); + subdomain->init(componentSpaces, feSpaces); delete solverCreator; } @@ -274,10 +281,14 @@ namespace AMDiS { MSG(" nRankDuals = %d nOverallDuals = %d\n", dualDofMap[i].nRankDofs, dualDofMap[i].nOverallDofs); - + MSG(" nRankLagrange = %d nOverallLagrange = %d\n", lagrangeMap[i].nRankDofs, lagrangeMap[i].nOverallDofs); + + MSG(" nRankLocal = %d nOverallLocal = %d\n", + localDofMap[i].nRankDofs, + localDofMap[i].nOverallDofs); } } @@ -324,6 +335,9 @@ namespace AMDiS { if (dirichletRows[component].count(**it)) continue; + if (meshLevel == 1 && not (*interiorMap)[component].isSet(**it)) + continue; + if (subDomainIsLocal) { primals.insert(**it); } else { @@ -377,8 +391,11 @@ namespace AMDiS { if (isPrimal(component, **it)) continue; + + if (meshLevel == 1 && not (*interiorMap)[component].isSet(**it)) + continue; - if (subDomainIsLocal || dofMapSd[feSpace].isRankDof(**it)) + if (subDomainIsLocal || dofMapSubDomain[feSpace].isRankDof(**it)) dualDofMap[component].insertRankDof(**it); } } @@ -433,7 +450,7 @@ namespace AMDiS { dofs.reserve(it.getDofs().size()); for (; !it.endDofIter(); it.nextDof()) { - if (dofMapSd[feSpace].isRankDof(it.getDofIndex())) + if (dofMapSubDomain[feSpace].isRankDof(it.getDofIndex())) dofs.push_back(1); else dofs.push_back(0); @@ -500,7 +517,7 @@ namespace AMDiS { bool recvFromRank = false; for (; !it.endDofIter(); it.nextDof()) { if (!isPrimal(component, it.getDofIndex())) { - if (subDomainIsLocal || dofMapSd[feSpace].isRankDof(it.getDofIndex())) { + if (subDomainIsLocal || dofMapSubDomain[feSpace].isRankDof(it.getDofIndex())) { recvFromRank = true; break; } @@ -518,7 +535,7 @@ namespace AMDiS { int i = 0; for (; !it.endDofIter(); it.nextDof()) if (!isPrimal(component, it.getDofIndex())) - if (subDomainIsLocal || dofMapSd[feSpace].isRankDof(it.getDofIndex())) + if (subDomainIsLocal || dofMapSubDomain[feSpace].isRankDof(it.getDofIndex())) boundaryDofRanks[feSpace][it.getDofIndex()] = stdMpi.getRecvData(it.getRank())[i++]; else { @@ -574,6 +591,9 @@ namespace AMDiS { dirichletRows[component].count(i)) continue; + if (meshLevel == 1 && not (*interiorMap)[component].isSet(i)) + continue; + if (subDomainIsLocal) { localDofMap[component].insertRankDof(i, nLocalInterior); @@ -582,7 +602,7 @@ namespace AMDiS { nLocalInterior++; } else { - if (dofMapSd[feSpace].isRankDof(i)) + if (dofMapSubDomain[feSpace].isRankDof(i)) localDofMap[component].insertRankDof(i); else localDofMap[component].insertNonRankDof(i); @@ -599,7 +619,7 @@ namespace AMDiS { if (subDomainIsLocal) { localDofMap[component].insertRankDof(it->first); } else { - if (dofMapSd[feSpace].isRankDof(it->first)) + if (dofMapSubDomain[feSpace].isRankDof(it->first)) localDofMap[component].insertRankDof(it->first); else localDofMap[component].insertNonRankDof(it->first); @@ -613,7 +633,7 @@ namespace AMDiS { FUNCNAME("PetscSolverFeti::createMatLagrange()"); double wtime = MPI::Wtime(); - int mpiRank = meshDistributor->getMpiRank(); + int mpiRank = domainComm.Get_rank(); // === Create distributed matrix for Lagrange constraints. === @@ -687,12 +707,17 @@ namespace AMDiS { MatAssemblyEnd(mat_lagrange, MAT_FINAL_ASSEMBLY); +#if (DEBUG != 0) { int nZeroRows = PetscSolverFetiDebug::testZeroRows(mat_lagrange); int m,n; MatGetSize(mat_lagrange, &m ,&n); + mpi::globalAdd(domainComm, nZeroRows); MSG("Lagrange matrix has %d zero rows and global size of %d %d!\n", nZeroRows, m, n); + + TEST_EXIT(nZeroRows == 0)("Lagrange matrix has zero rows!\n"); } +#endif // === If required, create \ref mat_lagrange_scaled === @@ -700,7 +725,9 @@ namespace AMDiS { VecAssemblyBegin(vec_scale_lagrange); VecAssemblyEnd(vec_scale_lagrange); - if (fetiPreconditioner != FETI_NONE || stokesMode) { + if (fetiPreconditioner != FETI_NONE || + fetiSolverType == INEXACT || + stokesMode) { MatDuplicate(mat_lagrange, MAT_COPY_VALUES, &mat_lagrange_scaled); MatDiagonalScale(mat_lagrange_scaled, vec_scale_lagrange, PETSC_NULL); } @@ -935,7 +962,7 @@ namespace AMDiS { FUNCNAME("PetscSolverFeti::createSchurPrimalKsp()"); if (schurPrimalSolver == 0) { - MSG("Create iterative schur primal solver!\n"); + MSG("Create iterative schur primal solver on level %d!\n", meshLevel); if (augmentedLagrange == false) { schurPrimalData.subSolver = subdomain; @@ -989,10 +1016,6 @@ namespace AMDiS { double wtime = MPI::Wtime(); - TEST_EXIT_DBG(subDomainIsLocal) - ("Does not support for multilevel, check usage of localDofMap.\n"); - - // === Create explicit matrix representation of the Schur primal system. === if (!augmentedLagrange) @@ -1042,14 +1065,14 @@ namespace AMDiS { FUNCNAME("PetscSolverFeti::createMatExplicitSchurPrimal()"); int creationMode = 0; - Parameters::get("parallel->feti->schur primal creation mode", creationMode); + Parameters::get(initFileStr + "->feti->schur primal creation mode", creationMode); if (creationMode == 0) { // matK = inv(A_BB) A_BPi Mat matK; - petsc_helper::blockMatMatSolve(subdomain->getSolver(), + petsc_helper::blockMatMatSolve(domainComm, + subdomain->getSolver(), subdomain->getMatInteriorCoarse(), matK); - // mat_schur_primal = A_PiPi - A_PiB inv(A_BB) A_BPi // = A_PiPi - A_PiB matK @@ -1058,14 +1081,12 @@ namespace AMDiS { MatAYPX(mat_schur_primal, -1.0, subdomain->getMatCoarse(), DIFFERENT_NONZERO_PATTERN); MatDestroy(&matK); - } else { - Mat tmp; - + } else { schurPrimalData.subSolver = subdomain; localDofMap.createVec(schurPrimalData.tmp_vec_b, nGlobalOverallInterior); primalDofMap.createVec(schurPrimalData.tmp_vec_primal); - + Mat tmp; MatCreateShell(domainComm, primalDofMap.getRankDofs(), primalDofMap.getRankDofs(), @@ -1101,7 +1122,8 @@ namespace AMDiS { // matTmp = inv(A_BB) A_BPi Mat matTmp; - petsc_helper::blockMatMatSolve(subdomain->getSolver(), + petsc_helper::blockMatMatSolve(domainComm, + subdomain->getSolver(), subdomain->getMatInteriorCoarse(), matTmp); @@ -1125,7 +1147,10 @@ namespace AMDiS { MatTranspose(mat_augmented_lagrange, MAT_INITIAL_MATRIX, &qT); MatTransposeMatMult(mat_lagrange, qT, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &jTqT); - petsc_helper::blockMatMatSolve(subdomain->getSolver(), jTqT, matTmp); + petsc_helper::blockMatMatSolve(domainComm, + subdomain->getSolver(), + jTqT, + matTmp); MatDestroy(&qT); MatDestroy(&jTqT); @@ -1222,6 +1247,26 @@ namespace AMDiS { { FUNCNAME("PetscSolverFeti::createFetiKsp()"); + switch (fetiSolverType) { + case EXACT: + createFetiExactKsp(); + break; + case INEXACT: + createFetiInexactKsp(); + break; + case INEXACT_REDUCED: + createFetiInexactReducedKsp(); + break; + default: + ERROR_EXIT("Should not happen!\n"); + } + } + + + void PetscSolverFeti::createFetiExactKsp() + { + FUNCNAME("PetscSolverFeti::createFetiExactKsp()"); + // === Create FETI-DP solver object. === fetiData.mat_lagrange = &mat_lagrange; @@ -1295,177 +1340,290 @@ namespace AMDiS { switch (fetiPreconditioner) { case FETI_DIRICHLET: - TEST_EXIT(subDomainIsLocal) - ("Check for localDofMap.getLocalMatIndex, which should not work for multilevel FETI-DP!\n"); + KSPGetPC(ksp_feti, &precon_feti); + createFetiPreconDirichlet(precon_feti); + break; - TEST_EXIT(!stokesMode) - ("Stokes mode does not yet support the Dirichlet precondition!\n"); + case FETI_LUMPED: + KSPGetPC(ksp_feti, &precon_feti); + createFetiPreconLumped(precon_feti); + break; - KSPCreate(PETSC_COMM_SELF, &ksp_interior); - KSPSetOperators(ksp_interior, mat_interior_interior, mat_interior_interior, - SAME_NONZERO_PATTERN); - KSPSetOptionsPrefix(ksp_interior, "precon_interior_"); - KSPSetType(ksp_interior, KSPPREONLY); - PC pc_interior; - KSPGetPC(ksp_interior, &pc_interior); - if (isSymmetric) { - PCSetType(pc_interior, PCCHOLESKY); - PCFactorSetMatSolverPackage(pc_interior, MATSOLVERMUMPS); - } else { - PCSetType(pc_interior, PCLU); - PCFactorSetMatSolverPackage(pc_interior, MATSOLVERUMFPACK); + default: + break; + } + } + + + void PetscSolverFeti::createFetiInexactKsp() + { + FUNCNAME("PetscSolverFeti::createFetiInexactKsp()"); + + // === Init solver === + + int localSize = + localDofMap.getRankDofs() + + primalDofMap.getRankDofs() + + lagrangeMap.getRankDofs(); + + int globalSize = + nGlobalOverallInterior + + primalDofMap.getOverallDofs() + + lagrangeMap.getOverallDofs(); + + fetiInexactData.matBB = &(subdomain->getMatInterior()); + fetiInexactData.matBPi = &(subdomain->getMatInteriorCoarse()); + fetiInexactData.matPiB = &(subdomain->getMatCoarseInterior()); + fetiInexactData.matPiPi = &(subdomain->getMatCoarse()); + fetiInexactData.mat_lagrange = &mat_lagrange; + + localDofMap.createVec(fetiInexactData.tmp_vec_b0); + localDofMap.createVec(fetiInexactData.tmp_vec_b1); + + MatCreateShell(domainComm, + localSize, localSize, globalSize, globalSize, + &fetiInexactData, &mat_feti); + + MatShellSetOperation(mat_feti, MATOP_MULT, + (void(*)(void))petscMultMatFetiInexact); + + KSPCreate(domainComm, &ksp_feti); + KSPSetOperators(ksp_feti, mat_feti, mat_feti, SAME_NONZERO_PATTERN); + KSPSetOptionsPrefix(ksp_feti, "feti_"); + KSPSetType(ksp_feti, KSPGMRES); + KSPSetTolerances(ksp_feti, 0, 1e-8, 1e+3, 1000); + KSPSetFromOptions(ksp_feti); + + + // === Init preconditioner === + + fetiInexactPreconData.ksp_schur = ksp_schur_primal; + fetiInexactPreconData.ksp_interior = subdomain->getSolver(); + fetiInexactPreconData.matPiB = &(subdomain->getMatCoarseInterior()); + fetiInexactPreconData.matBPi = &(subdomain->getMatInteriorCoarse()); + fetiInexactPreconData.mat_lagrange = &mat_lagrange; + localDofMap.createVec(fetiInexactPreconData.tmp_vec_b0); + + KSPCreate(domainComm, &(fetiInexactPreconData.ksp_pc_feti)); + KSPSetOperators(fetiInexactPreconData.ksp_pc_feti, + mat_lagrange, mat_lagrange, + SAME_NONZERO_PATTERN); + KSPGetPC(fetiInexactPreconData.ksp_pc_feti, + &(fetiInexactPreconData.pc_feti)); + createFetiPreconLumped(fetiInexactPreconData.pc_feti); + PCSetUp(fetiInexactPreconData.pc_feti); + + // === Setup pc object === + + PC pc; + KSPGetPC(ksp_feti, &pc); + PCSetType(pc, PCSHELL); + PCShellSetApply(pc, pcInexactFetiShell); + PCShellSetContext(pc, &fetiInexactPreconData); + } + + + void PetscSolverFeti::createFetiInexactReducedKsp() + { + FUNCNAME("PetscSolverFeti::createFetiInexactReducedKsp()"); + + ERROR_EXIT("Not yet implemented!\n"); + } + + + void PetscSolverFeti::createFetiPreconLumped(PC pc) + { + FUNCNAME("PetscSolverFeti::createFetiPreconLumped()"); + + FetiLumpedPreconData *lumpedData = + (stokesMode ? &fetiInterfaceLumpedPreconData : &fetiLumpedPreconData); + + lumpedData->mat_lagrange_scaled = &mat_lagrange_scaled; + lumpedData->mat_duals_duals = &mat_duals_duals; + + VecCreateMPI(meshDistributor->getMeshLevelData().getMpiComm(meshLevel), + localDofMap.getRankDofs(), + nGlobalOverallInterior, &(lumpedData->tmp_vec_b0)); + MatGetVecs(mat_duals_duals, PETSC_NULL, + &(lumpedData->tmp_vec_duals0)); + MatGetVecs(mat_duals_duals, PETSC_NULL, + &(lumpedData->tmp_vec_duals1)); + + for (unsigned int component = 0; component < componentSpaces.size(); + component++) { + if (stokesMode && component == pressureComponent) + continue; + + DofMap &dualMap = dualDofMap[component].getMap(); + for (DofMap::iterator it = dualMap.begin(); it != dualMap.end(); ++it) { + DegreeOfFreedom d = it->first; + int matIndexLocal = localDofMap.getLocalMatIndex(component, d); + int matIndexDual = dualDofMap.getLocalMatIndex(component, d); + lumpedData->localToDualMap[matIndexLocal] = matIndexDual; } - KSPSetFromOptions(ksp_interior); - - fetiDirichletPreconData.mat_lagrange_scaled = &mat_lagrange_scaled; - fetiDirichletPreconData.mat_interior_interior = &mat_interior_interior; - fetiDirichletPreconData.mat_duals_duals = &mat_duals_duals; - fetiDirichletPreconData.mat_interior_duals = &mat_interior_duals; - fetiDirichletPreconData.mat_duals_interior = &mat_duals_interior; - fetiDirichletPreconData.ksp_interior = &ksp_interior; + } + + if (stokesMode) { + // === Create mass matrix solver === - localDofMap.createVec(fetiDirichletPreconData.tmp_vec_b, - nGlobalOverallInterior); - MatGetVecs(mat_duals_duals, PETSC_NULL, - &(fetiDirichletPreconData.tmp_vec_duals0)); - MatGetVecs(mat_duals_duals, PETSC_NULL, - &(fetiDirichletPreconData.tmp_vec_duals1)); - MatGetVecs(mat_interior_interior, PETSC_NULL, - &(fetiDirichletPreconData.tmp_vec_interior)); - - TEST_EXIT_DBG(subDomainIsLocal) - ("Should not happen, check usage of localDofMap!\n"); - - for (unsigned int component = 0; component < componentSpaces.size(); component++) { - DofMap &dualMap = dualDofMap[component].getMap(); - for (DofMap::iterator it = dualMap.begin(); it != dualMap.end(); ++it) { - DegreeOfFreedom d = it->first; - int matIndexLocal = localDofMap.getLocalMatIndex(component, d); - int matIndexDual = dualDofMap.getLocalMatIndex(component, d); - fetiDirichletPreconData.localToDualMap[matIndexLocal] = matIndexDual; - } + const FiniteElemSpace *pressureFeSpace = + componentSpaces[pressureComponent]; + + // Create parallel DOF mapping in pressure space. + ParallelDofMapping *massMapping = NULL; + if (massMatrixSolver) { + massMapping = massMatrixSolver->getDofMapping(); + } else { + massMapping = + new ParallelDofMapping(COMPONENT_WISE, true); + massMapping->init(pressureFeSpace, pressureFeSpace); + massMapping->setDofComm(meshDistributor->getDofComm(meshLevel)); + massMapping->setMpiComm(meshDistributor->getMeshLevelData().getMpiComm(0)); + } + (*massMapping)[0] = interfaceDofMap[pressureComponent]; + massMapping->update(); + + DOFMatrix massMatrix(pressureFeSpace, pressureFeSpace); + Operator op(pressureFeSpace, pressureFeSpace); + Simple_ZOT zot; + op.addTerm(&zot); + massMatrix.assembleOperator(op); + + if (!massMatrixSolver) { + massMatrixSolver = new PetscSolverGlobalMatrix(""); + massMatrixSolver->setKspPrefix("mass_"); + massMatrixSolver->setMeshDistributor(meshDistributor, meshLevel); + massMatrixSolver->setDofMapping(massMapping); } + + massMatrixSolver->fillPetscMatrix(&massMatrix); + + int r, c; + MatGetSize(massMatrixSolver->getMatInterior(), &r, &c); + MatInfo info; + MatGetInfo(massMatrixSolver->getMatInterior(), MAT_GLOBAL_SUM, &info); + MSG("MASS MAT INFO: size = %d x %d nnz = %d\n", + r, c, static_cast<int>(info.nz_used)); + + fetiInterfaceLumpedPreconData.ksp_mass = massMatrixSolver->getSolver(); + + + // === Create tmp vectors === + + localDofMap.createVec(fetiInterfaceLumpedPreconData.tmp_vec_b1); + primalDofMap.createVec(fetiInterfaceLumpedPreconData.tmp_primal); + fetiInterfaceLumpedPreconData.subSolver = subdomain; + } - KSPGetPC(ksp_feti, &precon_feti); - PCSetType(precon_feti, PCSHELL); - PCShellSetContext(precon_feti, static_cast<void*>(&fetiDirichletPreconData)); - PCShellSetApply(precon_feti, petscApplyFetiDirichletPrecon); + // === Set PC object === + PCSetType(pc, PCSHELL); + if (stokesMode) { + PCShellSetContext(pc, static_cast<void*>(&fetiInterfaceLumpedPreconData)); + PCShellSetApply(pc, petscApplyFetiInterfaceLumpedPrecon); + } else { + PCShellSetContext(pc, static_cast<void*>(&fetiLumpedPreconData)); + PCShellSetApply(pc, petscApplyFetiLumpedPrecon); + } + } - // For the case, that we want to print the timings, we force the LU - // factorization of the local problems to be done here explicitly. - if (printTimings) { - double wtime = MPI::Wtime(); - KSPSetUp(ksp_interior); - KSPSetUpOnBlocks(ksp_interior); - MPI::COMM_WORLD.Barrier(); - MSG("FETI-DP timing 08: %.5f seconds (factorization of Dirichlet preconditoner matrices)\n", - MPI::Wtime() - wtime); - } - - break; - case FETI_LUMPED: - { - FetiLumpedPreconData *lumpedData = - (stokesMode ? &fetiInterfaceLumpedPreconData : &fetiLumpedPreconData); + void PetscSolverFeti::createFetiPreconDirichlet(PC pc) + { + FUNCNAME("PetscSolverFeti::createFetiPreconDirichlet()"); - lumpedData->mat_lagrange_scaled = &mat_lagrange_scaled; - lumpedData->mat_duals_duals = &mat_duals_duals; - - localDofMap.createVec(lumpedData->tmp_vec_b0); - MatGetVecs(mat_duals_duals, PETSC_NULL, - &(lumpedData->tmp_vec_duals0)); - MatGetVecs(mat_duals_duals, PETSC_NULL, - &(lumpedData->tmp_vec_duals1)); - - for (unsigned int component = 0; component < componentSpaces.size(); component++) { - if (stokesMode && component == pressureComponent) - continue; - - DofMap &dualMap = dualDofMap[component].getMap(); - for (DofMap::iterator it = dualMap.begin(); it != dualMap.end(); ++it) { - DegreeOfFreedom d = it->first; - int matIndexLocal = localDofMap.getLocalMatIndex(component, d); - int matIndexDual = dualDofMap.getLocalMatIndex(component, d); - lumpedData->localToDualMap[matIndexLocal] = matIndexDual; - } - } + TEST_EXIT(subDomainIsLocal) + ("Check for localDofMap.getLocalMatIndex, which should not work for multilevel FETI-DP!\n"); + + TEST_EXIT(!stokesMode) + ("Stokes mode does not yet support the Dirichlet precondition!\n"); + + KSPCreate(PETSC_COMM_SELF, &ksp_interior); + KSPSetOperators(ksp_interior, mat_interior_interior, mat_interior_interior, + SAME_NONZERO_PATTERN); + KSPSetOptionsPrefix(ksp_interior, "precon_interior_"); + KSPSetType(ksp_interior, KSPPREONLY); + PC pc_interior; + KSPGetPC(ksp_interior, &pc_interior); + if (isSymmetric) { + PCSetType(pc_interior, PCCHOLESKY); + PCFactorSetMatSolverPackage(pc_interior, MATSOLVERMUMPS); + } else { + PCSetType(pc_interior, PCLU); + PCFactorSetMatSolverPackage(pc_interior, MATSOLVERUMFPACK); + } + KSPSetFromOptions(ksp_interior); + + fetiDirichletPreconData.mat_lagrange_scaled = &mat_lagrange_scaled; + fetiDirichletPreconData.mat_interior_interior = &mat_interior_interior; + fetiDirichletPreconData.mat_duals_duals = &mat_duals_duals; + fetiDirichletPreconData.mat_interior_duals = &mat_interior_duals; + fetiDirichletPreconData.mat_duals_interior = &mat_duals_interior; + fetiDirichletPreconData.ksp_interior = &ksp_interior; + + VecCreateMPI(meshDistributor->getMeshLevelData().getMpiComm(meshLevel), + localDofMap.getRankDofs(), + nGlobalOverallInterior, &(fetiDirichletPreconData.tmp_vec_b)); + MatGetVecs(mat_duals_duals, PETSC_NULL, + &(fetiDirichletPreconData.tmp_vec_duals0)); + MatGetVecs(mat_duals_duals, PETSC_NULL, + &(fetiDirichletPreconData.tmp_vec_duals1)); + MatGetVecs(mat_interior_interior, PETSC_NULL, + &(fetiDirichletPreconData.tmp_vec_interior)); + + TEST_EXIT_DBG(subDomainIsLocal) + ("Should not happen, check usage of localDofMap!\n"); + + for (unsigned int component = 0; component < componentSpaces.size(); component++) { + DofMap &dualMap = dualDofMap[component].getMap(); + for (DofMap::iterator it = dualMap.begin(); it != dualMap.end(); ++it) { + DegreeOfFreedom d = it->first; + int matIndexLocal = localDofMap.getLocalMatIndex(component, d); + int matIndexDual = dualDofMap.getLocalMatIndex(component, d); + fetiDirichletPreconData.localToDualMap[matIndexLocal] = matIndexDual; + } + } - if (stokesMode) { - // === Create mass matrix solver === - - const FiniteElemSpace *pressureFeSpace = - componentSpaces[pressureComponent]; - - // Create parallel DOF mapping in pressure space. - ParallelDofMapping *massMapping = NULL; - if (massMatrixSolver) { - massMapping = massMatrixSolver->getDofMapping(); - } else { - massMapping = - new ParallelDofMapping(COMPONENT_WISE, true); - massMapping->init(pressureFeSpace, pressureFeSpace); - massMapping->setDofComm(meshDistributor->getDofComm(meshLevel)); - massMapping->setMpiComm(meshDistributor->getMeshLevelData().getMpiComm(0)); - } - (*massMapping)[0] = interfaceDofMap[pressureComponent]; - massMapping->update(); - - DOFMatrix massMatrix(pressureFeSpace, pressureFeSpace); - Operator op(pressureFeSpace, pressureFeSpace); - Simple_ZOT zot; - op.addTerm(&zot); - massMatrix.assembleOperator(op); - - if (!massMatrixSolver) { - massMatrixSolver = new PetscSolverGlobalMatrix(""); - massMatrixSolver->setKspPrefix("mass_"); - massMatrixSolver->setMeshDistributor(meshDistributor, meshLevel); - massMatrixSolver->setDofMapping(massMapping); - } - - massMatrixSolver->fillPetscMatrix(&massMatrix); - - int r, c; - MatGetSize(massMatrixSolver->getMatInterior(), &r, &c); - MatInfo info; - MatGetInfo(massMatrixSolver->getMatInterior(), MAT_GLOBAL_SUM, &info); - MSG("MASS MAT INFO: size = %d x %d nnz = %d\n", - r, c, static_cast<int>(info.nz_used)); - - fetiInterfaceLumpedPreconData.ksp_mass = massMatrixSolver->getSolver(); + PCSetType(pc, PCSHELL); + PCShellSetContext(pc, static_cast<void*>(&fetiDirichletPreconData)); + PCShellSetApply(pc, petscApplyFetiDirichletPrecon); + // For the case, that we want to print the timings, we force the LU + // factorization of the local problems to be done here explicitly. + if (printTimings) { + double wtime = MPI::Wtime(); + KSPSetUp(ksp_interior); + KSPSetUpOnBlocks(ksp_interior); + MPI::COMM_WORLD.Barrier(); + MSG("FETI-DP timing 08: %.5f seconds (factorization of Dirichlet preconditoner matrices)\n", + MPI::Wtime() - wtime); + } + } - // === Create tmp vectors === - - localDofMap.createVec(fetiInterfaceLumpedPreconData.tmp_vec_b1); - primalDofMap.createVec(fetiInterfaceLumpedPreconData.tmp_primal); - fetiInterfaceLumpedPreconData.subSolver = subdomain; - } - - KSPGetPC(ksp_feti, &precon_feti); - PCSetType(precon_feti, PCSHELL); - if (stokesMode) { - PCShellSetContext(precon_feti, static_cast<void*>(&fetiInterfaceLumpedPreconData)); - PCShellSetApply(precon_feti, petscApplyFetiInterfaceLumpedPrecon); - } else { - PCShellSetContext(precon_feti, static_cast<void*>(&fetiLumpedPreconData)); - PCShellSetApply(precon_feti, petscApplyFetiLumpedPrecon); - } - } + void PetscSolverFeti::destroyFetiKsp() + { + FUNCNAME("PetscSolverFeti::destroyFetiKsp()"); + switch (fetiSolverType) { + case EXACT: + destroyFetiExactKsp(); break; - default: + case INEXACT: + destroyFetiInexactKsp(); + break; + case INEXACT_REDUCED: + destroyFetiInexactReducedKsp(); break; + default: + ERROR_EXIT("Should not happen!\n"); } } - - void PetscSolverFeti::destroyFetiKsp() + + void PetscSolverFeti::destroyFetiExactKsp() { - FUNCNAME("PetscSolverFeti::destroyFetiKsp()"); + FUNCNAME("PetscSolverFeti::destroyFetiExactKsp()"); // === Destroy FETI-DP solver object. === @@ -1531,6 +1689,26 @@ namespace AMDiS { } + void PetscSolverFeti::destroyFetiInexactKsp() + { + FUNCNAME("PetscSolverFeti::destroyFetiInexactKsp()"); + + VecDestroy(&(fetiInexactData.tmp_vec_b0)); + VecDestroy(&(fetiInexactData.tmp_vec_b1)); + MatDestroy(&mat_feti); + KSPDestroy(&ksp_feti); + + VecDestroy(&(fetiInexactPreconData.tmp_vec_b0)); + KSPDestroy(&(fetiInexactPreconData.ksp_pc_feti)); + } + + + void PetscSolverFeti::destroyFetiInexactReducedKsp() + { + FUNCNAME("PetscSolverFeti::destroyFetiInexactReducedKsp()"); + } + + void PetscSolverFeti::createNullSpace() { FUNCNAME("PetscSolverFeti::createNullSpace()"); @@ -1601,6 +1779,11 @@ namespace AMDiS { { FUNCNAME("PetscSolverFeti::dbgMatrix()"); + if (levelMode == 2 && meshLevel == 0) { + MSG("WARNING: MAKE MORE GENERAL!\n"); + return; + } + #if (DEBUG != 0) PetscInt nRow, nCol; MatGetLocalSize(subdomain->getMatInterior(), &nRow, &nCol); @@ -1807,7 +1990,7 @@ namespace AMDiS { int petscIndex = localDofMap.getLocalMatIndex(component, it->first); dofVec[it->first] = localSolB[petscIndex]; } else { - if (dofMapSd[componentSpaces[component]].isRankDof(it->first)) { + if (dofMapSubDomain[componentSpaces[component]].isRankDof(it->first)) { int petscIndex = localDofMap.getLocalMatIndex(component, it->first); TEST_EXIT(petscIndex < bsize)("Should not happen!\n"); dofVec[it->first] = localSolB[petscIndex]; @@ -1901,8 +2084,6 @@ namespace AMDiS { createDirichletData(*mat); createFetiData(); - - TEST_EXIT(coarseSpaceMap.size() == 0)("This is not yet supported!\n"); // === Create matrices for the FETI-DP method. === @@ -1912,6 +2093,27 @@ namespace AMDiS { subdomain->fillPetscMatrix(mat); + + // === SUPER TRICK === + + if (meshLevel == 1) { + MSG("START MAT TRICK!\n"); + mlSubdomain = new PetscSolverGlobalMatrix(""); + mlSubdomain->initParameters(); + mlSubdomain->setSymmetric(isSymmetric); + mlSubdomain->setHandleDirichletRows(dirichletMode == 0); + mlSubdomain->setMeshDistributor(meshDistributor, meshLevel); + mlSubdomain->init(componentSpaces, feSpaces); + mlSubdomain->setDofMapping(interiorMap); + mlSubdomain->setCoarseSpaceDofMapping(coarseSpaceMap[0]); + mlSubdomain->fillPetscMatrix(mat); + this->mat = mlSubdomain->getMat(); + MSG("END MAT TRICK!\n"); + } + + + + if (printTimings) { MPI::COMM_WORLD.Barrier(); MSG("FETI-DP timing 02: %.5f seconds (creation of interior matrices)\n", @@ -1951,9 +2153,14 @@ namespace AMDiS { { FUNCNAME("PetscSolverFeti::fillPetscRhs()"); - TEST_EXIT(coarseSpaceMap.size() == 0)("This is not yet supported!\n"); - subdomain->fillPetscRhs(vec); + + if (meshLevel == 1) { + MSG("START VEC TRICK!\n"); + mlSubdomain->fillPetscRhs(vec); + this->vecRhs = mlSubdomain->getVecRhs(); + MSG("END VEC TRICK!\n"); + } } @@ -1961,7 +2168,7 @@ namespace AMDiS { { FUNCNAME("PetscSolverFeti::createPreconditionerMatrix()"); - if (fetiPreconditioner == FETI_NONE) + if (fetiPreconditioner == FETI_NONE && fetiSolverType == EXACT) return; double wtime = MPI::Wtime(); @@ -2048,8 +2255,7 @@ namespace AMDiS { if (isPrimal(rowComponent, *cursor)) continue; - switch (fetiPreconditioner) { - case FETI_DIRICHLET: + if (fetiPreconditioner == FETI_DIRICHLET) { colsLocal.clear(); colsLocalOther.clear(); valuesLocal.clear(); @@ -2110,11 +2316,9 @@ namespace AMDiS { MatSetValues(mat_duals_interior, 1, &rowIndex, colsLocalOther.size(), &(colsLocalOther[0]), &(valuesLocalOther[0]), INSERT_VALUES); } + } - break; - - - case FETI_LUMPED: + if (fetiPreconditioner == FETI_LUMPED || fetiSolverType == INEXACT) { if (!isDual(rowComponent, *cursor)) continue; @@ -2136,24 +2340,17 @@ namespace AMDiS { valuesLocal.push_back(value(*icursor)); } - { - int rowIndex = dualDofMap.getLocalMatIndex(rowComponent, *cursor); - MatSetValues(mat_duals_duals, 1, &rowIndex, colsLocal.size(), - &(colsLocal[0]), &(valuesLocal[0]), INSERT_VALUES); - } - break; - - - default: - break; - } + int rowIndex = dualDofMap.getLocalMatIndex(rowComponent, *cursor); + MatSetValues(mat_duals_duals, 1, &rowIndex, colsLocal.size(), + &(colsLocal[0]), &(valuesLocal[0]), INSERT_VALUES); + } } } } // === Start global assembly procedure for preconditioner matrices. === - if (fetiPreconditioner == FETI_LUMPED) { + if (fetiPreconditioner == FETI_LUMPED || fetiSolverType == INEXACT) { MatAssemblyBegin(mat_duals_duals, MAT_FINAL_ASSEMBLY); MatAssemblyEnd(mat_duals_duals, MAT_FINAL_ASSEMBLY); } @@ -2180,73 +2377,54 @@ namespace AMDiS { } - void PetscSolverFeti::solveFetiMatrix(SystemVector &vec) + void PetscSolverFeti::solveFeti(Vec &rhsInterior, Vec &rhsCoarse, + Vec &solInterior, Vec &solCoarse) { - FUNCNAME("PetscSolverFeti::solveFetiMatrix()"); + FUNCNAME("PetscSolverFeti::solveFeti()"); - // The function was removed when FETI-DP was reformulated for mixed finite - // elements. To reconstruct this function, check for revision number 2024. - ERROR_EXIT("This function was removed!\n"); + switch (fetiSolverType) { + case EXACT: + solveFetiExact(rhsInterior, rhsCoarse, solInterior, solCoarse); + break; + case INEXACT: + solveFetiInexact(rhsInterior, rhsCoarse, solInterior, solCoarse); + break; + case INEXACT_REDUCED: + solveFetiInexactReduced(rhsInterior, rhsCoarse, solInterior, solCoarse); + break; + default: + ERROR_EXIT("Should not happen!\n"); + } } - void PetscSolverFeti::solveReducedFetiMatrix(SystemVector &vec) + void PetscSolverFeti::solveFetiExact(Vec &rhsInterior, Vec &rhsCoarse, + Vec &solInterior, Vec &solCoarse) { - FUNCNAME("PetscSolverFeti::solveReducedFetiMatrix()"); - -#if (DEBUG != 0) - if (stokesMode) - PetscSolverFetiDebug::debugNullSpace(*this, vec); -#endif + FUNCNAME("PetscSolverFeti::solveFetiExact()"); // === Some temporary vectors. === - Vec tmp_b0, tmp_b1; - VecCreateMPI(meshDistributor->getMeshLevelData().getMpiComm(meshLevel), - localDofMap.getRankDofs(), - nGlobalOverallInterior, &tmp_b0); + Vec tmp_b1; VecCreateMPI(meshDistributor->getMeshLevelData().getMpiComm(meshLevel), localDofMap.getRankDofs(), nGlobalOverallInterior, &tmp_b1); - Vec tmp_primal0, tmp_primal1; - primalDofMap.createVec(tmp_primal0); + Vec tmp_primal1; primalDofMap.createVec(tmp_primal1); - Vec tmp_lagrange, tmp_interface; + Vec tmp_lagrange; MatGetVecs(mat_lagrange, PETSC_NULL, &tmp_lagrange); // === Create RHS and solution vectors. === Vec vecRhs, vecSol; - Vec vecRhsLagrange, vecSolLagrange, vecRhsInterface, vecSolInterface; - if (!stokesMode) { - MatGetVecs(mat_lagrange, PETSC_NULL, &vecRhsLagrange); - MatGetVecs(mat_lagrange, PETSC_NULL, &vecSolLagrange); - - vecRhs = vecRhsLagrange; - vecSol = vecSolLagrange; - } else { - MatGetVecs(mat_lagrange, PETSC_NULL, &vecRhsLagrange); - MatGetVecs(mat_lagrange, PETSC_NULL, &vecSolLagrange); - - - interfaceDofMap.createVec(tmp_interface); - interfaceDofMap.createVec(vecRhsInterface); - interfaceDofMap.createVec(vecSolInterface); - - Vec vecRhsArray[2] = {vecRhsInterface, vecRhsLagrange}; - VecCreateNest(domainComm, 2, PETSC_NULL, vecRhsArray, &vecRhs); - - Vec vecSolArray[2] = {vecSolInterface, vecSolLagrange}; - VecCreateNest(domainComm, 2, PETSC_NULL, vecSolArray, &vecSol); - - VecAssemblyBegin(vecRhs); - VecAssemblyEnd(vecRhs); - - VecAssemblyBegin(vecSol); - VecAssemblyEnd(vecSol); - } + Vec vecRhsLagrange, vecSolLagrange; + MatGetVecs(mat_lagrange, PETSC_NULL, &vecRhsLagrange); + MatGetVecs(mat_lagrange, PETSC_NULL, &vecSolLagrange); + + vecRhs = vecRhsLagrange; + vecSol = vecSolLagrange; VecDuplicate(vecSol, &fetiKspData.draft); @@ -2254,89 +2432,30 @@ namespace AMDiS { double wtime = MPI::Wtime(); - if (!stokesMode) { - // d = L inv(K_BB) f_B - L inv(K_BB) K_BPi inv(S_PiPi) [f_Pi - K_PiB inv(K_BB) f_B] - // vecRhs = L * inv(K_BB) * f_B - subdomain->solveGlobal(subdomain->getVecRhsInterior(), tmp_b0); - MatMult(mat_lagrange, tmp_b0, vecRhsLagrange); - - // tmp_primal0 = M_PiB * inv(K_BB) * f_B - MatMult(subdomain->getMatCoarseInterior(), tmp_b0, tmp_primal0); - - // tmp_primal0 = f_Pi - M_PiB * inv(K_BB) * f_B - VecAXPBY(tmp_primal0, 1.0, -1.0, subdomain->getVecRhsCoarse()); - - if (!augmentedLagrange) { - double wtime2 = MPI::Wtime(); - // tmp_primal0 = inv(S_PiPi) (f_Pi - M_PiB * inv(K_BB) * f_B) - KSPSolve(ksp_schur_primal, tmp_primal0, tmp_primal0); - if (printTimings) { - MSG("FETI-DP timing 09a: %.5f seconds (create rhs vector)\n", - MPI::Wtime() - wtime2); - } - - MatMult(subdomain->getMatInteriorCoarse(), tmp_primal0, tmp_b0); - subdomain->solveGlobal(tmp_b0, tmp_b0); - MatMult(mat_lagrange, tmp_b0, tmp_lagrange); - } else { - Vec tmp_mu; - MatGetVecs(mat_augmented_lagrange, PETSC_NULL, &tmp_mu); - - MatMult(mat_augmented_lagrange, vecRhsLagrange, tmp_mu); - VecScale(tmp_mu, -1.0); - - Vec vec_array[2] = {tmp_primal0, tmp_mu}; - Vec vec_nest; - VecCreateNest(PETSC_COMM_WORLD, 2, PETSC_NULL, vec_array, &vec_nest); + // d = L inv(K_BB) f_B - L inv(K_BB) K_BPi inv(S_PiPi) [f_Pi - K_PiB inv(K_BB) f_B] + // vecRhs = L * inv(K_BB) * f_B + subdomain->solveGlobal(rhsInterior, solInterior); - KSPSolve(ksp_schur_primal, vec_nest, vec_nest); - - // 1 Step - MatMult(subdomain->getMatInteriorCoarse(), tmp_primal0, tmp_b0); - subdomain->solveGlobal(tmp_b0, tmp_b0); - MatMult(mat_lagrange, tmp_b0, tmp_lagrange); - - // 2 Step - Vec tmp_lagrange1; - VecDuplicate(tmp_lagrange, &tmp_lagrange1); - MatMultTranspose(mat_augmented_lagrange, tmp_mu, tmp_lagrange1); - MatMultTranspose(mat_lagrange, tmp_lagrange1, tmp_b0); - subdomain->solveGlobal(tmp_b0, tmp_b0); - MatMult(mat_lagrange, tmp_b0, tmp_lagrange1); - VecAXPY(tmp_lagrange, 1.0, tmp_lagrange1); - VecDestroy(&tmp_lagrange1); - - - VecDestroy(&tmp_mu); - VecDestroy(&vec_nest); - } - - VecAXPY(vecRhsLagrange, -1.0, tmp_lagrange); - } else { - TEST_EXIT(!augmentedLagrange)("Not yet implemented!\n"); - - subdomain->solveGlobal(subdomain->getVecRhsInterior(), tmp_b0); - - MatMult(subdomain->getMatCoarseInterior(), tmp_b0, tmp_primal0); - VecAXPBY(tmp_primal0, 1.0, -1.0, subdomain->getVecRhsCoarse()); - double wtime2 = MPI::Wtime(); - // tmp_primal0 = inv(S_PiPi) (f_Pi - M_PiB * inv(K_BB) * f_B) - KSPSolve(ksp_schur_primal, tmp_primal0, tmp_primal0); - if (printTimings) { - MSG("FETI-DP timing 09a: %.5f seconds (create rhs vector)\n", - MPI::Wtime() - wtime2); - } - - MatMult(subdomain->getMatInteriorCoarse(), tmp_primal0, tmp_b1); - subdomain->solveGlobal(tmp_b1, tmp_b1); - VecAXPY(tmp_b0, -1.0, tmp_b1); - - MatMult(mat_lagrange, tmp_b0, vecRhsLagrange); - - MatMult(subdomain->getMatCoarseInterior(1), tmp_b0, vecRhsInterface); - MatMult(subdomain->getMatCoarse(1, 0), tmp_primal0, tmp_interface); - VecAXPY(vecRhsInterface, 1.0, tmp_interface); + MatMult(mat_lagrange, solInterior, vecRhsLagrange); + + // solCoarse = M_PiB * inv(K_BB) * f_B + MatMult(subdomain->getMatCoarseInterior(), solInterior, solCoarse); + + // solCoarse = f_Pi - M_PiB * inv(K_BB) * f_B + VecAXPBY(solCoarse, 1.0, -1.0, rhsCoarse); + + double wtime2 = MPI::Wtime(); + // solCoarse = inv(S_PiPi) (f_Pi - M_PiB * inv(K_BB) * f_B) + KSPSolve(ksp_schur_primal, solCoarse, solCoarse); + if (printTimings) { + MSG("FETI-DP timing 09a: %.5f seconds (create rhs vector)\n", + MPI::Wtime() - wtime2); } + + MatMult(subdomain->getMatInteriorCoarse(), solCoarse, solInterior); + subdomain->solveGlobal(solInterior, solInterior); + MatMult(mat_lagrange, solInterior, tmp_lagrange); + VecAXPY(vecRhsLagrange, -1.0, tmp_lagrange); if (printTimings) { MPI::COMM_WORLD.Barrier(); @@ -2367,61 +2486,21 @@ namespace AMDiS { // === Solve for u_primals. === - if (!stokesMode) { - MatMultTranspose(mat_lagrange, vecSol, tmp_b0); - VecAYPX(tmp_b0, -1.0, subdomain->getVecRhsInterior()); - } else { - MatMultTranspose(mat_lagrange, vecSolLagrange, tmp_b0); - VecAYPX(tmp_b0, -1.0, subdomain->getVecRhsInterior()); + MatMultTranspose(mat_lagrange, vecSol, solInterior); + VecAYPX(solInterior, -1.0, rhsInterior); - MatMult(subdomain->getMatInteriorCoarse(1), vecSolInterface, tmp_b1); - VecAXPY(tmp_b0, -1.0, tmp_b1); - } + subdomain->solveGlobal(solInterior, tmp_b1); + MatMult(subdomain->getMatCoarseInterior(), tmp_b1, solCoarse); + VecAYPX(solCoarse, -1.0, rhsCoarse); - subdomain->solveGlobal(tmp_b0, tmp_b1); - MatMult(subdomain->getMatCoarseInterior(), tmp_b1, tmp_primal0); - VecAYPX(tmp_primal0, -1.0, subdomain->getVecRhsCoarse()); - - if (stokesMode) { - MatMult(subdomain->getMatCoarse(0, 1), vecSolInterface, tmp_primal1); - VecAXPY(tmp_primal0, -1.0, tmp_primal1); - } - - if (augmentedLagrange == false) { - KSPSolve(ksp_schur_primal, tmp_primal0, tmp_primal0); - } else { - Vec tmp_mu; - MatGetVecs(mat_augmented_lagrange, PETSC_NULL, &tmp_mu); - - MatMult(mat_lagrange, tmp_b1, tmp_lagrange); - MatMult(mat_augmented_lagrange, tmp_lagrange, tmp_mu); - VecScale(tmp_mu, -1.0); - - Vec vec_array[2] = {tmp_primal0, tmp_mu}; - Vec vec_nest; - VecCreateNest(PETSC_COMM_WORLD, 2, PETSC_NULL, vec_array, &vec_nest); - - KSPSolve(ksp_schur_primal, vec_nest, vec_nest); - - MatMultTranspose(mat_augmented_lagrange, tmp_mu, tmp_lagrange); - MatMultTranspose(mat_lagrange, tmp_lagrange, tmp_b1); - VecAXPY(tmp_b0, -1.0, tmp_b1); - - VecDestroy(&tmp_mu); - VecDestroy(&vec_nest); - } + KSPSolve(ksp_schur_primal, solCoarse, solCoarse); // === Solve for u_b. === - MatMult(subdomain->getMatInteriorCoarse(), tmp_primal0, tmp_b1); - VecAXPY(tmp_b0, -1.0, tmp_b1); - subdomain->solveGlobal(tmp_b0, tmp_b0); - - // === And recover AMDiS solution vectors. === + MatMult(subdomain->getMatInteriorCoarse(), solCoarse, tmp_b1); + VecAXPY(solInterior, -1.0, tmp_b1); + subdomain->solveGlobal(solInterior, solInterior); - recoverSolution(tmp_b0, tmp_primal0, vec); - if (stokesMode) - recoverInterfaceSolution(vecSolInterface, vec); // === Print timings and delete data. === @@ -2433,17 +2512,52 @@ namespace AMDiS { VecDestroy(&vecRhs); VecDestroy(&vecSol); - VecDestroy(&tmp_b0); VecDestroy(&tmp_b1); VecDestroy(&tmp_lagrange); - VecDestroy(&tmp_primal0); VecDestroy(&tmp_primal1); + } - if (stokesMode) { - VecDestroy(&tmp_interface); - VecDestroy(&vecRhsInterface); - VecDestroy(&vecSolInterface); - } + + void PetscSolverFeti::solveFetiInexact(Vec &rhsInterior, Vec &rhsCoarse, + Vec &solInterior, Vec &solCoarse) + { + FUNCNAME("PetscSolverFeti::solveFetiInexact()"); + + Vec tmpLagrange0, tmpLagrange1; + lagrangeMap.createVec(tmpLagrange0); + lagrangeMap.createVec(tmpLagrange1); + VecSet(tmpLagrange0, 0.0); + VecSet(tmpLagrange1, 0.0); + + Vec nestVecRhs[3]; + nestVecRhs[0] = rhsInterior; + nestVecRhs[1] = rhsCoarse; + nestVecRhs[2] = tmpLagrange0; + + Vec nestVecSol[3]; + nestVecSol[0] = solInterior; + nestVecSol[1] = solCoarse; + nestVecSol[2] = tmpLagrange1; + + Vec vecRhs, vecSol; + VecCreateNest(domainComm, 3, PETSC_NULL, nestVecRhs, &vecRhs); + VecCreateNest(domainComm, 3, PETSC_NULL, nestVecSol, &vecSol); + + KSPSolve(ksp_feti, vecRhs, vecSol); + + VecDestroy(&vecRhs); + VecDestroy(&vecSol); + VecDestroy(&tmpLagrange0); + VecDestroy(&tmpLagrange1); + } + + + void PetscSolverFeti::solveFetiInexactReduced(Vec &rhsInterior, Vec &rhsCoarse, + Vec &solInterior, Vec &solCoarse) + { + FUNCNAME("PetscSolverFeti::solveFetiInexactReduced()"); + + ERROR_EXIT("Not yet implemented!\n"); } @@ -2487,15 +2601,101 @@ namespace AMDiS { { FUNCNAME("PetscSolverFeti::solvePetscMatrix()"); - int debug = 0; - Parameters::get("parallel->debug feti", debug); + Vec solInterior; + VecCreateMPI(meshDistributor->getMeshLevelData().getMpiComm(meshLevel), + localDofMap.getRankDofs(), + nGlobalOverallInterior, + &solInterior); + + Vec solCoarse; + primalDofMap.createVec(solCoarse); + + solveFeti(subdomain->getVecRhsInterior(), + subdomain->getVecRhsCoarse(), + solInterior, + solCoarse); + + // === And recover AMDiS solution vectors. === + + recoverSolution(solInterior, solCoarse, vec); - if (debug) - solveFetiMatrix(vec); - else - solveReducedFetiMatrix(vec); + VecDestroy(&solInterior); + VecDestroy(&solCoarse); MeshDistributor::globalMeshDistributor->synchVector(vec); } + + void PetscSolverFeti::solveGlobal(Vec &rhs, Vec &sol) + { + FUNCNAME("PetscSolverFeti::solveGlobal()"); + + Vec rhsInterior, rhsCoarse, solInterior, solCoarse; + VecCreateMPI(domainComm, + localDofMap.getRankDofs(), + nGlobalOverallInterior, + &rhsInterior); + primalDofMap.createVec(rhsCoarse); + VecDuplicate(rhsInterior, &solInterior); + VecDuplicate(rhsCoarse, &solCoarse); + + int offset = 0; + { + int domainLocal = 0, nSuperLocal = 0; + if (domainComm.Get_rank() == 0) + domainLocal = interiorMap->getOverallDofs(); + + mpi::getDofNumbering(meshDistributor->getMpiComm(meshLevel - 1), + domainLocal, offset, nSuperLocal); + + int tmp = 0; + if (domainComm.Get_rank() == 0) + tmp = offset; + + domainComm.Allreduce(&tmp, &offset, 1, MPI_INT, MPI_SUM); + } + + vector<int> localFromRhs, coarseFromRhs; + vector<int> rhsToLocal, rhsToCoarse; + + int nComponents = componentSpaces.size(); + for (int i = 0; i < nComponents; i++) { + DofMap &dMap = localDofMap[i].getMap(); + + for (DofMap::iterator it = dMap.begin(); it != dMap.end(); ++it) { + int matL = localDofMap.getMatIndex(i, it->first) + rStartInterior; + int matI = interiorMap->getMatIndex(i, it->first) + offset; + + localFromRhs.push_back(matL); + rhsToLocal.push_back(matI); + } + } + + for (int i = 0; i < nComponents; i++) { + DofMap &dMap = primalDofMap[i].getMap(); + + for (DofMap::iterator it = dMap.begin(); it != dMap.end(); ++it) { + int matL = primalDofMap.getMatIndex(i, it->first); + int matI = interiorMap->getMatIndex(i, it->first) + offset; + + coarseFromRhs.push_back(matL); + rhsToCoarse.push_back(matI); + } + } + + copyVec(rhs, rhsInterior, rhsToLocal, localFromRhs); + copyVec(rhs, rhsCoarse, rhsToCoarse, coarseFromRhs); + + solveFeti(rhsInterior, rhsCoarse, solInterior, solCoarse); + + copyVec(solInterior, sol, localFromRhs, rhsToLocal); + copyVec(solCoarse, sol, coarseFromRhs, rhsToCoarse); + + MPI::COMM_WORLD.Barrier(); + + VecDestroy(&rhsInterior); + VecDestroy(&rhsCoarse); + VecDestroy(&solInterior); + VecDestroy(&solCoarse); + } } diff --git a/AMDiS/src/parallel/PetscSolverFeti.h b/AMDiS/src/parallel/PetscSolverFeti.h index a20a771bf404173da987db63978fd603b099e409..3ade93c80870f02d5b8e9aa565ebe06c0cba79eb 100644 --- a/AMDiS/src/parallel/PetscSolverFeti.h +++ b/AMDiS/src/parallel/PetscSolverFeti.h @@ -72,6 +72,9 @@ namespace AMDiS { /// Solve the system using FETI-DP method. void solvePetscMatrix(SystemVector &vec, AdaptInfo *adaptInfo); + /// Just for the super trick + void solveGlobal(Vec &rhs, Vec &sol); + /// Destroys all matrix data structures. void destroyMatrixData(); @@ -150,7 +153,8 @@ namespace AMDiS { void createMatAugmentedLagrange(); - bool testWirebasketEdge(BoundaryObject &edge, const FiniteElemSpace *feSpace); + bool testWirebasketEdge(BoundaryObject &edge, + const FiniteElemSpace *feSpace); /// void createPreconditionerMatrix(Matrix<DOFMatrix*> *mat); @@ -171,9 +175,33 @@ namespace AMDiS { /// Creates PETSc KSP solver object for the FETI-DP operator, \ref ksp_feti void createFetiKsp(); + /// + void createFetiExactKsp(); + + /// + void createFetiInexactKsp(); + + /// + void createFetiInexactReducedKsp(); + + /// + void createFetiPreconLumped(PC pc); + + /// + void createFetiPreconDirichlet(PC pc); + /// Destroys FETI-DP operator, \ref ksp_feti void destroyFetiKsp(); + /// + void destroyFetiExactKsp(); + + /// + void destroyFetiInexactKsp(); + + /// + void destroyFetiInexactReducedKsp(); + /// Create the null space of the FETI-DP operator (if there is one) and /// attachets it to the corresponding matrices and KSP objects. void createNullSpace(); @@ -204,25 +232,26 @@ namespace AMDiS { void recoverInterfaceSolution(Vec& vecInterface, SystemVector &vec); - /** \brief - * Solves the FETI-DP system globally, thus without reducing it to the - * Lagrange multipliers. This should be used for debugging only to test - * if the FETI-DP system is setup correctly. - * - * \param[out] vec Solution DOF vectors. - */ - void solveFetiMatrix(SystemVector &vec); + /// + void solveFeti(Vec &rhsInterior, Vec &rhsCoarse, + Vec &solInterior, Vec &solCoarse); - /** \brief - * Solves the FETI-DP system with reducing it first to the Lagrange - * multipliers. This is what one expects when using the FETI-DP methid :) - * - * \param[out] vec Solution DOF vectors. - */ - void solveReducedFetiMatrix(SystemVector &vec); + /// + void solveFetiExact(Vec &rhsInterior, Vec &rhsCoarse, + Vec &solInterior, Vec &solCoarse); + /// + void solveFetiInexact(Vec &rhsInterior, Vec &rhsCoarse, + Vec &solInterior, Vec &solCoarse); + + /// + void solveFetiInexactReduced(Vec &rhsInterior, Vec &rhsCoarse, + Vec &solInterior, Vec &solCoarse); + + /// void resetStatistics(); + /// void printStatistics(); /// Checks whether a given DOF is a primal DOF in a given component. @@ -247,6 +276,9 @@ namespace AMDiS { } protected: + /// Type of FETI-DP solver, i.e., exact or some inexact version + FetiSolverType fetiSolverType; + /// Mapping from primal DOF indices to a global index of primals. ParallelDofMapping primalDofMap; @@ -309,6 +341,12 @@ namespace AMDiS { /// Data for MatMult operation in matrix \ref mat_feti FetiData fetiData; + /// + FetiInexactData fetiInexactData; + + /// + FetiInexactPreconData fetiInexactPreconData; + /// Defines which preconditioner should be used to solve the reduced /// FETI-DP system. FetiPreconditioner fetiPreconditioner; @@ -338,11 +376,10 @@ namespace AMDiS { PetscSolver *subdomain; - PetscSolver *massMatrixSolver; - - int rStartInterior; + // Just a trick for multi level things, should be removed and generalized! + PetscSolver *mlSubdomain; - int nGlobalOverallInterior; + PetscSolver *massMatrixSolver; bool printTimings; diff --git a/AMDiS/src/parallel/PetscSolverFetiOperators.cc b/AMDiS/src/parallel/PetscSolverFetiOperators.cc index e1abec273184b56fbcd3343f46783d478cb36f68..e25f3e940647f365a4e7d03aa24c61a0d4277090 100644 --- a/AMDiS/src/parallel/PetscSolverFetiOperators.cc +++ b/AMDiS/src/parallel/PetscSolverFetiOperators.cc @@ -15,6 +15,26 @@ namespace AMDiS { + void copyGlobalLocal(Vec globalB, Vec localB) + { + double *valueGlobal, *valueLocal; + VecGetArray(globalB, &valueGlobal); + VecGetArray(localB, &valueLocal); + + int nGlobal, nLocal; + VecGetLocalSize(globalB, &nGlobal); + VecGetLocalSize(localB, &nLocal); + + TEST_EXIT(nGlobal == nLocal)("Should not happen!\n"); + + for (int i = 0; i < nGlobal; i++) + valueLocal[i] = valueGlobal[i]; + + VecRestoreArray(globalB, &valueGlobal); + VecRestoreArray(localB, &valueLocal); + } + + int petscMultMatSchurPrimal(Mat mat, Vec x, Vec y) { // S_PiPi = K_PiPi - K_PiB inv(K_BB) K_BPi @@ -194,6 +214,173 @@ namespace AMDiS { } + // y = mat * x + int petscMultMatFetiInexact(Mat mat, Vec x, Vec y) + { + FUNCNAME("petscMultMatFetiInexact()"); + + void *ctx; + MatShellGetContext(mat, &ctx); + FetiInexactData* data = static_cast<FetiInexactData*>(ctx); + + Vec xInterior, xPrimal, xLagrange; + Vec yInterior, yPrimal, yLagrange; + + VecNestGetSubVec(x, 0, &xInterior); + VecNestGetSubVec(x, 1, &xPrimal); + VecNestGetSubVec(x, 2, &xLagrange); + + VecNestGetSubVec(y, 0, &yInterior); + VecNestGetSubVec(y, 1, &yPrimal); + VecNestGetSubVec(y, 2, &yLagrange); + + Vec tmpInterior; + VecDuplicate(xInterior, &tmpInterior); + MatMult(*(data->matBPi), xPrimal, tmpInterior); + MatMultTranspose(*(data->mat_lagrange), xLagrange, yInterior); + VecAXPY(yInterior, 1.0, tmpInterior); + + { + { + double *valueRhs, *valueTmp; + VecGetArray(xInterior, &valueRhs); + VecGetArray(data->tmp_vec_b0, &valueTmp); + + int nRhsLocal, nTmpLocal; + VecGetLocalSize(xInterior, &nRhsLocal); + VecGetLocalSize(data->tmp_vec_b0, &nTmpLocal); + + TEST_EXIT(nRhsLocal == nTmpLocal)("Should not happen!\n"); + + for (int i = 0; i < nRhsLocal; i++) + valueTmp[i] = valueRhs[i]; + + VecRestoreArray(xInterior, &valueRhs); + VecRestoreArray(data->tmp_vec_b0, &valueTmp); + } + + MatMult(*(data->matBB), data->tmp_vec_b0, data->tmp_vec_b1); + + + { + double *valueRhs, *valueTmp; + VecGetArray(tmpInterior, &valueRhs); + VecGetArray(data->tmp_vec_b1, &valueTmp); + + int nRhsLocal, nTmpLocal; + VecGetLocalSize(yInterior, &nRhsLocal); + VecGetLocalSize(data->tmp_vec_b1, &nTmpLocal); + + TEST_EXIT(nRhsLocal == nTmpLocal)("Should not happen!\n"); + + for (int i = 0; i < nRhsLocal; i++) + valueRhs[i] = valueTmp[i]; + + VecRestoreArray(tmpInterior, &valueRhs); + VecRestoreArray(data->tmp_vec_b1, &valueTmp); + } + + VecAXPY(yInterior, 1.0, tmpInterior); + } + + { + Vec tmpPrimal; + VecDuplicate(xPrimal, &tmpPrimal); + + MatMult(*(data->matPiB), xInterior, tmpPrimal); + MatMult(*(data->matPiPi), xPrimal, yPrimal); + VecAXPY(yPrimal, 1.0, tmpPrimal); + + VecDestroy(&tmpPrimal); + } + + MatMult(*(data->mat_lagrange), xInterior, yLagrange); + + + VecDestroy(&tmpInterior); + + + return 0; + } + + + PetscErrorCode pcInexactFetiShell(PC pc, Vec x, Vec y) + { + void *ctx; + PCShellGetContext(pc, &ctx); + FetiInexactPreconData* data = static_cast<FetiInexactPreconData*>(ctx); + + Vec xInterior, xPrimal, xLagrange; + Vec yInterior, yPrimal, yLagrange; + + VecNestGetSubVec(x, 0, &xInterior); + VecNestGetSubVec(x, 1, &xPrimal); + VecNestGetSubVec(x, 2, &xLagrange); + + VecNestGetSubVec(y, 0, &yInterior); + VecNestGetSubVec(y, 1, &yPrimal); + VecNestGetSubVec(y, 2, &yLagrange); + + + Vec tmpPrimal; + VecDuplicate(xPrimal, &tmpPrimal); + + Vec tmpInterior0, tmpInterior1; + VecDuplicate(xInterior, &tmpInterior0); + VecDuplicate(xInterior, &tmpInterior1); + + { + // === First Block === + + copyGlobalLocal(xInterior, data->tmp_vec_b0); + KSPSolve(data->ksp_interior, data->tmp_vec_b0, data->tmp_vec_b0); + copyGlobalLocal(data->tmp_vec_b0, tmpInterior0); + MatMult(*(data->matPiB), tmpInterior0, tmpPrimal); + VecAYPX(tmpPrimal, -1.0, xPrimal); + + + // === Second Block === + + // tmpInterior0 already calculated + + KSPSolve(data->ksp_schur, tmpPrimal, tmpPrimal); + + + // === Third Block === + + // tmpPrimal already calculated + + MatMult(*(data->matBPi), tmpPrimal, tmpInterior1); + copyGlobalLocal(tmpInterior1, data->tmp_vec_b0); + KSPSolve(data->ksp_interior, data->tmp_vec_b0, data->tmp_vec_b0); + copyGlobalLocal(data->tmp_vec_b0, tmpInterior1); + + VecAXPY(tmpInterior0, -1.0, tmpInterior1); + + VecCopy(tmpInterior0, yInterior); + VecCopy(tmpPrimal, yPrimal); + } + + + { + Vec tmpLagrange; + VecDuplicate(xLagrange, &tmpLagrange); + + MatMult(*(data->mat_lagrange), yInterior, tmpLagrange); + PCApply(data->pc_feti, tmpLagrange, yLagrange); + + PCApply(data->pc_feti, xLagrange, tmpLagrange); + VecAXPY(yLagrange, -1.0, tmpLagrange); + + VecDestroy(&tmpLagrange); + } + + VecDestroy(&tmpPrimal); + VecDestroy(&tmpInterior0); + VecDestroy(&tmpInterior1); + } + + // y = mat * x int petscMultMatFetiAugmented(Mat mat, Vec x, Vec y) { @@ -429,6 +616,7 @@ namespace AMDiS { // === K_DD === + MatMult(*(data->mat_duals_duals), data->tmp_vec_duals0, data->tmp_vec_duals1); diff --git a/AMDiS/src/parallel/PetscSolverFetiOperators.h b/AMDiS/src/parallel/PetscSolverFetiOperators.h index ce9542093182abf95a1d3e411fbf2c2be919a2ee..d09328323fa0e6c95b33d55e8d44a7b4c8567f7e 100644 --- a/AMDiS/src/parallel/PetscSolverFetiOperators.h +++ b/AMDiS/src/parallel/PetscSolverFetiOperators.h @@ -28,6 +28,8 @@ namespace AMDiS { + void copyGlobalLocal(Vec globalB, Vec localB); + /// int petscMultMatSchurPrimal(Mat mat, Vec x, Vec y); @@ -37,6 +39,12 @@ namespace AMDiS { /// FETI-DP operator int petscMultMatFeti(Mat mat, Vec x, Vec y); + /// Inexact FETI-DP operator + int petscMultMatFetiInexact(Mat mat, Vec x, Vec y); + + /// Inexact FETI-DP preconditioner + PetscErrorCode pcInexactFetiShell(PC pc, Vec x, Vec y); + /// FETI-DP operator with augmented Lagrange constraints int petscMultMatFetiAugmented(Mat mat, Vec x, Vec y); diff --git a/AMDiS/src/parallel/PetscSolverFetiStructs.h b/AMDiS/src/parallel/PetscSolverFetiStructs.h index 2ed5cedb39d4a2d50d40969c1d314bcd3b5f0909..811547f15333cd833c03e3e2c9f2074362c8ba07 100644 --- a/AMDiS/src/parallel/PetscSolverFetiStructs.h +++ b/AMDiS/src/parallel/PetscSolverFetiStructs.h @@ -32,6 +32,16 @@ namespace AMDiS { class PetscSolverFeti; + + enum FetiSolverType { + // Standard exakt FETI-DP system + EXACT, + // Inexact FETI-DP + INEXACT, + // Inexact reduced FETI-DP + INEXACT_REDUCED + }; + /** \brief * This structure is used when defining the MatShell operation for solving * primal schur complement. \ref petscMultMatSchurPrimal @@ -98,6 +108,32 @@ namespace AMDiS { }; + struct FetiInexactData { + Mat *matBB, *matBPi, *matPiB, *matPiPi; + + Mat *mat_lagrange; + + Vec tmp_vec_b0, tmp_vec_b1; + }; + + + struct FetiInexactPreconData { + KSP ksp_schur; + + KSP ksp_interior; + + KSP ksp_pc_feti; + + PC pc_feti; + + Mat *matPiB, *matBPi; + + Mat *mat_lagrange; + + Vec tmp_vec_b0; + }; + + struct FetiDirichletPreconData { /// Matrix of scaled Lagrange variables. Mat *mat_lagrange_scaled; diff --git a/AMDiS/src/parallel/PetscSolverGlobalMatrix.cc b/AMDiS/src/parallel/PetscSolverGlobalMatrix.cc index bda315c2cce18ed2c15823b2cc1bd472a0790aaa..2b1ffc8481a2d46c3092c661ec62ba52feb5e30c 100644 --- a/AMDiS/src/parallel/PetscSolverGlobalMatrix.cc +++ b/AMDiS/src/parallel/PetscSolverGlobalMatrix.cc @@ -413,9 +413,6 @@ namespace AMDiS { { FUNCNAME("PetscSolverGlobalMatrix::solveGlobal()"); - double wtime = MPI::Wtime(); - double t0 = 0.0, t1 = 0.0; - Vec tmp; if (domainComm.Get_size() == 1) interiorMap->createLocalVec(tmp); @@ -432,13 +429,7 @@ namespace AMDiS { VecRestoreArray(rhs, &rhsValues); VecRestoreArray(tmp, &tmpValues); - t0 = MPI::Wtime() - wtime; - - wtime = MPI::Wtime(); KSPSolve(kspInterior, tmp, tmp); - t1 = MPI::Wtime() - wtime; - - wtime = MPI::Wtime(); VecGetArray(tmp, &tmpValues); VecGetArray(sol, &rhsValues); @@ -450,9 +441,6 @@ namespace AMDiS { VecRestoreArray(tmp, &tmpValues); VecDestroy(&tmp); - t0 += MPI::Wtime() - wtime; - - // MSG("TIMEING: %.5f %.5f\n", t0, t1); } @@ -540,7 +528,7 @@ namespace AMDiS { } - void PetscSolverGlobalMatrix::exitSolver(KSP ksp) + void PetscSolverGlobalMatrix::exitSolver(KSP &ksp) { FUNCNAME("PetscSolverGlobalMatrix::exitSolver()"); @@ -871,6 +859,8 @@ namespace AMDiS { MatNullSpaceCreate(domainComm, PETSC_FALSE, 1, &nullSpaceBasis, &matNullSpace); KSPSetNullSpace(ksp, matNullSpace); MatNullSpaceDestroy(&matNullSpace); + + VecDestroy(&nullSpaceBasis); } diff --git a/AMDiS/src/parallel/PetscSolverGlobalMatrix.h b/AMDiS/src/parallel/PetscSolverGlobalMatrix.h index 040151de5b95b24ea31a996ec9dac912ecb4cf09..90a36b22d4b04da23e2b663b0cbeeb38ffab3804 100644 --- a/AMDiS/src/parallel/PetscSolverGlobalMatrix.h +++ b/AMDiS/src/parallel/PetscSolverGlobalMatrix.h @@ -100,7 +100,7 @@ namespace AMDiS { virtual void initSolver(KSP &ksp); - virtual void exitSolver(KSP ksp); + virtual void exitSolver(KSP &ksp); virtual void initPreconditioner(PC pc); diff --git a/AMDiS/src/parallel/PetscSolverNavierStokes.cc b/AMDiS/src/parallel/PetscSolverNavierStokes.cc index e3a9aa11a11fdd0cd5d1c20cc75c82d6e5a2a770..428dad486b81d5659ee135ecd273f1494906d362 100644 --- a/AMDiS/src/parallel/PetscSolverNavierStokes.cc +++ b/AMDiS/src/parallel/PetscSolverNavierStokes.cc @@ -104,6 +104,8 @@ namespace AMDiS { FUNCNAME("PetscSolverNavierStokes::initSolver()"); // Create FGMRES based outer solver + + MSG("CREATE POS 1: %p\n", &ksp); KSPCreate(domainComm, &ksp); KSPSetOperators(ksp, getMatInterior(), getMatInterior(), SAME_NONZERO_PATTERN); KSPMonitorSet(ksp, KSPMonitorTrueResidualNorm, PETSC_NULL, PETSC_NULL); @@ -136,6 +138,7 @@ namespace AMDiS { PCFieldSplitSetSchurFactType(pc, PC_FIELDSPLIT_SCHUR_FACT_FULL); createFieldSplit(pc, "velocity", velocityComponents); createFieldSplit(pc, "pressure", pressureComponent); + PCSetFromOptions(pc); KSPSetUp(kspInterior); @@ -174,7 +177,6 @@ namespace AMDiS { if (pressureNullSpace) setConstantNullSpace(kspSchur); - // === Mass matrix solver === const FiniteElemSpace *pressureFeSpace = componentSpaces[pressureComponent]; @@ -324,8 +326,11 @@ namespace AMDiS { FUNCNAME("PetscSolverNavierStokes::exitPreconditioner()"); massMatrixSolver->destroyMatrixData(); + massMatrixSolver->destroyVectorData(); laplaceMatrixSolver->destroyMatrixData(); + laplaceMatrixSolver->destroyVectorData(); conDifMatrixSolver->destroyMatrixData(); + conDifMatrixSolver->destroyVectorData(); delete massMatrixSolver; massMatrixSolver = NULL; diff --git a/AMDiS/src/parallel/StdMpi.cc b/AMDiS/src/parallel/StdMpi.cc index 2bec40b83df87fef2aee8622e58d1af5ce1527b9..97d13fb436996d4743e761c75ae5e884ec60b759 100644 --- a/AMDiS/src/parallel/StdMpi.cc +++ b/AMDiS/src/parallel/StdMpi.cc @@ -183,7 +183,8 @@ namespace AMDiS { return size; } - void StdMpiHelper<vector<vector<double> > >::createBuffer(vector<vector<double> > &data, double *buf) + void StdMpiHelper<vector<vector<double> > >::createBuffer(vector<vector<double> > &data, + double *buf) { buf[0] = data.size(); int counter = 1; @@ -195,7 +196,9 @@ namespace AMDiS { } } - void StdMpiHelper<vector<vector<double> > >::makeFromBuffer(vector<vector<double> > &data, double *buf, int bufSize) + void StdMpiHelper<vector<vector<double> > >::makeFromBuffer(vector<vector<double> > &data, + double *buf, + int bufSize) { data.resize(static_cast<unsigned int>(buf[0])); int counter = 1; diff --git a/AMDiS/src/parallel/StdMpi.h b/AMDiS/src/parallel/StdMpi.h index 0800cacac6e2a034c81d629f25084ceb136be84a..22a1d0d1ef71cb164e6ba92dda8472729a1843cd 100644 --- a/AMDiS/src/parallel/StdMpi.h +++ b/AMDiS/src/parallel/StdMpi.h @@ -427,6 +427,11 @@ namespace AMDiS { } MPI::Request::Waitall(requestCounter, request); + +#if (DEBUG != 0) + bool testall = MPI::Request::Testall(requestCounter, request); + TEST_EXIT(testall)("Should not happen!\n"); +#endif }