diff --git a/AMDiS/src/parallel/PetscSolverFeti.cc b/AMDiS/src/parallel/PetscSolverFeti.cc index 217ec1bb4481af3b9ad3b85d3ee6e9a2c2147890..67d9226a0961a30c8836b694f4d152db7fc5dc39 100644 --- a/AMDiS/src/parallel/PetscSolverFeti.cc +++ b/AMDiS/src/parallel/PetscSolverFeti.cc @@ -20,6 +20,56 @@ namespace AMDiS { #ifdef HAVE_PETSC_DEV + // y = mat * x + int petscMultMatSchurPrimal(Mat mat, Vec x, Vec y) + { + // S_PiPi = K_PiPi - K_PiB inv(K_BB) K_BPi + + void *ctx; + MatShellGetContext(mat, &ctx); + PetscSchurPrimalData* data = static_cast<PetscSchurPrimalData*>(ctx); + + MatMult(*(data->mat_b_primal), x, data->tmp_vec_b); + KSPSolve(*(data->ksp_b), data->tmp_vec_b, data->tmp_vec_b); + + MatMult(*(data->mat_primal_b), data->tmp_vec_b, data->tmp_vec_primal); + MatMult(*(data->mat_primal_primal), x, y); + VecAXPBY(y, -1.0, 1.0, data->tmp_vec_primal); + + return 0; + } + + + // y = mat * x + int petscMultMatFeti(Mat mat, Vec x, Vec y) + { + // F = L inv(K_BB) trans(L) + L inv(K_BB) K_BPi inv(S_PiPi) K_PiB inv(K_BB) trans(L) + + void *ctx; + MatShellGetContext(mat, &ctx); + PetscFetiData* data = static_cast<PetscFetiData*>(ctx); + + // y = L inv(K_BB) trans(L) x + MatMultTranspose(*(data->mat_lagrange), x, data->tmp_vec_b); + KSPSolve(*(data->ksp_b), data->tmp_vec_b, data->tmp_vec_b); + MatMult(*(data->mat_lagrange), data->tmp_vec_b, y); + + // tmp_vec_primal = inv(S_PiPi) K_PiB inv(K_BB) trans(L) + MatMult(*(data->mat_primal_b), data->tmp_vec_b, data->tmp_vec_primal); + KSPSolve(*(data->ksp_schur_primal), data->tmp_vec_primal, data->tmp_vec_primal); + + // tmp_vec_lagrange = L inv(K_BB) K_BPi tmp_vec_primal + // = L inv(K_BB) K_BPi inv(S_PiPi) K_PiB inv(K_BB) trans(L) + MatMult(*(data->mat_b_primal), data->tmp_vec_primal, data->tmp_vec_b); + KSPSolve(*(data->ksp_b), data->tmp_vec_b, data->tmp_vec_b); + MatMult(*(data->mat_lagrange), data->tmp_vec_b, data->tmp_vec_lagrange); + + VecAXPBY(y, 1.0, 1.0, data->tmp_vec_lagrange); + + return 0; + } + + void PetscSolverFeti::updateDofData() { FUNCNAME("PetscSolverFeti::updateDofData()"); @@ -62,7 +112,7 @@ namespace AMDiS { nOverallPrimals = 0; - int rStartPrimals = 0; + rStartPrimals = 0; mpi::getDofNumbering(meshDistributor->getMpiComm(), nRankPrimals, rStartPrimals, nOverallPrimals); @@ -226,7 +276,7 @@ namespace AMDiS { } nOverallLagrange = 0; - int rStartLagrange = 0; + rStartLagrange = 0; mpi::getDofNumbering(meshDistributor->getMpiComm(), nRankLagrange, rStartLagrange, nOverallLagrange); @@ -353,6 +403,197 @@ namespace AMDiS { } + void PetscSolverFeti::createSchurPrimalKsp(int nComponents) + { + FUNCNAME("PetscSolverFeti::createSchurPrimal()"); + + petscSchurPrimalData.mat_primal_primal = &mat_primal_primal; + petscSchurPrimalData.mat_primal_b = &mat_primal_b; + petscSchurPrimalData.mat_b_primal = &mat_b_primal; + petscSchurPrimalData.ksp_b = &ksp_b; + + MatGetVecs(mat_b_b, + PETSC_NULL, &(petscSchurPrimalData.tmp_vec_b)); + MatGetVecs(mat_primal_primal, + PETSC_NULL, &(petscSchurPrimalData.tmp_vec_primal)); + + MatCreateShell(PETSC_COMM_WORLD, + nRankPrimals * nComponents, nRankPrimals * nComponents, + nOverallPrimals * nComponents, nOverallPrimals * nComponents, + &petscSchurPrimalData, + &mat_schur_primal); + MatShellSetOperation(mat_schur_primal, MATOP_MULT, + (void(*)(void))petscMultMatSchurPrimal); + + KSPCreate(PETSC_COMM_WORLD, &ksp_schur_primal); + KSPSetOperators(ksp_schur_primal, mat_schur_primal, mat_schur_primal, SAME_NONZERO_PATTERN); + KSPSetOptionsPrefix(ksp_schur_primal, "solver_sp_"); + KSPSetFromOptions(ksp_schur_primal); + } + + + void PetscSolverFeti::destroySchurPrimalKsp() + { + FUNCNAME("PetscSolverFeti::destroySchurPrimal()"); + + petscSchurPrimalData.mat_primal_primal = PETSC_NULL; + petscSchurPrimalData.mat_primal_b = PETSC_NULL; + petscSchurPrimalData.mat_b_primal = PETSC_NULL; + petscSchurPrimalData.ksp_b = PETSC_NULL; + + VecDestroy(petscSchurPrimalData.tmp_vec_b); + VecDestroy(petscSchurPrimalData.tmp_vec_primal); + + MatDestroy(mat_schur_primal); + KSPDestroy(ksp_schur_primal); + } + + + void PetscSolverFeti::createFetiKsp() + { + FUNCNAME("PetscSolverFeti::createFetiKsp()"); + + petscFetiData.mat_primal_primal = &mat_primal_primal; + petscFetiData.mat_primal_b = &mat_primal_b; + petscFetiData.mat_b_primal = &mat_b_primal; + petscFetiData.mat_lagrange = &mat_lagrange; + petscFetiData.ksp_b = &ksp_b; + petscFetiData.ksp_schur_primal = &ksp_schur_primal; + + + MatGetVecs(mat_b_b, PETSC_NULL, &(petscFetiData.tmp_vec_b)); + MatGetVecs(mat_primal_primal, PETSC_NULL, &(petscFetiData.tmp_vec_primal)); + MatGetVecs(mat_lagrange, PETSC_NULL, &(petscFetiData.tmp_vec_lagrange)); + + + MatCreateShell(PETSC_COMM_WORLD, + nRankLagrange, nRankLagrange, + nOverallLagrange, nOverallLagrange, + &petscFetiData, &mat_feti); + MatShellSetOperation(mat_feti, MATOP_MULT, (void(*)(void))petscMultMatFeti); + + + KSPCreate(PETSC_COMM_WORLD, &ksp_feti); + KSPSetOperators(ksp_feti, mat_feti, mat_feti, SAME_NONZERO_PATTERN); + KSPSetOptionsPrefix(ksp_feti, "solver_feti_"); + KSPSetFromOptions(ksp_feti); + } + + + void PetscSolverFeti::destroyFetiKsp() + { + FUNCNAME("PetscSolverFeti::destroyFetiKsp()"); + + petscFetiData.mat_primal_primal = PETSC_NULL; + petscFetiData.mat_primal_b = PETSC_NULL; + petscFetiData.mat_b_primal = PETSC_NULL; + petscFetiData.mat_lagrange = PETSC_NULL; + petscFetiData.ksp_b = PETSC_NULL; + petscFetiData.ksp_schur_primal = PETSC_NULL; + + + VecDestroy(petscFetiData.tmp_vec_b); + VecDestroy(petscFetiData.tmp_vec_primal); + VecDestroy(petscFetiData.tmp_vec_lagrange); + + + MatDestroy(mat_feti); + KSPDestroy(ksp_feti); + } + + + void PetscSolverFeti::recoverSolution(Vec &vec_sol_b, + Vec &vec_sol_primal, + SystemVector &vec) + { + FUNCNAME("PetscSolverFeti::recoverSolution()"); + + int nComponents = vec.getSize(); + + // === === + + PetscScalar *localSolB; + VecGetArray(vec_sol_b, &localSolB); + + + // === === + + vector<PetscInt> globalIsIndex, localIsIndex; + globalIsIndex.reserve(globalPrimalIndex.size() * nComponents); + localIsIndex.reserve(globalPrimalIndex.size() * nComponents); + + { + int counter = 0; + for (DofMapping::iterator it = globalPrimalIndex.begin(); + it != globalPrimalIndex.end(); ++it) { + for (int i = 0; i < nComponents; i++) { + globalIsIndex.push_back(it->second * nComponents + i); + localIsIndex.push_back(counter++); + } + } + } + + IS globalIs, localIs; + ISCreateGeneral(PETSC_COMM_SELF, + globalIsIndex.size(), + &(globalIsIndex[0]), + PETSC_USE_POINTER, + &globalIs); + + ISCreateGeneral(PETSC_COMM_SELF, + localIsIndex.size(), + &(localIsIndex[0]), + PETSC_USE_POINTER, + &localIs); + + Vec local_sol_primal; + VecCreateSeq(PETSC_COMM_SELF, localIsIndex.size(), &local_sol_primal); + + + VecScatter primalScatter; + VecScatterCreate(vec_sol_primal, globalIs, local_sol_primal, localIs, &primalScatter); + VecScatterBegin(primalScatter, vec_sol_primal, local_sol_primal, + INSERT_VALUES, SCATTER_FORWARD); + VecScatterEnd(primalScatter, vec_sol_primal, local_sol_primal, + INSERT_VALUES, SCATTER_FORWARD); + + ISDestroy(globalIs); + ISDestroy(localIs); + VecScatterDestroy(primalScatter); + + + PetscScalar *localSolPrimal; + VecGetArray(local_sol_primal, &localSolPrimal); + + + + + for (int i = 0; i < nComponents; i++) { + DOFVector<double>& dofVec = *(vec.getDOFVector(i)); + + for (DofMapping::iterator it = globalIndexB.begin(); + it != globalIndexB.end(); ++it) { + int petscIndex = (it->second - rStartB) * nComponents + i; + dofVec[it->first] = localSolB[petscIndex]; + } + + int counter = 0; + for (DofMapping::iterator it = globalPrimalIndex.begin(); + it != globalPrimalIndex.end(); ++it) { + dofVec[it->first] = localSolPrimal[counter * nComponents + i]; + counter++; + } + } + + + + VecRestoreArray(vec_sol_b, &localSolB); + + VecRestoreArray(local_sol_primal, &localSolPrimal); + VecDestroy(local_sol_primal); + } + + void PetscSolverFeti::fillPetscMatrix(Matrix<DOFMatrix*> *mat, SystemVector *vec) { @@ -489,14 +730,14 @@ namespace AMDiS { // === === - VecCreate(PETSC_COMM_WORLD, &vec_b); - VecSetSizes(vec_b, nRankB * nComponents, nOverallB * nComponents); - VecSetType(vec_b, VECMPI); + VecCreate(PETSC_COMM_WORLD, &f_b); + VecSetSizes(f_b, nRankB * nComponents, nOverallB * nComponents); + VecSetType(f_b, VECMPI); - VecCreate(PETSC_COMM_WORLD, &vec_primal); - VecSetSizes(vec_primal, nRankPrimals * nComponents, + VecCreate(PETSC_COMM_WORLD, &f_primal); + VecSetSizes(f_primal, nRankPrimals * nComponents, nOverallPrimals * nComponents); - VecSetType(vec_primal, VECMPI); + VecSetType(f_primal, VECMPI); for (int i = 0; i < nComponents; i++) { DOFVector<double>::Iterator dofIt(vec->getDOFVector(i), USED_DOFS); @@ -508,29 +749,32 @@ namespace AMDiS { index = globalPrimalIndex[index] * nComponents + i; double value = *dofIt; - VecSetValues(vec_primal, 1, &index, &value, ADD_VALUES); + VecSetValues(f_primal, 1, &index, &value, ADD_VALUES); } else { TEST_EXIT_DBG(globalIndexB.count(index)) ("Should not happen!\n"); index = globalIndexB[index] * nComponents + i; double value = *dofIt; - VecSetValues(vec_b, 1, &index, &value, ADD_VALUES); + VecSetValues(f_b, 1, &index, &value, ADD_VALUES); } } } - VecAssemblyBegin(vec_b); - VecAssemblyEnd(vec_b); + VecAssemblyBegin(f_b); + VecAssemblyEnd(f_b); - VecAssemblyBegin(vec_primal); - VecAssemblyEnd(vec_primal); + VecAssemblyBegin(f_primal); + VecAssemblyEnd(f_primal); createMatLagrange(nComponents); - MSG("FINISHED!\n"); - exit(0); + + + createSchurPrimalKsp(nComponents); + + createFetiKsp(); } @@ -538,14 +782,248 @@ namespace AMDiS { { FUNCNAME("PetscSolverFeti::solvePetscMatrix()"); - MatDestroy(mat_b_b); - MatDestroy(mat_primal_primal); - MatDestroy(mat_b_primal); - MatDestroy(mat_primal_b); - MatDestroy(mat_lagrange); + int debug = 0; + Parameters::get("parallel->debug feti", debug); + + if (debug) { + int nComponents = vec.getSize(); + + Mat mat_lagrange_transpose; + MatTranspose(mat_lagrange, MAT_INITIAL_MATRIX, &mat_lagrange_transpose); + + Mat A; + Mat nestedA[3][3]; + nestedA[0][0] = mat_b_b; + nestedA[0][1] = mat_b_primal; + nestedA[0][2] = mat_lagrange_transpose; + nestedA[1][0] = mat_primal_b; + nestedA[1][1] = mat_primal_primal; + nestedA[1][2] = PETSC_NULL; + nestedA[2][0] = mat_lagrange; + nestedA[2][1] = PETSC_NULL; + nestedA[2][2] = PETSC_NULL; + + MatCreateNest(PETSC_COMM_WORLD, 3, PETSC_NULL, 3, PETSC_NULL, &(nestedA[0][0]), &A); + + MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY); + MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY); + + + + int nRankNest = (nRankB + nRankPrimals) * nComponents + nRankLagrange; + int nOverallNest = (nOverallB + nOverallPrimals) * nComponents + nOverallLagrange; + int rStartNest = (rStartB + rStartPrimals) * nComponents + rStartLagrange; + + { + int matRow, matCol; + MatGetLocalSize(A, &matRow, &matCol); + TEST_EXIT_DBG(matRow == nRankNest)("Should not happen!\n"); + mpi::globalAdd(matRow); + TEST_EXIT_DBG(matRow == nOverallNest)("Should not happen!\n"); + + MatGetOwnershipRange(A, &matRow, &matCol); + TEST_EXIT_DBG(matRow == rStartNest)("Should not happen!\n"); + } + + Vec f; + VecCreate(PETSC_COMM_WORLD, &f); + VecSetSizes(f, nRankNest, nOverallNest); + VecSetType(f, VECMPI); + + Vec b; + VecDuplicate(f, &b); + + PetscScalar *local_f_b; + VecGetArray(f_b, &local_f_b); + + { + int size; + VecGetLocalSize(f_b, &size); + TEST_EXIT_DBG(size == nRankB * nComponents)("Should not happen!\n"); + } + + PetscScalar *local_f_primal; + VecGetArray(f_primal, &local_f_primal); + + { + int size; + VecGetLocalSize(f_primal, &size); + TEST_EXIT_DBG(size == nRankPrimals * nComponents)("Should not happen!\n"); + } + + PetscScalar *local_f; + VecGetArray(f, &local_f); + + int index = 0; + for (int i = 0; i < nRankB * nComponents; i++) + local_f[index++] = local_f_b[i]; + for (int i = 0; i < nRankPrimals * nComponents; i++) + local_f[index++] = local_f_primal[i]; + + VecRestoreArray(f, &local_f); + VecRestoreArray(f_b, &local_f_b); + VecRestoreArray(f_primal, &local_f_primal); + + + KSP ksp; + KSPCreate(PETSC_COMM_WORLD, &ksp); + KSPSetOperators(ksp, A, A, SAME_NONZERO_PATTERN); + KSPSetFromOptions(ksp); + + + + KSPSolve(ksp, f, b); + + + + + Vec u_b, u_primal; + VecDuplicate(f_b, &u_b); + VecDuplicate(f_primal, &u_primal); + + + PetscScalar *local_b; + VecGetArray(b, &local_b); + + PetscScalar *local_u_b; + VecGetArray(u_b, &local_u_b); + + PetscScalar *local_u_primal; + VecGetArray(u_primal, &local_u_primal); + + index = 0; + for (int i = 0; i < nRankB * nComponents; i++) + local_u_b[i] = local_b[index++]; + for (int i = 0; i < nRankPrimals * nComponents; i++) + local_u_primal[i] = local_b[index++]; + for (int i = 0; i < nRankLagrange; i++) { + MSG("MY %d-ith Lagrange: %f\n", i, local_b[index++]); + } + + VecRestoreArray(f, &local_b); + VecRestoreArray(u_b, &local_u_b); + VecRestoreArray(u_primal, &local_u_primal); + + + recoverSolution(u_b, u_primal, vec); + + VecDestroy(u_b); + VecDestroy(u_primal); + VecDestroy(b); + VecDestroy(f); + + KSPDestroy(ksp); + + } else { + + KSPCreate(PETSC_COMM_WORLD, &ksp_b); + KSPSetOperators(ksp_b, mat_b_b, mat_b_b, SAME_NONZERO_PATTERN); + KSPSetOptionsPrefix(ksp_b, "solver_b_"); + KSPSetFromOptions(ksp_b); + + + Vec vec_rhs; + + Vec tmp_b0, tmp_b1, tmp_lagrange0, tmp_primal0, tmp_primal1; + MatGetVecs(mat_lagrange, PETSC_NULL, &tmp_lagrange0); + MatGetVecs(mat_lagrange, PETSC_NULL, &vec_rhs); + MatGetVecs(mat_b_b, PETSC_NULL, &tmp_b0); + MatGetVecs(mat_b_b, PETSC_NULL, &tmp_b1); + MatGetVecs(mat_primal_primal, PETSC_NULL, &tmp_primal0); + MatGetVecs(mat_primal_primal, PETSC_NULL, &tmp_primal1); + + + // === Create new rhs === + + // vec_rhs = L * inv(K_BB) * f_b + KSPSolve(ksp_b, f_b, tmp_b0); + MatMult(mat_lagrange, tmp_b0, vec_rhs); + + // tmp_primal0 = M_PiB * inv(K_BB) * f_b + MatMult(mat_primal_b, tmp_b0, tmp_primal0); + + // tmp_primal0 = f_Pi - M_PiB * inv(K_BB) * f_b + // VecAXPBY(tmp_primal0, 1.0, -1.0, f_primal); + VecAXPBY(tmp_primal0, -1.0, 1.0, f_primal); + + // tmp_primal0 = inv(S_PiPi) (f_Pi - M_PiB * inv(K_BB) * f_b) + KSPSolve(ksp_schur_primal, tmp_primal0, tmp_primal0); + + // + MatMult(mat_b_primal, tmp_primal0, tmp_b0); + KSPSolve(ksp_b, tmp_b0, tmp_b0); + MatMult(mat_lagrange, tmp_b0, tmp_lagrange0); + + // + VecAXPBY(vec_rhs, 1.0, 1.0, tmp_lagrange0); + + + // === Solve with FETI-DP operator. === + + KSPSolve(ksp_feti, vec_rhs, vec_rhs); + + + // === Solve for u_primals. === + + VecCopy(f_primal, tmp_primal0); + + KSPSolve(ksp_b, f_b, tmp_b0); + MatMult(mat_primal_b, tmp_b0, tmp_primal1); + + VecAXPBY(tmp_primal0, -1.0, 1.0, tmp_primal1); + + MatMultTranspose(mat_lagrange, vec_rhs, tmp_b0); + KSPSolve(ksp_b, tmp_b0, tmp_b0); + MatMult(mat_primal_b, tmp_b0, tmp_primal1); + + VecAXPBY(tmp_primal0, 1.0, 1.0, tmp_primal1); + KSPSolve(ksp_schur_primal, tmp_primal0, tmp_primal0); + + + // === Solve for u_b. === + + VecCopy(f_b, tmp_b0); + MatMultTranspose(mat_lagrange, vec_rhs, tmp_b1); + VecAXPBY(tmp_b0, -1.0, 1.0, tmp_b1); + + MatMult(mat_b_primal, tmp_primal0, tmp_b1); + VecAXPBY(tmp_b0, -1.0, 1.0, tmp_b1); + + KSPSolve(ksp_b, tmp_b0, tmp_b0); + + + // === And recover AMDiS solution vectors. === + + recoverSolution(tmp_b0, tmp_primal0, vec); + + + // === Destroy all data structures. === + + + VecDestroy(vec_rhs); + VecDestroy(tmp_b0); + VecDestroy(tmp_b1); + VecDestroy(tmp_lagrange0); + VecDestroy(tmp_primal0); + VecDestroy(tmp_primal1); + + + KSPDestroy(ksp_b); + + MatDestroy(mat_b_b); + MatDestroy(mat_primal_primal); + MatDestroy(mat_b_primal); + MatDestroy(mat_primal_b); + MatDestroy(mat_lagrange); + + VecDestroy(f_b); + VecDestroy(f_primal); + + destroySchurPrimalKsp(); + + destroyFetiKsp(); + } - VecDestroy(vec_b); - VecDestroy(vec_primal); } #endif diff --git a/AMDiS/src/parallel/PetscSolverFeti.h b/AMDiS/src/parallel/PetscSolverFeti.h index c22bf2d4b516212e83343b273b134ff0a3dfc50b..a033913b26f0ba2add7c2ebeb7abf9d434308ccf 100644 --- a/AMDiS/src/parallel/PetscSolverFeti.h +++ b/AMDiS/src/parallel/PetscSolverFeti.h @@ -32,6 +32,37 @@ namespace AMDiS { #ifdef HAVE_PETSC_DEV + + struct PetscSchurPrimalData { + Mat *mat_primal_b, *mat_b_primal, *mat_primal_primal; + + Vec tmp_vec_b; + + Vec tmp_vec_primal; + + KSP *ksp_b; + }; + + + + struct PetscFetiData { + Mat *mat_primal_b, *mat_b_primal, *mat_primal_primal; + + Mat *mat_lagrange; + + Vec tmp_vec_b; + + Vec tmp_vec_primal; + + Vec tmp_vec_lagrange; + + KSP *ksp_b; + + KSP *ksp_schur_primal; + }; + + + class PetscSolverFeti : public PetscSolver { public: @@ -63,6 +94,22 @@ namespace AMDiS { void createMatLagrange(int nComponents); + + + void createSchurPrimalKsp(int nComponents); + + void destroySchurPrimalKsp(); + + + void createFetiKsp(); + + void destroyFetiKsp(); + + + void recoverSolution(Vec &vec_sol_b, + Vec &vec_sol_primal, + SystemVector &vec); + protected: DofIndexSet primals; @@ -72,6 +119,8 @@ namespace AMDiS { int nOverallPrimals; + int rStartPrimals; + DofIndexSet duals; DofMapping globalDualIndex; @@ -83,6 +132,8 @@ namespace AMDiS { int nRankLagrange; int nOverallLagrange; + + int rStartLagrange; DofMapping globalIndexB; @@ -100,9 +151,26 @@ namespace AMDiS { Mat mat_lagrange; - Vec vec_b; + Vec f_b, f_primal; + + KSP ksp_b; + + + + + KSP ksp_schur_primal; + + Mat mat_schur_primal; + + PetscSchurPrimalData petscSchurPrimalData; + + + + KSP ksp_feti; + + Mat mat_feti; - Vec vec_primal; + PetscFetiData petscFetiData; }; #endif