diff --git a/AMDiS/src/parallel/PetscHelper.cc b/AMDiS/src/parallel/PetscHelper.cc index ad3c35c00a62130111536b2b6c92a67a3e18c301..eaf175b8f2c8a347164a5a4389db92828821acec 100644 --- a/AMDiS/src/parallel/PetscHelper.cc +++ b/AMDiS/src/parallel/PetscHelper.cc @@ -11,6 +11,7 @@ #include "parallel/PetscHelper.h" +#include "Global.h" namespace AMDiS { @@ -70,6 +71,104 @@ namespace AMDiS { VecAssemblyBegin(vec); VecAssemblyEnd(vec); } + + + void blockMatMatSolve(KSP ksp, Mat mat0, Mat &mat1) + { + // === We have to calculate mat1 = ksp mat0: === + // === - get all local column vectors from mat0 === + // === - solve with ksp for this column vector as the rhs vector === + // === - set the result to the corresponding column vector of === + // === matrix mat1 === + + PetscInt localRows, localCols, globalRows, globalCols; + MatGetLocalSize(mat0, &localRows, &localCols); + MatGetSize(mat0, &globalRows, &globalCols); + + MatCreateAIJ(PETSC_COMM_WORLD, + localRows, localCols, globalRows, globalCols, + 150, PETSC_NULL, 150, PETSC_NULL, &mat1); + MatSetOption(mat1, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE); + + // Transform matrix mat0 into a sparse column format. + PetscMatCol mat0_cols; + getMatLocalColumn(mat0, mat0_cols); + + Vec tmpVec; + VecCreateSeq(PETSC_COMM_SELF, localRows, &tmpVec); + + // Solve for all column vectors of mat A_BPi and create matrix matK + for (PetscMatCol::iterator it = mat0_cols.begin(); + it != mat0_cols.end(); ++it) { + getColumnVec(it->second, tmpVec); + KSPSolve(ksp, tmpVec, tmpVec); + setMatLocalColumn(mat1, it->first, tmpVec); + } + + VecDestroy(&tmpVec); + + MatAssemblyBegin(mat1, MAT_FINAL_ASSEMBLY); + MatAssemblyEnd(mat1, MAT_FINAL_ASSEMBLY); + } + + + void matNestConvert(Mat matNest, Mat &mat) + { + FUNCNAME("matNestConvert()"); + + PetscInt nestRows, nestCols; + MatNestGetSize(matNest, &nestRows, &nestCols); + + TEST_EXIT(nestRows == 2 && nestCols == 2) + ("This funciton is only implemented for 2x2 nested matrices!\n"); + + Mat mat00, mat01, mat10, mat11; + MatNestGetSubMat(matNest, 0, 0, &mat00); + MatNestGetSubMat(matNest, 0, 1, &mat01); + MatNestGetSubMat(matNest, 1, 0, &mat10); + MatNestGetSubMat(matNest, 1, 1, &mat11); + + int nRankRows0, nOverallRows0; + MatGetLocalSize(mat00, &nRankRows0, PETSC_NULL); + MatGetSize(mat00, &nOverallRows0, PETSC_NULL); + + int nRankRows1, nOverallRows1; + MatGetLocalSize(mat11, &nRankRows1, PETSC_NULL); + MatGetSize(mat11, &nOverallRows1, PETSC_NULL); + + int firstRow0; + MatGetOwnershipRange(mat00, &firstRow0, PETSC_NULL); + + int firstRow1; + MatGetOwnershipRange(mat11, &firstRow1, PETSC_NULL); + + int nRankRows = nRankRows0 + nRankRows1; + int nOverallRows = nOverallRows0 + nOverallRows1; + int firstRow = firstRow0 + firstRow1; + + MatCreateAIJ(PETSC_COMM_WORLD, + nRankRows, nRankRows, + nOverallRows, nOverallRows, + 10, PETSC_NULL, 10, PETSC_NULL, &mat); + + for (int i = 0; i < nRankRows0; i++) { + PetscInt nCols; + const PetscInt *cols; + const PetscScalar *vals; + + MatGetRow(mat00, i + firstRow0, &nCols, &cols, &vals); + + for (int j = 0; j < nCols; j++) { + MatSetValue(mat, firstRow + i, cols[j], vals[j], INSERT_VALUES); + } + + MatRestoreRow(mat00, i + firstRow0, &nCols, &cols, &vals); + } + + MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY); + MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY); + } + } - + } diff --git a/AMDiS/src/parallel/PetscHelper.h b/AMDiS/src/parallel/PetscHelper.h index 01edbc1daa046603a98ee2bc626f9b62c09821b4..9362aad873e912491ac7dce4ee25e7e6aaa3c0c0 100644 --- a/AMDiS/src/parallel/PetscHelper.h +++ b/AMDiS/src/parallel/PetscHelper.h @@ -69,6 +69,26 @@ namespace AMDiS { * \param[out] vec Vector representing one column of the matrix. */ void getColumnVec(const SparseCol &matCol, Vec vec); + + /** \brief + * Computes the matrix matrix product inv(A) B = C. Matrices B and C + * are distributed matrices. Matrix A is a local matrix on each MPI + * task. The overall number of rows of local matrices A must be the + * number of distriubted rows in B. + * + * \param[in] ksp inv(A) matrix given by a PETSc solver object. + * \param[in] mat0 matrix B + * \param[out] mat1 resulting matrix C, is created inside the function + */ + void blockMatMatSolve(KSP ksp, Mat mat0, Mat &mat1); + + /** \brief + * Converts a 2x2 nested matrix to a MATAIJ matrix (thus not nested). + * + * \param[in] matNest nested input matrix + * \param[out] mat matrix of type MATAIJ, created inside this function. + */ + void matNestConvert(Mat matNest, Mat &mat); } } diff --git a/AMDiS/src/parallel/PetscSolverFeti.cc b/AMDiS/src/parallel/PetscSolverFeti.cc index 95bb9eb8e6f81d881a80841d802ef73be837c45e..37c8fed40aa7a8ac666cd593bdb75743bd184859 100644 --- a/AMDiS/src/parallel/PetscSolverFeti.cc +++ b/AMDiS/src/parallel/PetscSolverFeti.cc @@ -794,6 +794,7 @@ namespace AMDiS { (void(*)(void))petscMultMatSchurPrimal); } else { schurPrimalAugmentedData.subSolver = subdomain; + schurPrimalAugmentedData.nestedVec = true; localDofMap.createVec(schurPrimalAugmentedData.tmp_vec_b0, nGlobalOverallInterior); localDofMap.createVec(schurPrimalAugmentedData.tmp_vec_b1, nGlobalOverallInterior); @@ -824,63 +825,16 @@ namespace AMDiS { double wtime = MPI::Wtime(); - TEST_EXIT(!augmentedLagrange)("Not yet supported!\n"); TEST_EXIT_DBG(meshLevel == 0) ("Does not support for multilevel, check usage of localDofMap.\n"); - // === First, calculate matK = inv(A_BB) A_BPi: === - // === - get all local column vectors from A_BPi === - // === - solve with A_BB for this column vector as the rhs vector === - // === - set the result to the corresponding column vector of === - // === matrix matK === + // === Create explicit matrix representation of the Schur primal system. === - int nRowsRankPrimal = primalDofMap.getRankDofs(); - int nRowsOverallPrimal = primalDofMap.getOverallDofs(); - int nRowsRankB = localDofMap.getRankDofs(); - - // Transform matrix A_BPi into a sparse column format. - petsc_helper::PetscMatCol mat_b_primal_cols; - petsc_helper::getMatLocalColumn(subdomain->getMatInteriorCoarse(), - mat_b_primal_cols); - - TEST_EXIT(static_cast<int>(mat_b_primal_cols.size()) == - primalDofMap.getLocalDofs()) - ("Should not happen!\n"); - - Vec tmpVec; - VecCreateSeq(PETSC_COMM_SELF, nRowsRankB, &tmpVec); - - Mat matK; - MatCreateAIJ(mpiCommGlobal, - nRowsRankB, nRowsRankPrimal, - nGlobalOverallInterior, nRowsOverallPrimal, - 150, PETSC_NULL, 150, PETSC_NULL, &matK); - MatSetOption(matK, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE); - - // Solve for all column vectors of mat A_BPi and create matrix matK - for (petsc_helper::PetscMatCol::iterator it = mat_b_primal_cols.begin(); - it != mat_b_primal_cols.end(); ++it) { - petsc_helper::getColumnVec(it->second, tmpVec); - subdomain->solve(tmpVec, tmpVec); - petsc_helper::setMatLocalColumn(matK, it->first, tmpVec); - } - - VecDestroy(&tmpVec); - - MatAssemblyBegin(matK, MAT_FINAL_ASSEMBLY); - MatAssemblyEnd(matK, MAT_FINAL_ASSEMBLY); - - // Calculate: mat_schur_primal = A_PiPi - A_PiB inv(A_BB) ABPi - // = A_PiPi - A_PiB matK - MatDuplicate(subdomain->getMatCoarse(), MAT_COPY_VALUES, - &mat_schur_primal); - Mat matPrimal; - MatMatMult(subdomain->getMatCoarseInterior(), matK, MAT_INITIAL_MATRIX, - PETSC_DEFAULT, &matPrimal); - MatAXPY(mat_schur_primal, -1.0, matPrimal, DIFFERENT_NONZERO_PATTERN); - MatDestroy(&matPrimal); - MatDestroy(&matK); + if (!augmentedLagrange) + createMatExplicitSchurPrimal(); + else + createMatExplicitAugmentedSchurPrimal(); // === Create KSP solver object and set appropriate solver options. ==== @@ -919,6 +873,160 @@ namespace AMDiS { } + void PetscSolverFeti::createMatExplicitSchurPrimal() + { + FUNCNAME("PetscSolverFeti::createMatExplicitSchurPrimal()"); + + int creationMode = 0; + Parameters::get("parallel->feti->schur primal creation mode", creationMode); + if (creationMode == 0) { + // matK = inv(A_BB) A_BPi + Mat matK; + petsc_helper::blockMatMatSolve(subdomain->getSolver(), + subdomain->getMatInteriorCoarse(), + matK); + + + // mat_schur_primal = A_PiPi - A_PiB inv(A_BB) A_BPi + // = A_PiPi - A_PiB matK + MatMatMult(subdomain->getMatCoarseInterior(), matK, MAT_INITIAL_MATRIX, + PETSC_DEFAULT, &mat_schur_primal); + MatAYPX(mat_schur_primal, -1.0, subdomain->getMatCoarse(), DIFFERENT_NONZERO_PATTERN); + + MatDestroy(&matK); + } else { + Mat tmp; + + schurPrimalData.subSolver = subdomain; + + localDofMap.createVec(schurPrimalData.tmp_vec_b, nGlobalOverallInterior); + primalDofMap.createVec(schurPrimalData.tmp_vec_primal); + + MatCreateShell(mpiCommGlobal, + primalDofMap.getRankDofs(), + primalDofMap.getRankDofs(), + primalDofMap.getOverallDofs(), + primalDofMap.getOverallDofs(), + &schurPrimalData, + &tmp); + MatShellSetOperation(tmp, MATOP_MULT, + (void(*)(void))petscMultMatSchurPrimal); + MatComputeExplicitOperator(tmp, &mat_schur_primal); + + MatDestroy(&tmp); + schurPrimalData.subSolver = NULL; + VecDestroy(&schurPrimalData.tmp_vec_b); + VecDestroy(&schurPrimalData.tmp_vec_primal); + } + } + + + void PetscSolverFeti::createMatExplicitAugmentedSchurPrimal() + { + FUNCNAME("PetscSolverFeti::createMatExplicitAugmentedSchurPrimal()"); + + int creationMode = 1; + Parameters::get("parallel->feti->schur primal creation mode", creationMode); + if (creationMode == 0) { + // qj = -Q J + Mat qj; + MatMatMult(mat_augmented_lagrange, mat_lagrange, MAT_INITIAL_MATRIX, + PETSC_DEFAULT, &qj); + MatScale(qj, -1.0); + + + // matTmp = inv(A_BB) A_BPi + Mat matTmp; + petsc_helper::blockMatMatSolve(subdomain->getSolver(), + subdomain->getMatInteriorCoarse(), + matTmp); + + + // mat00 = A_PiPi - A_PiB inv(A_BB) A_BPi + // = A_PiPi - A_PiB matTmp + Mat mat00; + MatMatMult(subdomain->getMatCoarseInterior(), matTmp, MAT_INITIAL_MATRIX, + PETSC_DEFAULT, &mat00); + MatAYPX(mat00, -1.0, subdomain->getMatCoarse(), DIFFERENT_NONZERO_PATTERN); + + + // mat10 = -Q J inv(A_BB) A_BPi + // = qj matTmp + Mat mat10; + MatMatMult(qj, matTmp, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &mat10); + + + // matTmp = inv(A_BB) trans(J) trans(Q) + Mat qT, jTqT; + MatTranspose(mat_augmented_lagrange, MAT_INITIAL_MATRIX, &qT); + MatTransposeMatMult(mat_lagrange, qT, MAT_INITIAL_MATRIX, PETSC_DEFAULT, + &jTqT); + petsc_helper::blockMatMatSolve(subdomain->getSolver(), jTqT, matTmp); + MatDestroy(&qT); + MatDestroy(&jTqT); + + // mat01 = -A_PiB inv(A_BB) trans(J) trans(Q) + // = -A_PiB matTmp + Mat mat01; + MatMatMult(subdomain->getMatCoarseInterior(), matTmp, MAT_INITIAL_MATRIX, + PETSC_DEFAULT, &mat01); + MatScale(mat01, -1.0); + + // mat11 = -Q J inv(A_BB) trans(J) trans(Q) + // = qj matTmp + Mat mat11; + MatMatMult(qj, matTmp, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &mat11); + + MatDestroy(&matTmp); + MatDestroy(&qj); + + Mat nestMat[4] = {mat00, mat01, mat10, mat11}; + MatCreateNest(PETSC_COMM_WORLD, 2, PETSC_NULL, 2, PETSC_NULL, nestMat, &matTmp); + petsc_helper::matNestConvert(matTmp, mat_schur_primal); + + MatDestroy(&mat00); + MatDestroy(&mat01); + MatDestroy(&mat10); + MatDestroy(&mat11); + MatDestroy(&matTmp); + } else { + Mat tmp; + + schurPrimalAugmentedData.subSolver = subdomain; + schurPrimalAugmentedData.nestedVec = false; + + localDofMap.createVec(schurPrimalAugmentedData.tmp_vec_b0, nGlobalOverallInterior); + localDofMap.createVec(schurPrimalAugmentedData.tmp_vec_b1, nGlobalOverallInterior); + primalDofMap.createVec(schurPrimalAugmentedData.tmp_vec_primal); + lagrangeMap.createVec(schurPrimalAugmentedData.tmp_vec_lagrange); + + schurPrimalAugmentedData.mat_lagrange = &mat_lagrange; + schurPrimalAugmentedData.mat_augmented_lagrange = &mat_augmented_lagrange; + + MatCreateShell(mpiCommGlobal, + primalDofMap.getRankDofs() + nRankEdges, + primalDofMap.getRankDofs() + nRankEdges, + primalDofMap.getOverallDofs() + nOverallEdges, + primalDofMap.getOverallDofs() + nOverallEdges, + &schurPrimalAugmentedData, + &tmp); + MatShellSetOperation(tmp, MATOP_MULT, + (void(*)(void))petscMultMatSchurPrimalAugmented); + + MatComputeExplicitOperator(tmp, &mat_schur_primal); + + MatDestroy(&tmp); + schurPrimalAugmentedData.subSolver = NULL; + schurPrimalAugmentedData.mat_lagrange = NULL; + schurPrimalAugmentedData.mat_augmented_lagrange = NULL; + VecDestroy(&schurPrimalAugmentedData.tmp_vec_b0); + VecDestroy(&schurPrimalAugmentedData.tmp_vec_b1); + VecDestroy(&schurPrimalAugmentedData.tmp_vec_primal); + VecDestroy(&schurPrimalAugmentedData.tmp_vec_lagrange); + } + } + + void PetscSolverFeti::destroySchurPrimalKsp() { FUNCNAME("PetscSolverFeti::destroySchurPrimal()"); diff --git a/AMDiS/src/parallel/PetscSolverFeti.h b/AMDiS/src/parallel/PetscSolverFeti.h index d30f5d5d41445950985a1b952f51c139d0535f28..0d73abb4d0ee10c10f3830d53140229db2765b72 100644 --- a/AMDiS/src/parallel/PetscSolverFeti.h +++ b/AMDiS/src/parallel/PetscSolverFeti.h @@ -140,6 +140,12 @@ namespace AMDiS { /// system on the primal variables, \ref ksp_schur_primal void createSchurPrimalKsp(vector<const FiniteElemSpace*> &feSpaces); + /// + void createMatExplicitSchurPrimal(); + + /// + void createMatExplicitAugmentedSchurPrimal(); + /// Destroys PETSc KSP solver object \ref ksp_schur_primal void destroySchurPrimalKsp(); diff --git a/AMDiS/src/parallel/PetscSolverFetiOperators.cc b/AMDiS/src/parallel/PetscSolverFetiOperators.cc index 88275a08577c5cd3e1327730b9e4e5242087c162..cd9c94c38a204d6490149ad095f30b4a3ae6af33 100644 --- a/AMDiS/src/parallel/PetscSolverFetiOperators.cc +++ b/AMDiS/src/parallel/PetscSolverFetiOperators.cc @@ -41,11 +41,50 @@ namespace AMDiS { SchurPrimalAugmentedData* data = static_cast<SchurPrimalAugmentedData*>(ctx); - Vec x_primal, x_mu, y_primal, y_mu; - VecNestGetSubVec(x, 0, &x_primal); - VecNestGetSubVec(x, 1, &x_mu); - VecNestGetSubVec(y, 0, &y_primal); - VecNestGetSubVec(y, 1, &y_mu); + Vec x_primal, x_mu, y_primal, y_mu; + + if (data->nestedVec) { + VecNestGetSubVec(x, 0, &x_primal); + VecNestGetSubVec(x, 1, &x_mu); + VecNestGetSubVec(y, 0, &y_primal); + VecNestGetSubVec(y, 1, &y_mu); + } else { + VecDuplicate(data->tmp_vec_primal, &x_primal); + VecDuplicate(data->tmp_vec_primal, &y_primal); + + PetscInt allLocalSize, allSize; + VecGetLocalSize(x, &allLocalSize); + VecGetSize(x, &allSize); + + PetscInt primalLocalSize, primalSize; + VecGetLocalSize(x_primal, &primalLocalSize); + VecGetSize(x_primal, &primalSize); + + TEST_EXIT_DBG(allSize > primalSize)("Should not happen!\n"); + TEST_EXIT_DBG(allLocalSize >= primalLocalSize)("Should not happen!\n"); + + PetscInt muLocalSize = allLocalSize - primalLocalSize; + PetscInt muSize = allSize - primalSize; + + VecCreateMPI(PETSC_COMM_WORLD, muLocalSize, muSize, &x_mu); + VecCreateMPI(PETSC_COMM_WORLD, muLocalSize, muSize, &y_mu); + + PetscScalar *allValue; + PetscScalar *primalValue; + PetscScalar *muValue; + VecGetArray(x, &allValue); + VecGetArray(x_primal, &primalValue); + VecGetArray(x_mu, &muValue); + + for (int i = 0; i < primalLocalSize; i++) + primalValue[i] = allValue[i]; + for (int i = 0; i < muLocalSize; i++) + muValue[i] = allValue[primalLocalSize + i]; + + VecRestoreArray(x, &allValue); + VecRestoreArray(x_primal, &primalValue); + VecRestoreArray(x_mu, &muValue); + } // inv(K_BB) K_BPi x_Pi MatMult(data->subSolver->getMatInteriorCoarse(), x_primal, data->tmp_vec_b0); @@ -74,6 +113,36 @@ namespace AMDiS { MatMult(*(data->mat_augmented_lagrange), data->tmp_vec_lagrange, y_mu); VecScale(y_mu, -1.0); + if (!data->nestedVec) { + PetscInt allLocalSize; + VecGetLocalSize(x, &allLocalSize); + PetscInt primalLocalSize; + VecGetLocalSize(x_primal, &primalLocalSize); + PetscInt muLocalSize = allLocalSize - primalLocalSize; + + PetscScalar *allValue; + PetscScalar *primalValue; + PetscScalar *muValue; + VecGetArray(y, &allValue); + VecGetArray(y_primal, &primalValue); + VecGetArray(y_mu, &muValue); + + for (int i = 0; i < primalLocalSize; i++) + allValue[i] = primalValue[i]; + + for (int i = 0; i < muLocalSize; i++) + allValue[primalLocalSize + i] = muValue[i]; + + VecRestoreArray(y, &allValue); + VecRestoreArray(y_primal, &primalValue); + VecRestoreArray(y_mu, &muValue); + + VecDestroy(&x_primal); + VecDestroy(&y_primal); + VecDestroy(&x_mu); + VecDestroy(&y_mu); + } + return 0; } diff --git a/AMDiS/src/parallel/PetscSolverFetiStructs.h b/AMDiS/src/parallel/PetscSolverFetiStructs.h index 11eafe4e5a219c2deabb6650b1760538385ca938..8fa97340ebbf310d156b691c365cbba3e1659183 100644 --- a/AMDiS/src/parallel/PetscSolverFetiStructs.h +++ b/AMDiS/src/parallel/PetscSolverFetiStructs.h @@ -63,6 +63,8 @@ namespace AMDiS { Mat *mat_augmented_lagrange; PetscSolver* subSolver; + + bool nestedVec; };