From c60ae5850ff66b1b24d43dde70275b5f658a67f4 Mon Sep 17 00:00:00 2001 From: Thomas Witkowski <thomas.witkowski@gmx.de> Date: Mon, 30 May 2011 12:05:49 +0000 Subject: [PATCH] Some performance issues in FETI-DP code. --- AMDiS/src/parallel/PetscSolverFeti.cc | 78 +++++++++++++-------------- AMDiS/src/parallel/PetscSolverFeti.h | 11 ++-- 2 files changed, 41 insertions(+), 48 deletions(-) diff --git a/AMDiS/src/parallel/PetscSolverFeti.cc b/AMDiS/src/parallel/PetscSolverFeti.cc index 9b076f92..18f5ed31 100644 --- a/AMDiS/src/parallel/PetscSolverFeti.cc +++ b/AMDiS/src/parallel/PetscSolverFeti.cc @@ -27,7 +27,7 @@ namespace AMDiS { void *ctx; MatShellGetContext(mat, &ctx); - PetscSchurPrimalData* data = static_cast<PetscSchurPrimalData*>(ctx); + SchurPrimalData* data = static_cast<SchurPrimalData*>(ctx); MatMult(*(data->mat_b_primal), x, data->tmp_vec_b); KSPSolve(*(data->ksp_b), data->tmp_vec_b, data->tmp_vec_b); @@ -43,28 +43,25 @@ namespace AMDiS { // y = mat * x int petscMultMatFeti(Mat mat, Vec x, Vec y) { - // F = L inv(K_BB) trans(L) + L inv(K_BB) K_BPi inv(S_PiPi) K_PiB inv(K_BB) trans(L) + // F = L inv(K_BB) trans(L) + L inv(K_BB) K_BPi inv(S_PiPi) K_PiB inv(K_BB) trans(L) + // => F = L [I + inv(K_BB) K_BPi inv(S_PiPi) K_PiB] inv(K_BB) trans(L) void *ctx; MatShellGetContext(mat, &ctx); FetiData* data = static_cast<FetiData*>(ctx); - // y = L inv(K_BB) trans(L) x - MatMultTranspose(*(data->mat_lagrange), x, data->tmp_vec_b); - KSPSolve(*(data->ksp_b), data->tmp_vec_b, data->tmp_vec_b); - MatMult(*(data->mat_lagrange), data->tmp_vec_b, y); + // tmp_vec_b0 = inv(K_BB) trans(L) x + MatMultTranspose(*(data->mat_lagrange), x, data->tmp_vec_b0); + KSPSolve(*(data->ksp_b), data->tmp_vec_b0, data->tmp_vec_b0); - // tmp_vec_primal = inv(S_PiPi) K_PiB inv(K_BB) trans(L) - MatMult(*(data->mat_primal_b), data->tmp_vec_b, data->tmp_vec_primal); + // tmp_vec_b1 = inv(K_BB) K_BPi inv(S_PiPi) K_PiB tmp_vec_b0 + MatMult(*(data->mat_primal_b), data->tmp_vec_b0, data->tmp_vec_primal); KSPSolve(*(data->ksp_schur_primal), data->tmp_vec_primal, data->tmp_vec_primal); + MatMult(*(data->mat_b_primal), data->tmp_vec_primal, data->tmp_vec_b1); - // tmp_vec_lagrange = L inv(K_BB) K_BPi tmp_vec_primal - // = L inv(K_BB) K_BPi inv(S_PiPi) K_PiB inv(K_BB) trans(L) - MatMult(*(data->mat_b_primal), data->tmp_vec_primal, data->tmp_vec_b); - KSPSolve(*(data->ksp_b), data->tmp_vec_b, data->tmp_vec_b); - MatMult(*(data->mat_lagrange), data->tmp_vec_b, data->tmp_vec_lagrange); - - VecAXPBY(y, 1.0, 1.0, data->tmp_vec_lagrange); + // y = L (tmp_vec_b0 + tmp_vec_b1) + VecAXPBY(data->tmp_vec_b0, 1.0, 1.0, data->tmp_vec_b1); + MatMult(*(data->mat_lagrange), data->tmp_vec_b0, y); return 0; } @@ -593,18 +590,18 @@ namespace AMDiS { { FUNCNAME("PetscSolverFeti::createSchurPrimal()"); - petscSchurPrimalData.mat_primal_primal = &mat_primal_primal; - petscSchurPrimalData.mat_primal_b = &mat_primal_b; - petscSchurPrimalData.mat_b_primal = &mat_b_primal; - petscSchurPrimalData.ksp_b = &ksp_b; + schurPrimalData.mat_primal_primal = &mat_primal_primal; + schurPrimalData.mat_primal_b = &mat_primal_b; + schurPrimalData.mat_b_primal = &mat_b_primal; + schurPrimalData.ksp_b = &ksp_b; - VecDuplicate(f_b, &(petscSchurPrimalData.tmp_vec_b)); - VecDuplicate(f_primal, &(petscSchurPrimalData.tmp_vec_primal)); + VecDuplicate(f_b, &(schurPrimalData.tmp_vec_b)); + VecDuplicate(f_primal, &(schurPrimalData.tmp_vec_primal)); MatCreateShell(PETSC_COMM_WORLD, nRankPrimals * nComponents, nRankPrimals * nComponents, nOverallPrimals * nComponents, nOverallPrimals * nComponents, - &petscSchurPrimalData, + &schurPrimalData, &mat_schur_primal); MatShellSetOperation(mat_schur_primal, MATOP_MULT, (void(*)(void))petscMultMatSchurPrimal); @@ -620,13 +617,13 @@ namespace AMDiS { { FUNCNAME("PetscSolverFeti::destroySchurPrimal()"); - petscSchurPrimalData.mat_primal_primal = PETSC_NULL; - petscSchurPrimalData.mat_primal_b = PETSC_NULL; - petscSchurPrimalData.mat_b_primal = PETSC_NULL; - petscSchurPrimalData.ksp_b = PETSC_NULL; + schurPrimalData.mat_primal_primal = PETSC_NULL; + schurPrimalData.mat_primal_b = PETSC_NULL; + schurPrimalData.mat_b_primal = PETSC_NULL; + schurPrimalData.ksp_b = PETSC_NULL; - VecDestroy(&petscSchurPrimalData.tmp_vec_b); - VecDestroy(&petscSchurPrimalData.tmp_vec_primal); + VecDestroy(&schurPrimalData.tmp_vec_b); + VecDestroy(&schurPrimalData.tmp_vec_primal); MatDestroy(&mat_schur_primal); KSPDestroy(&ksp_schur_primal); @@ -646,11 +643,9 @@ namespace AMDiS { fetiData.ksp_b = &ksp_b; fetiData.ksp_schur_primal = &ksp_schur_primal; - - VecDuplicate(f_b, &(fetiData.tmp_vec_b)); + VecDuplicate(f_b, &(fetiData.tmp_vec_b0)); + VecDuplicate(f_b, &(fetiData.tmp_vec_b1)); VecDuplicate(f_primal, &(fetiData.tmp_vec_primal)); - MatGetVecs(mat_lagrange, PETSC_NULL, &(fetiData.tmp_vec_lagrange)); - MatCreateShell(PETSC_COMM_WORLD, nRankLagrange * nComponents, nRankLagrange * nComponents, @@ -729,10 +724,9 @@ namespace AMDiS { fetiData.ksp_b = PETSC_NULL; fetiData.ksp_schur_primal = PETSC_NULL; - VecDestroy(&fetiData.tmp_vec_b); + VecDestroy(&fetiData.tmp_vec_b0); + VecDestroy(&fetiData.tmp_vec_b1); VecDestroy(&fetiData.tmp_vec_primal); - VecDestroy(&fetiData.tmp_vec_lagrange); - MatDestroy(&mat_feti); KSPDestroy(&ksp_feti); @@ -1391,17 +1385,19 @@ namespace AMDiS { // === Create new rhs === - // vec_rhs = L * inv(K_BB) * f_b + // d = L inv(K_BB) f_B - L inv(K_BB) K_BPi inv(S_PiPi) [f_Pi - K_PiB inv(K_BB) f_B] + + // vec_rhs = L * inv(K_BB) * f_B KSPSolve(ksp_b, f_b, tmp_b0); MatMult(mat_lagrange, tmp_b0, vec_rhs); - // tmp_primal0 = M_PiB * inv(K_BB) * f_b + // tmp_primal0 = M_PiB * inv(K_BB) * f_B MatMult(mat_primal_b, tmp_b0, tmp_primal0); - // tmp_primal0 = f_Pi - M_PiB * inv(K_BB) * f_b - VecAXPBY(tmp_primal0, -1.0, 1.0, f_primal); + // tmp_primal0 = f_Pi - M_PiB * inv(K_BB) * f_B + VecAXPBY(tmp_primal0, 1.0, -1.0, f_primal); - // tmp_primal0 = inv(S_PiPi) (f_Pi - M_PiB * inv(K_BB) * f_b) + // tmp_primal0 = inv(S_PiPi) (f_Pi - M_PiB * inv(K_BB) * f_B) KSPSolve(ksp_schur_primal, tmp_primal0, tmp_primal0); // @@ -1410,7 +1406,7 @@ namespace AMDiS { MatMult(mat_lagrange, tmp_b0, tmp_lagrange0); // - VecAXPBY(vec_rhs, 1.0, 1.0, tmp_lagrange0); + VecAXPBY(vec_rhs, -1.0, 1.0, tmp_lagrange0); // === Solve with FETI-DP operator. === diff --git a/AMDiS/src/parallel/PetscSolverFeti.h b/AMDiS/src/parallel/PetscSolverFeti.h index 6a03db6c..4dd72bca 100644 --- a/AMDiS/src/parallel/PetscSolverFeti.h +++ b/AMDiS/src/parallel/PetscSolverFeti.h @@ -37,7 +37,7 @@ namespace AMDiS { * This structure is used when defining the MatShell operation for solving * primal schur complement. \ref petscMultMatSchurPrimal */ - struct PetscSchurPrimalData { + struct SchurPrimalData { /// Pointers to the matrix containing the primal variables. Mat *mat_primal_primal; @@ -70,15 +70,12 @@ namespace AMDiS { /// Matrix of Lagrange variables. Mat *mat_lagrange; - /// Temporal vector on the B variables. - Vec tmp_vec_b; + /// Temporal vectors on the B variables. + Vec tmp_vec_b0, tmp_vec_b1; /// Temporal vector on the primal variables. Vec tmp_vec_primal; - /// Temporal vector on the Lagrange variables. - Vec tmp_vec_lagrange; - /// Pointer to the solver for \ref PetscSolverFeti::mat_bb. KSP *ksp_b; @@ -293,7 +290,7 @@ namespace AMDiS { Mat mat_schur_primal; /// Data for MatMult operation in matrix \ref mat_schur_primal - PetscSchurPrimalData petscSchurPrimalData; + SchurPrimalData schurPrimalData; /// PETSc solver object to solve a system with FETI-DP. KSP ksp_feti; -- GitLab