From ef52255eebd62f941135f5ed9c6b5ac8a182c02f Mon Sep 17 00:00:00 2001 From: Thomas Witkowski <thomas.witkowski@gmx.de> Date: Wed, 1 Oct 2008 09:12:34 +0000 Subject: [PATCH] * Bugfixes for muli-threaded runs. Valgrind and Intel thread checker run now with 0 Errors! --- AMDiS/src/BasisFunction.cc | 6 +++--- AMDiS/src/BoundaryManager.cc | 2 +- AMDiS/src/DOFVector.hh | 8 ++++---- AMDiS/src/DirichletBC.cc | 4 ++-- AMDiS/src/OpenMP.h | 4 ++++ AMDiS/src/Operator.cc | 2 +- AMDiS/src/Operator.hh | 6 +++--- AMDiS/src/ProblemVec.cc | 8 ++++++++ AMDiS/src/ResidualParallelEstimator.cc | 4 ++-- AMDiS/src/SecondOrderAssembler.cc | 6 +++--- AMDiS/src/SubAssembler.cc | 2 +- AMDiS/src/TraverseParallel.cc | 2 +- AMDiS/src/ZeroOrderAssembler.cc | 6 +++--- 13 files changed, 36 insertions(+), 24 deletions(-) diff --git a/AMDiS/src/BasisFunction.cc b/AMDiS/src/BasisFunction.cc index f64a0478..b71b642c 100644 --- a/AMDiS/src/BasisFunction.cc +++ b/AMDiS/src/BasisFunction.cc @@ -25,10 +25,10 @@ namespace AMDiS { nDOF = NEW DimVec<int>(dim, DEFAULT_VALUE, -1); dow = Global::getGeo(WORLD); - grdTmpVec1.resize(omp_get_max_threads()); - grdTmpVec2.resize(omp_get_max_threads()); + grdTmpVec1.resize(omp_get_num_procs()); + grdTmpVec2.resize(omp_get_num_procs()); - for (int i = 0; i < omp_get_max_threads(); i++) { + for (int i = 0; i < omp_get_num_procs(); i++) { grdTmpVec1[i] = NEW DimVec<double>(dim, DEFAULT_VALUE, 0.0); grdTmpVec2[i] = NEW DimVec<double>(dim, DEFAULT_VALUE, 0.0); } diff --git a/AMDiS/src/BoundaryManager.cc b/AMDiS/src/BoundaryManager.cc index 8ae905e8..416c08de 100644 --- a/AMDiS/src/BoundaryManager.cc +++ b/AMDiS/src/BoundaryManager.cc @@ -11,7 +11,7 @@ namespace AMDiS { BoundaryManager::BoundaryManager(const FiniteElemSpace *feSpace) { - localBounds.resize(omp_get_max_threads()); + localBounds.resize(omp_get_num_procs()); allocatedMemoryLocalBounds = feSpace->getBasisFcts()->getNumber(); for (int i = 0; i < static_cast<int>(localBounds.size()); i++) { localBounds[i] = GET_MEMORY(BoundaryType, allocatedMemoryLocalBounds); diff --git a/AMDiS/src/DOFVector.hh b/AMDiS/src/DOFVector.hh index a8b4aa2c..f408e1e1 100644 --- a/AMDiS/src/DOFVector.hh +++ b/AMDiS/src/DOFVector.hh @@ -31,11 +31,11 @@ namespace AMDiS { nBasFcts = feSpace->getBasisFcts()->getNumber(); int dim = feSpace->getMesh()->getDim(); - localIndices.resize(omp_get_max_threads()); - grdPhis.resize(omp_get_max_threads()); - D2Phis.resize(omp_get_max_threads()); + localIndices.resize(omp_get_num_procs()); + grdPhis.resize(omp_get_num_procs()); + D2Phis.resize(omp_get_num_procs()); - for (int i = 0; i < omp_get_max_threads(); i++) { + for (int i = 0; i < omp_get_num_procs(); i++) { localIndices[i] = GET_MEMORY(DegreeOfFreedom, this->nBasFcts); grdPhis[i] = NEW DimVec<double>(dim, DEFAULT_VALUE, 0.0); D2Phis[i] = NEW DimMat<double>(dim, NO_INIT); diff --git a/AMDiS/src/DirichletBC.cc b/AMDiS/src/DirichletBC.cc index 0cbd6ff4..cd67b70d 100644 --- a/AMDiS/src/DirichletBC.cc +++ b/AMDiS/src/DirichletBC.cc @@ -14,7 +14,7 @@ namespace AMDiS { f(fct), dofVec(NULL) { - worldCoords.resize(omp_get_max_threads()); + worldCoords.resize(omp_get_num_procs()); }; DirichletBC::DirichletBC(BoundaryType type, @@ -23,7 +23,7 @@ namespace AMDiS { f(NULL), dofVec(vec) { - worldCoords.resize(omp_get_max_threads()); + worldCoords.resize(omp_get_num_procs()); } void DirichletBC::fillBoundaryCondition(DOFMatrix* matrix, diff --git a/AMDiS/src/OpenMP.h b/AMDiS/src/OpenMP.h index 63cd1473..37d351d2 100644 --- a/AMDiS/src/OpenMP.h +++ b/AMDiS/src/OpenMP.h @@ -15,6 +15,10 @@ inline int omp_get_max_threads() { return 1; } +inline int omp_get_num_procs() { + return 1; +} + inline int omp_get_num_threads() { return 1; } diff --git a/AMDiS/src/Operator.cc b/AMDiS/src/Operator.cc index 5141e7eb..cedec770 100644 --- a/AMDiS/src/Operator.cc +++ b/AMDiS/src/Operator.cc @@ -180,7 +180,7 @@ namespace AMDiS { uhOld(NULL), optimized(true) { - int maxThreads = omp_get_max_threads(); + int maxThreads = omp_get_num_procs(); assembler.resize(maxThreads); secondOrder.resize(maxThreads); diff --git a/AMDiS/src/Operator.hh b/AMDiS/src/Operator.hh index 7e9c5b11..d0508f8c 100644 --- a/AMDiS/src/Operator.hh +++ b/AMDiS/src/Operator.hh @@ -6,7 +6,7 @@ namespace AMDiS { secondOrder[0].push_back(term); term->operat = this; - for (int i = 1; i < omp_get_max_threads(); i++) { + for (int i = 1; i < omp_get_num_procs(); i++) { T *newTerm = NEW T(static_cast<const T>(*term)); secondOrder[i].push_back(newTerm); } @@ -23,7 +23,7 @@ namespace AMDiS { } term->operat = this; - for (int i = 1; i < omp_get_max_threads(); i++) { + for (int i = 1; i < omp_get_num_procs(); i++) { T *newTerm = NEW T(static_cast<const T>(*term)); if (type == GRD_PSI) { firstOrderGrdPsi[i].push_back(newTerm); @@ -39,7 +39,7 @@ namespace AMDiS { zeroOrder[0].push_back(term); term->operat = this; - for (int i = 1; i < omp_get_max_threads(); i++) { + for (int i = 1; i < omp_get_num_procs(); i++) { T *newTerm = NEW T(static_cast<const T>(*term)); zeroOrder[i].push_back(newTerm); } diff --git a/AMDiS/src/ProblemVec.cc b/AMDiS/src/ProblemVec.cc index 76d4b385..532c6590 100644 --- a/AMDiS/src/ProblemVec.cc +++ b/AMDiS/src/ProblemVec.cc @@ -943,6 +943,14 @@ namespace AMDiS { tmpVector->set(0.0); } + // After creating privat copies of the DOFMatrix and the DOFVector, all threads + // have to wait at this barrier. Especially for small problems this is required, + // because otherwise one thread may be finished with assembling, before another + // has made his private copy. +#ifdef _OPENMP +#pragma omp barrier +#endif + // Because we are using the parallel traverse stack, each thread will // traverse only a part of the mesh. ElInfo *elInfo = stack.traverseFirst(mesh, -1, assembleFlag); diff --git a/AMDiS/src/ResidualParallelEstimator.cc b/AMDiS/src/ResidualParallelEstimator.cc index a6b285d8..5176a789 100644 --- a/AMDiS/src/ResidualParallelEstimator.cc +++ b/AMDiS/src/ResidualParallelEstimator.cc @@ -14,9 +14,9 @@ namespace AMDiS { GET_PARAMETER(0, name + "->C3", "%f", &C3); C3 = C3 > 1.e-25 ? sqr(C3) : 0.0; - seqEstimators_.resize(omp_get_max_threads()); + seqEstimators_.resize(omp_get_num_procs()); - for (int i = 0; i < omp_get_max_threads(); i++) { + for (int i = 0; i < omp_get_num_procs(); i++) { seqEstimators_[i] = NEW ResidualEstimator(name, r); } } diff --git a/AMDiS/src/SecondOrderAssembler.cc b/AMDiS/src/SecondOrderAssembler.cc index bd1dc76e..181ef0ca 100644 --- a/AMDiS/src/SecondOrderAssembler.cc +++ b/AMDiS/src/SecondOrderAssembler.cc @@ -88,8 +88,8 @@ namespace AMDiS { q11 = Q11PsiPhi::provideQ11PsiPhi(owner->getRowFESpace()->getBasisFcts(), owner->getColFESpace()->getBasisFcts(), quadrature); - tmpLALt.resize(omp_get_max_threads()); - for (int i = 0; i < omp_get_max_threads(); i++) { + tmpLALt.resize(omp_get_num_procs()); + for (int i = 0; i < omp_get_num_procs(); i++) { tmpLALt[i] = NEW DimMat<double>*; *(tmpLALt[i]) = NEW DimMat<double>(dim, NO_INIT); } @@ -163,7 +163,7 @@ namespace AMDiS { Quad2::Quad2(Operator *op, Assembler *assembler, Quadrature *quad) : SecondOrderAssembler(op, assembler, quad, true) { - tmpLALt.resize(omp_get_max_threads()); + tmpLALt.resize(omp_get_num_procs()); } Quad2::~Quad2() diff --git a/AMDiS/src/SubAssembler.cc b/AMDiS/src/SubAssembler.cc index 0414f82c..3f8e7fd7 100644 --- a/AMDiS/src/SubAssembler.cc +++ b/AMDiS/src/SubAssembler.cc @@ -34,7 +34,7 @@ namespace AMDiS { nRow = psi->getNumber(); nCol = phi->getNumber(); - int maxThreads = omp_get_max_threads(); + int maxThreads = omp_get_num_procs(); terms.resize(maxThreads); switch (order) { diff --git a/AMDiS/src/TraverseParallel.cc b/AMDiS/src/TraverseParallel.cc index bd770b46..60a627da 100644 --- a/AMDiS/src/TraverseParallel.cc +++ b/AMDiS/src/TraverseParallel.cc @@ -9,7 +9,7 @@ namespace AMDiS { TraverseParallelStack::TraverseParallelStack(int nThreads) { if (nThreads == 0) { - nThreads_ = omp_get_max_threads(); + nThreads_ = omp_get_num_procs(); } else { nThreads_ = nThreads; } diff --git a/AMDiS/src/ZeroOrderAssembler.cc b/AMDiS/src/ZeroOrderAssembler.cc index d9df629e..5e2b1535 100644 --- a/AMDiS/src/ZeroOrderAssembler.cc +++ b/AMDiS/src/ZeroOrderAssembler.cc @@ -39,7 +39,7 @@ namespace AMDiS { optimized ? &optimizedSubAssemblers : &standardSubAssemblers; int myRank = omp_get_thread_num(); - std::vector<OperatorTerm*> opTerms = op->zeroOrder[myRank]; + std::vector<OperatorTerm*> opTerms = op->zeroOrder[myRank]; sort(opTerms.begin(), opTerms.end()); @@ -227,12 +227,12 @@ namespace AMDiS { FastQuadZOA::FastQuadZOA(Operator *op, Assembler *assembler, Quadrature *quad) : ZeroOrderAssembler(op, assembler, quad, true) { - cPtrs.resize(omp_get_max_threads()); + cPtrs.resize(omp_get_num_procs()); } FastQuadZOA::~FastQuadZOA() { - for (int i = 0; i < omp_get_max_threads(); i++) { + for (int i = 0; i < omp_get_num_procs(); i++) { FREE_MEMORY(cPtrs[i], double, quadrature->getNumPoints()); } } -- GitLab