From 9fc5305263fc6b0d7eb36fadd68eeda696ff2439 Mon Sep 17 00:00:00 2001
From: Thomas Witkowski <thomas.witkowski@gmx.de>
Date: Thu, 1 Nov 2012 12:27:38 +0000
Subject: [PATCH] Some performance issues, part I.

---
 AMDiS/src/parallel/ElementObjectDatabase.cc | 116 ++++++++++++--------
 AMDiS/src/parallel/ElementObjectDatabase.h  |  34 +++---
 AMDiS/src/parallel/MeshDistributor.cc       |  19 +++-
 3 files changed, 102 insertions(+), 67 deletions(-)

diff --git a/AMDiS/src/parallel/ElementObjectDatabase.cc b/AMDiS/src/parallel/ElementObjectDatabase.cc
index 38b147d2..843954ec 100644
--- a/AMDiS/src/parallel/ElementObjectDatabase.cc
+++ b/AMDiS/src/parallel/ElementObjectDatabase.cc
@@ -563,9 +563,10 @@ namespace AMDiS {
 
 	  bool reverseMode = 
 	    BoundaryObject::computeReverseMode(obj0, obj1, feSpace, INTERIOR);
-
-	  edgeReverseMode[make_pair(els[i], els[j])] = reverseMode;
-	  edgeReverseMode[make_pair(els[j], els[i])] = reverseMode;
+	  if (reverseMode) {
+	    edgeReverseMode.insert(make_pair(els[i], els[j]));
+	    edgeReverseMode.insert(make_pair(els[j], els[i]));
+	  }
 	}
       }
     }
@@ -587,9 +588,10 @@ namespace AMDiS {
 
 	  bool reverseMode = 
 	    BoundaryObject::computeReverseMode(obj0, obj1, feSpace, INTERIOR);
-
-	  faceReverseMode[make_pair(els[i], els[j])] = reverseMode;
-	  faceReverseMode[make_pair(els[j], els[i])] = reverseMode;
+	  if (reverseMode) {
+	    faceReverseMode.insert(make_pair(els[i], els[j]));
+	    faceReverseMode.insert(make_pair(els[j], els[i]));
+	  }
 	}
       }
     }
@@ -615,9 +617,10 @@ namespace AMDiS {
 	  bool reverseMode = 
 	    BoundaryObject::computeReverseMode(obj0, obj1, feSpace, 
 					       edgeIt->second);
-
-	  edgeReverseMode[make_pair(edges0[i], edges1[j])] = reverseMode;
-	  edgeReverseMode[make_pair(edges1[j], edges0[i])] = reverseMode;
+	  if (reverseMode) {
+	    edgeReverseMode.insert(make_pair(edges0[i], edges1[j]));
+	    edgeReverseMode.insert(make_pair(edges1[j], edges0[i]));
+	  }
 	}
       }
     }
@@ -638,9 +641,10 @@ namespace AMDiS {
       
       bool reverseMode = 
 	BoundaryObject::computeReverseMode(obj0, obj1, feSpace, faceIt->second);
-      
-      faceReverseMode[make_pair(faces0[0], faces1[0])] = reverseMode;
-      faceReverseMode[make_pair(faces1[0], faces0[0])] = reverseMode;
+      if (reverseMode) {
+	faceReverseMode.insert(make_pair(faces0[0], faces1[0]));
+	faceReverseMode.insert(make_pair(faces1[0], faces0[0]));
+      }
     }  
   }
 
@@ -863,19 +867,18 @@ namespace AMDiS {
 
     nSize = edgeReverseMode.size();
     SerUtil::serialize(out, nSize);
-    for (map<pair<ElementObjectData, ElementObjectData>, bool>::iterator it = edgeReverseMode.begin();
+    for (std::set<pair<ElementObjectData, ElementObjectData> >::iterator it = edgeReverseMode.begin();
 	 it != edgeReverseMode.end(); ++it) {
-      it->first.first.serialize(out);
-      it->first.second.serialize(out);
-      SerUtil::serialize(out, it->second);
+      it->first.serialize(out);
+      it->second.serialize(out);
     }
 
     nSize = faceReverseMode.size();
     SerUtil::serialize(out, nSize);
-    for (map<pair<ElementObjectData, ElementObjectData>, bool>::iterator it = faceReverseMode.begin();
+    for (std::set<pair<ElementObjectData, ElementObjectData> >::iterator it = faceReverseMode.begin();
 	 it != faceReverseMode.end(); ++it) {
-      it->first.first.serialize(out);
-      it->first.second.serialize(out);
+      it->first.serialize(out);
+      it->second.serialize(out);
       SerUtil::serialize(out, it->second);
     }
   }
@@ -1012,24 +1015,20 @@ namespace AMDiS {
     edgeReverseMode.clear();
     for (int i = 0; i < nSize; i++) {
       ElementObjectData obj0, obj1;
-      bool reverseMode;
       obj0.deserialize(in);
       obj1.deserialize(in);
-      SerUtil::deserialize(in, reverseMode);
 
-      edgeReverseMode[make_pair(obj0, obj1)] = reverseMode;
+      edgeReverseMode.insert(make_pair(obj0, obj1));
     }
 
     SerUtil::deserialize(in, nSize);
     faceReverseMode.clear();
     for (int i = 0; i < nSize; i++) {
       ElementObjectData obj0, obj1;
-      bool reverseMode;
       obj0.deserialize(in);
       obj1.deserialize(in);
-      SerUtil::deserialize(in, reverseMode);
 
-      faceReverseMode[make_pair(obj0, obj1)] = reverseMode;
+      faceReverseMode.insert(make_pair(obj0, obj1));
     }
   }
 
@@ -1086,6 +1085,8 @@ namespace AMDiS {
 
   unsigned long ElementObjectDatabase::calculateMemoryUsage()
   {
+    FUNCNAME("ElementObjectDatabase::calculateMemoryUsage()");
+
     const unsigned int structElObjDataSize = sizeof(ElementObjectData);
     const unsigned int dofSize = sizeof(DegreeOfFreedom);
     const unsigned int edgeSize = sizeof(DofEdge);
@@ -1093,73 +1094,96 @@ namespace AMDiS {
     const unsigned int vectorOverhead = sizeof(vector<int>);
     const unsigned int mapOverhead = 48; //sizeof(_Rb_tree<int, int>);
     const unsigned int mapEntryOverhead = 40; // sizeof(_Rb_tree_node_base);
-    
+    const unsigned int setOverhead = 48; 
+    const unsigned int setEntryOverhead = 40;
+
 
     unsigned long value = 0;
+    unsigned long tmp = 0;
 
     // vertexElements
-    value += mapOverhead + vertexElements.size() * mapEntryOverhead;
+    tmp = mapOverhead + vertexElements.size() * mapEntryOverhead;
     for (map<DegreeOfFreedom, vector<ElementObjectData> >::iterator mapIt =
-	   vertexElements.begin(); mapIt != vertexElements.end(); ++mapIt)
-      value += dofSize + vectorOverhead + mapIt->second.size() * structElObjDataSize;
+	   vertexElements.begin(); mapIt != vertexElements.end(); ++mapIt)      
+      tmp += dofSize + vectorOverhead + mapIt->second.size() * structElObjDataSize;
+    MSG("EL-OBJ-DB MEM 01: %d\n", tmp);
+    value += tmp;
 
     // edgeElements
-    value += mapOverhead + edgeElements.size() * mapEntryOverhead;
+    tmp = mapOverhead + edgeElements.size() * mapEntryOverhead;
     for (map<DofEdge, vector<ElementObjectData> >::iterator mapIt =
 	   edgeElements.begin(); mapIt != edgeElements.end(); ++mapIt)
-      value += dofSize + vectorOverhead + mapIt->second.size() * structElObjDataSize;
+      tmp += dofSize + vectorOverhead + mapIt->second.size() * structElObjDataSize;
+    MSG("EL-OBJ-DB MEM 02: %d\n", tmp);
+    value += tmp;
 
     // faceElements
-    value += mapOverhead + faceElements.size() * mapEntryOverhead;
+    tmp += mapOverhead + faceElements.size() * mapEntryOverhead;
     for (map<DofFace, vector<ElementObjectData> >::iterator mapIt =
 	   faceElements.begin(); mapIt != faceElements.end(); ++mapIt)
-      value += dofSize + vectorOverhead + mapIt->second.size() * structElObjDataSize;
+      tmp += dofSize + vectorOverhead + mapIt->second.size() * structElObjDataSize;
+    MSG("EL-OBJ-DB MEM 03: %d\n", tmp);
+    value += tmp;
+    tmp = 0;
 
     // vertexLocalMap
-    value += mapOverhead + vertexLocalMap.size() * (mapEntryOverhead + structElObjDataSize + dofSize);
+    tmp += mapOverhead + vertexLocalMap.size() * (mapEntryOverhead + structElObjDataSize + dofSize);
 
     // edgeLocalMap
-    value += mapOverhead + edgeLocalMap.size() * (mapEntryOverhead + structElObjDataSize + edgeSize);
+    tmp += mapOverhead + edgeLocalMap.size() * (mapEntryOverhead + structElObjDataSize + edgeSize);
 
     // faceLocalMap
-    value += mapOverhead + faceLocalMap.size() * (mapEntryOverhead + structElObjDataSize + faceSize);
+    tmp += mapOverhead + faceLocalMap.size() * (mapEntryOverhead + structElObjDataSize + faceSize);
+
+    MSG("EL-OBJ-DB MEM 04: %d\n", tmp);
+    value += tmp;
 
     // vertexInRank
-    value += mapOverhead + vertexInRank.size() * mapEntryOverhead;
+    tmp = mapOverhead + vertexInRank.size() * mapEntryOverhead;
     for (map<DegreeOfFreedom, map<int, ElementObjectData> >::iterator mapIt = 
 	   vertexInRank.begin(); mapIt != vertexInRank.end(); ++mapIt)
-      value += dofSize + mapOverhead + 
+      tmp += dofSize + mapOverhead + 
 	mapIt->second.size() * (mapEntryOverhead + sizeof(int) + structElObjDataSize);
+    MSG("EL-OBJ-DB MEM 05: %d\n", tmp);
+    value += tmp;
 
     // edgeInRank
-    value += mapOverhead + edgeInRank.size() * mapEntryOverhead;
+    tmp += mapOverhead + edgeInRank.size() * mapEntryOverhead;
     for (map<DofEdge, map<int, ElementObjectData> >::iterator mapIt = 
 	   edgeInRank.begin(); mapIt != edgeInRank.end(); ++mapIt)
-      value += edgeSize + mapOverhead + 
+      tmp += edgeSize + mapOverhead + 
 	mapIt->second.size() * (mapEntryOverhead + sizeof(int) + structElObjDataSize);
+    MSG("EL-OBJ-DB MEM 06: %d\n", tmp);
+    value += tmp;
 
     // faceInRank
-    value += mapOverhead + faceInRank.size() * mapEntryOverhead;
+    tmp = mapOverhead + faceInRank.size() * mapEntryOverhead;
     for (map<DofFace, map<int, ElementObjectData> >::iterator mapIt = 
 	   faceInRank.begin(); mapIt != faceInRank.end(); ++mapIt)
-      value += faceSize + mapOverhead + 
+      tmp += faceSize + mapOverhead + 
 	mapIt->second.size() * (mapEntryOverhead + sizeof(int) + structElObjDataSize);
+    MSG("EL-OBJ-DB MEM 07: %d\n", tmp);
+    value += tmp;
+    tmp = 0;
 
     if (bConnMap.size() || periodicVertices.size() || periodicDofAssoc.size()) {
       ERROR_EXIT("Not yet implemented for periodic meshes!\n");
     }
 
     // edgeReverseMode
-    value += mapOverhead + edgeReverseMode.size() * (mapEntryOverhead + 2 * structElObjDataSize + sizeof(bool));
+    tmp += setOverhead + edgeReverseMode.size() * (setEntryOverhead + 2 * structElObjDataSize);
 
     // faceReverseMode
-    value += mapOverhead + faceReverseMode.size() * (mapEntryOverhead + 2 * structElObjDataSize + sizeof(bool));
+    tmp += setOverhead + faceReverseMode.size() * (setEntryOverhead + 2 * structElObjDataSize);
 
     // macroElIndexMap
-    value += mapOverhead + macroElIndexMap.size() * (mapEntryOverhead + sizeof(int) + sizeof(int*));
+    tmp += mapOverhead + macroElIndexMap.size() * (mapEntryOverhead + sizeof(int) + sizeof(int*));
 
     // macroElIndexTypeMap
-    value += mapOverhead + macroElIndexTypeMap.size() * (mapEntryOverhead + sizeof(int) + sizeof(int*));
+    tmp += mapOverhead + macroElIndexTypeMap.size() * (mapEntryOverhead + sizeof(int) + sizeof(int*));
+
+    MSG("EL-OBJ-DB MEM 08: %d\n", tmp);
+    value += tmp;
 
     return value;
   }
diff --git a/AMDiS/src/parallel/ElementObjectDatabase.h b/AMDiS/src/parallel/ElementObjectDatabase.h
index f781c429..3bec6945 100644
--- a/AMDiS/src/parallel/ElementObjectDatabase.h
+++ b/AMDiS/src/parallel/ElementObjectDatabase.h
@@ -385,19 +385,19 @@ namespace AMDiS {
       if (mesh->getDim() == 2)
 	return true;
 
-      TEST_EXIT_DBG(edgeReverseMode.count(make_pair(obj0, obj1)))
-	("Should not happen!\n");
+      if (edgeReverseMode.empty())
+	return false;
 
-      return edgeReverseMode[make_pair(obj0, obj1)];
+      return static_cast<bool>(edgeReverseMode.count(make_pair(obj0, obj1)));
     }
 
     inline bool getFaceReverseMode(ElementObjectData &obj0, 
 				   ElementObjectData &obj1)
     {
-      TEST_EXIT_DBG(faceReverseMode.count(make_pair(obj0, obj1)))
-	("Should not happen!\n");
+      if (faceReverseMode.empty())
+	return false;
 
-      return faceReverseMode[make_pair(obj0, obj1)];
+      return static_cast<bool>(faceReverseMode.count(make_pair(obj0, obj1)));
     }
 
     /// Returns true if there is periodic data.
@@ -459,15 +459,13 @@ namespace AMDiS {
     /// Adds the i-th face of an element to the object database.
     void addFace(Element *el, int ith);
 
-    /** \brief
-     * Creates final data of the periodic boundaries. Must be called after all
-     * elements of the mesh are added to the object database. Then this functions
-     * search for indirectly connected vertices in periodic boundaries. This is
-     * only the case, if there are more than one boundary conditions. Then, e.g., 
-     * in 2D, all edges of a square are iterectly connected. In 3D, if the macro 
-     * mesh is a box, all eight vertex nodes and always four of the 12 edges are 
-     * indirectly connected.
-     */
+    /// Creates final data of the periodic boundaries. Must be called after all
+    /// elements of the mesh are added to the object database. Then this
+    /// functions search for indirectly connected vertices in periodic 
+    /// boundaries. This is only the case, if there are more than one boundary
+    /// conditions. Then, e.g., in 2D, all edges of a square are iterectly
+    /// connected. In 3D, if the macro mesh is a box, all eight vertex nodes and
+    /// always four of the 12 edges are indirectly connected.
     void createPeriodicData();
     
     /// Creates on all boundaries the reverse mode flag.
@@ -566,9 +564,11 @@ namespace AMDiS {
     /// Stores to each edge all its periodic associations.
     map<DofEdge, std::set<DofEdge> > periodicEdgeAssoc;
 
-    map<pair<ElementObjectData, ElementObjectData>, bool> edgeReverseMode;
+    /// Stores all interior edge boundaries which have reverse mode enabled.
+    std::set<pair<ElementObjectData, ElementObjectData> > edgeReverseMode;
 
-    map<pair<ElementObjectData, ElementObjectData>, bool> faceReverseMode;
+    /// Stores all interior face boundaries which have reverse mode enabled.
+    std::set<pair<ElementObjectData, ElementObjectData> > faceReverseMode;
 
     map<int, int> *macroElementRankMap;
 
diff --git a/AMDiS/src/parallel/MeshDistributor.cc b/AMDiS/src/parallel/MeshDistributor.cc
index 1aabfaea..0b416add 100644
--- a/AMDiS/src/parallel/MeshDistributor.cc
+++ b/AMDiS/src/parallel/MeshDistributor.cc
@@ -959,6 +959,7 @@ namespace AMDiS {
 	// === Check the boundaries and adapt mesh if necessary. ===
 	MSG_DBG("Run checkAndAdaptBoundary ...\n");
 
+	double second = MPI::Wtime();
 	// Check for periodic boundaries within rank's subdomain.
 	for (InteriorBoundary::iterator it(intBoundary.getPeriodic()); 
 	     !it.end(); ++it) {
@@ -973,9 +974,13 @@ namespace AMDiS {
 	    }
 	  }
 	}
-	
+	MSG("  -> create mesh structure codes needed %.5f seconds\n", MPI::Wtime() - second);
+	second = MPI::Wtime();
+
 	meshChanged |= checkAndAdaptBoundary(allBound);
 	
+	MSG("  -> checkAndAdaptBoundary needed %.5f seconds\n", MPI::Wtime() - second);
+
 	// === Check on all ranks if at least one rank's mesh has changed. ===
 	
 	int sendValue = static_cast<int>(meshChanged);
@@ -1091,6 +1096,8 @@ namespace AMDiS {
       
     map<int, MeshCodeVec> sendCodes;
    
+    double first = MPI::Wtime();
+
     for (RankToBoundMap::iterator it = allBound.begin(); 
 	 it != allBound.end(); ++it) {
       for (vector<AtomicBoundary>::iterator boundIt = it->second.begin();
@@ -1109,6 +1116,8 @@ namespace AMDiS {
 
     stdMpi.startCommunication();
 
+    MSG("        -> communicate codes needed %.5f seconds\n", MPI::Wtime() - first);
+    first = MPI::Wtime();
 
     // === Compare received mesh structure codes. ===
     
@@ -1144,6 +1153,8 @@ namespace AMDiS {
       }
     }
 
+    MSG("        -> fitElementToMeshCode needed %.5f seconds\n", MPI::Wtime() - first);
+
     return meshChanged;
   }
 
@@ -1584,9 +1595,9 @@ namespace AMDiS {
       elObjDb.create(partitionMap, levelData);
     elObjDb.updateRankData();
 
-    //    unsigned long memsize = elObjDb.calculateMemoryUsage();
-    // MSG("Memory usage of element object database = %5.f KByte\n", 
-    // 	static_cast<double>(memsize / 1024));
+    unsigned long memsize = elObjDb.calculateMemoryUsage();
+    MSG("Memory usage of element object database = %5.f KByte\n", 
+    	static_cast<double>(memsize / 1024.0));
 
     intBoundary.create(levelData, 0, elObjDb);
     ParallelDebug::printBoundaryInfo(intBoundary);
-- 
GitLab