#include "Assembler.h"
#include "Operator.h"
#include "Element.h"
#include "QPsiPhi.h"
#include "ElementMatrix.h"
#include "ElementVector.h"
#include "DOFVector.h"
#include "OpenMP.h"
#include <vector>
#include <algorithm>

namespace AMDiS {

  ::std::vector<SubAssembler*> ZeroOrderAssembler::optimizedSubAssemblers;
  ::std::vector<SubAssembler*> FirstOrderAssembler::optimizedSubAssemblersGrdPhi;
  ::std::vector<SubAssembler*> FirstOrderAssembler::optimizedSubAssemblersGrdPsi;
  ::std::vector<SubAssembler*> SecondOrderAssembler::optimizedSubAssemblers;
  
  ::std::vector<SubAssembler*> ZeroOrderAssembler::standardSubAssemblers;
  ::std::vector<SubAssembler*> FirstOrderAssembler::standardSubAssemblersGrdPhi;
  ::std::vector<SubAssembler*> FirstOrderAssembler::standardSubAssemblersGrdPsi;
  ::std::vector<SubAssembler*> SecondOrderAssembler::standardSubAssemblers;

  SubAssembler::SubAssembler(Operator *op,
			     Assembler *assembler,
			     Quadrature *quadrat,
			     int order, 
			     bool optimized,
			     FirstOrderType type) 
    : nRow(0),
      nCol(0),
      coordsAtQPs(NULL),
      coordsNumAllocated(0),
      quadrature(quadrat),
      psiFast(NULL),
      phiFast(NULL),
      owner(assembler),
      symmetric(true),
      opt(optimized),
      firstCall(true)
  {
    const BasisFunction *psi = assembler->rowFESpace->getBasisFcts();
    const BasisFunction *phi = assembler->colFESpace->getBasisFcts();

    nRow = psi->getNumber();
    nCol = phi->getNumber();

    int maxThreads = omp_get_max_threads();
    terms.resize(maxThreads);

    switch (order) {
    case 0:
      terms = op->zeroOrder;
      break;
    case 1:
      if(type == GRD_PHI)
	terms = op->firstOrderGrdPhi;
      else 
	terms = op->firstOrderGrdPsi;
      break;
    case 2:
      terms = op->secondOrder;
      break;
    }

    // check if all terms are symmetric
    symmetric = true;
    for (int i = 0; i < static_cast<int>(terms[0].size()); i++) {
      if (!(terms[0][i])->isSymmetric()) {
	symmetric = false;
	break;
      }
    }  

    dim = assembler->rowFESpace->getMesh()->getDim();
  }

  FastQuadrature *SubAssembler::updateFastQuadrature(FastQuadrature *quadFast,
						     const BasisFunction *psi,
						     Flag updateFlag)
  {
    if (!quadFast) {
      quadFast = FastQuadrature::provideFastQuadrature(psi,
						       *quadrature,
						       updateFlag);
    } else {
      if (!quadFast->initialized(updateFlag))
	quadFast->init(updateFlag);
    }

    return quadFast;
  }

  void SubAssembler::initElement(const ElInfo* elInfo,
				 Quadrature *quad)
  {
    // set corrdsAtQPs invalid
    coordsValid = false;

    // set values at QPs invalid
    ::std::map<const DOFVectorBase<double>*, ValuesAtQPs*>::iterator it1;
    for (it1 = valuesAtQPs.begin(); it1 != valuesAtQPs.end(); ++it1) {
      ((*it1).second)->valid = false;
    }

    // set gradients at QPs invalid
    ::std::map<const DOFVectorBase<double>*, GradientsAtQPs*>::iterator it2;
    for (it2 = gradientsAtQPs.begin(); it2 != gradientsAtQPs.end(); ++it2) {
      ((*it2).second)->valid = false;
    }

    int myRank = omp_get_thread_num();
    // calls initElement of each term
    ::std::vector<OperatorTerm*>::iterator it;
    for (it = terms[myRank].begin(); it != terms[myRank].end(); ++it) {
      (*it)->initElement(elInfo, this, quad);
    }
  }

  WorldVector<double>* SubAssembler::getCoordsAtQPs(const ElInfo* elInfo,
						    Quadrature *quad)
  {
    Quadrature *localQuad = quad ? quad : quadrature;
  
    const int numPoints = localQuad->getNumPoints();

    // already calculated for this element ?
    if (coordsValid) {
      return coordsAtQPs;
    }
   
    if (coordsAtQPs)  {
      if (coordsNumAllocated != numPoints) {
	DELETE [] coordsAtQPs;
        coordsAtQPs = NEW WorldVector<double>[numPoints];
	coordsNumAllocated = numPoints;
      }
    } else {
      coordsAtQPs = NEW WorldVector<double>[numPoints];
      coordsNumAllocated = numPoints;
    }

    // set new values
    WorldVector<double>* k = &(coordsAtQPs[0]);
    for (int l = 0; k < &(coordsAtQPs[numPoints]); ++k, ++l) {
      elInfo->coordToWorld(localQuad->getLambda(l), k);
    }

    // mark values as valid
    coordsValid = true;

    return coordsAtQPs;
  }

  double* SubAssembler::getVectorAtQPs(DOFVectorBase<double>* dv, 
				       const ElInfo* elInfo,
				       Quadrature *quad)
  {
    FUNCNAME("SubAssembler::getVectorAtQPs()");

    const DOFVectorBase<double>* vec = dv ? dv : owner->operat->getUhOld();

    TEST_EXIT_DBG(vec)("no dof vector!\n");

    if (valuesAtQPs[vec] && valuesAtQPs[vec]->valid) 
      return valuesAtQPs[vec]->values.getValArray();

    Quadrature *localQuad = quad ? quad : quadrature;

    if (!valuesAtQPs[vec]) {
      valuesAtQPs[vec] = new ValuesAtQPs;
    }
    valuesAtQPs[vec]->values.resize(localQuad->getNumPoints());

    double *values = valuesAtQPs[vec]->values.getValArray();

    bool sameFESpaces = 
      (vec->getFESpace() == owner->rowFESpace) || 
      (vec->getFESpace() == owner->colFESpace);

    if (opt && !quad && sameFESpaces) {
      const BasisFunction *psi = owner->rowFESpace->getBasisFcts();
      const BasisFunction *phi = owner->colFESpace->getBasisFcts();
      if (vec->getFESpace()->getBasisFcts() == psi) {
	psiFast = updateFastQuadrature(psiFast, psi, INIT_PHI);
      } else if(vec->getFESpace()->getBasisFcts() == phi) {
	phiFast = updateFastQuadrature(phiFast, phi, INIT_PHI);
      }
    }

    // calculate new values
    const BasisFunction *basFcts = vec->getFESpace()->getBasisFcts();

    if (opt && !quad && sameFESpaces) {
      if (psiFast->getBasisFunctions() == basFcts) {
	vec->getVecAtQPs(elInfo, NULL, psiFast, values);
      } else if(phiFast->getBasisFunctions() == basFcts) {
	vec->getVecAtQPs(elInfo, NULL, phiFast, values);
      } else {
	vec->getVecAtQPs(elInfo, localQuad, NULL, values);
      }
    } else {
      vec->getVecAtQPs(elInfo, localQuad, NULL, values);
    }
  
    valuesAtQPs[vec]->valid = true;

    return values;
  }

  WorldVector<double>* SubAssembler::getGradientsAtQPs(DOFVectorBase<double>* dv, 
						       const ElInfo* elInfo,
						       Quadrature *quad)
  {
    FUNCNAME("SubAssembler::getGradientsAtQPs()");

    const DOFVectorBase<double>* vec = dv ? dv : owner->operat->getUhOld();

    TEST_EXIT_DBG(vec)("no dof vector!\n");

    if (gradientsAtQPs[vec] && gradientsAtQPs[vec]->valid) 
      return gradientsAtQPs[vec]->values.getValArray();

    Quadrature *localQuad = quad ? quad : quadrature;

    if(!gradientsAtQPs[vec]) {
      gradientsAtQPs[vec] = new GradientsAtQPs;
    } 
    gradientsAtQPs[vec]->values.resize(localQuad->getNumPoints());

    WorldVector<double> *values = gradientsAtQPs[vec]->values.getValArray();

    const BasisFunction *psi = owner->rowFESpace->getBasisFcts();
    const BasisFunction *phi = owner->colFESpace->getBasisFcts();

    bool sameFESpaces = 
      (vec->getFESpace() == owner->rowFESpace) || 
      (vec->getFESpace() == owner->colFESpace);

    if (opt && !quad && sameFESpaces) {
      if (vec->getFESpace()->getBasisFcts() == psi) {
	psiFast = updateFastQuadrature(psiFast, psi, INIT_GRD_PHI);
      } else if(vec->getFESpace()->getBasisFcts() == phi) {
	phiFast = updateFastQuadrature(phiFast, phi, INIT_GRD_PHI);
      }
    }
  
    // calculate new values
    const BasisFunction *basFcts = vec->getFESpace()->getBasisFcts();

    if (opt && !quad && sameFESpaces) {
      if(psiFast->getBasisFunctions() == basFcts) {
	vec->getGrdAtQPs(elInfo, NULL, psiFast, values);
      } else {
	vec->getGrdAtQPs(elInfo, NULL, phiFast, values);
      }
    } else {
      vec->getGrdAtQPs(elInfo, localQuad, NULL, values);
    }
   
    gradientsAtQPs[vec]->valid = true;

    return values;
  }

  ZeroOrderAssembler::ZeroOrderAssembler(Operator *op,
					 Assembler *assembler,
					 Quadrature *quad,
					 bool optimized)
    : SubAssembler(op, assembler, quad, 0, optimized)
  {}

  FirstOrderAssembler::FirstOrderAssembler(Operator *op,
					   Assembler *assembler,
					   Quadrature *quad,
					   bool optimized,
					   FirstOrderType type)
    : SubAssembler(op, assembler, quad, 1, optimized, type)
  {}

  SecondOrderAssembler::SecondOrderAssembler(Operator *op,
					     Assembler *assembler,
					     Quadrature *quad,
					     bool optimized)
    : SubAssembler(op, assembler, quad, 2, optimized)
  {}

  ZeroOrderAssembler* 
  ZeroOrderAssembler::getSubAssembler(Operator* op,
				      Assembler *assembler,
				      Quadrature *quad,
				      bool optimized)
  {
    // check if a assembler is needed at all
    if (op->zeroOrder.size() == 0) {
      return NULL;
    }

    ZeroOrderAssembler *newAssembler;

    ::std::vector<SubAssembler*> *subAssemblers =
	optimized ?
	&optimizedSubAssemblers :
    &standardSubAssemblers;

    int myRank = omp_get_thread_num();
    ::std::vector<OperatorTerm*> opTerms  = op->zeroOrder[myRank];

    sort(opTerms.begin(), opTerms.end());

    // check if a new assembler is needed
    if (quad) {
      for (int i = 0; i < static_cast<int>( subAssemblers->size()); i++) {
	::std::vector<OperatorTerm*> assTerms = *((*subAssemblers)[i]->getTerms());

	sort(assTerms.begin(), assTerms.end());

	if ((opTerms == assTerms) && 
	    ((*subAssemblers)[i]->getQuadrature() == quad)) {
	
	  return dynamic_cast<ZeroOrderAssembler*>((*subAssemblers)[i]);
	}
      }
    }
 
    // check if all terms are pw_const
    bool pwConst = true;
    for (int i = 0; i < static_cast<int>( op->zeroOrder[myRank].size()); i++) {
      if (!op->zeroOrder[myRank][i]->isPWConst()) {
	pwConst = false;
	break;
      }
    }  

    // create new assembler
    if (!optimized) {
      newAssembler = NEW Stand0(op, assembler, quad);
    } else {
      if(pwConst) {
	newAssembler = NEW Pre0(op, assembler, quad);
      } else {
	newAssembler = NEW Quad0(op, assembler, quad);
      }
    }

    subAssemblers->push_back(newAssembler);
    return newAssembler;
  }

  FirstOrderAssembler* 
  FirstOrderAssembler::getSubAssembler(Operator* op,
				       Assembler *assembler,
				       Quadrature *quad,
				       FirstOrderType type,
				       bool optimized)
  {
    ::std::vector<SubAssembler*> *subAssemblers =
	optimized ?
	(type == GRD_PSI ? 
	 &optimizedSubAssemblersGrdPsi : 
	 &optimizedSubAssemblersGrdPhi) :
    (type == GRD_PSI ? 
     &standardSubAssemblersGrdPsi :
     &standardSubAssemblersGrdPhi);

    int myRank = omp_get_thread_num();
    ::std::vector<OperatorTerm*> opTerms 
	= (type == GRD_PSI) ? 
	    op->firstOrderGrdPsi[myRank] : 
            op->firstOrderGrdPhi[myRank];

    // check if a assembler is needed at all
    if (opTerms.size() == 0) {
      return NULL;
    }

    sort(opTerms.begin(), opTerms.end());

    FirstOrderAssembler *newAssembler;

    // check if a new assembler is needed
    for (int i = 0; i < static_cast<int>( subAssemblers->size()); i++) {

      ::std::vector<OperatorTerm*> assTerms = *((*subAssemblers)[i]->getTerms());
    
      sort(assTerms.begin(), assTerms.end());   

      if ((opTerms == assTerms) && 
	  ((*subAssemblers)[i]->getQuadrature() == quad)) {

	return dynamic_cast<FirstOrderAssembler*>((*subAssemblers)[i]);
      }
    }

    // check if all terms are pw_const
    bool pwConst = true;
    for (int i = 0; i < static_cast<int>( opTerms.size()); i++) {
      if (!(opTerms[i])->isPWConst()) {
	pwConst = false;
	break;
      }
    }  

    // create new assembler
    if (!optimized) {
      newAssembler = 
	(type == GRD_PSI) ?
	dynamic_cast<FirstOrderAssembler*>(NEW Stand10(op, assembler, quad)) :
	dynamic_cast<FirstOrderAssembler*>(NEW Stand01(op, assembler, quad));    
    } else {
      if (pwConst) {
	newAssembler = 
	  (type == GRD_PSI) ?
	  dynamic_cast<FirstOrderAssembler*>(NEW Pre10(op, assembler, quad)) :
	  dynamic_cast<FirstOrderAssembler*>(NEW Pre01(op, assembler, quad));
      } else {
	newAssembler = 
	  (type == GRD_PSI) ?
	  dynamic_cast<FirstOrderAssembler*>( NEW Quad10(op, assembler, quad)) :
	  dynamic_cast<FirstOrderAssembler*>( NEW Quad01(op, assembler, quad));
      }
    }

    subAssemblers->push_back(newAssembler);
    return newAssembler;
  };

  SecondOrderAssembler* 
  SecondOrderAssembler::getSubAssembler(Operator* op,
					Assembler *assembler,
					Quadrature *quad,
					bool optimized)
  {
    int myRank = omp_get_thread_num();

    // check if a assembler is needed at all
    if (op->secondOrder[myRank].size() == 0) {
      return NULL;
    }

    SecondOrderAssembler *newAssembler;

    ::std::vector<SubAssembler*> *subAssemblers =
	optimized ?
	&optimizedSubAssemblers :
    &standardSubAssemblers;

    ::std::vector<OperatorTerm*> opTerms  = op->zeroOrder[myRank];

    sort(opTerms.begin(), opTerms.end());

    // check if a new assembler is needed
    for (int i = 0; i < static_cast<int>( subAssemblers->size()); i++) {
      ::std::vector<OperatorTerm*> assTerms = *((*subAssemblers)[i]->getTerms());
    
      sort(assTerms.begin(), assTerms.end());

      if ((opTerms == assTerms) && 
	  ((*subAssemblers)[i]->getQuadrature() == quad)) {
	
	return dynamic_cast<SecondOrderAssembler*>((*subAssemblers)[i]);
      }
    }

    // check if all terms are pw_const
    bool pwConst = true;
    for (int i = 0; i < static_cast<int>( op->secondOrder[myRank].size()); i++) {
      if (!op->secondOrder[myRank][i]->isPWConst()) {
	pwConst = false;
	break;
      }
    }  

    // create new assembler
    if (!optimized) {
      newAssembler = NEW Stand2(op, assembler, quad);
    } else {
      if (pwConst) {
	newAssembler = NEW Pre2(op, assembler, quad);
      } else {
	newAssembler = NEW Quad2(op, assembler, quad);
      }
    }

    subAssemblers->push_back(newAssembler);
    return newAssembler;
  }

  Stand0::Stand0(Operator *op, Assembler *assembler, Quadrature *quad)
    : ZeroOrderAssembler(op, assembler, quad, false)
  {
  }

  void Stand0::calculateElementMatrix(const ElInfo *elInfo, ElementMatrix *mat)
  {
    double val;

    const BasisFunction *psi = owner->getRowFESpace()->getBasisFcts();
    const BasisFunction *phi = owner->getColFESpace()->getBasisFcts();

    double  psival;
    double *phival = GET_MEMORY(double, nCol);
    int numPoints = quadrature->getNumPoints();
    double *c = GET_MEMORY(double, numPoints);

    for (int iq = 0; iq < numPoints; iq++) {
      c[iq] = 0.0;
    }

    int myRank = omp_get_thread_num();
    ::std::vector<OperatorTerm*>::iterator termIt;
    for (termIt = terms[myRank].begin(); termIt != terms[myRank].end(); ++termIt) {
      (static_cast<ZeroOrderTerm*>((*termIt)))->getC(elInfo, numPoints, c);
    }
      
    if (symmetric) {
      for (int iq = 0; iq < numPoints; iq++) {
	c[iq] *= elInfo->getDet();

	// calculate phi at QPs only once!
	for (int i = 0; i < nCol; i++) {
	  phival[i] = (*(phi->getPhi(i)))(quadrature->getLambda(iq));
	}

	for (int i = 0; i < nRow; i++) {
	  psival = (*(psi->getPhi(i)))(quadrature->getLambda(iq));
	  (*mat)[i][i] += quadrature->getWeight(iq) * c[iq] * psival * phival[i];
	  for (int j = i + 1; j < nCol; j++) {
	    val = quadrature->getWeight(iq) * c[iq] * psival * phival[j];
	    (*mat)[i][j] += val;
	    (*mat)[j][i] += val;
	  }
	}
      }
    } else {      //  non symmetric assembling 
      for (int iq = 0; iq < numPoints; iq++) {
	c[iq] *= elInfo->getDet();

	// calculate phi at QPs only once!
	for (int i = 0; i < nCol; i++) {
	  phival[i] = (*(phi->getPhi(i)))(quadrature->getLambda(iq));
	}

	for (int i = 0; i < nRow; i++) {
	  psival = (*(psi->getPhi(i)))(quadrature->getLambda(iq));
	  for (int j = 0; j < nCol; j++) {
	    (*mat)[i][j] += quadrature->getWeight(iq)*c[iq]*psival*phival[j];
	  }
	}
      }
    }

    FREE_MEMORY(phival, double, nCol);
    FREE_MEMORY(c, double, numPoints);
  }

  void Stand0::calculateElementVector(const ElInfo *elInfo, ElementVector *vec)
  {
    int numPoints = quadrature->getNumPoints();

    double *c = GET_MEMORY(double, numPoints);

    for (int iq = 0; iq < numPoints; iq++) {
      c[iq] = 0.0;
    }

    int myRank = omp_get_thread_num();
    ::std::vector<OperatorTerm*>::iterator termIt;
    for (termIt = terms[myRank].begin(); termIt != terms[myRank].end(); ++termIt) {
      (static_cast<ZeroOrderTerm*>((*termIt)))->getC(elInfo, numPoints, c);
    }

    for (int iq = 0; iq < numPoints; iq++) {
      c[iq] *= elInfo->getDet();

      for (int i = 0; i < nRow; i++) {
	double psi = (*(owner->getRowFESpace()->getBasisFcts()->getPhi(i)))
	  (quadrature->getLambda(iq));
	(*vec)[i] += quadrature->getWeight(iq) * c[iq] * psi;
      }
    }
    
    FREE_MEMORY(c, double, numPoints);
  }

  Quad0::Quad0(Operator *op, Assembler *assembler, Quadrature *quad)
    : ZeroOrderAssembler(op, assembler, quad, true)
  {
  }

  void Quad0::calculateElementMatrix(const ElInfo *elInfo, ElementMatrix *mat)
  {
    const double *psi, *phi;

    if (firstCall) {
      const BasisFunction *basFcts = owner->getRowFESpace()->getBasisFcts();
      psiFast = updateFastQuadrature(psiFast, basFcts, INIT_PHI);
      basFcts = owner->getColFESpace()->getBasisFcts();
      phiFast = updateFastQuadrature(phiFast, basFcts, INIT_PHI);
      firstCall = false;
    }

    int numPoints = quadrature->getNumPoints();
    double *c = GET_MEMORY(double, numPoints);

    for (int iq = 0; iq < numPoints; iq++) {
      c[iq] = 0.0;
    }

    int myRank = omp_get_thread_num();
    ::std::vector<OperatorTerm*>::iterator termIt;
    for (termIt = terms[myRank].begin(); termIt != terms[myRank].end(); ++termIt) {
      (static_cast<ZeroOrderTerm*>((*termIt)))->getC(elInfo, numPoints, c);
    }

    if (symmetric) {
      for (int iq = 0; iq < numPoints; iq++) {
	c[iq] *= elInfo->getDet();

	psi = psiFast->getPhi(iq);
	phi = phiFast->getPhi(iq);
	for (int i = 0; i < nRow; i++) {
	  (*mat)[i][i] += quadrature->getWeight(iq) * c[iq] * psi[i] * phi[i];
	  for (int j = i + 1; j < nCol; j++) {
	    double val = quadrature->getWeight(iq) * c[iq] * psi[i] * phi[j];
	    (*mat)[i][j] += val;
	    (*mat)[j][i] += val;
	  }
	}
      }
    } else {      /*  non symmetric assembling   */
      for (int iq = 0; iq < numPoints; iq++) {
	c[iq] *= elInfo->getDet();

	psi = psiFast->getPhi(iq);
	phi = phiFast->getPhi(iq);
	for (int i = 0; i < nRow; i++) {
	  for (int j = 0; j < nCol; j++) {
	    (*mat)[i][j] += quadrature->getWeight(iq) * c[iq] * psi[i] * phi[j];
	  }
	}
      }
    }
    FREE_MEMORY(c, double, numPoints);
  }

  void Quad0::calculateElementVector(const ElInfo *elInfo, ElementVector *vec)
  {
    if (firstCall) {
      const BasisFunction *basFcts = owner->getRowFESpace()->getBasisFcts();
      psiFast = updateFastQuadrature(psiFast, basFcts, INIT_PHI);
      basFcts = owner->getColFESpace()->getBasisFcts();
      phiFast = updateFastQuadrature(phiFast, basFcts, INIT_PHI);
      firstCall = false;
    }

    int numPoints = quadrature->getNumPoints();
    double *c = GET_MEMORY(double, numPoints);

    for (int iq = 0; iq < numPoints; iq++) {
      c[iq] = 0.0;
    }

    int myRank = omp_get_thread_num();
    ::std::vector<OperatorTerm*>::iterator termIt;
    for (termIt = terms[myRank].begin(); termIt != terms[myRank].end(); ++termIt) {
      (static_cast<ZeroOrderTerm*>((*termIt)))->getC(elInfo, numPoints, c);
    }

    for (int iq = 0; iq < numPoints; iq++) {
      c[iq] *= elInfo->getDet();

      const double *psi = psiFast->getPhi(iq);
      for (int i = 0; i < nRow; i++) {
	(*vec)[i] += quadrature->getWeight(iq) * c[iq] * psi[i];
      }
    }
    FREE_MEMORY(c, double, numPoints);
  }

  Pre0::Pre0(Operator *op, Assembler *assembler, Quadrature *quad) 
    : ZeroOrderAssembler(op, assembler, quad, true)
  {
  }

  void Pre0::calculateElementMatrix(const ElInfo *elInfo, ElementMatrix *mat)
  {
    if (firstCall) {
      q00 = Q00PsiPhi::provideQ00PsiPhi(owner->getRowFESpace()->getBasisFcts(), 
					owner->getColFESpace()->getBasisFcts(), 
					quadrature);
      q0 = Q0Psi::provideQ0Psi(owner->getRowFESpace()->getBasisFcts(),
			       quadrature);
      firstCall = false;
    }

    //    c[0] = 0.0;
    double c = 0.0;
    int myRank = omp_get_thread_num();
    int size = static_cast<int>(terms[myRank].size());

    for (int i = 0; i < size; i++) {
      (static_cast<ZeroOrderTerm*>((terms[myRank][i])))->getC(elInfo, 1, &c);
    }

    c *= elInfo->getDet();

    if (symmetric) {
      for (int i = 0; i < nRow; i++) {
	(*mat)[i][i] += c * q00->getValue(i,i);
	for (int j = i + 1; j < nCol; j++) {
	  double val = c * q00->getValue(i, j);
	  (*mat)[i][j] += val;
	  (*mat)[j][i] += val;
	}
      }
    } else {
      for (int i = 0; i < nRow; i++)
	for (int j = 0; j < nCol; j++)
	  (*mat)[i][j] += c * q00->getValue(i, j);
    }
  }

  void Pre0::calculateElementVector(const ElInfo *elInfo, ElementVector *vec)
  {
    if (firstCall) {
      q00 = Q00PsiPhi::provideQ00PsiPhi(owner->getRowFESpace()->getBasisFcts(), 
					owner->getColFESpace()->getBasisFcts(), 
					quadrature);
      q0 = Q0Psi::provideQ0Psi(owner->getRowFESpace()->getBasisFcts(),
			       quadrature);
      firstCall = false;
    }

    ::std::vector<OperatorTerm*>::iterator termIt;

    int myRank = omp_get_thread_num();
    double c = 0.0;
    for (termIt = terms[myRank].begin(); termIt != terms[myRank].end(); ++termIt) {
      (static_cast<ZeroOrderTerm*>( *termIt))->getC(elInfo, 1, &c);
    }

    c *= elInfo->getDet();

    for (int i = 0; i < nRow; i++)
      (*vec)[i] += c * q0->getValue(i);
  }

  Stand10::Stand10(Operator *op, Assembler *assembler, Quadrature *quad) 
    : FirstOrderAssembler(op, assembler, quad, false, GRD_PSI)
  {}


  void Stand10::calculateElementMatrix(const ElInfo *elInfo, ElementMatrix *mat)
  {
    DimVec<double> grdPsi(dim, DEFAULT_VALUE, 0.0);
    double *phival = GET_MEMORY(double, nCol);

    const BasisFunction *psi = owner->getRowFESpace()->getBasisFcts();
    const BasisFunction *phi = owner->getColFESpace()->getBasisFcts();

    int numPoints = quadrature->getNumPoints();

    VectorOfFixVecs<DimVec<double> > Lb(dim, numPoints, NO_INIT);
    for (int iq = 0; iq < numPoints; iq++) {
      Lb[iq].set(0.0);
    }

    int myRank = omp_get_thread_num();
    for (int i = 0; i < static_cast<int>(terms[myRank].size()); i++) {
      (static_cast<FirstOrderTerm*>((terms[myRank][i])))->getLb(elInfo, numPoints, Lb);
    }
  
    for (int iq = 0; iq < numPoints; iq++) {
      Lb[iq] *= elInfo->getDet();

      for (int i = 0; i < nCol; i++) {
	phival[i] = (*(phi->getPhi(i)))(quadrature->getLambda(iq));
      }

      for (int i = 0; i < nRow; i++) {
	(*(psi->getGrdPhi(i)))(quadrature->getLambda(iq), grdPsi);
	for (int j = 0; j < nCol; j++) {
	  (*mat)[i][j] += quadrature->getWeight(iq) * (Lb[iq] * grdPsi) * phival[j];
	}
      }
    }
    FREE_MEMORY(phival, double, nCol);
  }


  Quad10::Quad10(Operator *op, Assembler *assembler, Quadrature *quad) 
    : FirstOrderAssembler(op, assembler, quad, true, GRD_PSI)
  {
  }


  void Quad10::calculateElementMatrix(const ElInfo *elInfo, ElementMatrix *mat)
  {
    VectorOfFixVecs<DimVec<double> > *grdPsi;
    const double *phi;

    if (firstCall) {
      const BasisFunction *basFcts = owner->getRowFESpace()->getBasisFcts();
      psiFast = updateFastQuadrature(psiFast, basFcts, INIT_GRD_PHI);
      basFcts = owner->getColFESpace()->getBasisFcts();
      phiFast = updateFastQuadrature(phiFast, basFcts, INIT_PHI);
      firstCall = false;
    }

    int numPoints = quadrature->getNumPoints();

    VectorOfFixVecs<DimVec<double> > Lb(dim,numPoints,NO_INIT);
    for (int iq = 0; iq < numPoints; iq++) {
      Lb[iq].set(0.0);
    }

    int myRank = omp_get_thread_num();
    for (int i = 0; i < static_cast<int>(terms[myRank].size()); i++) {
      (static_cast<FirstOrderTerm*>((terms[myRank][i])))->getLb(elInfo, numPoints, Lb);
    }
  
    for (int iq = 0; iq < numPoints; iq++) {
      Lb[iq] *= elInfo->getDet();

      phi    = phiFast->getPhi(iq);
      grdPsi = psiFast->getGradient(iq);

      for (int i = 0; i < nRow; i++) {
	for (int j = 0; j < nCol; j++)
	  (*mat)[i][j] += quadrature->getWeight(iq) * (Lb[iq] * (*grdPsi)[i]) * phi[j];
      }
    }
  }


  Pre10::Pre10(Operator *op, Assembler *assembler, Quadrature *quad) 
    : FirstOrderAssembler(op, assembler, quad, true, GRD_PSI)
  {
  }


  void Pre10::calculateElementMatrix(const ElInfo *elInfo, ElementMatrix *mat)
  {
    VectorOfFixVecs<DimVec<double> > Lb(dim, 1, NO_INIT);
    const int *k;
    const double *values;

    if (firstCall) {
      q10 = Q10PsiPhi::provideQ10PsiPhi(owner->getRowFESpace()->getBasisFcts(), 
					owner->getColFESpace()->getBasisFcts(), 
					quadrature);
      q1 = Q1Psi::provideQ1Psi(owner->getRowFESpace()->getBasisFcts(),
			       quadrature);
      firstCall = false;
    }

    const int **nEntries = q10->getNumberEntries();
    int myRank = omp_get_thread_num();
    Lb[0].set(0.0);

    for (int i = 0; i < static_cast<int>( terms[myRank].size()); i++) {
      (static_cast<FirstOrderTerm*>((terms[myRank][i])))->getLb(elInfo, 1, Lb);
    }

    Lb[0] *= elInfo->getDet();

    for (int i = 0; i < nRow; i++) {
      for (int j = 0; j < nCol; j++) {
	k = q10->getKVec(i, j);
	values = q10->getValVec(i, j);
	double val = 0.0;
	for (int m = 0; m < nEntries[i][j]; m++) {
	  val += values[m] * Lb[0][k[m]];
	}
	(*mat)[i][j] += val;
      }
    }
  }


  Stand01::Stand01(Operator *op, Assembler *assembler, Quadrature *quad) 
    : FirstOrderAssembler(op, assembler, quad, false, GRD_PHI)
  {}


  void Stand01::calculateElementMatrix(const ElInfo *elInfo, ElementMatrix *mat)
  {
    VectorOfFixVecs<DimVec<double> > grdPhi(dim, nCol, NO_INIT);

    const BasisFunction *psi = owner->getRowFESpace()->getBasisFcts();
    const BasisFunction *phi = owner->getColFESpace()->getBasisFcts();

    int numPoints = quadrature->getNumPoints();
    VectorOfFixVecs<DimVec<double> > Lb(dim, numPoints, NO_INIT);
    int myRank = omp_get_thread_num();

    for (int iq = 0; iq < numPoints; iq++) {
      Lb[iq].set(0.0);
    }
    for (int i = 0; i < static_cast<int>(terms[myRank].size()); i++) {
      (static_cast<FirstOrderTerm*>((terms[myRank][i])))->getLb(elInfo, numPoints, Lb);
    }
  
    for (int iq = 0; iq < numPoints; iq++) {
      Lb[iq] *= elInfo->getDet();

      for (int i = 0; i < nCol; i++) {
	(*(phi->getGrdPhi(i)))(quadrature->getLambda(iq), grdPhi[i]);
      }

      for (int i = 0; i < nRow; i++) {
	double psival = (*(psi->getPhi(i)))(quadrature->getLambda(iq));
	for (int j = 0; j < nCol; j++)
	  (*mat)[i][j] += quadrature->getWeight(iq) * ((Lb[iq] * psival) * grdPhi[j]);
      }
    } 
  }

  void Stand10::calculateElementVector(const ElInfo *elInfo, ElementVector *vec)
  {
    DimVec<double> grdPsi(dim, DEFAULT_VALUE, 0.0);
    const BasisFunction *psi = owner->getRowFESpace()->getBasisFcts();
    int numPoints = quadrature->getNumPoints();
    VectorOfFixVecs<DimVec<double> > Lb(dim,numPoints,NO_INIT);
    int myRank = omp_get_thread_num();

    for (int iq = 0; iq < numPoints; iq++) {
      Lb[iq].set(0.0);
    }
    for (int i = 0; i < static_cast<int>(terms[myRank].size()); i++) {
      (static_cast<FirstOrderTerm*>((terms[myRank][i])))->getLb(elInfo, numPoints, Lb);
    }
  
    for (int iq = 0; iq < numPoints; iq++) {
      Lb[iq] *= elInfo->getDet();

      for (int i = 0; i < nRow; i++) {
	(*(psi->getGrdPhi(i)))(quadrature->getLambda(iq), grdPsi);
	(*vec)[i] += quadrature->getWeight(iq) * (Lb[iq] * grdPsi);
      }
    }
  }

  Quad01::Quad01(Operator *op, Assembler *assembler, Quadrature *quad) 
    : FirstOrderAssembler(op, assembler, quad, true, GRD_PHI)
  {
  }

  void Quad01::calculateElementMatrix(const ElInfo *elInfo, ElementMatrix *mat)
  {
    VectorOfFixVecs<DimVec<double> > *grdPhi;

    if (firstCall) {
      const BasisFunction *basFcts = owner->getRowFESpace()->getBasisFcts();
      psiFast = updateFastQuadrature(psiFast, basFcts, INIT_PHI);
      basFcts = owner->getColFESpace()->getBasisFcts();
      phiFast = updateFastQuadrature(phiFast, basFcts, INIT_GRD_PHI);
      firstCall = false;
    }

    int numPoints = quadrature->getNumPoints();
    VectorOfFixVecs<DimVec<double> > Lb(dim,numPoints,NO_INIT);
    int myRank = omp_get_thread_num();

    for (int iq = 0; iq < numPoints; iq++) {
      Lb[iq].set(0.0);
    }
    for (int i = 0; i < static_cast<int>(terms[myRank].size()); i++) {
      (static_cast<FirstOrderTerm*>((terms[myRank][i])))->getLb(elInfo, numPoints, Lb);
    }
  
    for (int iq = 0; iq < numPoints; iq++) {
      Lb[iq] *= elInfo->getDet();

      const double *psi = psiFast->getPhi(iq);
      grdPhi = phiFast->getGradient(iq);

      for (int i = 0; i < nRow; i++) {
	for (int j = 0; j < nCol; j++)
	  (*mat)[i][j] += quadrature->getWeight(iq) * (Lb[iq] * (*grdPhi)[j]) * psi[i];
      }
    }
  }

  void Quad10::calculateElementVector(const ElInfo *elInfo, ElementVector *vec)
  {
    VectorOfFixVecs<DimVec<double> > *grdPsi;

    if (firstCall) {
      const BasisFunction *basFcts = owner->getRowFESpace()->getBasisFcts();
      psiFast = updateFastQuadrature(psiFast, basFcts, INIT_GRD_PHI);
      basFcts = owner->getColFESpace()->getBasisFcts();
      phiFast = updateFastQuadrature(phiFast, basFcts, INIT_PHI);
      firstCall = false;
    }

    int numPoints = quadrature->getNumPoints();
    VectorOfFixVecs<DimVec<double> > Lb(dim,numPoints,NO_INIT);
    int myRank = omp_get_thread_num();

    for (int iq = 0; iq < numPoints; iq++) {
      Lb[iq].set(0.0);
    }
    for (int i = 0; i < static_cast<int>(terms[myRank].size()); i++) {
      (static_cast<FirstOrderTerm*>((terms[myRank][i])))->getLb(elInfo, numPoints, Lb);
    }
  
    for (int iq = 0; iq < numPoints; iq++) {

      Lb[iq] *= elInfo->getDet();
      grdPsi = psiFast->getGradient(iq);

      for (int i = 0; i < nRow; i++) {
	(*vec)[i] += quadrature->getWeight(iq) * (Lb[iq] * (*grdPsi)[i]);
      }
    }
  }

  Pre01::Pre01(Operator *op, Assembler *assembler, Quadrature *quad) 
    : FirstOrderAssembler(op, assembler, quad, true, GRD_PHI)
  {
  }

  void Pre01::calculateElementMatrix(const ElInfo *elInfo, ElementMatrix *mat)
  {
    VectorOfFixVecs<DimVec<double> > Lb(dim,1,NO_INIT);

    const int *l;
    const double *values;

    if (firstCall) {
      q01 = Q01PsiPhi::provideQ01PsiPhi(owner->getRowFESpace()->getBasisFcts(), 
					owner->getColFESpace()->getBasisFcts(), 
					quadrature);
      q1 = Q1Psi::provideQ1Psi(owner->getRowFESpace()->getBasisFcts(),
			       quadrature);
      firstCall = false;
    }

    const int **nEntries = q01->getNumberEntries();
    int myRank = omp_get_thread_num();
    Lb[0].set(0.0);

    for (int i = 0; i < static_cast<int>( terms[myRank].size()); i++) {
      (static_cast<FirstOrderTerm*>((terms[myRank][i])))->getLb(elInfo, 1, Lb);
    }

    Lb[0] *= elInfo->getDet();

    for (int i = 0; i < nRow; i++) {
      for (int j = 0; j < nCol; j++) {
	l = q01->getLVec(i, j);
	values = q01->getValVec(i, j);
	double val = 0.0;
	for (int m = 0; m < nEntries[i][j]; m++) {
	  val += values[m] * Lb[0][l[m]];
	}
	(*mat)[i][j] += val;
      }
    }
  }

  void Pre10::calculateElementVector(const ElInfo *elInfo, ElementVector *vec)
  {
    VectorOfFixVecs<DimVec<double> > Lb(dim,1,NO_INIT);

    const int *k;
    const double *values;

    if (firstCall) {
      q10 = Q10PsiPhi::provideQ10PsiPhi(owner->getRowFESpace()->getBasisFcts(), 
					owner->getColFESpace()->getBasisFcts(), 
					quadrature);
      q1 = Q1Psi::provideQ1Psi(owner->getRowFESpace()->getBasisFcts(),
			       quadrature);
      firstCall = false;
    }

    const int *nEntries = q1->getNumberEntries();
    int myRank = omp_get_thread_num();
    Lb[0].set(0.0);

    for (int i = 0; i < static_cast<int>(terms[myRank].size()); i++) {
      (static_cast<FirstOrderTerm*>(terms[myRank][i]))->getLb(elInfo, 1, Lb);
    }

    Lb[0] *= elInfo->getDet();

    for (int i = 0; i < nRow; i++) {
      k = q1->getKVec(i);
      values = q1->getValVec(i);
      double val = 0.0;
      for (int m = 0; m < nEntries[i]; m++) {
	val += values[m] * Lb[0][k[m]];
      }
      (*vec)[i] += val;
    }
  }

  Pre2::Pre2(Operator *op, Assembler *assembler, Quadrature *quad) 
    : SecondOrderAssembler(op, assembler, quad, true)
  {
    q11 = Q11PsiPhi::provideQ11PsiPhi(owner->getRowFESpace()->getBasisFcts(), 
				      owner->getColFESpace()->getBasisFcts(), 
				      quadrature);
    tmpLALt.resize(omp_get_max_threads());
    for (int i = 0; i < omp_get_max_threads(); i++) {
      tmpLALt[i] = NEW DimMat<double>*;
      *(tmpLALt[i]) = NEW DimMat<double>(dim, NO_INIT);
    }
  }

  Pre2::~Pre2()
  {
    for (int i = 0; i < static_cast<int>(tmpLALt.size()); i++) {
      DELETE *(tmpLALt[i]);
      DELETE tmpLALt[i];
    }
  }

  void Pre2::calculateElementMatrix(const ElInfo *elInfo, ElementMatrix *mat)
  {
    const int **nEntries;
    const int *k, *l;
    const double *values;

    int myRank = omp_get_thread_num();
    DimMat<double> **LALt = tmpLALt[myRank];
    DimMat<double> &tmpMat = *LALt[0];
    tmpMat.set(0.0);

    for (int i = 0; i < static_cast<int>( terms[myRank].size()); i++) {
      (static_cast<SecondOrderTerm*>(terms[myRank][i]))->getLALt(elInfo, 1, LALt);
    }

    tmpMat *= elInfo->getDet();

    nEntries = q11->getNumberEntries();

    if (symmetric) {
      for (int i = 0; i < nRow; i++) {
	k = q11->getKVec(i, i);
	l = q11->getLVec(i, i);
	values = q11->getValVec(i, i);
	double val = 0.0;
	for (int m = 0; m < nEntries[i][i]; m++) {
	  val += values[m] * tmpMat[k[m]][l[m]];
	}
	(*mat)[i][i] += val;

	for (int j = i + 1; j < nCol; j++) {
	  k = q11->getKVec(i, j);
	  l = q11->getLVec(i, j);
	  values = q11->getValVec(i, j);
	  val = 0.0;
	  for (int m = 0; m < nEntries[i][j]; m++) {
	    val += values[m] * tmpMat[k[m]][l[m]];
	  }
	  (*mat)[i][j] += val;
	  (*mat)[j][i] += val;
	}
      }
    } else {  /*  A not symmetric or psi != phi        */
      for (int i = 0; i < nRow; i++) {
	for (int j = 0; j < nCol; j++) {
	  k = q11->getKVec(i, j);
	  l = q11->getLVec(i, j);
	  values = q11->getValVec(i, j);
	  double val = 0.0;
	  for (int m = 0; m < nEntries[i][j]; m++) {
	    val += values[m] * tmpMat[k[m]][l[m]];
	  }
	  (*mat)[i][j] += val;
	}
      }
    }
  }

  Quad2::Quad2(Operator *op, Assembler *assembler, Quadrature *quad) 
    : SecondOrderAssembler(op, assembler, quad, true)
  {}

  void Quad2::calculateElementMatrix(const ElInfo *elInfo, ElementMatrix *mat)
  {
    double val;
    VectorOfFixVecs<DimVec<double> > *grdPsi, *grdPhi;

    if (firstCall) {
      const BasisFunction *basFcts = owner->getRowFESpace()->getBasisFcts();
      psiFast = updateFastQuadrature(psiFast, basFcts, INIT_GRD_PHI);
      basFcts = owner->getColFESpace()->getBasisFcts();
      phiFast = updateFastQuadrature(phiFast, basFcts, INIT_GRD_PHI);
      firstCall = false;
    }

    int nPoints = quadrature->getNumPoints();
    DimMat<double> **LALt = NEW DimMat<double>*[nPoints];
    int myRank = omp_get_thread_num();

    for (int i = 0; i < nPoints;i++) {
      LALt[i] = NEW DimMat<double>(dim, NO_INIT);
    }
    for (int iq = 0; iq < nPoints; iq++) {
      LALt[iq]->set(0.0);
    }
    for (int i = 0; i < static_cast<int>(terms[myRank].size()); i++) {
      (static_cast<SecondOrderTerm*>(terms[myRank][i]))->getLALt(elInfo, nPoints, LALt);
    }

    if (symmetric) {
      for (int iq = 0; iq < nPoints; iq++) {
	(*LALt[iq]) *= elInfo->getDet();

	grdPsi = psiFast->getGradient(iq);
	grdPhi = phiFast->getGradient(iq);

	for (int i = 0; i < nRow; i++) {
	  (*mat)[i][i] += quadrature->getWeight(iq) * 
	    ((*grdPsi)[i] * ((*LALt[iq]) * (*grdPhi)[i]));

	  for (int j = i + 1; j < nCol; j++) {
	    val = quadrature->getWeight(iq) * ((*grdPsi)[i] * ((*LALt[iq]) * (*grdPhi)[j]));
	    (*mat)[i][j] += val;
	    (*mat)[j][i] += val;
	  }
	}
      }
    }
    else {      /*  non symmetric assembling   */
      for (int iq = 0; iq < nPoints; iq++) {
	(*LALt[iq]) *= elInfo->getDet();

	grdPsi = psiFast->getGradient(iq);
	grdPhi = phiFast->getGradient(iq);

	for (int i = 0; i < nRow; i++) {
	  for (int j = 0; j < nCol; j++) {
	    (*mat)[i][j] += quadrature->getWeight(iq) *
	      ((*grdPsi)[i] * ((*LALt[iq]) * (*grdPhi)[j]));
	  }
	}
      }
    }
  
    for (int i = 0;i < nPoints; i++) 
      DELETE LALt[i];

    DELETE [] LALt; 
  }

  Stand2::Stand2(Operator *op, Assembler *assembler, Quadrature *quad) 
    : SecondOrderAssembler(op, assembler, quad, false)
  {}

  void Stand2::calculateElementMatrix(const ElInfo *elInfo, ElementMatrix *mat)
  {
    double val;
    DimVec<double> grdPsi(dim, NO_INIT);
    VectorOfFixVecs<DimVec<double> > grdPhi(dim, nCol, NO_INIT);

    const BasisFunction *psi = owner->getRowFESpace()->getBasisFcts();
    const BasisFunction *phi = owner->getColFESpace()->getBasisFcts();

    int nPoints = quadrature->getNumPoints();

    DimMat<double> **LALt = NEW DimMat<double>*[nPoints];
    for (int iq = 0; iq < nPoints; iq++) {
      LALt[iq] = NEW DimMat<double>(dim, NO_INIT);
      LALt[iq]->set(0.0);
    }

    int myRank = omp_get_thread_num();

    for (int i = 0; i < static_cast<int>(terms[myRank].size()); i++) {
      (static_cast<SecondOrderTerm*>(terms[myRank][i]))->getLALt(elInfo, nPoints, LALt);
    }

    if (symmetric) {
      for (int iq = 0; iq < nPoints; iq++) {
	(*LALt[iq]) *= elInfo->getDet();

	for (int i = 0; i < nCol; i++) {
	  (*(phi->getGrdPhi(i)))(quadrature->getLambda(iq), grdPhi[i]);
	}

	for (int i = 0; i < nRow; i++) {
	  (*(psi->getGrdPhi(i)))(quadrature->getLambda(iq), grdPsi);
	  (*mat)[i][i] += quadrature->getWeight(iq) * 
	    (grdPsi * ((*LALt[iq]) * grdPhi[i]));

	  for (int j = i + 1; j < nCol; j++) {
	    val = quadrature->getWeight(iq) * (grdPsi * ((*LALt[iq]) * grdPhi[j]));
	    (*mat)[i][j] += val;
	    (*mat)[j][i] += val;
	  }
	}
      }
    } else {      /*  non symmetric assembling   */
      for (int iq = 0; iq < nPoints; iq++) {
	(*LALt[iq]) *= elInfo->getDet();

	for (int i = 0; i < nCol; i++) {
	  (*(phi->getGrdPhi(i)))(quadrature->getLambda(iq), grdPhi[i]);
	}

	for (int i = 0; i < nRow; i++) {
	  (*(psi->getGrdPhi(i)))(quadrature->getLambda(iq), grdPsi);
	  for (int j = 0; j < nCol; j++) {
	    (*mat)[i][j] += quadrature->getWeight(iq) *
	      (grdPsi * ((*LALt[iq]) * grdPhi[j]));
	  }
	}
      }
    }

    for (int iq = 0; iq < nPoints; iq++) 
      DELETE LALt[iq];

    DELETE [] LALt;
  }

  Assembler::Assembler(Operator  *op,
		       const FiniteElemSpace *rowFESpace_,
		       const FiniteElemSpace *colFESpace_) 
    : operat(op),
      rowFESpace(rowFESpace_),
      colFESpace(colFESpace_ ? colFESpace_ : rowFESpace_),
      nRow(rowFESpace->getBasisFcts()->getNumber()),
      nCol(colFESpace->getBasisFcts()->getNumber()),
      remember(true),
      rememberElMat(false),
      rememberElVec(false),
      elementMatrix(NULL),
      elementVector(NULL),
      lastMatEl(NULL),
      lastVecEl(NULL),
      lastTraverseId(-1)
  
  {}

  void Assembler::calculateElementMatrix(const ElInfo *elInfo, 
					 ElementMatrix *userMat,
					 double factor)
  {
    FUNCNAME("Assembler::calculateElementMatrix()");

    if (remember && ((factor != 1.0) || (operat->uhOld))) {
      rememberElMat = true;
    }
  
    if (rememberElMat && !elementMatrix)
      elementMatrix = NEW ElementMatrix(nRow, nCol);

    Element *el = elInfo->getElement();

    
    checkForNewTraverse();

    checkQuadratures();
    
    if ((el != lastMatEl && el != lastVecEl) || !operat->isOptimized()) {
      initElement(elInfo);
    }

    if (el != lastMatEl || !operat->isOptimized()) {
      if (rememberElMat) {
	elementMatrix->set(0.0);
      }
      lastMatEl = el;
    } else {
      if (rememberElMat) {
	axpy(factor, *elementMatrix, *userMat);
	return;
      }
    }
  
    ElementMatrix *mat = rememberElMat ? elementMatrix : userMat;

    if (secondOrderAssembler)
      secondOrderAssembler->calculateElementMatrix(elInfo, mat);
    if (firstOrderAssemblerGrdPsi)
      firstOrderAssemblerGrdPsi->calculateElementMatrix(elInfo, mat);
    if (firstOrderAssemblerGrdPhi)
      firstOrderAssemblerGrdPhi->calculateElementMatrix(elInfo, mat);
    if (zeroOrderAssembler)
      zeroOrderAssembler->calculateElementMatrix(elInfo, mat);

    if (rememberElMat && userMat) {
      axpy(factor, *elementMatrix, *userMat);
    }      
  }

  void Assembler::calculateElementVector(const ElInfo *elInfo, 
					 ElementVector *userVec,
					 double factor)
  {
    FUNCNAME("Assembler::calculateElementVector()");

    if (remember && factor != 1.0) {
      rememberElVec = true;
    }

    if (rememberElVec && !elementVector)
      elementVector = NEW ElementVector(nRow);

    Element *el = elInfo->getElement();

    checkForNewTraverse();

    checkQuadratures();

    if ((el != lastMatEl && el != lastVecEl) || !operat->isOptimized()) {
      initElement(elInfo);
    }

    
    if (el != lastVecEl || !operat->isOptimized()) {
      if (rememberElVec) {
	elementVector->set(0.0);
      }
      lastVecEl = el;
    } else {
      if (rememberElVec) {
	axpy(factor, *elementVector, *userVec);
	return;
      }
    }
    
    ElementVector *vec = rememberElVec ? elementVector : userVec;

    if (operat->uhOld && remember) {
      matVecAssemble(elInfo, vec);
      if (rememberElVec) {
	axpy(factor, *elementVector, *userVec);
      }
      return;
    } 
    
    if (firstOrderAssemblerGrdPsi)
      firstOrderAssemblerGrdPsi->calculateElementVector(elInfo, vec);
    if (zeroOrderAssembler)
      zeroOrderAssembler->calculateElementVector(elInfo, vec);
        
    if (rememberElVec) {
      axpy(factor, *elementVector, *userVec);
    }      
  }

  void Assembler::matVecAssemble(const ElInfo *elInfo, ElementVector *vec)
  {
    FUNCNAME("Assembler::matVecAssemble()");

    Element *el = elInfo->getElement(); 
    const BasisFunction *basFcts = rowFESpace->getBasisFcts();
    int nBasFcts = basFcts->getNumber();
    double *uhOldLoc = new double[nBasFcts];

    operat->uhOld->getLocalVector(el, uhOldLoc);
    
    if (el != lastMatEl) {
      calculateElementMatrix(elInfo, NULL);
    }
    
    for (int i = 0; i < nBasFcts; i++) {
      double val = 0.0;
      for (int j = 0; j < nBasFcts; j++) {
	val += (*elementMatrix)[i][j] * uhOldLoc[j];
      }
      (*vec)[i] += val;
    }
    

    delete [] uhOldLoc;       
  }

  void Assembler::initElement(const ElInfo *elInfo, Quadrature *quad)
  {
    checkQuadratures();

    if (secondOrderAssembler) 
      secondOrderAssembler->initElement(elInfo, quad);
    if (firstOrderAssemblerGrdPsi)
      firstOrderAssemblerGrdPsi->initElement(elInfo, quad);
    if (firstOrderAssemblerGrdPhi)
      firstOrderAssemblerGrdPhi->initElement(elInfo, quad);
    if (zeroOrderAssembler)
      zeroOrderAssembler->initElement(elInfo, quad);
  }

  OptimizedAssembler::OptimizedAssembler(Operator  *op,
					 Quadrature *quad2,
					 Quadrature *quad1GrdPsi,
					 Quadrature *quad1GrdPhi,
					 Quadrature *quad0,
					 const FiniteElemSpace *rowFESpace_,
					 const FiniteElemSpace *colFESpace_) 
    : Assembler(op, rowFESpace_, colFESpace_)
  {
    bool opt = (rowFESpace_ == colFESpace_);

    // create sub assemblers
    secondOrderAssembler = 
      SecondOrderAssembler::getSubAssembler(op, this, quad2, opt);

    firstOrderAssemblerGrdPsi = 
      FirstOrderAssembler::getSubAssembler(op, this, quad1GrdPsi, GRD_PSI, opt);

    firstOrderAssemblerGrdPhi = 
      FirstOrderAssembler::getSubAssembler(op, this, quad1GrdPhi, GRD_PHI, opt);

    zeroOrderAssembler = 
      ZeroOrderAssembler::getSubAssembler(op, this, quad0, opt);
  }

  StandardAssembler::StandardAssembler(Operator  *op,
				       Quadrature *quad2,
				       Quadrature *quad1GrdPsi,
				       Quadrature *quad1GrdPhi,
				       Quadrature *quad0,
				       const FiniteElemSpace *rowFESpace_,
				       const FiniteElemSpace *colFESpace_) 
    : Assembler(op, rowFESpace_, colFESpace_)
  {
    remember = false;

    // create sub assemblers
    secondOrderAssembler = 
      SecondOrderAssembler::getSubAssembler(op, this, quad2, false);
    firstOrderAssemblerGrdPsi = 
      FirstOrderAssembler::getSubAssembler(op, this, quad1GrdPsi, GRD_PSI, false);
    firstOrderAssemblerGrdPhi = 
      FirstOrderAssembler::getSubAssembler(op, this, quad1GrdPhi, GRD_PHI, false);
    zeroOrderAssembler = 
      ZeroOrderAssembler::getSubAssembler(op, this, quad0, false);
  }

  ElementMatrix *Assembler::initElementMatrix(ElementMatrix *elMat, 
					      const ElInfo *elInfo)
  {
    if (!elMat) {
      elMat = NEW ElementMatrix(nRow, nCol);
    }

    elMat->set(0.0);
         
    rowFESpace->getBasisFcts()->getLocalIndicesVec(elInfo->getElement(),
						   rowFESpace->getAdmin(),
						   &(elMat->rowIndices));


    if (rowFESpace == colFESpace) {
      elMat->colIndices = elMat->rowIndices;
    } else {
      colFESpace->getBasisFcts()->getLocalIndicesVec(elInfo->getElement(),
						     colFESpace->getAdmin(),
						     &(elMat->colIndices));
    }
    
    return elMat;
  }

  ElementVector *Assembler::initElementVector(ElementVector *elVec, 
					      const ElInfo *elInfo)
  {
    if (!elVec) {
      elVec = NEW ElementVector(nRow);
    }

    elVec->set(0.0);
  
    DOFAdmin *rowAdmin = rowFESpace->getAdmin();

    Element *element = elInfo->getElement();

    rowFESpace->getBasisFcts()->getLocalIndicesVec(element, rowAdmin, &(elVec->dofIndices));

    return elVec;
  }

  void Assembler::checkQuadratures()
  { 
    if (secondOrderAssembler) {
      // create quadrature
      if (!secondOrderAssembler->getQuadrature()) {
	int dim = rowFESpace->getMesh()->getDim();
	int degree = operat->getQuadratureDegree(2);
	Quadrature *quadrature = Quadrature::provideQuadrature(dim, degree);
	secondOrderAssembler->setQuadrature(quadrature);
      }
    }
    if (firstOrderAssemblerGrdPsi) {
      // create quadrature
      if (!firstOrderAssemblerGrdPsi->getQuadrature()) {
	int dim = rowFESpace->getMesh()->getDim();
	int degree = operat->getQuadratureDegree(1, GRD_PSI);
	Quadrature *quadrature = Quadrature::provideQuadrature(dim, degree);
	firstOrderAssemblerGrdPsi->setQuadrature(quadrature);
      }
    }
    if (firstOrderAssemblerGrdPhi) {
      // create quadrature
      if (!firstOrderAssemblerGrdPhi->getQuadrature()) {
	int dim = rowFESpace->getMesh()->getDim();
	int degree = operat->getQuadratureDegree(1, GRD_PHI);
	Quadrature *quadrature = Quadrature::provideQuadrature(dim, degree);
	firstOrderAssemblerGrdPhi->setQuadrature(quadrature);
      }
    }
    if (zeroOrderAssembler) {
      // create quadrature
      if (!zeroOrderAssembler->getQuadrature()) {
	int dim = rowFESpace->getMesh()->getDim();
	int degree = operat->getQuadratureDegree(0);
	Quadrature *quadrature = Quadrature::provideQuadrature(dim, degree);
	zeroOrderAssembler->setQuadrature(quadrature);
      }
    }
  }

}