namespace AMDiS {

  template<typename VectorType>
  BiCGStab_M<VectorType>::BiCGStab_M(std::string name) : OEMSolver<VectorType>(name) {}

  template<typename VectorType>
  BiCGStab_M<VectorType>::~BiCGStab_M() {}

  template<typename VectorType>
  void BiCGStab_M<VectorType>::init()
  {
    r    = this->vectorCreator->create();
    rt   = this->vectorCreator->create();
    p    = this->vectorCreator->create();
    v    = this->vectorCreator->create();
    t    = this->vectorCreator->create();
    ph   = this->vectorCreator->create();
    sh   = this->vectorCreator->create();
    xmin = this->vectorCreator->create();
  }

  template<typename VectorType>
  void BiCGStab_M<VectorType>::exit()
  {
    this->vectorCreator->free(r);
    this->vectorCreator->free(rt);
    this->vectorCreator->free(p);
    this->vectorCreator->free(v);
    this->vectorCreator->free(t);
    this->vectorCreator->free(ph);
    this->vectorCreator->free(sh);
    this->vectorCreator->free(xmin);
  }

  template<typename VectorType>
  int BiCGStab_M<VectorType>::solveSystem(MatVecMultiplier<VectorType> *matVec,
					  VectorType *x, VectorType *b, bool reuseMatrix)
  {
    FUNCNAME("BiCGStab_M::solveSystem");
    double old_res = -1.0;
    int    iter, imin;
    double n2b, normrmin, rho, rho1, omega, alpha = 0.0, beta, rtv;

    const double TOL_0 = 1e-30, TOL_INF = 1e+30;

    double save_tolerance = this->tolerance;

    // Check for all zero right hand side vector => all zero solution
    n2b = norm(b);                // norm of rhs vector b
    if (n2b < TOL_0)
      {
	INFO(this->info,2)("b == 0, x = 0 is the solution of the linear system\n");
	set(*x, 0.0);               // solution is all zeros
	this->residual = 0.0;             // residual is zero
	return(0);                  // no iterations need to be performed
      }

    if (this->relative) this->tolerance *= n2b;

    // Set up for the method
    *xmin    = *x;                // iterate which has minimal residual so far
    imin     = 0;                 // iteration at which xmin was computed
    matVec->matVec(NoTranspose, *x, *r);
    *r      *= -1.0;
    *r      += *b;                // zero-th order residual
    this->residual = norm(r);           // norm of the residual

    START_INFO();
    if (SOLVE_INFO(0, this->residual, &old_res) == 1)
      {
	if (this->relative) this->tolerance = save_tolerance;
	return(0);                  // initial guess is a good enough solution
      }

    *rt      = *r;                // shadow residual
    normrmin = this->residual;          // norm of residual from xmin
    rho      = 1.0;
    omega    = 1.0;

    // Loop over maxit iterations (unless convergence or failure)
    for (iter = 1; iter <= this->max_iter; iter++)
      {
	rho1 = rho;
	rho  = *r * *rt;
	if (abs(rho) < TOL_0 || abs(rho) > TOL_INF)
	  {
	    BREAK_INFO("R and RT have become orthogonal", iter, this->residual, &old_res);
	    break;
	  }

	if (iter == 1)
	  *p = *r;
	else
	  {
	    beta = (rho/rho1) * (alpha/omega);
	    if (abs(beta) < TOL_0 || abs(beta) > TOL_INF)
	      {
		BREAK_INFO("beta has become too small or too large to continue "
			   "computing", iter, this->residual, &old_res);
		break;
	      }
	    axpy(-omega, *v, *p);
	    *p *= beta;
	    *p += *r;
	  }

	*ph = *p;
	if (this->leftPrecon)
	  this->leftPrecon->precon(ph);

	matVec->matVec(NoTranspose, *ph, *v);
    
	rtv = *v * *rt;
	if (abs(rtv) < TOL_0 || abs(rtv) > TOL_INF)
	  {
	    BREAK_INFO("V and RT have become orthogonal", iter, this->residual, &old_res);
	    break;
	  }

	alpha = rho / rtv;
	if (abs(alpha) > TOL_INF)
	  {
	    BREAK_INFO("alpha has become too large to continue computing",
		       iter, this->residual, &old_res);
	    break;
	  }

	axpy(alpha, *ph, *x);       // form the "half" iterate
	axpy(-alpha, *v, *r);       // and its residual
	this->residual = norm(r);

	if (this->residual < normrmin)    // update minimal norm quantities
	  {
	    normrmin = this->residual;
	    *xmin    = *x;
	    imin     = iter;
	  }

	if (abs(alpha) < TOL_0)
	  {
	    BREAK_INFO("Stagnation of the method", iter, this->residual, &old_res);
	    break;                    // stagnation of the method
	  }

	*sh = *r;                   // residual associated with xhalf
	if (this->leftPrecon)
	  this->leftPrecon->precon(sh);
	matVec->matVec(NoTranspose, *sh, *t);

	omega = (*t * *r) / (*t * *t);
	if (abs(omega) > TOL_INF)
	  {
	    BREAK_INFO("omega has become too large to continue computing",
		       iter, this->residual, &old_res);
	    break;
	  }

	axpy(omega, *sh, *x);       // update x
	axpy(-omega, *t, *r);
	this->residual = norm(r);
	if (SOLVE_INFO(iter, this->residual, &old_res) == 1)
	  {
	    if (this->relative) this->tolerance = save_tolerance;
	    return(iter);             // check for convergence
	  }

	if (this->residual < normrmin)    // update minimal norm quantities
	  {
	    normrmin = this->residual;
	    *xmin = *x;
	    imin = iter;
	  }

	if (abs(omega) < TOL_0)
	  {
	    BREAK_INFO("T and S have become orthogonal", iter, this->residual, &old_res);
	    break;                    // stagnation of the method
	  }
      }                             // end for (iter = 1; iter <= this->max_iter; iter++)

    // returned solution is first with minimal residual
    *x       = *xmin;
    iter     = imin;
    this->residual = normrmin;

    if (this->relative) this->tolerance = save_tolerance;

    return(iter);
  }

}