MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/cg_solve.hpp - third_party/github.com/llvm/llvm-test-suite - Git at Google

 #ifndef _cg_solve_hpp_
 #define _cg_solve_hpp_

 //@HEADER
 // ************************************************************************
 //
 // MiniFE: Simple Finite Element Assembly and Solve
 // Copyright (2006-2013) Sandia Corporation
 //
 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
 // license for use of this work by or on behalf of the U.S. Government.
 //
 // This library is free software; you can redistribute it and/or modify
 // it under the terms of the GNU Lesser General Public License as
 // published by the Free Software Foundation; either version 2.1 of the
 // License, or (at your option) any later version.
 //
 // This library is distributed in the hope that it will be useful, but
 // WITHOUT ANY WARRANTY; without even the implied warranty of
 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 // Lesser General Public License for more details.
 //
 // You should have received a copy of the GNU Lesser General Public
 // License along with this library; if not, write to the Free Software
 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
 // USA
 //
 // ************************************************************************
 //@HEADER

 #include <cmath>
 #include <limits>

 #include <Vector_functions.hpp>
 #include <mytimer.hpp>

 #include <outstream.hpp>

 namespace miniFE {

 template<typename Scalar>
 void print_vec(const std::vector<Scalar>& vec, const std::string& name)
 {
   for(size_t i=0; i<vec.size(); ++i) {
     std::cout << name << "["<<i<<"]: " << vec[i] << std::endl;
   }
 }

 template<typename VectorType>
 bool breakdown(typename VectorType::ScalarType inner,
                const VectorType& v,
                const VectorType& w)
 {
   typedef typename VectorType::ScalarType Scalar;
   typedef typename TypeTraits<Scalar>::magnitude_type magnitude;

 //This is code that was copied from Aztec, and originally written
 //by my hero, Ray Tuminaro.
 //
 //Assuming that inner = <v,w> (inner product of v and w),
 //v and w are considered orthogonal if
 //  |inner| < 100 * ||v||_2 * ||w||_2 * epsilon

   magnitude vnorm = std::sqrt(dot(v,v));
   magnitude wnorm = std::sqrt(dot(w,w));
   return std::abs(inner) <= 100*vnorm*wnorm*std::numeric_limits<magnitude>::epsilon();
 }

 template<typename OperatorType,
          typename VectorType,
          typename Matvec>
 void
 cg_solve(OperatorType& A,
          const VectorType& b,
          VectorType& x,
          Matvec matvec,
          typename OperatorType::LocalOrdinalType max_iter,
          typename TypeTraits<typename OperatorType::ScalarType>::magnitude_type& tolerance,
          typename OperatorType::LocalOrdinalType& num_iters,
          typename TypeTraits<typename OperatorType::ScalarType>::magnitude_type& normr,
          timer_type* my_cg_times)
 {
   typedef typename OperatorType::ScalarType ScalarType;
   typedef typename OperatorType::GlobalOrdinalType GlobalOrdinalType;
   typedef typename OperatorType::LocalOrdinalType LocalOrdinalType;
   typedef typename TypeTraits<ScalarType>::magnitude_type magnitude_type;

   timer_type t0 = 0, tWAXPY = 0, tDOT = 0, tMATVEC = 0, tMATVECDOT = 0;
   timer_type total_time = mytimer();

   int myproc = 0;
 #ifdef HAVE_MPI
   MPI_Comm_rank(MPI_COMM_WORLD, &myproc);
 #ifdef USE_MPI_PCONTROL
   MPI_Pcontrol(1);
 #endif
 #endif

   if (!A.has_local_indices) {
     std::cerr << "miniFE::cg_solve ERROR, A.has_local_indices is false, needs to be true. This probably means "
        << "miniFE::make_local_matrix(A) was not called prior to calling miniFE::cg_solve."
        << std::endl;
     return;
   }

   size_t nrows = A.rows.size();
   LocalOrdinalType ncols = A.num_cols;

   VectorType r(b.startIndex, nrows);
   VectorType p(0, ncols);
   VectorType Ap(b.startIndex, nrows);

   normr = 0;
   magnitude_type rtrans = 0;
   magnitude_type oldrtrans = 0;

   LocalOrdinalType print_freq = max_iter/10;
   if (print_freq>50) print_freq = 50;
   if (print_freq<1)  print_freq = 1;

   ScalarType one = 1.0;
   ScalarType zero = 0.0;

   TICK(); waxpby(one, x, zero, x, p); TOCK(tWAXPY);

 //  print_vec(p.coefs, "p");

   TICK();
   matvec(A, p, Ap);
   TOCK(tMATVEC);

   TICK(); waxpby(one, b, -one, Ap, r); TOCK(tWAXPY);

   TICK(); rtrans = dot(r, r); TOCK(tDOT);

 //std::cout << "rtrans="<<rtrans<<std::endl;

   normr = std::sqrt(rtrans);

   if (myproc == 0) {
     std::cout << "Initial Residual = "<< normr << std::endl;
   }

   magnitude_type brkdown_tol = std::numeric_limits<magnitude_type>::epsilon();

 #ifdef MINIFE_DEBUG
   std::ostream& os = outstream();
   os << "brkdown_tol = " << brkdown_tol << std::endl;
 #endif


   for(LocalOrdinalType k=1; k <= max_iter && normr > tolerance; ++k) {
     if (k == 1) {
       TICK(); waxpby(one, r, zero, r, p); TOCK(tWAXPY);
     }
     else {
       oldrtrans = rtrans;
       TICK(); rtrans = dot(r, r); TOCK(tDOT);
       magnitude_type beta = rtrans/oldrtrans;
       TICK(); waxpby(one, r, beta, p, p); TOCK(tWAXPY);
     }

     normr = std::sqrt(rtrans);

     if (myproc == 0 && (k%print_freq==0 || k==max_iter)) {
       std::cout << "Iteration = "<<k<<"   Residual = "<<normr<<std::endl;
     }

     magnitude_type alpha = 0;
     magnitude_type p_ap_dot = 0;

 #ifdef MINIFE_FUSED
     TICK();
     p_ap_dot = matvec_and_dot(A, p, Ap);
     TOCK(tMATVECDOT);
 #else
     TICK(); matvec(A, p, Ap); TOCK(tMATVEC);

     TICK(); p_ap_dot = dot(Ap, p); TOCK(tDOT);
 #endif

 #ifdef MINIFE_DEBUG
     os << "iter " << k << ", p_ap_dot = " << p_ap_dot;
     os.flush();
 #endif
     if (p_ap_dot < brkdown_tol) {
       if (p_ap_dot < 0 || breakdown(p_ap_dot, Ap, p)) {
         std::cerr << "miniFE::cg_solve ERROR, numerical breakdown!"<<std::endl;
 #ifdef MINIFE_DEBUG
         os << "ERROR, numerical breakdown!"<<std::endl;
 #endif
         //update the timers before jumping out.
         my_cg_times[WAXPY] = tWAXPY;
         my_cg_times[DOT] = tDOT;
         my_cg_times[MATVEC] = tMATVEC;
         my_cg_times[TOTAL] = mytimer() - total_time;
         return;
       }
       else brkdown_tol = 0.1 * p_ap_dot;
     }
     alpha = rtrans/p_ap_dot;
 #ifdef MINIFE_DEBUG
     os << ", rtrans = " << rtrans << ", alpha = " << alpha << std::endl;
 #endif

 #ifdef MINIFE_FUSED
     TICK();
     fused_waxpby(one, x, alpha, p, x, one, r, -alpha, Ap, r);
     TOCK(tWAXPY);
 #else
     TICK(); waxpby(one, x, alpha, p, x);
             waxpby(one, r, -alpha, Ap, r); TOCK(tWAXPY);
 #endif

     num_iters = k;
   }

 #ifdef HAVE_MPI
 #ifdef USE_MPI_PCONTROL
   MPI_Pcontrol(0);
 #endif
 #endif

   my_cg_times[WAXPY] = tWAXPY;
   my_cg_times[DOT] = tDOT;
   my_cg_times[MATVEC] = tMATVEC;
   my_cg_times[MATVECDOT] = tMATVECDOT;
   my_cg_times[TOTAL] = mytimer() - total_time;
 }

 }//namespace miniFE

 #endif
	#ifndef _cg_solve_hpp_
	#define _cg_solve_hpp_

	//@HEADER
	// ************************************************************************
	//
	// MiniFE: Simple Finite Element Assembly and Solve
	// Copyright (2006-2013) Sandia Corporation
	//
	// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
	// license for use of this work by or on behalf of the U.S. Government.
	//
	// This library is free software; you can redistribute it and/or modify
	// it under the terms of the GNU Lesser General Public License as
	// published by the Free Software Foundation; either version 2.1 of the
	// License, or (at your option) any later version.
	//
	// This library is distributed in the hope that it will be useful, but
	// WITHOUT ANY WARRANTY; without even the implied warranty of
	// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	// Lesser General Public License for more details.
	//
	// You should have received a copy of the GNU Lesser General Public
	// License along with this library; if not, write to the Free Software
	// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
	// USA
	//
	// ************************************************************************
	//@HEADER

	#include <cmath>
	#include <limits>

	#include <Vector_functions.hpp>
	#include <mytimer.hpp>

	#include <outstream.hpp>

	namespace miniFE {

	template<typename Scalar>
	void print_vec(const std::vector<Scalar>& vec, const std::string& name)
	{
	for(size_t i=0; i<vec.size(); ++i) {
	std::cout << name << "["<<i<<"]: " << vec[i] << std::endl;
	}
	}

	template<typename VectorType>
	bool breakdown(typename VectorType::ScalarType inner,
	const VectorType& v,
	const VectorType& w)
	{
	typedef typename VectorType::ScalarType Scalar;
	typedef typename TypeTraits<Scalar>::magnitude_type magnitude;

	//This is code that was copied from Aztec, and originally written
	//by my hero, Ray Tuminaro.
	//
	//Assuming that inner = <v,w> (inner product of v and w),
	//v and w are considered orthogonal if
	// \|inner\| < 100 * \|\|v\|\|_2 * \|\|w\|\|_2 * epsilon

	magnitude vnorm = std::sqrt(dot(v,v));
	magnitude wnorm = std::sqrt(dot(w,w));
	return std::abs(inner) <= 100vnormwnorm*std::numeric_limits<magnitude>::epsilon();
	}

	template<typename OperatorType,
	typename VectorType,
	typename Matvec>
	void
	cg_solve(OperatorType& A,
	const VectorType& b,
	VectorType& x,
	Matvec matvec,
	typename OperatorType::LocalOrdinalType max_iter,
	typename TypeTraits<typename OperatorType::ScalarType>::magnitude_type& tolerance,
	typename OperatorType::LocalOrdinalType& num_iters,
	typename TypeTraits<typename OperatorType::ScalarType>::magnitude_type& normr,
	timer_type* my_cg_times)
	{
	typedef typename OperatorType::ScalarType ScalarType;
	typedef typename OperatorType::GlobalOrdinalType GlobalOrdinalType;
	typedef typename OperatorType::LocalOrdinalType LocalOrdinalType;
	typedef typename TypeTraits<ScalarType>::magnitude_type magnitude_type;

	timer_type t0 = 0, tWAXPY = 0, tDOT = 0, tMATVEC = 0, tMATVECDOT = 0;
	timer_type total_time = mytimer();

	int myproc = 0;
	#ifdef HAVE_MPI
	MPI_Comm_rank(MPI_COMM_WORLD, &myproc);
	#ifdef USE_MPI_PCONTROL
	MPI_Pcontrol(1);
	#endif
	#endif

	if (!A.has_local_indices) {
	std::cerr << "miniFE::cg_solve ERROR, A.has_local_indices is false, needs to be true. This probably means "
	<< "miniFE::make_local_matrix(A) was not called prior to calling miniFE::cg_solve."
	<< std::endl;
	return;
	}

	size_t nrows = A.rows.size();
	LocalOrdinalType ncols = A.num_cols;

	VectorType r(b.startIndex, nrows);
	VectorType p(0, ncols);
	VectorType Ap(b.startIndex, nrows);

	normr = 0;
	magnitude_type rtrans = 0;
	magnitude_type oldrtrans = 0;

	LocalOrdinalType print_freq = max_iter/10;
	if (print_freq>50) print_freq = 50;
	if (print_freq<1) print_freq = 1;

	ScalarType one = 1.0;
	ScalarType zero = 0.0;

	TICK(); waxpby(one, x, zero, x, p); TOCK(tWAXPY);

	// print_vec(p.coefs, "p");

	TICK();
	matvec(A, p, Ap);
	TOCK(tMATVEC);

	TICK(); waxpby(one, b, -one, Ap, r); TOCK(tWAXPY);

	TICK(); rtrans = dot(r, r); TOCK(tDOT);

	//std::cout << "rtrans="<<rtrans<<std::endl;

	normr = std::sqrt(rtrans);

	if (myproc == 0) {
	std::cout << "Initial Residual = "<< normr << std::endl;
	}

	magnitude_type brkdown_tol = std::numeric_limits<magnitude_type>::epsilon();

	#ifdef MINIFE_DEBUG
	std::ostream& os = outstream();
	os << "brkdown_tol = " << brkdown_tol << std::endl;
	#endif


	for(LocalOrdinalType k=1; k <= max_iter && normr > tolerance; ++k) {
	if (k == 1) {
	TICK(); waxpby(one, r, zero, r, p); TOCK(tWAXPY);
	}
	else {
	oldrtrans = rtrans;
	TICK(); rtrans = dot(r, r); TOCK(tDOT);
	magnitude_type beta = rtrans/oldrtrans;
	TICK(); waxpby(one, r, beta, p, p); TOCK(tWAXPY);
	}

	normr = std::sqrt(rtrans);

	if (myproc == 0 && (k%print_freq==0 \|\| k==max_iter)) {
	std::cout << "Iteration = "<<k<<" Residual = "<<normr<<std::endl;
	}

	magnitude_type alpha = 0;
	magnitude_type p_ap_dot = 0;

	#ifdef MINIFE_FUSED
	TICK();
	p_ap_dot = matvec_and_dot(A, p, Ap);
	TOCK(tMATVECDOT);
	#else
	TICK(); matvec(A, p, Ap); TOCK(tMATVEC);

	TICK(); p_ap_dot = dot(Ap, p); TOCK(tDOT);
	#endif

	#ifdef MINIFE_DEBUG
	os << "iter " << k << ", p_ap_dot = " << p_ap_dot;
	os.flush();
	#endif
	if (p_ap_dot < brkdown_tol) {
	if (p_ap_dot < 0 \|\| breakdown(p_ap_dot, Ap, p)) {
	std::cerr << "miniFE::cg_solve ERROR, numerical breakdown!"<<std::endl;
	#ifdef MINIFE_DEBUG
	os << "ERROR, numerical breakdown!"<<std::endl;
	#endif
	//update the timers before jumping out.
	my_cg_times[WAXPY] = tWAXPY;
	my_cg_times[DOT] = tDOT;
	my_cg_times[MATVEC] = tMATVEC;
	my_cg_times[TOTAL] = mytimer() - total_time;
	return;
	}
	else brkdown_tol = 0.1 * p_ap_dot;
	}
	alpha = rtrans/p_ap_dot;
	#ifdef MINIFE_DEBUG
	os << ", rtrans = " << rtrans << ", alpha = " << alpha << std::endl;
	#endif

	#ifdef MINIFE_FUSED
	TICK();
	fused_waxpby(one, x, alpha, p, x, one, r, -alpha, Ap, r);
	TOCK(tWAXPY);
	#else
	TICK(); waxpby(one, x, alpha, p, x);
	waxpby(one, r, -alpha, Ap, r); TOCK(tWAXPY);
	#endif

	num_iters = k;
	}

	#ifdef HAVE_MPI
	#ifdef USE_MPI_PCONTROL
	MPI_Pcontrol(0);
	#endif
	#endif

	my_cg_times[WAXPY] = tWAXPY;
	my_cg_times[DOT] = tDOT;
	my_cg_times[MATVEC] = tMATVEC;
	my_cg_times[MATVECDOT] = tMATVECDOT;
	my_cg_times[TOTAL] = mytimer() - total_time;
	}

	}//namespace miniFE

	#endif