SingleSource/Benchmarks/CoyoteBench/fftbench.cpp - third_party/github.com/llvm/llvm-test-suite - Git at Google

 /*
     fftbench
     Standard C++ version
     10 May 2003

     Written by Scott Robert Ladd.
     No rights reserved. This is public domain software, for use by anyone.

     A number-crunching benchmark that can be used as a fitness test for
     evolving optimal compiler options via genetic algorithm.

     A simplified version of the FFT component from my standard libcoyote
     library, this benchmark test FFT performance on complex<double> values.

     Note that the code herein is design for the purpose of testing
     computational performance; error handling and other such "niceties"
     is virtually non-existent.

     Actual benchmark results can be found at:
             http://www.coyotegulch.com

     Please do not use this information or algorithm in any way that might
     upset the balance of the universe or to produce imaginary friends for
     imaginary numbers.
 */

 #include <cmath>
 #include <complex>
 #include <ctime>
 #include <cstring>
 #include <iostream>
 #include <iomanip>
 #include <stdexcept>

 using namespace std;

 //---------------------------------------------------------------------------
 // number of loops to perform
 #if defined(ACOVEA)
 #if defined(arch_pentium4)
 static const int TEST_SIZE   =   262144;
 #else
 static const int TEST_SIZE   =   131072;
 #endif
 #else
 #ifdef SMALL_PROBLEM_SIZE
 static const int TEST_SIZE   =  50000;
 #else
 static const int TEST_SIZE   =  2097152/4;
 #endif
 #endif

 //---------------------------------------------------------------------------
 // embedded random number generator; ala Park and Miller
 static double random_double()
 {
     static       long seed = 1325;
     static const long IA   = 16807;
     static const long IM   = 2147483647;
     static const double AM = 4.65661287525E-9;
     static const long IQ   = 127773;
     static const long IR   = 2836;
     static const long MASK = 123459876;

     long k;
     double result;

     seed ^= MASK;
     k = seed / IQ;
     seed = IA * (seed - k * IQ) - IR * k;

     if (seed < 0)
         seed += IM;

     result = AM * seed;
     seed ^= MASK;

     return result;
 }

 //---------------------------------------------------------------------------
 template <class T>
 class polynomial
 {
     public:
         // creation constructor, uninitialized (coefficients will be unknown values)
         polynomial(size_t degree);

         // creation constructor, with initialization from array
         polynomial(size_t degree, const T * coefficients);

         // creation constructor, with single-value initialization constructor
         polynomial(size_t degree, const T & value);

         // copy constructor
         polynomial(const polynomial<T> & source);

         // destructor
         virtual ~polynomial();

         // assignment
         polynomial<T> & operator = (const polynomial<T> & source);

         // initialize to specific value
         void initialize(const T & value = T(0));

         // increase polynomial length
         polynomial<T> & stretch(size_t degrees);

         // interrogate for degree
         size_t degree() const
         {
             return m_degree;
         }

         // get specific coefficient
         T get(size_t term) const;

         T & operator [] (size_t term);

         // evaluate for a specific value
         T operator() (const T & x) const;

         // unitary operators
         polynomial<T> operator - () const;
         polynomial<T> operator + () const;

         // binary mathematical operators
         polynomial<T> operator + (const polynomial<T> & poly) const;
         polynomial<T> operator - (const polynomial<T> & poly) const;

         // constant required by FFT routines
         static const complex<T> PI2I;

         // returns largest power of two that holds n
         static size_t log2(size_t n);

         // reverses a sequence of bits
         static size_t flip_bits(size_t k, size_t bits);

         // stretches the length of a polynomial to a power of two
         size_t stretch_fft();

         // performs a reverse-bit copy of a polynomial<T> into a new polynomial< complex<T> >
         static polynomial< complex<T> > bit_reverse(const polynomial<T> & poly);
         static polynomial< complex<T> > bit_reverse(const polynomial< complex<T> > & poly);

         // Fast Fourier Transform of polynomial<T> to polynomial< complex<T> >
         static polynomial< complex<T> > fft(const polynomial<T> & poly);

         // inverse FFT of polynomial< complex<T> > to polynomial<T>
         static polynomial< complex<T> > inverse_fft(const polynomial< complex<T> > & poly);

         // multiplication via FFT
         polynomial<T> operator * (const polynomial<T> & poly) const;

         // shorthand mathematical operators
         polynomial<T> & operator += (const polynomial<T> & poly);
         polynomial<T> & operator -= (const polynomial<T> & poly);
         polynomial<T> & operator *= (const polynomial<T> & poly);

     protected:
         // coefficients
         T * m_coeff;

         // number of terms
         size_t m_degree;

         // acquire resources
         void acquire();

         // release resources
         void release();

         // deep copy
         void deep_copy(const T * source);
 };

 // acquire resources
 template <typename T>
 inline void polynomial<T>::acquire()
 {
     m_coeff = new T [m_degree];
 }

 // release resources
 template <typename T>
 inline void polynomial<T>::release()
 {
     delete [] m_coeff;
 }

 // deep copy
 template <typename T>
 inline void polynomial<T>::deep_copy(const T * source)
 {
     for (size_t n = 0; n < m_degree; ++n)
         m_coeff[n] = source[n];
 }

 // initialize to specific value
 template <typename T>
 inline void polynomial<T>::initialize(const T & value)
 {
     for (size_t n = 0; n < m_degree; ++n)
         m_coeff[n] = value;
 }

 // creation constructor, uninitialized (coefficients will be unknown values)
 template <typename T>
 polynomial<T>::polynomial(size_t degree)
   : m_coeff(NULL),
     m_degree(degree)
 {
     acquire();
 }

 // creation constructor, with possible initialization from array
 template <typename T>
 polynomial<T>::polynomial(size_t degree, const T * coefficients)
   : m_coeff(NULL),
     m_degree(degree)
 {
     acquire();

     if (coefficients != NULL)
         deep_copy(coefficients);
 }

 // creation constructor, with single-value initialization constructor
 template <typename T>
 polynomial<T>::polynomial(size_t degree, const T & value)
   : m_coeff(NULL),
     m_degree(degree)
 {
     acquire();
     initialize(value);
 }

 // copy constructor
 template <typename T>
 polynomial<T>::polynomial(const polynomial<T> & source)
   : m_coeff(NULL),
     m_degree(source.m_degree)
 {
     acquire();
     deep_copy(source.m_coeff);
 }

 // destructor
 template <typename T>
 polynomial<T>::~polynomial()
 {
     release();
 }

 // assignment
 template <typename T>
 polynomial<T> & polynomial<T>::operator = (const polynomial<T> & source)
 {
     if (m_degree != source.m_degree)
     {
         release();

         m_degree = source.m_degree;
         acquire();
     }

     deep_copy(source.m_coeff);

     return *this;
 }

 // increase polynomial length
 template <typename T>
 polynomial<T> & polynomial<T>::stretch(size_t degrees)
 {
     if (degrees != 0)
     {
         T * old_coeff = m_coeff;
         size_t old_degree = m_degree;

         m_degree += degrees;
         acquire();

         size_t n = 0;

         for (; n < old_degree; ++n)
             m_coeff[n] = old_coeff[n];

         for (; n < m_degree; ++n)
             m_coeff[n] = T(0);
     }

     return *this;
 }

 // get specific coefficient
 template <typename T>
 T polynomial<T>::get(size_t term) const
 {
     return m_coeff[term];
 }

 template <typename T>
 T & polynomial<T>::operator [] (size_t term)
 {
     return m_coeff[term];
 }

 // evaluate for a specific value
 template <typename T>
 T polynomial<T>::operator() (const T & x) const
 {
     // using Horner's Rule
     T y = m_coeff[m_degree - 1];

     size_t i = m_degree - 2;

     while (true)
     {
         y = x * y + m_coeff[i];

         if (i > 0)
             --i;
         else
             break;
     }

     return y;
 }

 // unitary operators
 template <typename T>
 polynomial<T> polynomial<T>::operator - () const
 {
     polynomial<T> result(m_degree); // uninitialized

     for (size_t n = 0; n < m_degree; ++n)
         result.m_coeff[n] = -m_coeff[n];

     return result;
 }

 template <typename T>
 polynomial<T> polynomial<T>::operator + () const
 {
     return polynomial<T>(*this);
 }

 // binary mathematical operators
 template <typename T>
 polynomial<T> polynomial<T>::operator + (const polynomial<T> & poly) const
 {
     if (m_degree >= poly.m_degree)
     {
         polynomial<T> result(*this);

         for (size_t n = 0; n < m_degree; ++n)
             result.m_coeff[n] += poly.m_coeff[n];

         return result;
     }
     else
     {
         polynomial<T> result(poly);

         for (size_t n = 0; n < m_degree; ++n)
             result.m_coeff[n] += m_coeff[n];

         return result;
     }
 }

 template <typename T>
 polynomial<T> polynomial<T>::operator - (const polynomial<T> & poly) const
 {
     if (m_degree >= poly.m_degree)
     {
         polynomial<T> result(*this);

         for (size_t n = 0; n < m_degree; ++n)
             result.m_coeff[n] -= poly.m_coeff[n];

         return result;
     }
     else
     {
         polynomial<T> result(poly);

         for (size_t n = 0; n < m_degree; ++n)
             result.m_coeff[n] -= m_coeff[n];

         return result;
     }
 }

 // constant required by FFT routines
 template <typename T>
 const complex<T> polynomial<T>::PI2I(0.0,6.283185307179586);

 // returns largest power of two that holds n
 template <typename T>
 size_t polynomial<T>::log2(size_t n)
 {
     // returns 1 if n == 0!
     size_t x = 1, c = 0;

     while (x < n)
     {
         ++c;
         x <<= 1;

         if (x == 0)
             break;
     }

     return c;
 }

 // reverses a sequence of bits
 template <typename T>
 size_t polynomial<T>::flip_bits(size_t k, size_t bits)
 {
     size_t lm = 1 << (bits - 1);
     size_t rm = 1;
     size_t  r = 0;

     while (lm)
     {
         if (k & rm)
             r |= (lm);

         lm >>= 1;
         rm <<= 1;
     }

     return r;
 }

 // stretches the length of a polynomial to a power of two
 template <class T>
 size_t polynomial<T>::stretch_fft()
 {
     size_t n = 1;

     while (true)
     {
         if (m_degree <= n)
             break;

         n <<= 1;

         if (n == 0)
             throw overflow_error("overflow in fft polynomial stretch");
     }

     n <<= 1;
     n -= m_degree;

     if (n > 0)
         stretch(n);

     return n;
 }

 // performs a reverse-bit copy of a polynomial<T> into a new polynomial< complex<T> >
 template <typename T>
 polynomial< complex<T> > polynomial<T>::bit_reverse(const polynomial<T> & poly)
 {
     size_t b = log2(poly.degree());

     polynomial< complex<T> > result(poly.degree());

     for (size_t n = 0; n < poly.degree(); ++n)
         result[flip_bits(n,b)] = poly.get(n);

     return result;
 }

 // performs a reverse-bit copy of a polynomial<T> into a new polynomial< complex<T> >
 template <typename T>
 polynomial< complex<T> > polynomial<T>::bit_reverse(const polynomial< complex<T> > & poly)
 {
     size_t b = log2(poly.degree());

     polynomial< complex<T> > result(poly.degree());

     for (size_t n = 0; n < poly.degree(); ++n)
         result[flip_bits(n,b)] = poly.get(n);

     return result;
 }

 // Fast Fourier Transform of polynomial<T> to polynomial< complex<T> >
 template <typename T>
 polynomial< complex<T> > polynomial<T>::fft(const polynomial<T> & poly)
 {
     size_t nl = log2(poly.degree());
     size_t j, k, m, m2, s;
     complex<T> wm, w, t, u;

     polynomial< complex<T> > result = bit_reverse(poly);

     m  = 2;
     m2 = 1;

     for (s = 0; s < nl; ++s)
     {
         wm = exp(PI2I / complex<T>(m));
         w  = 1.0;

         for (j = 0; j <= (m2 - 1); ++j)
         {
             for (k = j; k <= poly.degree() - 1; k += m)
             {
                 t = w * result[k + m2];
                 u = result[k];
                 result[k] = u + t;
                 result[k + m2] = u - t;
             }

             w *= wm;
         }

         m  <<= 1;
         m2 <<= 1;
     }

     return result;
 }

 // inverse FFT of polynomial< complex<T> > to polynomial<T>
 template <typename T>
 polynomial< complex<T> > polynomial<T>::inverse_fft(const polynomial< complex<T> > & poly)
 {
     size_t nl = log2(poly.degree());
     size_t j, k, m, m2, s;
     complex<T> wm, w, t, u;

     polynomial< complex<T> > result = bit_reverse(poly);

     m  = 2;
     m2 = 1;

     for (s = 0; s < nl; ++s)
     {
         wm = exp(-PI2I / complex<T>(m));
         w  = 1.0;

         for (j = 0; j <= (m2 - 1); ++j)
         {
             for (k = j; k <= poly.degree() - 1; k += m)
             {
                 t = w * result[k + m2];
                 u = result[k];
                 result[k] = u + t;
                 result[k + m2] = u - t;
             }

             w *= wm;
         }

         m  <<= 1;
         m2 <<= 1;
     }

     for (j = 0; j < poly.degree(); ++j)
         result[j] /= double(poly.degree());

     return result;
 }

 // multiplication by FFT
 template <typename T>
 polynomial<T> polynomial<T>::operator * (const polynomial<T> & poly) const
 {
 #if defined(BRUTE_FORCE)
     // brute force algorithm
     if (m_degree != poly.m_degree)
         throw domain_error("can not multiply two polynomials with different lengths");

     polynomial<T> result(2 * m_degree - 1, T(0));

     for (size_t n1 = 0; n1 < m_degree; ++n1)
         for (size_t n2 = 0; n2 < m_degree; ++n2)
             result.m_coeff[n1 + n2] += m_coeff[n1] * poly.m_coeff[n2];
 #else
     // duplicate p1 and p2 to preserve original objects
     polynomial<T> a1(*this);
     polynomial<T> a2(poly);

     // expand polynomials to next-largest power of two
     if (a1.degree() > a2.degree())
         a2.stretch(a1.stretch_fft());
     else
         a1.stretch(a2.stretch_fft());

     // FFT polynomials
     polynomial< complex<T> > dft1 = fft(a1);
     polynomial< complex<T> > dft2 = fft(a2);

     // multiply coefficients
     size_t n2 = a1.degree();

     for (size_t k = 0; k < n2; ++k)
         dft1[k] *= dft2[k];

     // inverse DFT to obtain result
     dft2 = inverse_fft(dft1);

     // convert back to complex<T>
     --n2;
     polynomial<T> result(n2);

     for (size_t k = 0; k < n2; ++k)
         result[k] = dft2[k].real();
 #endif

     return result;
 }

 // shorthand mathematical operators
 template <typename T>
 polynomial<T> & polynomial<T>::operator += (const polynomial<T> & poly)
 {
     size_t length = (m_degree <= poly.m_degree) ? m_degree : poly.m_degree;

     for (size_t n = 0; n < length; ++n)
         m_coeff[n] += poly.m_coeff[n];

     return *this;
 }

 template <typename T>
 polynomial<T> & polynomial<T>::operator -= (const polynomial<T> & poly)
 {
     size_t length = (m_degree <= poly.m_degree) ? m_degree : poly.m_degree;

     for (size_t n = 0; n < length; ++n)
         m_coeff[n] -= poly.m_coeff[n];

     return *this;
 }

 template <typename T>
 polynomial<T> & polynomial<T>::operator *= (const polynomial<T> & poly)
 {
     *this = (*this) * poly;
 }

 //---------------------------------------------------------------------------
 // Entry point
 int main(int argc, char ** argv)
 {
     // are we testing using a genetic algorithm?
     bool ga_testing = false;

     if (argc > 1)
     {
         for (int i = 1; i < argc; ++i)
         {
             if (!strcmp(argv[1],"-ga"))
             {
                 ga_testing = true;
                 break;
             }
         }
     }

     // generate random polynomials
     polynomial<double> poly1(TEST_SIZE);
     polynomial<double> poly2(TEST_SIZE);
     polynomial<double> poly3(TEST_SIZE * 2 - 1);

     for (int n = 0; n < TEST_SIZE; ++n)
     {
         poly1[n] = random_double();
         poly2[n] = random_double();
     }

     // get starting time
     //struct timespec start, stop;
     //clock_gettime(CLOCK_REALTIME,&start);

     // what we're timing
     poly3 = poly1 * poly2;

     // get final time
     //clock_gettime(CLOCK_REALTIME,&stop);
     double run_time = 0;
     //(stop.tv_sec - start.tv_sec) + (double)(stop.tv_nsec - start.tv_nsec) / 1000000000.0;

     // report runtime
     if (ga_testing)
         cout << run_time;
     else
         cout << "\nfftbench (Std. C++) run time: " << run_time << "\n\n";

     cout.flush();

     return 0;
 }
	/*
	fftbench
	Standard C++ version
	10 May 2003

	Written by Scott Robert Ladd.
	No rights reserved. This is public domain software, for use by anyone.

	A number-crunching benchmark that can be used as a fitness test for
	evolving optimal compiler options via genetic algorithm.

	A simplified version of the FFT component from my standard libcoyote
	library, this benchmark test FFT performance on complex<double> values.

	Note that the code herein is design for the purpose of testing
	computational performance; error handling and other such "niceties"
	is virtually non-existent.

	Actual benchmark results can be found at:
	http://www.coyotegulch.com

	Please do not use this information or algorithm in any way that might
	upset the balance of the universe or to produce imaginary friends for
	imaginary numbers.
	*/

	#include <cmath>
	#include <complex>
	#include <ctime>
	#include <cstring>
	#include <iostream>
	#include <iomanip>
	#include <stdexcept>

	using namespace std;

	//---------------------------------------------------------------------------
	// number of loops to perform
	#if defined(ACOVEA)
	#if defined(arch_pentium4)
	static const int TEST_SIZE = 262144;
	#else
	static const int TEST_SIZE = 131072;
	#endif
	#else
	#ifdef SMALL_PROBLEM_SIZE
	static const int TEST_SIZE = 50000;
	#else
	static const int TEST_SIZE = 2097152/4;
	#endif
	#endif

	//---------------------------------------------------------------------------
	// embedded random number generator; ala Park and Miller
	static double random_double()
	{
	static long seed = 1325;
	static const long IA = 16807;
	static const long IM = 2147483647;
	static const double AM = 4.65661287525E-9;
	static const long IQ = 127773;
	static const long IR = 2836;
	static const long MASK = 123459876;

	long k;
	double result;

	seed ^= MASK;
	k = seed / IQ;
	seed = IA * (seed - k * IQ) - IR * k;

	if (seed < 0)
	seed += IM;

	result = AM * seed;
	seed ^= MASK;

	return result;
	}

	//---------------------------------------------------------------------------
	template <class T>
	class polynomial
	{
	public:
	// creation constructor, uninitialized (coefficients will be unknown values)
	polynomial(size_t degree);

	// creation constructor, with initialization from array
	polynomial(size_t degree, const T * coefficients);

	// creation constructor, with single-value initialization constructor
	polynomial(size_t degree, const T & value);

	// copy constructor
	polynomial(const polynomial<T> & source);

	// destructor
	virtual ~polynomial();

	// assignment
	polynomial<T> & operator = (const polynomial<T> & source);

	// initialize to specific value
	void initialize(const T & value = T(0));

	// increase polynomial length
	polynomial<T> & stretch(size_t degrees);

	// interrogate for degree
	size_t degree() const
	{
	return m_degree;
	}

	// get specific coefficient
	T get(size_t term) const;

	T & operator [] (size_t term);

	// evaluate for a specific value
	T operator() (const T & x) const;

	// unitary operators
	polynomial<T> operator - () const;
	polynomial<T> operator + () const;

	// binary mathematical operators
	polynomial<T> operator + (const polynomial<T> & poly) const;
	polynomial<T> operator - (const polynomial<T> & poly) const;

	// constant required by FFT routines
	static const complex<T> PI2I;

	// returns largest power of two that holds n
	static size_t log2(size_t n);

	// reverses a sequence of bits
	static size_t flip_bits(size_t k, size_t bits);

	// stretches the length of a polynomial to a power of two
	size_t stretch_fft();

	// performs a reverse-bit copy of a polynomial<T> into a new polynomial< complex<T> >
	static polynomial< complex<T> > bit_reverse(const polynomial<T> & poly);
	static polynomial< complex<T> > bit_reverse(const polynomial< complex<T> > & poly);

	// Fast Fourier Transform of polynomial<T> to polynomial< complex<T> >
	static polynomial< complex<T> > fft(const polynomial<T> & poly);

	// inverse FFT of polynomial< complex<T> > to polynomial<T>
	static polynomial< complex<T> > inverse_fft(const polynomial< complex<T> > & poly);

	// multiplication via FFT
	polynomial<T> operator * (const polynomial<T> & poly) const;

	// shorthand mathematical operators
	polynomial<T> & operator += (const polynomial<T> & poly);
	polynomial<T> & operator -= (const polynomial<T> & poly);
	polynomial<T> & operator *= (const polynomial<T> & poly);

	protected:
	// coefficients
	T * m_coeff;

	// number of terms
	size_t m_degree;

	// acquire resources
	void acquire();

	// release resources
	void release();

	// deep copy
	void deep_copy(const T * source);
	};

	// acquire resources
	template <typename T>
	inline void polynomial<T>::acquire()
	{
	m_coeff = new T [m_degree];
	}

	// release resources
	template <typename T>
	inline void polynomial<T>::release()
	{
	delete [] m_coeff;
	}

	// deep copy
	template <typename T>
	inline void polynomial<T>::deep_copy(const T * source)
	{
	for (size_t n = 0; n < m_degree; ++n)
	m_coeff[n] = source[n];
	}

	// initialize to specific value
	template <typename T>
	inline void polynomial<T>::initialize(const T & value)
	{
	for (size_t n = 0; n < m_degree; ++n)
	m_coeff[n] = value;
	}

	// creation constructor, uninitialized (coefficients will be unknown values)
	template <typename T>
	polynomial<T>::polynomial(size_t degree)
	: m_coeff(NULL),
	m_degree(degree)
	{
	acquire();
	}

	// creation constructor, with possible initialization from array
	template <typename T>
	polynomial<T>::polynomial(size_t degree, const T * coefficients)
	: m_coeff(NULL),
	m_degree(degree)
	{
	acquire();

	if (coefficients != NULL)
	deep_copy(coefficients);
	}

	// creation constructor, with single-value initialization constructor
	template <typename T>
	polynomial<T>::polynomial(size_t degree, const T & value)
	: m_coeff(NULL),
	m_degree(degree)
	{
	acquire();
	initialize(value);
	}

	// copy constructor
	template <typename T>
	polynomial<T>::polynomial(const polynomial<T> & source)
	: m_coeff(NULL),
	m_degree(source.m_degree)
	{
	acquire();
	deep_copy(source.m_coeff);
	}

	// destructor
	template <typename T>
	polynomial<T>::~polynomial()
	{
	release();
	}

	// assignment
	template <typename T>
	polynomial<T> & polynomial<T>::operator = (const polynomial<T> & source)
	{
	if (m_degree != source.m_degree)
	{
	release();

	m_degree = source.m_degree;
	acquire();
	}

	deep_copy(source.m_coeff);

	return *this;
	}

	// increase polynomial length
	template <typename T>
	polynomial<T> & polynomial<T>::stretch(size_t degrees)
	{
	if (degrees != 0)
	{
	T * old_coeff = m_coeff;
	size_t old_degree = m_degree;

	m_degree += degrees;
	acquire();

	size_t n = 0;

	for (; n < old_degree; ++n)
	m_coeff[n] = old_coeff[n];

	for (; n < m_degree; ++n)
	m_coeff[n] = T(0);
	}

	return *this;
	}

	// get specific coefficient
	template <typename T>
	T polynomial<T>::get(size_t term) const
	{
	return m_coeff[term];
	}

	template <typename T>
	T & polynomial<T>::operator [] (size_t term)
	{
	return m_coeff[term];
	}

	// evaluate for a specific value
	template <typename T>
	T polynomial<T>::operator() (const T & x) const
	{
	// using Horner's Rule
	T y = m_coeff[m_degree - 1];

	size_t i = m_degree - 2;

	while (true)
	{
	y = x * y + m_coeff[i];

	if (i > 0)
	--i;
	else
	break;
	}

	return y;
	}

	// unitary operators
	template <typename T>
	polynomial<T> polynomial<T>::operator - () const
	{
	polynomial<T> result(m_degree); // uninitialized

	for (size_t n = 0; n < m_degree; ++n)
	result.m_coeff[n] = -m_coeff[n];

	return result;
	}

	template <typename T>
	polynomial<T> polynomial<T>::operator + () const
	{
	return polynomial<T>(*this);
	}

	// binary mathematical operators
	template <typename T>
	polynomial<T> polynomial<T>::operator + (const polynomial<T> & poly) const
	{
	if (m_degree >= poly.m_degree)
	{
	polynomial<T> result(*this);

	for (size_t n = 0; n < m_degree; ++n)
	result.m_coeff[n] += poly.m_coeff[n];

	return result;
	}
	else
	{
	polynomial<T> result(poly);

	for (size_t n = 0; n < m_degree; ++n)
	result.m_coeff[n] += m_coeff[n];

	return result;
	}
	}

	template <typename T>
	polynomial<T> polynomial<T>::operator - (const polynomial<T> & poly) const
	{
	if (m_degree >= poly.m_degree)
	{
	polynomial<T> result(*this);

	for (size_t n = 0; n < m_degree; ++n)
	result.m_coeff[n] -= poly.m_coeff[n];

	return result;
	}
	else
	{
	polynomial<T> result(poly);

	for (size_t n = 0; n < m_degree; ++n)
	result.m_coeff[n] -= m_coeff[n];

	return result;
	}
	}

	// constant required by FFT routines
	template <typename T>
	const complex<T> polynomial<T>::PI2I(0.0,6.283185307179586);

	// returns largest power of two that holds n
	template <typename T>
	size_t polynomial<T>::log2(size_t n)
	{
	// returns 1 if n == 0!
	size_t x = 1, c = 0;

	while (x < n)
	{
	++c;
	x <<= 1;

	if (x == 0)
	break;
	}

	return c;
	}

	// reverses a sequence of bits
	template <typename T>
	size_t polynomial<T>::flip_bits(size_t k, size_t bits)
	{
	size_t lm = 1 << (bits - 1);
	size_t rm = 1;
	size_t r = 0;

	while (lm)
	{
	if (k & rm)
	r \|= (lm);

	lm >>= 1;
	rm <<= 1;
	}

	return r;
	}

	// stretches the length of a polynomial to a power of two
	template <class T>
	size_t polynomial<T>::stretch_fft()
	{
	size_t n = 1;

	while (true)
	{
	if (m_degree <= n)
	break;

	n <<= 1;

	if (n == 0)
	throw overflow_error("overflow in fft polynomial stretch");
	}

	n <<= 1;
	n -= m_degree;

	if (n > 0)
	stretch(n);

	return n;
	}

	// performs a reverse-bit copy of a polynomial<T> into a new polynomial< complex<T> >
	template <typename T>
	polynomial< complex<T> > polynomial<T>::bit_reverse(const polynomial<T> & poly)
	{
	size_t b = log2(poly.degree());

	polynomial< complex<T> > result(poly.degree());

	for (size_t n = 0; n < poly.degree(); ++n)
	result[flip_bits(n,b)] = poly.get(n);

	return result;
	}

	// performs a reverse-bit copy of a polynomial<T> into a new polynomial< complex<T> >
	template <typename T>
	polynomial< complex<T> > polynomial<T>::bit_reverse(const polynomial< complex<T> > & poly)
	{
	size_t b = log2(poly.degree());

	polynomial< complex<T> > result(poly.degree());

	for (size_t n = 0; n < poly.degree(); ++n)
	result[flip_bits(n,b)] = poly.get(n);

	return result;
	}

	// Fast Fourier Transform of polynomial<T> to polynomial< complex<T> >
	template <typename T>
	polynomial< complex<T> > polynomial<T>::fft(const polynomial<T> & poly)
	{
	size_t nl = log2(poly.degree());
	size_t j, k, m, m2, s;
	complex<T> wm, w, t, u;

	polynomial< complex<T> > result = bit_reverse(poly);

	m = 2;
	m2 = 1;

	for (s = 0; s < nl; ++s)
	{
	wm = exp(PI2I / complex<T>(m));
	w = 1.0;

	for (j = 0; j <= (m2 - 1); ++j)
	{
	for (k = j; k <= poly.degree() - 1; k += m)
	{
	t = w * result[k + m2];
	u = result[k];
	result[k] = u + t;
	result[k + m2] = u - t;
	}

	w *= wm;
	}

	m <<= 1;
	m2 <<= 1;
	}

	return result;
	}

	// inverse FFT of polynomial< complex<T> > to polynomial<T>
	template <typename T>
	polynomial< complex<T> > polynomial<T>::inverse_fft(const polynomial< complex<T> > & poly)
	{
	size_t nl = log2(poly.degree());
	size_t j, k, m, m2, s;
	complex<T> wm, w, t, u;

	polynomial< complex<T> > result = bit_reverse(poly);

	m = 2;
	m2 = 1;

	for (s = 0; s < nl; ++s)
	{
	wm = exp(-PI2I / complex<T>(m));
	w = 1.0;

	for (j = 0; j <= (m2 - 1); ++j)
	{
	for (k = j; k <= poly.degree() - 1; k += m)
	{
	t = w * result[k + m2];
	u = result[k];
	result[k] = u + t;
	result[k + m2] = u - t;
	}

	w *= wm;
	}

	m <<= 1;
	m2 <<= 1;
	}

	for (j = 0; j < poly.degree(); ++j)
	result[j] /= double(poly.degree());

	return result;
	}

	// multiplication by FFT
	template <typename T>
	polynomial<T> polynomial<T>::operator * (const polynomial<T> & poly) const
	{
	#if defined(BRUTE_FORCE)
	// brute force algorithm
	if (m_degree != poly.m_degree)
	throw domain_error("can not multiply two polynomials with different lengths");

	polynomial<T> result(2 * m_degree - 1, T(0));

	for (size_t n1 = 0; n1 < m_degree; ++n1)
	for (size_t n2 = 0; n2 < m_degree; ++n2)
	result.m_coeff[n1 + n2] += m_coeff[n1] * poly.m_coeff[n2];
	#else
	// duplicate p1 and p2 to preserve original objects
	polynomial<T> a1(*this);
	polynomial<T> a2(poly);

	// expand polynomials to next-largest power of two
	if (a1.degree() > a2.degree())
	a2.stretch(a1.stretch_fft());
	else
	a1.stretch(a2.stretch_fft());

	// FFT polynomials
	polynomial< complex<T> > dft1 = fft(a1);
	polynomial< complex<T> > dft2 = fft(a2);

	// multiply coefficients
	size_t n2 = a1.degree();

	for (size_t k = 0; k < n2; ++k)
	dft1[k] *= dft2[k];

	// inverse DFT to obtain result
	dft2 = inverse_fft(dft1);

	// convert back to complex<T>
	--n2;
	polynomial<T> result(n2);

	for (size_t k = 0; k < n2; ++k)
	result[k] = dft2[k].real();
	#endif

	return result;
	}

	// shorthand mathematical operators
	template <typename T>
	polynomial<T> & polynomial<T>::operator += (const polynomial<T> & poly)
	{
	size_t length = (m_degree <= poly.m_degree) ? m_degree : poly.m_degree;

	for (size_t n = 0; n < length; ++n)
	m_coeff[n] += poly.m_coeff[n];

	return *this;
	}

	template <typename T>
	polynomial<T> & polynomial<T>::operator -= (const polynomial<T> & poly)
	{
	size_t length = (m_degree <= poly.m_degree) ? m_degree : poly.m_degree;

	for (size_t n = 0; n < length; ++n)
	m_coeff[n] -= poly.m_coeff[n];

	return *this;
	}

	template <typename T>
	polynomial<T> & polynomial<T>::operator *= (const polynomial<T> & poly)
	{
	this = (this) * poly;
	}

	//---------------------------------------------------------------------------
	// Entry point
	int main(int argc, char ** argv)
	{
	// are we testing using a genetic algorithm?
	bool ga_testing = false;

	if (argc > 1)
	{
	for (int i = 1; i < argc; ++i)
	{
	if (!strcmp(argv[1],"-ga"))
	{
	ga_testing = true;
	break;
	}
	}
	}

	// generate random polynomials
	polynomial<double> poly1(TEST_SIZE);
	polynomial<double> poly2(TEST_SIZE);
	polynomial<double> poly3(TEST_SIZE * 2 - 1);

	for (int n = 0; n < TEST_SIZE; ++n)
	{
	poly1[n] = random_double();
	poly2[n] = random_double();
	}

	// get starting time
	//struct timespec start, stop;
	//clock_gettime(CLOCK_REALTIME,&start);

	// what we're timing
	poly3 = poly1 * poly2;

	// get final time
	//clock_gettime(CLOCK_REALTIME,&stop);
	double run_time = 0;
	//(stop.tv_sec - start.tv_sec) + (double)(stop.tv_nsec - start.tv_nsec) / 1000000000.0;

	// report runtime
	if (ga_testing)
	cout << run_time;
	else
	cout << "\nfftbench (Std. C++) run time: " << run_time << "\n\n";

	cout.flush();

	return 0;
	}