SingleSource/Benchmarks/Misc/flops-6.c - third_party/llvm-test-suite - Git at Google

 /*--------------------- Start flops.c source code ----------------------*/

 /*****************************/
 /*          flops.c          */
 /* Version 2.0,  18 Dec 1992 */
 /*         Al Aburto         */
 /*      aburto@nosc.mil      */
 /*****************************/

 /*
    Flops.c is a 'c' program which attempts to estimate your systems
    floating-point 'MFLOPS' rating for the FADD, FSUB, FMUL, and FDIV
    operations based on specific 'instruction mixes' (discussed below).
    The program provides an estimate of PEAK MFLOPS performance by making
    maximal use of register variables with minimal interaction with main
    memory. The execution loops are all small so that they will fit in
    any cache. Flops.c can be used along with Linpack and the Livermore
    kernels (which exersize memory much more extensively) to gain further
    insight into the limits of system performance. The flops.c execution
    modules also include various percent weightings of FDIV's (from 0% to
    25% FDIV's) so that the range of performance can be obtained when
    using FDIV's. FDIV's, being computationally more intensive than
    FADD's or FMUL's, can impact performance considerably on some systems.

    Flops.c consists of 8 independent modules (routines) which, except for
    module 2, conduct numerical integration of various functions. Module
    2, estimates the value of pi based upon the Maclaurin series expansion
    of atan(1). MFLOPS ratings are provided for each module, but the
    programs overall results are summerized by the MFLOPS(1), MFLOPS(2),
    MFLOPS(3), and MFLOPS(4) outputs.

    The MFLOPS(1) result is identical to the result provided by all
    previous versions of flops.c. It is based only upon the results from
    modules 2 and 3. Two problems surfaced in using MFLOPS(1). First, it
    was difficult to completely 'vectorize' the result due to the
    recurrence of the 's' variable in module 2. This problem is addressed
    in the MFLOPS(2) result which does not use module 2, but maintains
    nearly the same weighting of FDIV's (9.2%) as in MFLOPS(1) (9.6%).
    The second problem with MFLOPS(1) centers around the percentage of
    FDIV's (9.6%) which was viewed as too high for an important class of
    problems. This concern is addressed in the MFLOPS(3) result where NO
    FDIV's are conducted at all.

    The number of floating-point instructions per iteration (loop) is
    given below for each module executed:

    MODULE   FADD   FSUB   FMUL   FDIV   TOTAL  Comment
      1        7      0      6      1      14   7.1%  FDIV's
      2        3      2      1      1       7   difficult to vectorize.
      3        6      2      9      0      17   0.0%  FDIV's
      4        7      0      8      0      15   0.0%  FDIV's
      5       13      0     15      1      29   3.4%  FDIV's
      6       13      0     16      0      29   0.0%  FDIV's
      7        3      3      3      3      12   25.0% FDIV's
      8       13      0     17      0      30   0.0%  FDIV's

    A*2+3     21     12     14      5      52   A=5, MFLOPS(1), Same as
 	   40.4%  23.1%  26.9%  9.6%          previous versions of the
 						flops.c program. Includes
 						only Modules 2 and 3, does
 						9.6% FDIV's, and is not
 						easily vectorizable.

    1+3+4     58     14     66     14     152   A=4, MFLOPS(2), New output
    +5+6+    38.2%  9.2%   43.4%  9.2%          does not include Module 2,
    A*7                                         but does 9.2% FDIV's.

    1+3+4     62      5     74      5     146   A=0, MFLOPS(3), New output
    +5+6+    42.9%  3.4%   50.7%  3.4%          does not include Module 2,
    7+8                                         but does 3.4% FDIV's.

    3+4+6     39      2     50      0      91   A=0, MFLOPS(4), New output
    +8       42.9%  2.2%   54.9%  0.0%          does not include Module 2,
 						and does NO FDIV's.

    NOTE: Various timer routines are included as indicated below. The
 	timer routines, with some comments, are attached at the end
 	of the main program.

    NOTE: Please do not remove any of the printouts.

    EXAMPLE COMPILATION:
    UNIX based systems
        cc -DUNIX -O flops.c -o flops
        cc -DUNIX -DROPT flops.c -o flops
        cc -DUNIX -fast -O4 flops.c -o flops
        .
        .
        .
      etc.

    Al Aburto
    aburto@nosc.mil
 */

 /***************************************************************/
 /* Timer options. You MUST uncomment one of the options below  */
 /* or compile, for example, with the '-DUNIX' option.          */
 /***************************************************************/
 /* #define Amiga       */
 /* #define UNIX        */
 /* #define UNIX_Old    */
 /* #define VMS         */
 /* #define BORLAND_C   */
 /* #define MSC         */
 /* #define MAC         */
 /* #define IPSC        */
 /* #define FORTRAN_SEC */
 #define GTODay
 /* #define CTimer      */
 /* #define UXPM        */
 /* #define MAC_TMgr    */
 /* #define PARIX       */
 /* #define POSIX       */
 /* #define WIN32       */
 /* #define POSIX1      */
 /***********************/

 #include <stdio.h>
 #include <math.h>
 			    /* 'Uncomment' the line below to run   */
 			    /* with 'register double' variables    */
 			    /* defined, or compile with the        */
 			    /* '-DROPT' option. Don't need this if */
 			    /* registers used automatically, but   */
 			    /* you might want to try it anyway.    */
 /* #define ROPT */

 double nulltime, TimeArray[3];   /* Variables needed for 'dtime()'.     */
 double TLimit;                   /* Threshold to determine Number of    */
 				 /* Loops to run. Fixed at 15.0 seconds.*/

 double T[36];                    /* Global Array used to hold timing    */
 				 /* results and other information.      */

 double sa,sb,sc,sd,one,two,three;
 double four,five,piref,piprg;
 double scale,pierr;

 double A0 = 1.0;
 double A1 = -0.1666666666671334;
 double A2 = 0.833333333809067E-2;
 double A3 = 0.198412715551283E-3;
 double A4 = 0.27557589750762E-5;
 double A5 = 0.2507059876207E-7;
 double A6 = 0.164105986683E-9;

 double B0 = 1.0;
 double B1 = -0.4999999999982;
 double B2 = 0.4166666664651E-1;
 double B3 = -0.1388888805755E-2;
 double B4 = 0.24801428034E-4;
 double B5 = -0.2754213324E-6;
 double B6 = 0.20189405E-8;

 double C0 = 1.0;
 double C1 = 0.99999999668;
 double C2 = 0.49999995173;
 double C3 = 0.16666704243;
 double C4 = 0.4166685027E-1;
 double C5 = 0.832672635E-2;
 double C6 = 0.140836136E-2;
 double C7 = 0.17358267E-3;
 double C8 = 0.3931683E-4;

 double D1 = 0.3999999946405E-1;
 double D2 = 0.96E-3;
 double D3 = 0.1233153E-5;

 double E2 = 0.48E-3;
 double E3 = 0.411051E-6;

 int main()
 {

 #ifdef ROPT
    register double s,u,v,w,x;
 #else
    double s,u,v,w,x;
 #endif

    long loops, NLimit;
    register long i, m, n;

    printf("\n");
    printf("   FLOPS C Program (Double Precision), V2.0 18 Dec 1992\n\n");

 			/****************************/
    loops = 15625;        /* Initial number of loops. */
 			/*     DO NOT CHANGE!       */
 			/****************************/

 /****************************************************/
 /* Set Variable Values.                             */
 /* T[1] references all timing results relative to   */
 /* one million loops.                               */
 /*                                                  */
 /* The program will execute from 31250 to 512000000 */
 /* loops based on a runtime of Module 1 of at least */
 /* TLimit = 15.0 seconds. That is, a runtime of 15  */
 /* seconds for Module 1 is used to determine the    */
 /* number of loops to execute.                      */
 /*                                                  */
 /* No more than NLimit = 512000000 loops are allowed*/
 /****************************************************/

    TLimit = 1.0;
    NLimit = 512000000;

    piref = 3.14159265358979324;
    one   = 1.0;
    two   = 2.0;
    three = 3.0;
    four  = 4.0;
    five  = 5.0;
    scale = one;

    printf("   Module     Error        RunTime      MFLOPS\n");
    printf("                            (usec)\n");
 /*************************/
 /* Initialize the timer. */
 /*************************/
 #ifdef SMALL_PROBLEM_SIZE
    m = loops*200;
 #else
    m = loops*10000;
 #endif


 /************************************************************/
 /* Module 6.  Calculate Integral of sin(x)*cos(x) from 0.0  */
 /*            to PI/4 using the Trapazoidal Method. Result  */
 /*            is sin(PI/4)^2. There are 29 double precision */
 /*            operations per loop (13 +, 0 -, 16 *, and 0 /)*/
 /*            included in the timing.                       */
 /*            46.7% +, 00.0% -, 53.3% *, and 00.0% /        */
 /************************************************************/

    x = piref / ( four * (double)m );               /*********************/
    s = 0.0;                                        /*  Loop 7.          */
    v = 0.0;                                        /*********************/

    for( i = 1 ; i <= m-1 ; i++ )
    {
    u = (double)i * x;
    w = u * u;
    v = u * ((((((A6*w+A5)*w+A4)*w+A3)*w+A2)*w+A1)*w+one);
    s = s + v*(w*(w*(w*(w*(w*(B6*w+B5)+B4)+B3)+B2)+B1)+one);
    }

    u  = piref / four;
    w  = u * u;
    sa = u*((((((A6*w+A5)*w+A4)*w+A3)*w+A2)*w+A1)*w+one);
    sb = w*(w*(w*(w*(w*(B6*w+B5)+B4)+B3)+B2)+B1)+one;
    sa = sa * sb;

    sa = x * ( sa + two * s ) / two;                  /* Module 6 Result */
    sb = 0.25;                                        /*******************/
    sc = sa - sb;
 						  /*********************/
 						  /*   DO NOT REMOVE   */
 						  /*   THIS PRINTOUT!  */
 						  /*********************/
    printf("     6   %13.4lf  %10.4lf  %10.4lf\n",
           sc* /* stabilize output */  1e-30,
           0* /* stabilize output */ 1e-30 ,
           0* /* stabilize output */  1e-30);

    return 0;
 }
	/--------------------- Start flops.c source code ----------------------/

	/*****************************/
	/* flops.c */
	/* Version 2.0, 18 Dec 1992 */
	/* Al Aburto */
	/* aburto@nosc.mil */
	/*****************************/

	/*
	Flops.c is a 'c' program which attempts to estimate your systems
	floating-point 'MFLOPS' rating for the FADD, FSUB, FMUL, and FDIV
	operations based on specific 'instruction mixes' (discussed below).
	The program provides an estimate of PEAK MFLOPS performance by making
	maximal use of register variables with minimal interaction with main
	memory. The execution loops are all small so that they will fit in
	any cache. Flops.c can be used along with Linpack and the Livermore
	kernels (which exersize memory much more extensively) to gain further
	insight into the limits of system performance. The flops.c execution
	modules also include various percent weightings of FDIV's (from 0% to
	25% FDIV's) so that the range of performance can be obtained when
	using FDIV's. FDIV's, being computationally more intensive than
	FADD's or FMUL's, can impact performance considerably on some systems.

	Flops.c consists of 8 independent modules (routines) which, except for
	module 2, conduct numerical integration of various functions. Module
	2, estimates the value of pi based upon the Maclaurin series expansion
	of atan(1). MFLOPS ratings are provided for each module, but the
	programs overall results are summerized by the MFLOPS(1), MFLOPS(2),
	MFLOPS(3), and MFLOPS(4) outputs.

	The MFLOPS(1) result is identical to the result provided by all
	previous versions of flops.c. It is based only upon the results from
	modules 2 and 3. Two problems surfaced in using MFLOPS(1). First, it
	was difficult to completely 'vectorize' the result due to the
	recurrence of the 's' variable in module 2. This problem is addressed
	in the MFLOPS(2) result which does not use module 2, but maintains
	nearly the same weighting of FDIV's (9.2%) as in MFLOPS(1) (9.6%).
	The second problem with MFLOPS(1) centers around the percentage of
	FDIV's (9.6%) which was viewed as too high for an important class of
	problems. This concern is addressed in the MFLOPS(3) result where NO
	FDIV's are conducted at all.

	The number of floating-point instructions per iteration (loop) is
	given below for each module executed:

	MODULE FADD FSUB FMUL FDIV TOTAL Comment
	1 7 0 6 1 14 7.1% FDIV's
	2 3 2 1 1 7 difficult to vectorize.
	3 6 2 9 0 17 0.0% FDIV's
	4 7 0 8 0 15 0.0% FDIV's
	5 13 0 15 1 29 3.4% FDIV's
	6 13 0 16 0 29 0.0% FDIV's
	7 3 3 3 3 12 25.0% FDIV's
	8 13 0 17 0 30 0.0% FDIV's

	A*2+3 21 12 14 5 52 A=5, MFLOPS(1), Same as
	40.4% 23.1% 26.9% 9.6% previous versions of the
	flops.c program. Includes
	only Modules 2 and 3, does
	9.6% FDIV's, and is not
	easily vectorizable.

	1+3+4 58 14 66 14 152 A=4, MFLOPS(2), New output
	+5+6+ 38.2% 9.2% 43.4% 9.2% does not include Module 2,
	A*7 but does 9.2% FDIV's.

	1+3+4 62 5 74 5 146 A=0, MFLOPS(3), New output
	+5+6+ 42.9% 3.4% 50.7% 3.4% does not include Module 2,
	7+8 but does 3.4% FDIV's.

	3+4+6 39 2 50 0 91 A=0, MFLOPS(4), New output
	+8 42.9% 2.2% 54.9% 0.0% does not include Module 2,
	and does NO FDIV's.

	NOTE: Various timer routines are included as indicated below. The
	timer routines, with some comments, are attached at the end
	of the main program.

	NOTE: Please do not remove any of the printouts.

	EXAMPLE COMPILATION:
	UNIX based systems
	cc -DUNIX -O flops.c -o flops
	cc -DUNIX -DROPT flops.c -o flops
	cc -DUNIX -fast -O4 flops.c -o flops
	.
	.
	.
	etc.

	Al Aburto
	aburto@nosc.mil
	*/

	/***************************************************************/
	/* Timer options. You MUST uncomment one of the options below */
	/* or compile, for example, with the '-DUNIX' option. */
	/***************************************************************/
	/* #define Amiga */
	/* #define UNIX */
	/* #define UNIX_Old */
	/* #define VMS */
	/* #define BORLAND_C */
	/* #define MSC */
	/* #define MAC */
	/* #define IPSC */
	/* #define FORTRAN_SEC */
	#define GTODay
	/* #define CTimer */
	/* #define UXPM */
	/* #define MAC_TMgr */
	/* #define PARIX */
	/* #define POSIX */
	/* #define WIN32 */
	/* #define POSIX1 */
	/***********************/

	#include <stdio.h>
	#include <math.h>
	/* 'Uncomment' the line below to run */
	/* with 'register double' variables */
	/* defined, or compile with the */
	/* '-DROPT' option. Don't need this if */
	/* registers used automatically, but */
	/* you might want to try it anyway. */
	/* #define ROPT */

	double nulltime, TimeArray[3]; /* Variables needed for 'dtime()'. */
	double TLimit; /* Threshold to determine Number of */
	/* Loops to run. Fixed at 15.0 seconds.*/

	double T[36]; /* Global Array used to hold timing */
	/* results and other information. */

	double sa,sb,sc,sd,one,two,three;
	double four,five,piref,piprg;
	double scale,pierr;

	double A0 = 1.0;
	double A1 = -0.1666666666671334;
	double A2 = 0.833333333809067E-2;
	double A3 = 0.198412715551283E-3;
	double A4 = 0.27557589750762E-5;
	double A5 = 0.2507059876207E-7;
	double A6 = 0.164105986683E-9;

	double B0 = 1.0;
	double B1 = -0.4999999999982;
	double B2 = 0.4166666664651E-1;
	double B3 = -0.1388888805755E-2;
	double B4 = 0.24801428034E-4;
	double B5 = -0.2754213324E-6;
	double B6 = 0.20189405E-8;

	double C0 = 1.0;
	double C1 = 0.99999999668;
	double C2 = 0.49999995173;
	double C3 = 0.16666704243;
	double C4 = 0.4166685027E-1;
	double C5 = 0.832672635E-2;
	double C6 = 0.140836136E-2;
	double C7 = 0.17358267E-3;
	double C8 = 0.3931683E-4;

	double D1 = 0.3999999946405E-1;
	double D2 = 0.96E-3;
	double D3 = 0.1233153E-5;

	double E2 = 0.48E-3;
	double E3 = 0.411051E-6;

	int main()
	{

	#ifdef ROPT
	register double s,u,v,w,x;
	#else
	double s,u,v,w,x;
	#endif

	long loops, NLimit;
	register long i, m, n;

	printf("\n");
	printf(" FLOPS C Program (Double Precision), V2.0 18 Dec 1992\n\n");

	/****************************/
	loops = 15625; /* Initial number of loops. */
	/* DO NOT CHANGE! */
	/****************************/

	/****************************************************/
	/* Set Variable Values. */
	/* T[1] references all timing results relative to */
	/* one million loops. */
	/* */
	/* The program will execute from 31250 to 512000000 */
	/* loops based on a runtime of Module 1 of at least */
	/* TLimit = 15.0 seconds. That is, a runtime of 15 */
	/* seconds for Module 1 is used to determine the */
	/* number of loops to execute. */
	/* */
	/* No more than NLimit = 512000000 loops are allowed*/
	/****************************************************/

	TLimit = 1.0;
	NLimit = 512000000;

	piref = 3.14159265358979324;
	one = 1.0;
	two = 2.0;
	three = 3.0;
	four = 4.0;
	five = 5.0;
	scale = one;

	printf(" Module Error RunTime MFLOPS\n");
	printf(" (usec)\n");
	/*************************/
	/* Initialize the timer. */
	/*************************/
	#ifdef SMALL_PROBLEM_SIZE
	m = loops*200;
	#else
	m = loops*10000;
	#endif


	/************************************************************/
	/* Module 6. Calculate Integral of sin(x)cos(x) from 0.0 /
	/* to PI/4 using the Trapazoidal Method. Result */
	/* is sin(PI/4)^2. There are 29 double precision */
	/* operations per loop (13 +, 0 -, 16 , and 0 /)/
	/* included in the timing. */
	/* 46.7% +, 00.0% -, 53.3% , and 00.0% / /
	/************************************************************/

	x = piref / ( four * (double)m ); /*********************/
	s = 0.0; /* Loop 7. */
	v = 0.0; /*********************/

	for( i = 1 ; i <= m-1 ; i++ )
	{
	u = (double)i * x;
	w = u * u;
	v = u * ((((((A6w+A5)w+A4)w+A3)w+A2)w+A1)w+one);
	s = s + v(w(w(w(w(w(B6*w+B5)+B4)+B3)+B2)+B1)+one);
	}

	u = piref / four;
	w = u * u;
	sa = u((((((A6w+A5)w+A4)w+A3)w+A2)w+A1)*w+one);
	sb = w(w(w(w(w(B6w+B5)+B4)+B3)+B2)+B1)+one;
	sa = sa * sb;

	sa = x * ( sa + two * s ) / two; /* Module 6 Result */
	sb = 0.25; /*******************/
	sc = sa - sb;
	/*********************/
	/* DO NOT REMOVE */
	/* THIS PRINTOUT! */
	/*********************/
	printf(" 6 %13.4lf %10.4lf %10.4lf\n",
	sc* /* stabilize output */ 1e-30,
	0* /* stabilize output */ 1e-30 ,
	0* /* stabilize output */ 1e-30);

	return 0;
	}