| /************************************************************************* |
| * * |
| * N A S P A R A L L E L B E N C H M A R K S 3.3 * |
| * * |
| * S E R I A L V E R S I O N * |
| * * |
| * I S * |
| * * |
| ************************************************************************* |
| * * |
| * This benchmark is a serial version of the NPB IS code. * |
| * Refer to NAS Technical Reports 95-020 for details. * |
| * * |
| * Permission to use, copy, distribute and modify this software * |
| * for any purpose with or without fee is hereby granted. We * |
| * request, however, that all derived work reference the NAS * |
| * Parallel Benchmarks 3.3. This software is provided "as is" * |
| * without express or implied warranty. * |
| * * |
| * Information on NPB 3.3, including the technical report, the * |
| * original specifications, source code, results and information * |
| * on how to submit new results, is available at: * |
| * * |
| * http://www.nas.nasa.gov/Software/NPB/ * |
| * * |
| * Send comments or suggestions to npb@nas.nasa.gov * |
| * * |
| * NAS Parallel Benchmarks Group * |
| * NASA Ames Research Center * |
| * Mail Stop: T27A-1 * |
| * Moffett Field, CA 94035-1000 * |
| * * |
| * E-mail: npb@nas.nasa.gov * |
| * Fax: (650) 604-3957 * |
| * * |
| ************************************************************************* |
| * * |
| * Author: M. Yarrow * |
| * H. Jin * |
| * * |
| *************************************************************************/ |
| |
| #include <stdlib.h> |
| #include <stdio.h> |
| |
| #define NPBVERSION "3.3" |
| |
| #ifdef SMALL_PROBLEM_SIZE |
| #define CLASS 'W' |
| #else |
| #define CLASS 'B' |
| #endif |
| |
| /*****************************************************************/ |
| /* For serial IS, buckets are not really req'd to solve NPB1 IS */ |
| /* spec, but their use on some machines improves performance, on */ |
| /* other machines the use of buckets compromises performance, */ |
| /* probably because it is extra computation which is not req'd. */ |
| /* (Note: Mechanism not understood, probably cache related) */ |
| /* Example: SP2-66MhzWN: 50% speedup with buckets */ |
| /* Example: SGI Indy5000: 50% slowdown with buckets */ |
| /* Example: SGI O2000: 400% slowdown with buckets (Wow!) */ |
| /*****************************************************************/ |
| /* To disable the use of buckets, comment out the following line */ |
| #define USE_BUCKETS |
| |
| |
| /******************/ |
| /* default values */ |
| /******************/ |
| #ifndef CLASS |
| #define CLASS 'S' |
| #endif |
| |
| |
| /*************/ |
| /* CLASS S */ |
| /*************/ |
| #if CLASS == 'S' |
| #define TOTAL_KEYS_LOG_2 16 |
| #define MAX_KEY_LOG_2 11 |
| #define NUM_BUCKETS_LOG_2 9 |
| #endif |
| |
| |
| /*************/ |
| /* CLASS W */ |
| /*************/ |
| #if CLASS == 'W' |
| #define TOTAL_KEYS_LOG_2 20 |
| #define MAX_KEY_LOG_2 16 |
| #define NUM_BUCKETS_LOG_2 10 |
| #endif |
| |
| /*************/ |
| /* CLASS A */ |
| /*************/ |
| #if CLASS == 'A' |
| #define TOTAL_KEYS_LOG_2 23 |
| #define MAX_KEY_LOG_2 19 |
| #define NUM_BUCKETS_LOG_2 10 |
| #endif |
| |
| |
| /*************/ |
| /* CLASS B */ |
| /*************/ |
| #if CLASS == 'B' |
| #define TOTAL_KEYS_LOG_2 25 |
| #define MAX_KEY_LOG_2 21 |
| #define NUM_BUCKETS_LOG_2 10 |
| #endif |
| |
| |
| /*************/ |
| /* CLASS C */ |
| /*************/ |
| #if CLASS == 'C' |
| #define TOTAL_KEYS_LOG_2 27 |
| #define MAX_KEY_LOG_2 23 |
| #define NUM_BUCKETS_LOG_2 10 |
| #endif |
| |
| |
| /*************/ |
| /* CLASS D */ |
| /*************/ |
| #if CLASS == 'D' |
| #define TOTAL_KEYS_LOG_2 31 |
| #define MAX_KEY_LOG_2 27 |
| #define NUM_BUCKETS_LOG_2 10 |
| #endif |
| |
| |
| #if CLASS == 'D' |
| #define TOTAL_KEYS (1L << TOTAL_KEYS_LOG_2) |
| #else |
| #define TOTAL_KEYS (1 << TOTAL_KEYS_LOG_2) |
| #endif |
| #define MAX_KEY (1 << MAX_KEY_LOG_2) |
| #define NUM_BUCKETS (1 << NUM_BUCKETS_LOG_2) |
| #define NUM_KEYS TOTAL_KEYS |
| #define SIZE_OF_BUFFERS NUM_KEYS |
| |
| |
| #define MAX_ITERATIONS 10 |
| #define TEST_ARRAY_SIZE 5 |
| |
| |
| /*************************************/ |
| /* Typedef: if necessary, change the */ |
| /* size of int here by changing the */ |
| /* int type to, say, long */ |
| /*************************************/ |
| #if CLASS == 'D' |
| typedef long INT_TYPE; |
| #else |
| typedef int INT_TYPE; |
| #endif |
| |
| |
| /********************/ |
| /* Some global info */ |
| /********************/ |
| INT_TYPE *key_buff_ptr_global; /* used by full_verify to get */ |
| /* copies of rank info */ |
| |
| int passed_verification; |
| |
| |
| /************************************/ |
| /* These are the three main arrays. */ |
| /* See SIZE_OF_BUFFERS def above */ |
| /************************************/ |
| INT_TYPE key_array[SIZE_OF_BUFFERS], |
| key_buff1[MAX_KEY], |
| key_buff2[SIZE_OF_BUFFERS], |
| partial_verify_vals[TEST_ARRAY_SIZE]; |
| |
| #ifdef USE_BUCKETS |
| INT_TYPE bucket_size[NUM_BUCKETS], |
| bucket_ptrs[NUM_BUCKETS]; |
| #endif |
| |
| |
| /**********************/ |
| /* Partial verif info */ |
| /**********************/ |
| INT_TYPE test_index_array[TEST_ARRAY_SIZE], |
| test_rank_array[TEST_ARRAY_SIZE], |
| |
| S_test_index_array[TEST_ARRAY_SIZE] = |
| {48427,17148,23627,62548,4431}, |
| S_test_rank_array[TEST_ARRAY_SIZE] = |
| {0,18,346,64917,65463}, |
| |
| W_test_index_array[TEST_ARRAY_SIZE] = |
| {357773,934767,875723,898999,404505}, |
| W_test_rank_array[TEST_ARRAY_SIZE] = |
| {1249,11698,1039987,1043896,1048018}, |
| |
| A_test_index_array[TEST_ARRAY_SIZE] = |
| {2112377,662041,5336171,3642833,4250760}, |
| A_test_rank_array[TEST_ARRAY_SIZE] = |
| {104,17523,123928,8288932,8388264}, |
| |
| B_test_index_array[TEST_ARRAY_SIZE] = |
| {41869,812306,5102857,18232239,26860214}, |
| B_test_rank_array[TEST_ARRAY_SIZE] = |
| {33422937,10244,59149,33135281,99}, |
| |
| C_test_index_array[TEST_ARRAY_SIZE] = |
| {44172927,72999161,74326391,129606274,21736814}, |
| C_test_rank_array[TEST_ARRAY_SIZE] = |
| {61147,882988,266290,133997595,133525895}, |
| |
| D_test_index_array[TEST_ARRAY_SIZE] = |
| {1317351170,995930646,1157283250,1503301535,1453734525}, |
| D_test_rank_array[TEST_ARRAY_SIZE] = |
| {1,36538729,1978098519,2145192618,2147425337}; |
| |
| |
| |
| /***********************/ |
| /* function prototypes */ |
| /***********************/ |
| double randlc( double *X, double *A ); |
| |
| void full_verify( void ); |
| |
| void c_print_results( char *name, |
| char class, |
| int n1, |
| int n2, |
| int n3, |
| int niter, |
| char *optype, |
| int passed_verification, |
| char *npbversion); |
| |
| |
| |
| /* |
| * FUNCTION RANDLC (X, A) |
| * |
| * This routine returns a uniform pseudorandom double precision number in the |
| * range (0, 1) by using the linear congruential generator |
| * |
| * x_{k+1} = a x_k (mod 2^46) |
| * |
| * where 0 < x_k < 2^46 and 0 < a < 2^46. This scheme generates 2^44 numbers |
| * before repeating. The argument A is the same as 'a' in the above formula, |
| * and X is the same as x_0. A and X must be odd double precision integers |
| * in the range (1, 2^46). The returned value RANDLC is normalized to be |
| * between 0 and 1, i.e. RANDLC = 2^(-46) * x_1. X is updated to contain |
| * the new seed x_1, so that subsequent calls to RANDLC using the same |
| * arguments will generate a continuous sequence. |
| * |
| * This routine should produce the same results on any computer with at least |
| * 48 mantissa bits in double precision floating point data. On Cray systems, |
| * double precision should be disabled. |
| * |
| * David H. Bailey October 26, 1990 |
| * |
| * IMPLICIT DOUBLE PRECISION (A-H, O-Z) |
| * SAVE KS, R23, R46, T23, T46 |
| * DATA KS/0/ |
| * |
| * If this is the first call to RANDLC, compute R23 = 2 ^ -23, R46 = 2 ^ -46, |
| * T23 = 2 ^ 23, and T46 = 2 ^ 46. These are computed in loops, rather than |
| * by merely using the ** operator, in order to insure that the results are |
| * exact on all systems. This code assumes that 0.5D0 is represented exactly. |
| */ |
| |
| |
| /*****************************************************************/ |
| /************* R A N D L C ************/ |
| /************* ************/ |
| /************* portable random number generator ************/ |
| /*****************************************************************/ |
| |
| double randlc( double *X, double *A ) |
| { |
| static int KS=0; |
| static double R23, R46, T23, T46; |
| double T1, T2, T3, T4; |
| double A1; |
| double A2; |
| double X1; |
| double X2; |
| double Z; |
| int i, j; |
| |
| if (KS == 0) |
| { |
| R23 = 1.0; |
| R46 = 1.0; |
| T23 = 1.0; |
| T46 = 1.0; |
| |
| for (i=1; i<=23; i++) |
| { |
| R23 = 0.50 * R23; |
| T23 = 2.0 * T23; |
| } |
| for (i=1; i<=46; i++) |
| { |
| R46 = 0.50 * R46; |
| T46 = 2.0 * T46; |
| } |
| KS = 1; |
| } |
| |
| /* Break A into two parts such that A = 2^23 * A1 + A2 and set X = N. */ |
| |
| T1 = R23 * *A; |
| j = T1; |
| A1 = j; |
| A2 = *A - T23 * A1; |
| |
| /* Break X into two parts such that X = 2^23 * X1 + X2, compute |
| Z = A1 * X2 + A2 * X1 (mod 2^23), and then |
| X = 2^23 * Z + A2 * X2 (mod 2^46). */ |
| |
| T1 = R23 * *X; |
| j = T1; |
| X1 = j; |
| X2 = *X - T23 * X1; |
| T1 = A1 * X2 + A2 * X1; |
| |
| j = R23 * T1; |
| T2 = j; |
| Z = T1 - T23 * T2; |
| T3 = T23 * Z + A2 * X2; |
| j = R46 * T3; |
| T4 = j; |
| *X = T3 - T46 * T4; |
| return(R46 * *X); |
| } |
| |
| |
| |
| |
| /*****************************************************************/ |
| /************* C R E A T E _ S E Q ************/ |
| /*****************************************************************/ |
| |
| void create_seq( double seed, double a ) |
| { |
| double x; |
| int i, k; |
| |
| k = MAX_KEY/4; |
| |
| for (i=0; i<NUM_KEYS; i++) |
| { |
| x = randlc(&seed, &a); |
| x += randlc(&seed, &a); |
| x += randlc(&seed, &a); |
| x += randlc(&seed, &a); |
| |
| key_array[i] = k*x; |
| } |
| } |
| |
| |
| |
| |
| /*****************************************************************/ |
| /************* F U L L _ V E R I F Y ************/ |
| /*****************************************************************/ |
| |
| |
| void full_verify( void ) |
| { |
| INT_TYPE i, j; |
| |
| |
| |
| /* Now, finally, sort the keys: */ |
| |
| #ifdef USE_BUCKETS |
| |
| /* key_buff2[] already has the proper information, so do nothing */ |
| |
| #else |
| |
| /* Copy keys into work array; keys in key_array will be reassigned. */ |
| for( i=0; i<NUM_KEYS; i++ ) |
| key_buff2[i] = key_array[i]; |
| |
| #endif |
| |
| for( i=0; i<NUM_KEYS; i++ ) |
| key_array[--key_buff_ptr_global[key_buff2[i]]] = key_buff2[i]; |
| |
| |
| /* Confirm keys correctly sorted: count incorrectly sorted keys, if any */ |
| |
| j = 0; |
| for( i=1; i<NUM_KEYS; i++ ) |
| if( key_array[i-1] > key_array[i] ) |
| j++; |
| |
| |
| if( j != 0 ) |
| { |
| printf( "Full_verify: number of keys out of sort: %ld\n", |
| (long)j ); |
| } |
| else |
| passed_verification++; |
| |
| |
| } |
| |
| /*****************************************************************/ |
| /************* C _ P R I N T _ R E S U L T S ************/ |
| /*****************************************************************/ |
| |
| void c_print_results( char *name, |
| char class, |
| int n1, |
| int n2, |
| int n3, |
| int niter, |
| char *optype, |
| int passed_verification, |
| char *npbversion) |
| { |
| printf( "\n\n %s Benchmark Completed\n", name ); |
| |
| printf( " Class = %c\n", class ); |
| |
| if( n3 == 0 ) { |
| long nn = n1; |
| if ( n2 != 0 ) nn *= n2; |
| printf( " Size = %12ld\n", nn ); /* as in IS */ |
| } |
| else |
| printf( " Size = %4dx%4dx%4d\n", n1,n2,n3 ); |
| |
| printf( " Iterations = %12d\n", niter ); |
| |
| printf( " Operation type = %24s\n", optype); |
| |
| if( passed_verification < 0 ) |
| printf( " Verification = NOT PERFORMED\n" ); |
| else if( passed_verification ) |
| printf( " Verification = SUCCESSFUL\n" ); |
| else |
| printf( " Verification = UNSUCCESSFUL\n" ); |
| |
| printf( " Version = %12s\n", npbversion ); |
| |
| #ifdef SMP |
| evalue = getenv("MP_SET_NUMTHREADS"); |
| printf( " MULTICPUS = %s\n", evalue ); |
| #endif |
| |
| printf( "\n\n" ); |
| printf( " Please send all errors/feedbacks to:\n\n" ); |
| printf( " NPB Development Team\n" ); |
| printf( " npb@nas.nasa.gov\n\n" ); |
| } |
| |
| |
| /*****************************************************************/ |
| /************* R A N K ****************/ |
| /*****************************************************************/ |
| |
| |
| void rank( int iteration ) |
| { |
| |
| INT_TYPE i, k; |
| |
| INT_TYPE *key_buff_ptr, *key_buff_ptr2; |
| |
| #ifdef USE_BUCKETS |
| int shift = MAX_KEY_LOG_2 - NUM_BUCKETS_LOG_2; |
| INT_TYPE key; |
| #endif |
| |
| |
| key_array[iteration] = iteration; |
| key_array[iteration+MAX_ITERATIONS] = MAX_KEY - iteration; |
| |
| |
| /* Determine where the partial verify test keys are, load into */ |
| /* top of array bucket_size */ |
| for( i=0; i<TEST_ARRAY_SIZE; i++ ) |
| partial_verify_vals[i] = key_array[test_index_array[i]]; |
| |
| #ifdef USE_BUCKETS |
| |
| /* Initialize */ |
| for( i=0; i<NUM_BUCKETS; i++ ) |
| bucket_size[i] = 0; |
| |
| /* Determine the number of keys in each bucket */ |
| for( i=0; i<NUM_KEYS; i++ ) |
| bucket_size[key_array[i] >> shift]++; |
| |
| |
| /* Accumulative bucket sizes are the bucket pointers */ |
| bucket_ptrs[0] = 0; |
| for( i=1; i< NUM_BUCKETS; i++ ) |
| bucket_ptrs[i] = bucket_ptrs[i-1] + bucket_size[i-1]; |
| |
| |
| /* Sort into appropriate bucket */ |
| for( i=0; i<NUM_KEYS; i++ ) |
| { |
| key = key_array[i]; |
| key_buff2[bucket_ptrs[key >> shift]++] = key; |
| } |
| |
| key_buff_ptr2 = key_buff2; |
| |
| #else |
| |
| key_buff_ptr2 = key_array; |
| |
| #endif |
| |
| /* Clear the work array */ |
| for( i=0; i<MAX_KEY; i++ ) |
| key_buff1[i] = 0; |
| |
| |
| /* Ranking of all keys occurs in this section: */ |
| |
| key_buff_ptr = key_buff1; |
| |
| /* In this section, the keys themselves are used as their |
| own indexes to determine how many of each there are: their |
| individual population */ |
| |
| for( i=0; i<NUM_KEYS; i++ ) |
| key_buff_ptr[key_buff_ptr2[i]]++; /* Now they have individual key */ |
| /* population */ |
| |
| /* To obtain ranks of each key, successively add the individual key |
| population */ |
| |
| |
| for( i=0; i<MAX_KEY-1; i++ ) |
| key_buff_ptr[i+1] += key_buff_ptr[i]; |
| |
| |
| /* This is the partial verify test section */ |
| /* Observe that test_rank_array vals are */ |
| /* shifted differently for different cases */ |
| for( i=0; i<TEST_ARRAY_SIZE; i++ ) |
| { |
| k = partial_verify_vals[i]; /* test vals were put here */ |
| if( 0 < k && k <= NUM_KEYS-1 ) |
| { |
| INT_TYPE key_rank = key_buff_ptr[k-1]; |
| int failed = 0; |
| |
| switch( CLASS ) |
| { |
| case 'S': |
| if( i <= 2 ) |
| { |
| if( key_rank != test_rank_array[i]+iteration ) |
| failed = 1; |
| else |
| passed_verification++; |
| } |
| else |
| { |
| if( key_rank != test_rank_array[i]-iteration ) |
| failed = 1; |
| else |
| passed_verification++; |
| } |
| break; |
| case 'W': |
| if( i < 2 ) |
| { |
| if( key_rank != test_rank_array[i]+(iteration-2) ) |
| failed = 1; |
| else |
| passed_verification++; |
| } |
| else |
| { |
| if( key_rank != test_rank_array[i]-iteration ) |
| failed = 1; |
| else |
| passed_verification++; |
| } |
| break; |
| case 'A': |
| if( i <= 2 ) |
| { |
| if( key_rank != test_rank_array[i]+(iteration-1) ) |
| failed = 1; |
| else |
| passed_verification++; |
| } |
| else |
| { |
| if( key_rank != test_rank_array[i]-(iteration-1) ) |
| failed = 1; |
| else |
| passed_verification++; |
| } |
| break; |
| case 'B': |
| if( i == 1 || i == 2 || i == 4 ) |
| { |
| if( key_rank != test_rank_array[i]+iteration ) |
| failed = 1; |
| else |
| passed_verification++; |
| } |
| else |
| { |
| if( key_rank != test_rank_array[i]-iteration ) |
| failed = 1; |
| else |
| passed_verification++; |
| } |
| break; |
| case 'C': |
| if( i <= 2 ) |
| { |
| if( key_rank != test_rank_array[i]+iteration ) |
| failed = 1; |
| else |
| passed_verification++; |
| } |
| else |
| { |
| if( key_rank != test_rank_array[i]-iteration ) |
| failed = 1; |
| else |
| passed_verification++; |
| } |
| break; |
| case 'D': |
| if( i < 2 ) |
| { |
| if( key_rank != test_rank_array[i]+iteration ) |
| failed = 1; |
| else |
| passed_verification++; |
| } |
| else |
| { |
| if( key_rank != test_rank_array[i]-iteration ) |
| failed = 1; |
| else |
| passed_verification++; |
| } |
| break; |
| } |
| if( failed == 1 ) |
| printf( "Failed partial verification: " |
| "iteration %d, test key %d\n", |
| iteration, (int)i ); |
| } |
| } |
| |
| |
| |
| |
| /* Make copies of rank info for use by full_verify: these variables |
| in rank are local; making them global slows down the code, probably |
| since they cannot be made register by compiler */ |
| |
| if( iteration == MAX_ITERATIONS ) |
| key_buff_ptr_global = key_buff_ptr; |
| |
| } |
| |
| |
| /*****************************************************************/ |
| /************* M A I N ****************/ |
| /*****************************************************************/ |
| |
| int main( int argc, char **argv ) |
| { |
| |
| int i, iteration; |
| |
| FILE *fp; |
| |
| /* Initialize the verification arrays if a valid class */ |
| for( i=0; i<TEST_ARRAY_SIZE; i++ ) |
| switch( CLASS ) |
| { |
| case 'S': |
| test_index_array[i] = S_test_index_array[i]; |
| test_rank_array[i] = S_test_rank_array[i]; |
| break; |
| case 'A': |
| test_index_array[i] = A_test_index_array[i]; |
| test_rank_array[i] = A_test_rank_array[i]; |
| break; |
| case 'W': |
| test_index_array[i] = W_test_index_array[i]; |
| test_rank_array[i] = W_test_rank_array[i]; |
| break; |
| case 'B': |
| test_index_array[i] = B_test_index_array[i]; |
| test_rank_array[i] = B_test_rank_array[i]; |
| break; |
| case 'C': |
| test_index_array[i] = C_test_index_array[i]; |
| test_rank_array[i] = C_test_rank_array[i]; |
| break; |
| case 'D': |
| test_index_array[i] = D_test_index_array[i]; |
| test_rank_array[i] = D_test_rank_array[i]; |
| break; |
| }; |
| |
| |
| |
| /* Printout initial NPB info */ |
| printf |
| ( "\n\n NAS Parallel Benchmarks (NPB3.3-SER) - IS Benchmark\n\n" ); |
| printf( " Size: %ld (class %c)\n", (long)TOTAL_KEYS, CLASS ); |
| printf( " Iterations: %d\n", MAX_ITERATIONS ); |
| |
| /* Generate random number sequence and subsequent keys on all procs */ |
| create_seq( 314159265.00, /* Random number gen seed */ |
| 1220703125.00 ); /* Random number gen mult */ |
| |
| /* Do one interation for free (i.e., untimed) to guarantee initialization of |
| all data and code pages and respective tables */ |
| rank( 1 ); |
| |
| /* Start verification counter */ |
| passed_verification = 0; |
| |
| if( CLASS != 'S' ) printf( "\n iteration\n" ); |
| |
| |
| /* This is the main iteration */ |
| for( iteration=1; iteration<=MAX_ITERATIONS; iteration++ ) |
| { |
| if( CLASS != 'S' ) printf( " %d\n", iteration ); |
| rank( iteration ); |
| } |
| |
| |
| /* This tests that keys are in sequence: sorting of last ranked key seq |
| occurs here, but is an untimed operation */ |
| full_verify(); |
| |
| |
| /* The final printout */ |
| if( passed_verification != 5*MAX_ITERATIONS + 1 ) |
| passed_verification = 0; |
| c_print_results( "IS", |
| CLASS, |
| (int)(TOTAL_KEYS/64), |
| 64, |
| 0, |
| MAX_ITERATIONS, |
| "keys ranked", |
| passed_verification, |
| NPBVERSION); |
| |
| |
| return 0; |
| /**************************/ |
| } /* E N D P R O G R A M */ |
| /**************************/ |
| |
| |
| |
| |