blob: 5560f457c8a97b99a60c2f6fea34e58f22a25752 [file] [log] [blame]
/* Efficient version of attenuate fluxes which determines the change in angular
* flux along a particular track across a fine axial region and tallies the
* contribution to the scalar flux in the fine axial region. This function
* assumes a quadratic source, which is calculated on the fly using neighboring
* source values.
* This version decomposes the work into many for loops for efficient SIMD
* instructions and to reduce register pressure. For a more descriptive
* (but less effiient) version of the code in terms of the underlying physics,
* see alt_attenuate_fluxes which solves the problem in a more naive,
* straightforward manner. */
void attenuate_fluxes( Track * track, bool forward, Source * QSR, Input * I_in,
Params * params_in, float ds, float mu, float az_weight,
AttenuateVars * A )
Input I = *I_in;
Params params = *params_in;
// unload attenuate vars
float * restrict q0 = A->q0;
float * restrict q1 = A->q1;
float * restrict q2 = A->q2;
float * restrict sigT = A->sigT;
float * restrict tau = A->tau;
float * restrict sigT2 = A->sigT2;
float * restrict expVal = A->expVal;
float * restrict reuse = A->reuse;
float * restrict flux_integral = A->flux_integral;
float * restrict tally = A->tally;
float * restrict t1 = A->t1;
float * restrict t2 = A->t2;
float * restrict t3 = A->t3;
float * restrict t4 = A->t4;
// compute fine axial interval spacing
float dz = I.height / (I.fai * I.decomp_assemblies_ax * I.cai);
// compute z height in cell
float zin = track->z_height - dz *
( (int)( track->z_height / dz ) + 0.5f );
// compute fine axial region ID
int fine_id = (int) ( track->z_height / dz ) % I.fai;
// compute weight (azimuthal * polar)
// NOTE: real app would also have volume weight component
float weight = track->p_weight * az_weight;
float mu2 = mu * mu;
// load fine source region flux vector
float * FSR_flux = QSR -> fine_flux[fine_id];
if( fine_id == 0 )
// adjust z height to account for edge
zin -= dz;
// cycle over energy groups
#ifdef INTEL
#pragma simd
#elif defined IBM
#pragma simd_level(10)
for( int g = 0; g < I.n_egroups; g++)
// load neighboring sources
float y1 = QSR->fine_source[fine_id][g];
float y2 = QSR->fine_source[fine_id+1][g];
float y3 = QSR->fine_source[fine_id+2][g];
// do quadratic "fitting"
float c0 = y2;
float c1 = (y1 - y3) / (2.f*dz);
float c2 = (y1 - 2.f*y2 + y3) / (2.f*dz*dz);
// calculate q0, q1, q2
q0[g] = c0 + c1*zin + c2*zin*zin;
q1[g] = c1 + 2.f*c2*zin;
q2[g] = c2;
else if ( fine_id == I.fai - 1 )
// adjust z height to account for edge
zin += dz;
// cycle over energy groups
#ifdef INTEL
#pragma simd
#elif defined IBM
#pragma simd_level(10)
for( int g = 0; g < I.n_egroups; g++)
// load neighboring sources
float y1 = QSR->fine_source[fine_id-2][g];
float y2 = QSR->fine_source[fine_id-1][g];
float y3 = QSR->fine_source[fine_id][g];
// do quadratic "fitting"
float c0 = y2;
float c1 = (y1 - y3) / (2.f*dz);
float c2 = (y1 - 2.f*y2 + y3) / (2.f*dz*dz);
// calculate q0, q1, q2
q0[g] = c0 + c1*zin + c2*zin*zin;
q1[g] = c1 + 2.f*c2*zin;
q2[g] = c2;
// cycle over energy groups
#ifdef INTEL
#pragma simd
#elif defined IBM
#pragma simd_level(10)
for( int g = 0; g < I.n_egroups; g++)
// load neighboring sources
float y1 = QSR->fine_source[fine_id-1][g];
float y2 = QSR->fine_source[fine_id][g];
float y3 = QSR->fine_source[fine_id+1][g];
// do quadratic "fitting"
float c0 = y2;
float c1 = (y1 - y3) / (2.f*dz);
float c2 = (y1 - 2.f*y2 + y3) / (2.f*dz*dz);
// calculate q0, q1, q2
q0[g] = c0 + c1*zin + c2*zin*zin;
q1[g] = c1 + 2.f*c2*zin;
q2[g] = c2;
// cycle over energy groups
#ifdef INTEL
#pragma simd
#elif defined IBM
#pragma simd_level(10)
for( int g = 0; g < I.n_egroups; g++)
// load total cross section
sigT[g] = QSR->sigT[g];
// calculate common values for efficiency
tau[g] = sigT[g] * ds;
sigT2[g] = sigT[g] * sigT[g];
// cycle over energy groups
#ifdef INTEL
#pragma simd
#elif defined IBM
#pragma simd_level(10)
for( int g = 0; g < I.n_egroups; g++)
expVal[g] = interpolateTable( params.expTable, tau[g] );
// Flux Integral
// Re-used Term
#ifdef INTEL
#pragma simd
#elif defined IBM
#pragma simd_level(10)
for( int g = 0; g < I.n_egroups; g++)
reuse[g] = tau[g] * (tau[g] - 2.f) + 2.f * expVal[g]
/ (sigT[g] * sigT2[g]);
float * psi;
psi = track->f_psi;
psi = track->b_psi;
//#pragma vector nontemporal
#ifdef INTEL
#pragma simd
#elif defined IBM
#pragma simd_level(10)
for( int g = 0; g < I.n_egroups; g++)
// add contribution to new source flux
flux_integral[g] = (q0[g] * tau[g] + (sigT[g] * psi[g] - q0[g])
* expVal[g]) / sigT2[g] + q1[g] * mu * reuse[g] + q2[g] * mu2
* (tau[g] * (tau[g] * (tau[g] - 3.f) + 6.f) - 6.f * expVal[g])
/ (3.f * sigT2[g] * sigT2[g]);
#ifdef INTEL
#pragma simd
#elif defined IBM
#pragma simd_level(10)
for( int g = 0; g < I.n_egroups; g++)
// Prepare tally
tally[g] = weight * flux_integral[g];
#ifdef OPENMP
omp_set_lock(QSR->locks + fine_id);
#ifdef INTEL
#pragma simd
#elif defined IBM
#pragma simd_level(10)
for( int g = 0; g < I.n_egroups; g++)
FSR_flux[g] += tally[g];
#ifdef OPENMP
omp_unset_lock(QSR->locks + fine_id);
// Term 1
#ifdef INTEL
#pragma simd
#elif defined IBM
#pragma simd_level(10)
for( int g = 0; g < I.n_egroups; g++)
t1[g] = q0[g] * expVal[g] / sigT[g];
// Term 2
#ifdef INTEL
#pragma simd
#elif defined IBM
#pragma simd_level(10)
for( int g = 0; g < I.n_egroups; g++)
t2[g] = q1[g] * mu * (tau[g] - expVal[g]) / sigT2[g];
// Term 3
#ifdef INTEL
#pragma simd
#elif defined IBM
#pragma simd_level(10)
for( int g = 0; g < I.n_egroups; g++)
t3[g] = q2[g] * mu2 * reuse[g];
// Term 4
#ifdef INTEL
#pragma simd
#elif defined IBM
#pragma simd_level(10)
for( int g = 0; g < I.n_egroups; g++)
t4[g] = psi[g] * (1.f - expVal[g]);
// Total psi
#ifdef INTEL
#pragma simd
#elif defined IBM
#pragma simd_level(10)
for( int g = 0; g < I.n_egroups; g++)
psi[g] = t1[g] + t2[g] + t3[g] + t4[g];
// single direction transport sweep
void transport_sweep( Params * params, Input * I )
if(I->mype==0) printf("Starting transport sweep ...\n");
// calculate the height of a node's domain and of each FSR
double node_delta_z = I->height / I->decomp_assemblies_ax;
double fine_delta_z = node_delta_z / (I->cai * I->fai);
/* loop over tracks (implicitly azimuthal angles, tracks in azimuthal
* angles, polar angles, and z stacked rays) */
//print_Input_struct( I );
long segments_processed = 0;
#pragma omp parallel default(none) \
shared( I, params, node_delta_z, fine_delta_z ) \
reduction(+ : segments_processed )
#ifdef OPENMP
int thread = omp_get_thread_num();
int nthreads = omp_get_num_threads();
unsigned int seed = time(NULL) * (thread+1);
//print_Input_struct( I );
#ifdef PAPI
int eventset = PAPI_NULL;
int num_papi_events;
#pragma omp critical
counter_init(&eventset, &num_papi_events, I);
AttenuateVars A;
float * ptr = (float * ) malloc( I->n_egroups * 14 * sizeof(float));
A.q0 = ptr;
ptr += I->n_egroups;
A.q1 = ptr;
ptr += I->n_egroups;
A.q2 = ptr;
ptr += I->n_egroups;
A.sigT = ptr;
ptr += I->n_egroups;
A.tau = ptr;
ptr += I->n_egroups;
A.sigT2 = ptr;
ptr += I->n_egroups;
A.expVal = ptr;
ptr += I->n_egroups;
A.reuse = ptr;
ptr += I->n_egroups;
A.flux_integral = ptr;
ptr += I->n_egroups;
A.tally = ptr;
ptr += I->n_egroups;
A.t1 = ptr;
ptr += I->n_egroups;
A.t2 = ptr;
ptr += I->n_egroups;
A.t3 = ptr;
ptr += I->n_egroups;
A.t4 = ptr;
#pragma omp for schedule( dynamic )
for (long i = 0; i < I->ntracks_2D; i++)
// print progress
#ifdef OPENMP
if(I->mype==0 && thread == 0)
printf("\rAttenuating Tracks... (%.0lf%% completed)",
(i / ( (double)I->ntracks_2D / (double) nthreads ))
/ (double) nthreads * 100.0);
if( i % 50 == 0)
printf("%s%ld%s%ld\n","2D Tracks Completed = ", i," / ",
I->ntracks_2D );
// treat positive-z traveling rays first
bool pos_z_dir = true;
for( int j = 0; j < I->n_polar_angles; j++)
if( j == I->n_polar_angles / 2 )
pos_z_dir = false;
float p_angle = params->polar_angles[j];
float mu = cos(p_angle);
// start with all z stacked rays
int begin_stacked = 0;
int end_stacked = I->z_stacked;
for( int n = 0; n < params->tracks_2D[i].n_segments; n++)
// calculate distance traveled in cell if segment completed
float s_full = params->tracks_2D[i].segments[n].length
/ sin(p_angle);
// allocate varaible for distance traveled in an FSR
float ds = 0;
// loop over remaining z-stacked rays
for( int k = begin_stacked; k < end_stacked; k++)
// initialize s to full length
float s = s_full;
// select current track
Track * track = &params->tracks[i][j][k];
// set flag for completeion of segment
bool seg_complete = false;
// calculate interval
int curr_interval;
if( pos_z_dir)
curr_interval = get_pos_interval(track->z_height,
curr_interval = get_neg_interval(track->z_height,
while( !seg_complete )
// flag to reset z position
bool reset = false;
/* calculate new height based on s
* (distance traveled in FSR) */
float z = track->z_height + s * cos(p_angle);
// check if still in same FSR (fine axial interval)
int new_interval;
if( pos_z_dir )
new_interval = get_pos_interval(z,
new_interval = get_neg_interval(z,
if( new_interval == curr_interval )
seg_complete = true;
ds = s;
// otherwise, we need to recalculate distances
// correct z
if( pos_z_dir )
z = fine_delta_z * (float) curr_interval;
z = fine_delta_z * (float) curr_interval;
// calculate distance travelled in FSR (ds)
ds = (z - track->z_height) / cos(p_angle);
// update track length remaining
s -= ds;
/* check remaining track length to protect
* against potential roundoff errors */
if( s <= 0 )
seg_complete = true;
// check if out of bounds or track complete
if( z <= 0 || z >= node_delta_z )
// mark segment as completed
seg_complete = true;
// remember to no longer treat this track
if ( pos_z_dir )
// reset z height
reset = true;
// pick a random FSR (cache miss expected)
#ifdef OPENMP
long QSR_id = rand_r(&seed) %
long QSR_id = rand() %
/* update sources and fluxes from attenuation
* over FSR */
if( I->axial_exp == 2 )
attenuate_fluxes( track, true,
I, params, ds, mu,
params->tracks_2D[i].az_weight, &A );
else if( I->axial_exp == 0 )
attenuate_FSR_fluxes( track, true,
I, params, ds, mu,
params->tracks_2D[i].az_weight, &A );
printf("Error: invalid axial expansion order");
printf("\n Please input 0 or 2\n");
// update with new z height or reset if finished
if( n == params->tracks_2D[i].n_segments - 1
|| reset)
if( pos_z_dir)
track->z_height = I->axial_z_sep * k;
track->z_height = I->axial_z_sep * (k+1);
track->z_height = z;
#ifdef OPENMP
if(thread == 0 && I->mype==0) printf("\n");
#ifdef PAPI
if( thread == 0 )
center_print("PAPI COUNTER RESULTS", 79);
printf("Count \tSmybol \tDescription\n");
#pragma omp barrier
counter_stop(&eventset, num_papi_events, I);
I->segments_processed = segments_processed;
// run one full transport sweep, return k
void two_way_transport_sweep( Params * params, Input * I )
if(I->mype==0) printf("Starting transport sweep ...\n");
// calculate the height of a node's domain and of each FSR
double node_delta_z = I->height / I->decomp_assemblies_ax;
int num_intervals = (I->cai * I->fai);
double fine_delta_z = node_delta_z / num_intervals;
/* loop over tracks (implicitly azimuthal angles, tracks in azimuthal
* angles, polar angles, and z stacked rays) */
long segments_processed = 0;
#pragma omp parallel default(none) \
shared( I, params, node_delta_z, fine_delta_z, num_intervals ) \
reduction(+ : segments_processed )
#ifdef OPENMP
int thread = omp_get_thread_num();
int nthreads = omp_get_num_threads();
unsigned int seed = time(NULL) * (thread+1);
//print_Input_struct( I );
#ifdef PAPI
int eventset = PAPI_NULL;
int num_papi_events;
#pragma omp critical
counter_init(&eventset, &num_papi_events, I);
AttenuateVars A;
float * ptr = (float * ) malloc( I->n_egroups * 14 * sizeof(float));
A.q0 = ptr;
ptr += I->n_egroups;
A.q1 = ptr;
ptr += I->n_egroups;
A.q2 = ptr;
ptr += I->n_egroups;
A.sigT = ptr;
ptr += I->n_egroups;
A.tau = ptr;
ptr += I->n_egroups;
A.sigT2 = ptr;
ptr += I->n_egroups;
A.expVal = ptr;
ptr += I->n_egroups;
A.reuse = ptr;
ptr += I->n_egroups;
A.flux_integral = ptr;
ptr += I->n_egroups;
A.tally = ptr;
ptr += I->n_egroups;
A.t1 = ptr;
ptr += I->n_egroups;
A.t2 = ptr;
ptr += I->n_egroups;
A.t3 = ptr;
ptr += I->n_egroups;
A.t4 = ptr;
#pragma omp for schedule( dynamic )
for (long i = 0; i < I->ntracks_2D; i++)
// print progress
#ifdef OPENMP
if(I->mype==0 && thread == 0)
printf("\rAttenuating Tracks... (%.0lf%% completed)",
(i / ( (double)I->ntracks_2D / (double) nthreads ))
/ (double) nthreads * 100.0);
if( i % 50 == 0)
printf("%s%ld%s%ld\n","2D Tracks Completed = ", i," / ",
I->ntracks_2D );
// allocate arrays for segment storage FIXME
double ** seg_dist = malloc( I->z_stacked * sizeof(double *) );
Source *** seg_src = malloc( I->z_stacked * sizeof(Source**) );
int * seg_idx = malloc( I->z_stacked * sizeof(int) );
int * seg_size = malloc( I->z_stacked * sizeof(int) );
// fill matrix with arrays FIXME
for( int k = 0; k < I->z_stacked; k++)
seg_size[k] = 2 * I->segments_per_track;
seg_dist[k] = malloc( seg_size[k] * sizeof(double) );
seg_src[k] = malloc( seg_size[k] * sizeof(Source *) );
seg_idx[k] = 0;
// treat positive-z traveling rays first
bool pos_z_dir = true;
for( int j = 0; j < I->n_polar_angles; j++)
if( j == I->n_polar_angles / 2 )
pos_z_dir = false;
float p_angle = params->polar_angles[j];
float mu = cos(p_angle);
// start with all z stacked rays
int begin_stacked = 0;
int end_stacked = I->z_stacked;
// reset semgnet indexes
for( int k = 0; k < I->z_stacked; k++)
seg_idx[k] = 0;
for( int n = 0; n < params->tracks_2D[i].n_segments; n++)
// calculate distance traveled in cell if segment completed
float s_full = params->tracks_2D[i].segments[n].length
/ sin(p_angle);
// allocate varaible for distance traveled in an FSR
float ds = 0;
// loop over remaining z-stacked rays
int tracks_completed = 0;
for( int k = begin_stacked; k < end_stacked; k++)
// select current track
Track * track = &params->tracks[i][j][k];
// determine current axial interval
int interval = (int) track->z_height / fine_delta_z;
// calculate distance to domain boundary
float bound_dist;
if( pos_z_dir)
bound_dist = (node_delta_z - track->z_height) / mu;
bound_dist = -track->z_height / mu;
// determine track length
float s;
if( s_full < bound_dist )
s = s_full;
// note completion of track
s = bound_dist;
// set flag for completeion of segment
bool seg_complete = false;
while( !seg_complete )
// initialize tracking variables
long QSR_id = interval + num_intervals * n;
float ds;
float z;
// calculate z height of next fine axial interval
float fai_z_height;
if( pos_z_dir )
fai_z_height = (interval + 1) * fine_delta_z ;
fai_z_height = interval * fine_delta_z;
// calculate z distance to next fine axial interval
float z_dist_to_fai =
fai_z_height - track->z_height;
/* calculate total distance (s) to fine axial
* interval */
float s_dist_to_fai = z_dist_to_fai / mu;
// determine if a fine axial interval is crossed
if( s_dist_to_fai < s )
if( pos_z_dir )
ds = s_dist_to_fai;
z = track->z_height + z_dist_to_fai;
ds = s;
z = track->z_height + s * mu;
/* shorten remaining segment length and check if
* completed (accounting for potential roundoff) */
s -= ds;
if( s <= 0 || interval < 0
|| interval >= num_intervals)
seg_complete = true;
// pick a random FSR (cache miss expected)
#ifdef OPENMP
QSR_id = rand_r(&seed) %
QSR_id = rand() % I->n_source_regions_per_node;
/* update sources and fluxes from attenuation
* over FSR */
if( I->axial_exp == 2 )
attenuate_fluxes( track, true,
I, params, ds, mu,
params->tracks_2D[i].az_weight, &A );
else if( I->axial_exp == 0 )
attenuate_FSR_fluxes( track, true,
I, params, ds, mu,
params->tracks_2D[i].az_weight, &A );
printf("Error: invalid axial expansion order");
printf("\n Please input 0 or 2\n");
// update track height
track->z_height = z;
// save segment length and source FIXME
seg_dist[k][seg_idx[k]] = ds;
seg_src[k][seg_idx[k]] = &params->sources[QSR_id];
// check if array needs to grow FIXME
if( seg_idx[k] >= seg_size[k] )
seg_size[k] *= 2;
seg_dist[k] = (double *) realloc( seg_dist[k],
seg_size[k] * sizeof(double) );
seg_src[k] = (Source **) realloc( seg_src[k],
seg_size[k] * sizeof(Source *) );
end_stacked -= tracks_completed;
begin_stacked += tracks_completed;
// loop over all z stacked rays again
for( int k = 0; k < I->z_stacked; k++ )
for( int n = seg_idx[k]-1; n >= 0; n--)
// load distance
float ds = seg_dist[k][n];
// select current track
Track * track = &params->tracks[i][j][k];
// update sources and fluxes from attenuation over FSR
if( I->axial_exp == 2 )
attenuate_fluxes( track, false,
I, params, ds, -mu,
params->tracks_2D[i].az_weight, &A );
else if( I->axial_exp == 0 )
attenuate_FSR_fluxes( track, false,
I, params, ds, -mu,
params->tracks_2D[i].az_weight, &A );
// update z height
track->z_height -= ds * mu;
/* Update all tracks with correct starting z location again
* NOTE: this is only here to acocunt for roundoff error */
for( int k = 0; k < I->z_stacked; k++)
Track * track = &params->tracks[i][j][k];
if( pos_z_dir)
track->z_height = I->axial_z_sep * k;
track->z_height = I->axial_z_sep * (k+1);
// free memory
for( int k = 0; k < I->z_stacked; k++)
#ifdef OPENMP
if(thread == 0 && I->mype==0) printf("\n");
#ifdef PAPI
if( thread == 0 )
center_print("PAPI COUNTER RESULTS", 79);
printf("Count \tSmybol \tDescription\n");
#pragma omp barrier
counter_stop(&eventset, num_papi_events, I);
//printf("Number of segments processed: %ld\n", segments_processed);
I->segments_processed = segments_processed;
/* returns integer number for axial interval for tracks traveling in the
* positive direction */
int get_pos_interval( float z, float dz)
int interval = (int) (z/dz);
return interval;
/* returns integer number for axial interval for tracks traveling in the
* negative direction */
int get_neg_interval( float z, float dz)
int interval = (int) ( ceilf( z / dz ) );
return interval;
int calc_next_fai( float z, float dz, bool pos_dir)
int interval = z/dz;
float lower_z = dz * (float) interval;
return interval + 1;
return interval;
/* Determines the change in angular flux along a particular track across a fine
* axial region and tallies the contribution to the scalar flux in the fine
* axial region. This function assumes a quadratic source, which is calculated
* on the fly using neighboring source values.
* This legacy function is unused since it is less efficient than the current
* attenuate_fluxes function. However, it provides a more straightforward
* description of the underlying physical problem. */
void alt_attenuate_fluxes( Track * track, bool forward, Source * QSR, Input * I,
Params * params, float ds, float mu, float az_weight )
// compute fine axial interval spacing
float dz = I->height / (I->fai * I->decomp_assemblies_ax * I->cai);
// compute z height in cell
float zin = track->z_height - dz * ( (int)( track->z_height / dz ) + 0.5 );
// compute fine axial region ID
int fine_id = (int) ( track->z_height / dz ) % I->fai;
// compute weight (azimuthal * polar)
// NOTE: real app would also have volume weight component
float weight = track->p_weight * az_weight;
float mu2 = mu * mu;
// load fine source region flux vector
float * FSR_flux = QSR -> fine_flux[fine_id];
// cycle over energy groups
for( int g = 0; g < I->n_egroups; g++)
// load total cross section
float sigT = QSR->sigT[g];
// define source parameters
float q0, q1, q2;
// calculate source components
if( fine_id == 0 )
// load neighboring sources
float y2 = QSR->fine_source[fine_id][g];
float y3 = QSR->fine_source[fine_id+1][g];
// do linear "fitting"
float c0 = y2;
float c1 = (y3 - y2) / dz;
// calculate q0, q1, q2
q0 = c0 + c1*zin;
q1 = c1;
q2 = 0;
else if( fine_id == I->fai - 1 )
// load neighboring sources
float y1 = QSR->fine_source[fine_id-1][g];
float y2 = QSR->fine_source[fine_id][g];
// do linear "fitting"
float c0 = y2;
float c1 = (y2 - y1) / dz;
// calculate q0, q1, q2
q0 = c0 + c1*zin;
q1 = c1;
q2 = 0;
// load neighboring sources
float y1 = QSR->fine_source[fine_id-1][g];
float y2 = QSR->fine_source[fine_id][g];
float y3 = QSR->fine_source[fine_id+1][g];
// do quadratic "fitting"
float c0 = y2;
float c1 = (y1 - y3) / (2*dz);
float c2 = (y1 - 2*y2 + y3) / (2*dz*dz);
// calculate q0, q1, q2
q0 = c0 + c1*zin + c2*zin*zin;
q1 = c1 + 2*c2*zin;
q2 = c2;
// calculate common values for efficiency
float tau = sigT * ds;
float sigT2 = sigT * sigT;
// compute exponential ( 1 - exp(-x) ) using table lookup
float expVal = interpolateTable( params->expTable, tau );
// load correct angular flux vector
float * psi;
psi = track->f_psi;
psi = track->b_psi;
// add contribution to new source flux
float flux_integral = (q0 * tau + (sigT * psi[g] - q0) * expVal)
/ sigT2
+ q1 * mu * (tau * (tau - 2) + 2 * expVal)
/ (sigT * sigT2)
+ q2 * mu2 * (tau * (tau * (tau - 3) + 6) - 6 * expVal)
/ (3 * sigT2 * sigT2);
#pragma omp atomic
FSR_flux[g] += weight * flux_integral;
// update angular flux
psi[g] = psi[g] * (1.0 - expVal) + q0 * expVal / sigT
+ q1 * mu * (tau - expVal) / sigT2 + q2 * mu2 *
(tau * (tau - 2) + 2 * expVal) / (sigT2 * sigT);
/* Determines the change in angular flux along a particular track across a fine
* axial region and tallies the contribution to the scalar flux in the fine
* axial region. This function assumes a constant source. */
void attenuate_FSR_fluxes( Track * track, bool forward, Source * FSR, Input * I,
Params * params_in, float ds, float mu, float az_weight,
AttenuateVars *A)
// upack attenuate vars struct
float * restrict tally = A->tally;
float * restrict expVal = A->expVal;
float * restrict sigT = A->sigT;
float * restrict tau = A->tau;
Params params = * params_in;
// compute fine axial interval spacing
float dz = I->height / (I->fai * I->decomp_assemblies_ax * I->cai);
// compute z height in cell
float zin = track->z_height - dz *
( (int)( track->z_height / dz ) + 0.5f );
// compute fine axial region ID
int fine_id = (int) ( track->z_height / dz ) % I->fai;
// compute weight (azimuthal * polar)
// NOTE: real app would also have volume weight component
float weight = track->p_weight * az_weight * mu;
// load fine source region flux vector
float * FSR_flux = FSR -> fine_flux[fine_id];
// cycle over energy groups
#ifdef INTEL
#pragma simd
#elif defined IBM
#pragma simd_level(10)
for( int g = 0; g < I->n_egroups; g++)
// load total cross section
sigT[g] = FSR->sigT[g];
tau[g] = sigT[g] * ds;
// compute exponential ( 1 - exp(-x) ) using table lookup
#ifdef INTEL
#pragma simd
#elif defined IBM
#pragma simd_level(10)
for(int g = 0; g < I->n_egroups; g++)
expVal[g] = interpolateTable( params.expTable, tau[g] );
float * psi;
psi = track->f_psi;
psi = track->b_psi;
#ifdef INTEL
#pragma simd
#elif defined IBM
#pragma simd_level(10)
for( int g = 0; g < I->n_egroups; g++)
// compute angular flux attenuation
float q = FSR->fine_source[fine_id][g] / sigT[g];
float delta_psi = (psi[g] - q) * expVal[g];
// add contribution to new source flux
tally[g] = weight * delta_psi;
// update angular flux
psi[g] -= delta_psi;
#ifdef OPENMP
#ifdef INTEL
#pragma simd
#elif defined IBM
#pragma simd_level(10)
for( int g = 0; g < I->n_egroups; g++)
FSR_flux[g] += tally[g];
#ifdef OPENMP
/* Renormalizes scalar and angular flux for next transport sweep iteration.
* Calculation requires multiple pair-wise sums and a reduction accross all
* nodes. */
void renormalize_flux( Params params, Input I, CommGrid grid )
if( I.mype == 0 ) printf("Renormalizing Flux...\n");
float node_fission_rate = 0;
#ifdef OPENMP
#pragma omp parallel default(none) shared(params, I, grid) \
reduction(+ : node_fission_rate)
// tally total fission rate (pair-wise sum)
float * fission_rates = malloc( I.n_source_regions_per_node
* sizeof(float) );
float * fine_fission_rates = malloc( I.fai * sizeof(float) );
float * g_fission_rates = malloc( I.n_egroups * sizeof(float) );
// accumulate total fission rate on node domain
#pragma omp for schedule(dynamic)
for( int i = 0; i < I.n_source_regions_per_node; i++)
Source src = params.sources[i];
for( int j = 0; j < I.fai; j++)
for( int g = 0; g < I.n_egroups; g++)
g_fission_rates[g] = src.fine_flux[j][g] * src.vol
* src.XS[g][0];
fine_fission_rates[j] = pairwise_sum( g_fission_rates,
I.n_egroups );
fission_rates[i] = pairwise_sum( fine_fission_rates, I.fai );
node_fission_rate = pairwise_sum(fission_rates,
// free allocated memory
#ifdef OPENMP
#ifdef MPI
// accumulate total fission rate by MPI Allreduce
float total_fission_rate = 0;
MPI_Allreduce( &node_fission_rate, // Send Buffer
&total_fission_rate, // Receive Buffer
1, // Element Count
MPI_FLOAT, // Element Type
MPI_SUM, // Reduciton Operation Type
grid.cart_comm_3d ); // MPI Communicator
float total_fission_rate = node_fission_rate;
// normalize fluxes by fission reaction rate
float norm_factor = 1.0 / total_fission_rate;
#pragma omp parallel for default(none) \
shared(I, params) private(norm_factor) schedule(dynamic)
for( int i = 0; i < I.n_source_regions_per_node; i++)
Source * src = &params.sources[i];
float adjust = norm_factor * 4 * M_PI * I.fai / src->vol;
for( int k = 0; k < I.fai; k++)
for( int g = 0; g < I.n_egroups; g++)
src->fine_flux[k][g] *= adjust;
// normalize boundary fluxes by same factor
#pragma omp parallel for default(none) \
shared(I, params) private(norm_factor) schedule(dynamic)
for( long i = 0; i < I.ntracks_2D; i++)
for( int j = 0; j < I.n_polar_angles; j++)
for( int k = 0; k < I.z_stacked; k++)
for( int g = 0; g < I.n_egroups; g++)
params.tracks[i][j][k].f_psi[g] *= norm_factor;
params.tracks[i][j][k].b_psi[g] *= norm_factor;
if( I.mype == 0 ) printf("Renormalizing Flux Complete.\n");
/* Updates sources for next iteration by computing scattering and fission
* components. Calculation includes multiple pair-wise sums and reductions
* accross all nodes */
float update_sources( Params params, Input I, float keff )
// source residual
float residual;
// calculate inverse multiplication facotr for efficiency
float inverse_k = 1.0 / keff;
// allocate residual arrays
float * group_res = (float *) malloc(I.n_egroups * sizeof(float));
float * fine_res = (float *) malloc(I.fai * sizeof(float));
float * residuals = (float *) malloc(I.n_source_regions_per_node
* sizeof(float));
// allocate arrays for summation
float * fission_rates = malloc(I.n_egroups * sizeof(float));
float * scatter_rates = malloc(I.n_egroups * sizeof(float));
// cycle through all coarse axial intervals to update source
for( long i = 0; i < I.n_source_regions_per_node; i++)
Source src = params.sources[i];
// cycle thorugh all fine axial regions to calculate new source
for( int j = 0; j < I.fai; j++)
// calculate total fission source and scattering source
float fission_source;
float scatter_source;
// compute total fission source
for( int g = 0; g < I.n_egroups; g++ )
fission_rates[g] = src.fine_flux[j][g] * src.XS[g][0];
fission_source = pairwise_sum( fission_rates, (long) I.n_egroups);
// normalize fission source by multiplication factor
fission_source *= inverse_k;
// compute scattering and new total source for each group
for( int g = 0; g < I.n_egroups; g++ )
for( int g2 = 0; g2 < I.n_egroups; g2++ )
// compute scatter source originating from g2 -> g
scatter_rates[g2] = src.scattering_matrix[g][g2] *
scatter_source = pairwise_sum(scatter_rates,
(long) I.n_egroups);
// compuate new total source
float chi = src.XS[g][2];
// calculate new fine source
float newSrc = (fission_source * chi + scatter_source)
/ (4.0 * M_PI);
// calculate residual
float oldSrc = src.fine_source[j][g];
group_res[g] = (newSrc - oldSrc) * (newSrc - oldSrc)
/ (oldSrc * oldSrc);
/* calculate new source in fine axial interval assuming
* isotropic source components */
src.fine_source[j][g] = newSrc;
fine_res[j] = pairwise_sum(group_res, (long) I.n_egroups);
residuals[i] = pairwise_sum(fine_res, (long) I.fai);
// calculate source residual
residual = pairwise_sum(residuals, I.n_source_regions_per_node);
// free memory
// NOTE: See code around line 600 of CPUSolver.cpp in ClosedMOC/ OpenMOC
return residual;
/* Computes globall k-effective using multiple pair-wise summations and finally
* a reduction accross all nodes */
float compute_keff(Params params, Input I, CommGrid grid)
// allocate temporary memory
float * sigma = malloc( I.n_egroups * sizeof(float) );
float * group_rates = malloc( I.n_egroups * sizeof(float) );
float * fine_rates = malloc( I.fai * sizeof(float) );
float * QSR_rates = malloc( I.n_source_regions_per_node * sizeof(float) );
// compute total absorption rate, looping over source regions
for( long i = 0; i < I.n_source_regions_per_node; i++)
// load absorption XS data
Source src = params.sources[i];
for( int g = 0; g < I.n_egroups; g++)
sigma[g] = src.XS[g][1];
for( int j = 0; j < I.fai; j++ )
// calculate absorption rates
float * fine_flux = src.fine_flux[j];
for( int g = 0; g < I.n_egroups; g++)
group_rates[g] = sigma[g] * fine_flux[g];
// sum absorption over all energy groups
fine_rates[j] = pairwise_sum( group_rates, (long) I.n_egroups );
// sum absorption over all fine axial intervals
QSR_rates[i] = pairwise_sum( fine_rates, (long) I.fai );
// sum absorption over all source regions in a node
float node_abs = pairwise_sum( QSR_rates, I.n_source_regions_per_node);
// compute total absorption rate, looping over source regions
for( long i = 0; i < I.n_source_regions_per_node; i++)
// load nuSigmaF XS data
Source src = params.sources[i];
for( int g = 0; g < I.n_egroups; g++)
sigma[g] = src.XS[g][0];
for( int j = 0; j < I.fai; j++ )
// calculate absorption rates
float * fine_flux = src.fine_flux[j];
for( int g = 0; g < I.n_egroups; g++)
group_rates[g] = sigma[g] * fine_flux[g];
// sum fission over all energy groups
fine_rates[j] = pairwise_sum( group_rates, (long) I.n_egroups );
// sum fission over all fine axial intervals
QSR_rates[i] = pairwise_sum( fine_rates, (long) I.fai );
// sum fission over all source regions in a node
float node_fission = pairwise_sum( QSR_rates, I.n_source_regions_per_node);
// MPi Reduction
float tot_abs = 0;
float tot_fission = 0;
float leakage = 0;
#ifdef MPI
// Total Absorption Reduction
MPI_Reduce( &node_abs, // Send Buffer
&tot_abs, // Receive Buffer
1, // Element Count
MPI_FLOAT, // Element Type
MPI_SUM, // Reduciton Operation Type
0, // Master Rank
grid.cart_comm_3d ); // MPI Communicator
// Total Fission Reduction
MPI_Reduce( &node_fission, // Send Buffer
&tot_fission, // Receive Buffer
1, // Element Count
MPI_FLOAT, // Element Type
MPI_SUM, // Reduciton Operation Type
0, // Master Rank
grid.cart_comm_3d ); // MPI Communicator
// Total Leakage Reduction
MPI_Reduce( params.leakage, // Send Buffer
&leakage, // Receive Buffer
1, // Element Count
MPI_FLOAT, // Element Type
MPI_SUM, // Reduciton Operation Type
0, // Master Rank
grid.cart_comm_3d ); // MPI Communicator
// calculate keff
float keff = tot_fission/ (tot_abs + leakage);
float keff = node_fission / (node_abs + *params.leakage);
// free memory
return keff;
/* Interpolates a formed exponential table to compute ( 1- exp(-x) )
* at the desired x value */
float interpolateTable( Table table, float x)
// check to ensure value is in domain
if( x > table.maxVal )
return 1.0f;
int interval = (int) ( x / table.dx + 0.5f * table.dx );
if( interval >= table.N || interval < 0)
printf( "Interval = %d\n", interval);
printf( "N = %d\n", table.N);
printf( "x = %f\n", x);
printf( "dx = %f\n", table.dx);
float slope = table.values[ 2 * interval ];
float intercept = table.values[ 2 * interval + 1 ];
float val = slope * x + intercept;
return val;