blob: 62dd295b900894023b4bcfa9190b9f747f7ea882 [file] [log] [blame]
/*
* Copyright (C) 2017, UChicago Argonne, LLC
* All Rights Reserved
*
* Hardware/Hybrid Cosmology Code (HACC), Version 1.0
*
* Salman Habib, Adrian Pope, Hal Finkel, Nicholas Frontiere, Katrin Heitmann,
* Vitali Morozov, Jeffrey Emberson, Thomas Uram, Esteban Rangel
* (Argonne National Laboratory)
*
* David Daniel, Patricia Fasel, Chung-Hsing Hsu, Zarija Lukic, James Ahrens
* (Los Alamos National Laboratory)
*
* George Zagaris
* (Kitware)
*
* OPEN SOURCE LICENSE
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer. Software changes,
* modifications, or derivative works, should be noted with comments and
* the author and organization’s name.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* 3. Neither the names of UChicago Argonne, LLC or the Department of Energy
* nor the names of its contributors may be used to endorse or promote
* products derived from this software without specific prior written
* permission.
*
* 4. The software and the end-user documentation included with the
* redistribution, if any, must include the following acknowledgment:
*
* "This product includes software produced by UChicago Argonne, LLC under
* Contract No. DE-AC02-06CH11357 with the Department of Energy."
*
* *****************************************************************************
* DISCLAIMER
* THE SOFTWARE IS SUPPLIED "AS IS" WITHOUT WARRANTY OF ANY KIND. NEITHER THE
* UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT OF ENERGY, NOR
* UCHICAGO ARGONNE, LLC, NOR ANY OF THEIR EMPLOYEES, MAKES ANY WARRANTY,
* EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR RESPONSIBILITY FOR THE
* ACCURARY, COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, DATA, APPARATUS,
* PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD NOT INFRINGE
* PRIVATELY OWNED RIGHTS.
*
* *****************************************************************************
*/
#include "HACCKernels.h"
#include <ctime>
#include <cstdlib>
#include <limits>
#include <vector>
#include <iostream>
#ifdef _OPENMP
#include <omp.h>
#endif
// This number of iterations, which can be changed via the command line, is set
// so that the benchmark will run for a few seconds per polynomial degree of
// the force kernel on a single core of a modern CPU.
int NumIters = 2000;
// The interaction lists range is size between a few hundred and a few thousand.
int IListMin = 250;
int IListMax = 2250;
// The number of particles to update, which represents the number of particles
// per leaf node of the force evaluation tree in HACC, varies between tends of
// particles to around a hundred particles depending on the platform. These
// numbers represent the high side of the production range.
int PMin = 75;
int PMax = 150;
// The softening length and maximum separation similar to those used in HACC
// high-resolution configurations.
float SofteningLen = 0.1;
float MaxSep = 3.2;
// In this benchmark we offset the positions of the particles being updated
// from the particles in the interaction list so that some of the interactions
// will be filtered for being out of range. 0.1 yields ~5% of interactions
// filtered for being out of range.
float OffsetAdjFrac = 0.1;
// A simple random-number generator, see: https://en.wikipedia.org/wiki/Xorshift
static unsigned int rand32(unsigned int &state) {
unsigned int x = state;
x ^= x << 13;
x ^= x >> 17;
x ^= x << 5;
return (state = x);
}
static float randflt(unsigned int &state) {
return ((float) rand32(state)) / ((float) 0xffffffff);
}
void run(GravityForceKernelFunc GravityForceKernel, const char *Desc) {
#ifndef VERIFICATION_OUTPUT_ONLY
std::clock_t Start, End;
#endif
std::cout << "Gravity Short-Range-Force Kernel (" << Desc << "): ";
#ifndef VERIFICATION_OUTPUT_ONLY
Start = std::clock();
#endif
float ax, ay, az;
// We use lastprivate for (ax,ay,az) so that the reported output, which can
// be used for validation, does not depend on the order in which parallel
// loop iterations are executed.
// Because each iteration has a different amount of work, dynamic or guided
// scheduling is used here. guided gives the implementation more scheduling
// freedom.
#ifdef _OPENMP
#pragma omp parallel for schedule(guided) lastprivate(ax,ay,az)
#endif
for (int i = 0; i < NumIters; ++i) {
// Set the random seed used by each iteration to be a function of the
// iteration number only. This allows information from any fixed iteration
// (e.g. first or last) to be used for numerical validation.
unsigned int seed = i+1;
ax = ay = az = 0.0f;
int ILParticleCount = IListMin + rand32(seed) % (IListMax - IListMin);
int ParticleCount = PMin + rand32(seed) % (PMax - PMin);
std::vector<float> px(ParticleCount), py(ParticleCount),
pz(ParticleCount);
std::vector<float> x(ILParticleCount), y(ILParticleCount),
z(ILParticleCount), mass(ILParticleCount);
// Fill the particle arrays and the interaction list. The interaction-list
// particles are offset in the x direction based on OffsetAdjFrac.
for (int j = 0; j < ParticleCount; ++j) {
px[j] = randflt(seed)*0.5*MaxSep;
py[j] = randflt(seed)*0.5*MaxSep;
pz[j] = randflt(seed)*0.5*MaxSep;
}
for (int j = 0; j < ILParticleCount; ++j) {
x[j] = randflt(seed)*0.5*MaxSep + (0.5+OffsetAdjFrac)*MaxSep;
y[j] = randflt(seed)*0.5*MaxSep;
z[j] = randflt(seed)*0.5*MaxSep;
mass[j] = 1.0f + randflt(seed);
}
for (int j = 0; j < ParticleCount; ++j)
GravityForceKernel(ILParticleCount, &x[0], &y[0], &z[0], &mass[0],
px[j], py[j], pz[j], MaxSep*MaxSep,
SofteningLen*SofteningLen, ax, ay, az);
}
#ifndef VERIFICATION_OUTPUT_ONLY
End = std::clock();
#endif
std::cout << ax << " " << ay << " " << az;
#ifndef VERIFICATION_OUTPUT_ONLY
std::cout << ": ";
std::cout << ((float)(End - Start))/CLOCKS_PER_SEC << " s\n";
#else
std::cout << "\n";
#endif
}
int main(int argc, char *argv[]) {
#if defined(_OPENMP) && !defined(VERIFICATION_OUTPUT_ONLY)
std::cout << "Maximum OpenMP Threads: " << omp_get_max_threads() << "\n";
#endif
if (argc > 1)
NumIters = atoi(argv[1]);
std::cout << "Iterations: " << NumIters << "\n";
run(GravityForceKernel4, "4th Order");
run(GravityForceKernel5, "5th Order");
run(GravityForceKernel6, "6th Order");
return 0;
}