blob: fdcec2749e68df89ac996771b7faaa2c3c003ce6 [file] [log] [blame]
/*
* Copyright (C) 2017, UChicago Argonne, LLC
* All Rights Reserved
*
* Hardware/Hybrid Cosmology Code (HACC), Version 1.0
*
* Salman Habib, Adrian Pope, Hal Finkel, Nicholas Frontiere, Katrin Heitmann,
* Vitali Morozov, Jeffrey Emberson, Thomas Uram, Esteban Rangel
* (Argonne National Laboratory)
*
* David Daniel, Patricia Fasel, Chung-Hsing Hsu, Zarija Lukic, James Ahrens
* (Los Alamos National Laboratory)
*
* George Zagaris
* (Kitware)
*
* OPEN SOURCE LICENSE
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer. Software changes,
* modifications, or derivative works, should be noted with comments and
* the author and organization’s name.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* 3. Neither the names of UChicago Argonne, LLC or the Department of Energy
* nor the names of its contributors may be used to endorse or promote
* products derived from this software without specific prior written
* permission.
*
* 4. The software and the end-user documentation included with the
* redistribution, if any, must include the following acknowledgment:
*
* "This product includes software produced by UChicago Argonne, LLC under
* Contract No. DE-AC02-06CH11357 with the Department of Energy."
*
* *****************************************************************************
* DISCLAIMER
* THE SOFTWARE IS SUPPLIED "AS IS" WITHOUT WARRANTY OF ANY KIND. NEITHER THE
* UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT OF ENERGY, NOR
* UCHICAGO ARGONNE, LLC, NOR ANY OF THEIR EMPLOYEES, MAKES ANY WARRANTY,
* EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR RESPONSIBILITY FOR THE
* ACCURARY, COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, DATA, APPARATUS,
* PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD NOT INFRINGE
* PRIVATELY OWNED RIGHTS.
*
* *****************************************************************************
*/
#include "HACCKernels.h"
#include <cmath>
extern const float PolyCoefficients4[] = {
0.263729f, -0.0686285f, 0.00882248f, -0.000592487f, 0.0000164622f
};
extern const float PolyCoefficients5[] = {
0.269327f, -0.0750978f, 0.0114808f, -0.00109313f, 0.0000605491f,
-0.00000147177f
};
extern const float PolyCoefficients6[] = {
0.271431f, -0.0783394f, 0.0133122f, -0.00159485f, 0.000132336f,
-0.00000663394f, 0.000000147305f
};
// HACC's gravity short-range-force kernel represents the part of the 1/r^2
// gravitational force that is not computed by the long-range grid solver. This
// kernel computes the acceleration of a target particle from all of the other
// particles in the provided interaction lists. It is assumed that the target
// particle has unit mass while the interaction-list can contain pseudo
// particles with larger mass values. Beyond a distance of MaxSep, the
// inter-particle force should be completely accounted for by the long-range
// grid solver (and thus we filter out such interactions here). Closer than
// MaxSep, we directly compute the inter-particle force, subtracting the
// long-range part of the force (as fit to a polynomial of the specified
// degree). A softening length, SofteningLen, is also used, as is standard in
// N-body codes.
template <int PolyOrder, const float (&PolyCoefficients)[PolyOrder+1]>
static void GravityForceKernel(int n, float *RESTRICT x, float *RESTRICT y,
float *RESTRICT z, float *RESTRICT mass,
float x0, float y0, float z0,
float MaxSepSqrd, float SofteningLenSqrd,
float &RESTRICT ax, float &RESTRICT ay,
float &RESTRICT az) {
float lax = 0.0f, lay = 0.0f, laz = 0.0f;
// As written below, the mass array is conditionally accessed (i.e. accessed
// only if the interaction is not filtered by the distance checks). This will
// tend to inhibit vectorization on architectures without masked vector loads.
// With OpenMP 4+, we can explicitly inform the compiler that vectorization is
// safe.
//
// For the test suite: Clang does not report a high-enough version of OpenMP
// to enable the pragma below. Moreover, vectorization is desirable regardless
// of whether OpenMP is enabled (even if Clang's reported version were high
// enough), so we also use the Clang loop pragma to assume vectorization safety.
#if _OPENMP >= 201307
#pragma omp simd reduction(+:lax,lay,laz)
#elif defined(clang)
#pragma clang loop vectorize(assume_safety)
#endif
for (int i = 0; i < n; ++i) {
float dx = x[i] - x0, dy = y[i] - y0, dz = z[i] - z0;
float r2 = dx * dx + dy * dy + dz * dz;
if (r2 >= MaxSepSqrd || r2 == 0.0f)
continue;
float r2s = r2 + SofteningLenSqrd;
float f = PolyCoefficients[PolyOrder];
for (int p = 1; p <= PolyOrder; ++p)
f = PolyCoefficients[PolyOrder-p] + r2*f;
f = (1.0f / (r2s * std::sqrt(r2s)) - f) * mass[i];
lax += f * dx;
lay += f * dy;
laz += f * dz;
}
ax += lax;
ay += lay;
az += laz;
}
void GravityForceKernel4(int n, float *RESTRICT x, float *RESTRICT y,
float *RESTRICT z, float *RESTRICT mass,
float x0, float y0, float z0,
float MaxSepSqrd, float SofteningLenSqrd,
float &RESTRICT ax, float &RESTRICT ay,
float &RESTRICT az) {
GravityForceKernel<4, PolyCoefficients4>(n, x, y, z, mass, x0, y0, z0,
MaxSepSqrd, SofteningLenSqrd,
ax, ay, az);
}
void GravityForceKernel5(int n, float *RESTRICT x, float *RESTRICT y,
float *RESTRICT z, float *RESTRICT mass,
float x0, float y0, float z0,
float MaxSepSqrd, float SofteningLenSqrd,
float &RESTRICT ax, float &RESTRICT ay,
float &RESTRICT az) {
GravityForceKernel<5, PolyCoefficients5>(n, x, y, z, mass, x0, y0, z0,
MaxSepSqrd, SofteningLenSqrd,
ax, ay, az);
}
void GravityForceKernel6(int n, float *RESTRICT x, float *RESTRICT y,
float *RESTRICT z, float *RESTRICT mass,
float x0, float y0, float z0,
float MaxSepSqrd, float SofteningLenSqrd,
float &RESTRICT ax, float &RESTRICT ay,
float &RESTRICT az) {
GravityForceKernel<6, PolyCoefficients6>(n, x, y, z, mass, x0, y0, z0,
MaxSepSqrd, SofteningLenSqrd,
ax, ay, az);
}