blob: ca2c175bda22825ac2d1a00965db20f435ef990b [file] [log] [blame]
/*
* Copyright (c) 2011-2013, Los Alamos National Security, LLC.
* All rights Reserved.
*
* Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced
* under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National
* Laboratory (LANL), which is operated by Los Alamos National Security, LLC
* for the U.S. Department of Energy. The U.S. Government has rights to use,
* reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS
* ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
* ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified
* to produce derivative works, such modified software should be clearly marked,
* so as not to confuse it with the version available from LANL.
*
* Additionally, redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the Los Alamos National Security, LLC, Los Alamos
* National Laboratory, LANL, the U.S. Government, nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
* NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL
* SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* CLAMR -- LA-CC-11-094
* This research code is being developed as part of the
* 2011 X Division Summer Workshop for the express purpose
* of a collaborative code for development of ideas in
* the implementation of AMR codes for Exascale platforms
*
* AMR implementation of the Wave code previously developed
* as a demonstration code for regular grids on Exascale platforms
* as part of the Supercomputing Challenge and Los Alamos
* National Laboratory
*
* Authors: Bob Robey XCP-2 brobey@lanl.gov
* Neal Davis davis68@lanl.gov, davis68@illinois.edu
* David Nicholaeff dnic@lanl.gov, mtrxknight@aol.com
* Dennis Trujillo dptrujillo@lanl.gov, dptru10@gmail.com
*
*/
#ifndef STATE_H_
#define STATE_H_
#include <list>
#include "MallocPlus.h"
#include "mesh.h"
#include "crux.h"
#ifdef HAVE_OPENCL
#include "ezcl/ezcl.h"
#endif
//#include "l7/l7.h"
#define STATUS_OK 0
#define STATUS_NAN 1
#define STATUS_MASS_LOSS 2
#if !defined(FULL_PRECISION) && !defined(MIXED_PRECISION) && !defined(MINIMUM_PRECISION)
#define FULL_PRECISION
#endif
#ifdef NO_CL_DOUBLE
#undef FULL_PRECISION
#undef MIXED_PRECISION
#define MINIMUM_PRECISION
#endif
#if defined(MINIMUM_PRECISION)
typedef float state_t; // this is for physics state variables ncell in size
typedef float real_t; // this is used for intermediate calculations
typedef struct
{
float s0;
float s1;
} real2_t;
#define CONSERVATION_EPS 15.0
#ifdef HAVE_OPENCL
typedef cl_float cl_state_t; // for gpu physics state variables
typedef cl_float4 cl_state4_t; // for gpu physics state variables
typedef cl_float cl_real_t; // for intermediate gpu physics state variables
typedef cl_float2 cl_real2_t; // for intermediate gpu physics state variables
typedef cl_float4 cl_real4_t; // for intermediate gpu physics state variables
#endif
#ifdef HAVE_MPI
#define MPI_STATE_T MPI_FLOAT // for MPI communication for physics state variables
#define MPI_REAL_T MPI_FLOAT // for MPI communication for physics state variables
#define L7_STATE_T L7_FLOAT
#define L7_REAL_T L7_FLOAT
#endif
#elif defined(MIXED_PRECISION) // intermediate values calculated high precision and stored as floats
typedef float state_t;
typedef double real_t;
typedef struct
{
double s0;
double s1;
} real2_t;
#define CONSERVATION_EPS .02
#ifdef HAVE_OPENCL
typedef cl_float cl_state_t;
typedef cl_float4 cl_state4_t;
typedef cl_double cl_real_t; // for intermediate gpu physics state variables
typedef cl_double2 cl_real2_t; // for intermediate gpu physics state variables
typedef cl_double4 cl_real4_t; // for intermediate gpu physics state variables
#endif
#ifdef HAVE_MPI
#define MPI_STATE_T MPI_FLOAT
#define MPI_REAL_T MPI_DOUBLE
#define L7_STATE_T L7_FLOAT
#define L7_REAL_T L7_DOUBLE
#endif
#elif defined(FULL_PRECISION)
typedef double state_t;
typedef double real_t;
typedef struct
{
double s0;
double s1;
} real2_t;
#define CONSERVATION_EPS .02
#ifdef HAVE_OPENCL
typedef cl_double cl_state_t;
typedef cl_double4 cl_state4_t;
typedef cl_double cl_real_t; // for intermediate gpu physics state variables
typedef cl_double2 cl_real2_t; // for intermediate gpu physics state variables
typedef cl_double4 cl_real4_t; // for intermediate gpu physics state variables
#endif
#ifdef HAVE_MPI
#define MPI_STATE_T MPI_DOUBLE
#define MPI_REAL_T MPI_DOUBLE
#define L7_STATE_T L7_DOUBLE
#define L7_REAL_T L7_DOUBLE
#endif
#endif
extern "C" void do_calc(void);
enum SUM_TYPE {
SUM_REGULAR,
SUM_KAHAN
};
enum SIGN_RULE {
DIAG_RULE,
X_RULE,
Y_RULE,
};
enum state_timers
{
STATE_TIMER_APPLY_BCS,
STATE_TIMER_SET_TIMESTEP,
STATE_TIMER_FINITE_DIFFERENCE,
STATE_TIMER_REFINE_POTENTIAL,
STATE_TIMER_CALC_MPOT,
STATE_TIMER_REZONE_ALL,
STATE_TIMER_MASS_SUM,
STATE_TIMER_READ,
STATE_TIMER_WRITE,
STATE_TIMER_SIZE
};
typedef enum state_timers state_timer_category;
using namespace std;
class State {
public:
MallocPlus state_memory;
MallocPlus gpu_state_memory;
Mesh *mesh;
state_t *H;
state_t *U;
state_t *V;
#ifdef HAVE_OPENCL
cl_mem dev_H;
cl_mem dev_U;
cl_mem dev_V;
cl_mem dev_mass_sum;
cl_mem dev_deltaT;
cl_event apply_BCs_event;
cl_mem dev_mpot;
//cl_mem dev_ioffset;
cl_mem dev_result;
#endif
double cpu_timers[STATE_TIMER_SIZE];
long long gpu_timers[STATE_TIMER_SIZE];
// constructor -- allocates state arrays to size ncells
State(Mesh *mesh_in);
void init(int do_gpu_calc);
void terminate(void);
/* Memory routines for linked list of state arrays */
void allocate(size_t ncells);
void allocate_from_backup_file(FILE *fp);
void allocate_for_rollback(State *state_to_copy);
void resize(size_t ncells);
void memory_reset_ptrs(void);
#ifdef HAVE_OPENCL
void allocate_device_memory(size_t ncells);
#endif
void resize_old_device_memory(size_t ncells);
/* Accessor routines */
double get_cpu_timer(state_timer_category category) {return(cpu_timers[category]); };
/* Convert nanoseconds to msecs */
double get_gpu_timer(state_timer_category category) {return((double)(gpu_timers[category])*1.0e-9); };
/* Boundary routines -- not currently used */
void add_boundary_cells(void);
void apply_boundary_conditions(void);
void apply_boundary_conditions_local(void);
void apply_boundary_conditions_ghost(void);
void remove_boundary_cells(void);
/*******************************************************************
* set_timestep
* Input
* H, U, V -- from state object
* celltype, level, lev_delta
* Output
* mindeltaT returned
*******************************************************************/
double set_timestep(double g, double sigma);
#ifdef HAVE_OPENCL
double gpu_set_timestep(double sigma);
#endif
/*******************************************************************
* calc finite difference
* will add ghost region to H, U, V and fill at start of routine
* Input
* H, U, V -- from state object
* nlft, nrht, nbot, ntop, level, celltype -- from mesh object
* Output
* H, U, V
*******************************************************************/
void calc_finite_difference(double deltaT);
void calc_finite_difference_via_faces(double deltaT);
#ifdef HAVE_OPENCL
void gpu_calc_finite_difference(double deltaT);
#endif
/*******************************************************************
* calc refine potential -- state has responsibility to calc initial
* refinement potential array that is then passed to mesh for
* smoothing and enforcing refinement ruiles
* Input
* H, U, V -- from state object
* Output
* mpot
* ioffset
* count
*******************************************************************/
size_t calc_refine_potential(vector<int> &mpot, int &icount, int &jcount);
#ifdef HAVE_OPENCL
size_t gpu_calc_refine_potential(int &icount, int &jcount);
#endif
/*******************************************************************
* rezone all -- most of call is done in mesh
* Input
* Mesh and state variables
* Output
* New mesh and state variables on refined mesh
*******************************************************************/
void rezone_all(int icount, int jcount, vector<int> mpot);
#ifdef HAVE_OPENCL
void gpu_rezone_all(int icount, int jcount, bool localStencil);
#endif
/*******************************************************************
* load balance -- most of call is done in mesh, but pointers are
* reset to newly allocated state arrays
* Input
* Mesh and state variables
* Output
* New mesh and state variables on refined mesh
*******************************************************************/
#ifdef HAVE_MPI
void do_load_balance_local(size_t &numcells);
#ifdef HAVE_OPENCL
void gpu_do_load_balance_local(size_t &numcells);
#endif
#endif
/*******************************************************************
* mass sum -- Conservation of mass check
* Input
* H from state object
* Precision type for sum
* Output
* total mass is returned
*******************************************************************/
double mass_sum(int enhanced_precision_sum);
#ifdef HAVE_OPENCL
double gpu_mass_sum(int enhanced_precision_sum);
#endif
void fill_circle(double circ_radius, double fill_value, double background);
void state_reorder(vector<int> iorder);
void symmetry_check(const char *string, vector<int> sym_index, double eps,
SIGN_RULE sign_rule, int &flag);
void output_timing_info(int do_cpu_calc, int do_gpu_calc, double total_elapsed_time);
/* state comparison routines */
#ifdef HAVE_OPENCL
void compare_state_gpu_global_to_cpu_global(const char* string, int cycle, uint ncells);
#endif
void compare_state_cpu_local_to_cpu_global(State *state_global, const char* string, int cycle, uint ncells, uint ncells_global, int *nsizes, int *ndispl);
#ifdef HAVE_OPENCL
void compare_state_all_to_gpu_local(State *state_global, uint ncells, uint ncells_global, int mype, int ncycle, int *nsizes, int *ndispl);
#endif
void output_timer_block(mesh_device_types device_type, double elapsed_time,
double mesh_time, double compute_time, double total_elapsed_time, double speedup_ratio);
void timer_output(state_timer_category category, mesh_device_types device_type, int timer_level);
void print(void);
size_t get_checkpoint_size(void);
void store_checkpoint(Crux *crux);
void restore_checkpoint(Crux *crux);
//Added to for second print for every interation: Brian Atkinson (5-29-14)
void print(int iteration, double simTime, double initial_mass, double iteration_mass, double mass_diff_percentage);
void print_local(int ncycle);
void print_failure_log(int iteration, double simTime, double initial_mass, double iteration_mass, double mass_diff_percentage, bool got_nan);
void print_rollback_log(int iteration, double simTime, double initial_mass, double iteration_mass, double mass_diff_percentage, int backup_attempt, int num_of_attempts, int error_status);
private:
State(const State&); // To block copy constructor so copies are not made inadvertently
void print_object_info(void);
};
#endif // ifndef STATE_H_