blob: 141234aa00caf03f1e0b0492a35bf55b5dbccae5 [file] [log] [blame]
// ************************************************************************
//
// miniAMR: stencil computations with boundary exchange and AMR.
//
// Copyright (2014) Sandia Corporation. Under the terms of Contract
// DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government
// retains certain rights in this software.
//
// This library is free software; you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as
// published by the Free Software Foundation; either version 2.1 of the
// License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
// Questions? Contact Courtenay T. Vaughan (ctvaugh@sandia.gov)
// Richard F. Barrett (rfbarre@sandia.gov)
//
// ************************************************************************
#include <stdio.h>
#include <math.h>
#include "block.h"
#include "proto.h"
#include "timer.h"
// Profiling output.
void profile(void)
{
int i;
double total_gflops, gflops_rank, total_fp_ops, total_fp_adds,
total_fp_divs;
object *op;
char *version = "1.0 provisional";
FILE *fp;
calculate_results();
total_fp_divs = ((double) total_blocks)*((double) x_block_size)*
((double) y_block_size)*((double) z_block_size);
if (stencil == 7)
total_fp_adds = 6*total_fp_divs;
else
total_fp_adds = 26*total_fp_divs;
total_fp_ops = total_fp_divs + total_fp_adds;
total_gflops = total_fp_ops/(average[38]*1024.0*1024.0*1024.0);
gflops_rank = total_gflops/((double) num_pes);
if (!my_pe) {
if (report_perf & 1) {
fp = fopen("results.yaml", "w");
fprintf(fp, "code: miniAMR\n");
fprintf(fp, "version: %s\n", version);
fprintf(fp, "ranks: %d\n", num_pes);
fprintf(fp, "init_block_x: %d\n", init_block_x);
fprintf(fp, "init_block_y: %d\n", init_block_y);
fprintf(fp, "init_block_z: %d\n", init_block_z);
fprintf(fp, "x_block_size: %d\n", x_block_size);
fprintf(fp, "y_block_size: %d\n", y_block_size);
fprintf(fp, "z_block_size: %d\n", z_block_size);
fprintf(fp, "permute: %d\n", permute);
fprintf(fp, "max_blocks_allowed: %d\n", max_num_blocks);
fprintf(fp, "code: %d\n", code);
fprintf(fp, "num_refine: %d\n", num_refine);
fprintf(fp, "block_change: %d\n", block_change);
fprintf(fp, "refine_ghost: %d\n", refine_ghost);
fprintf(fp, "uniform_refine: %d\n", uniform_refine);
fprintf(fp, "num_objects: %d\n", num_objects);
for (i = 0; i < num_objects; i++) {
op = &objects[i];
fprintf(fp, "obj%dtype: %d\n", i, op->type);
fprintf(fp, "obj%dbounce: %d\n", i, op->bounce);
fprintf(fp, "obj%dcenter_x: %lf\n", i, op->cen[0]);
fprintf(fp, "obj%dcenter_y: %lf\n", i, op->cen[1]);
fprintf(fp, "obj%dcenter_z: %lf\n", i, op->cen[2]);
fprintf(fp, "obj%dmove_x: %lf\n", i, op->move[0]);
fprintf(fp, "obj%dmove_y: %lf\n", i, op->move[1]);
fprintf(fp, "obj%dmove_z: %lf\n", i, op->move[2]);
fprintf(fp, "obj%dsize_x: %lf\n", i, op->size[0]);
fprintf(fp, "obj%dsize_y: %lf\n", i, op->size[1]);
fprintf(fp, "obj%dsize_z: %lf\n", i, op->size[2]);
fprintf(fp, "obj%dinc_x: %lf\n", i, op->inc[0]);
fprintf(fp, "obj%dinc_y: %lf\n", i, op->inc[1]);
fprintf(fp, "obj%dinc_z: %lf\n", i, op->inc[2]);
}
fprintf(fp, "num_tsteps: %d\n", num_tsteps);
fprintf(fp, "stages_per_timestep: %d\n", stages_per_ts);
fprintf(fp, "checksum_freq: %d\n", checksum_freq);
fprintf(fp, "refine_freq: %d\n", refine_freq);
fprintf(fp, "plot_freq: %d\n", plot_freq);
fprintf(fp, "num_vars: %d\n", num_vars);
fprintf(fp, "stencil: %d\n", stencil);
fprintf(fp, "comm_vars: %d\n", comm_vars);
fprintf(fp, "error_tol: %d\n", error_tol);
fprintf(fp, "total_time: %lf\n", average[0]);
fprintf(fp, "memory_used: %lf\n", average[111]);
fprintf(fp, "compute_time: %lf\n", average[38]);
fprintf(fp, "total_gflops: %lf\n", total_gflops);
fprintf(fp, "ave_gflops: %lf\n", gflops_rank);
fprintf(fp, "total_comm: %lf\n", average[37]);
fprintf(fp, " total_exch_same: %lf\n", average[5]);
fprintf(fp, " total_exch_diff: %lf\n", average[6]);
fprintf(fp, " total_apply_bc: %lf\n", average[7]);
fprintf(fp, " total_face_exch_same: %lf\n", average[75]);
fprintf(fp, " total_face_exch_diff: %lf\n", average[76]);
fprintf(fp, " total_face_bc_apply: %lf\n", average[74]);
fprintf(fp, " x_comm: %lf\n", average[10]);
fprintf(fp, " x_exch_same: %lf\n", average[14]);
fprintf(fp, " x_exch_diff: %lf\n", average[15]);
fprintf(fp, " x_apply_bc: %lf\n", average[16]);
fprintf(fp, " x_face_exch_same: %lf\n", average[84]);
fprintf(fp, " x_face_exch_diff: %lf\n", average[85]);
fprintf(fp, " x_face_bc_apply: %lf\n", average[83]);
fprintf(fp, " y_comm: %lf\n", average[19]);
fprintf(fp, " y_exch_same: %lf\n", average[23]);
fprintf(fp, " y_exch_diff: %lf\n", average[24]);
fprintf(fp, " y_apply_bc: %lf\n", average[25]);
fprintf(fp, " y_face_exch_same: %lf\n", average[93]);
fprintf(fp, " y_face_exch_diff: %lf\n", average[94]);
fprintf(fp, " y_face_bc_apply: %lf\n", average[92]);
fprintf(fp, " z_comm: %lf\n", average[28]);
fprintf(fp, " z_exch_same: %lf\n", average[32]);
fprintf(fp, " z_exch_diff: %lf\n", average[33]);
fprintf(fp, " z_apply_bc: %lf\n", average[34]);
fprintf(fp, " z_face_exch_same: %lf\n", average[102]);
fprintf(fp, " z_face_exch_diff: %lf\n", average[103]);
fprintf(fp, " z_face_bc_apply: %lf\n", average[101]);
fprintf(fp, "gridsum_time: %lf\n", average[39]);
fprintf(fp, " gridsum_calc: %lf\n", average[41]);
fprintf(fp, "refine_time: %lf\n", average[42]);
fprintf(fp, " total_blocks_ts: %lf\n",
((double) total_blocks)/((double) (num_tsteps*stages_per_ts)));
fprintf(fp, " total_blocks_ts_min: %d\n", nb_min);
fprintf(fp, " total_blocks_ts_max: %d\n", nb_max);
fprintf(fp, " blocks_split: %lf\n", average[104]);
fprintf(fp, " blocks_reformed: %lf\n", average[105]);
fprintf(fp, " time_compare_obj: %lf\n", average[43]);
fprintf(fp, " time_mark_refine: %lf\n", average[44]);
fprintf(fp, " time_split_block: %lf\n", average[46]);
fprintf(fp, " time_total_coarsen: %lf\n", average[47]);
fprintf(fp, " time_misc: %lf\n", average[45]);
fprintf(fp, "plot_time: %lf\n", average[67]);
fclose(fp);
}
if (report_perf & 2) {
fp = fopen("results.txt", "w");
fprintf(fp, "\n ================ Start report ===================\n\n");
fprintf(fp, " Mantevo miniAMR\n");
fprintf(fp, " version %s\n\n", version);
fprintf(fp, "serial run on 1 rank\n");
fprintf(fp, "initial blocks per rank %d x %d x %d\n", init_block_x,
init_block_y, init_block_z);
fprintf(fp, "block size %d x %d x %d\n", x_block_size, y_block_size,
z_block_size);
if (permute)
fprintf(fp, "Order of exchanges permuted\n");
fprintf(fp, "Maximum number of blocks per rank is %d\n",
max_num_blocks);
if (target_active)
fprintf(fp, "Target number of blocks per rank is %d\n",
target_active);
if (target_max)
fprintf(fp, "Target max number of blocks per rank is %d\n",
target_max);
if (target_min)
fprintf(fp, "Target min number of blocks per rank is %d\n",
target_min);
if (code)
fprintf(fp, "Code set to code %d\n", code);
fprintf(fp, "Number of levels of refinement is %d\n", num_refine);
fprintf(fp, "Blocks can change by %d levels per refinement step\n",
block_change);
if (refine_ghost)
fprintf(fp, "Ghost cells will be used determine is block is refined\n");
if (uniform_refine)
fprintf(fp, "\nBlocks will be uniformly refined\n");
else {
fprintf(fp, "\nBlocks will be refined by %d objects\n\n", num_objects);
for (i = 0; i < num_objects; i++) {
op = &objects[i];
if (op->type == 0)
fprintf(fp, "Object %d is the surface of a rectangle\n", i);
else if (op->type == 1)
fprintf(fp, "Object %d is the volume of a rectangle\n", i);
else if (op->type == 2)
fprintf(fp, "Object %d is the surface of a spheroid\n", i);
else if (op->type == 3)
fprintf(fp, "Object %d is the volume of a spheroid\n", i);
else if (op->type == 4)
fprintf(fp, "Object %d is the surface of x+ hemispheroid\n", i);
else if (op->type == 5)
fprintf(fp, "Object %d is the volume of x+ hemispheroid\n", i);
else if (op->type == 6)
fprintf(fp, "Object %d is the surface of x- hemispheroid\n", i);
else if (op->type == 7)
fprintf(fp, "Object %d is the volume of x- hemispheroid\n", i);
else if (op->type == 8)
fprintf(fp, "Object %d is the surface of y+ hemispheroid\n", i);
else if (op->type == 9)
fprintf(fp, "Object %d is the volume of y+ hemispheroid\n", i);
else if (op->type == 10)
fprintf(fp, "Object %d is the surface of y- hemispheroid\n", i);
else if (op->type == 11)
fprintf(fp, "Object %d is the volume of y- hemispheroid\n", i);
else if (op->type == 12)
fprintf(fp, "Object %d is the surface of z+ hemispheroid\n", i);
else if (op->type == 13)
fprintf(fp, "Object %d is the volume of z+ hemispheroid\n", i);
else if (op->type == 14)
fprintf(fp, "Object %d is the surface of z- hemispheroid\n", i);
else if (op->type == 15)
fprintf(fp, "Object %d is the volume of z- hemispheroid\n", i);
else if (op->type == 20)
fprintf(fp, "Object %d is the surface of x axis cylinder\n", i);
else if (op->type == 21)
fprintf(fp, "Object %d is the volune of x axis cylinder\n", i);
else if (op->type == 22)
fprintf(fp, "Object %d is the surface of y axis cylinder\n", i);
else if (op->type == 23)
fprintf(fp, "Object %d is the volune of y axis cylinder\n", i);
else if (op->type == 24)
fprintf(fp, "Object %d is the surface of z axis cylinder\n", i);
else if (op->type == 25)
fprintf(fp, "Object %d is the volune of z axis cylinder\n", i);
if (op->bounce == 0)
fprintf(fp, "Oject may leave mesh\n");
else
fprintf(fp, "Oject center will bounce off of walls\n");
fprintf(fp, "Center starting at %lf %lf %lf\n",
op->orig_cen[0], op->orig_cen[1], op->orig_cen[2]);
fprintf(fp, "Center end at %lf %lf %lf\n",
op->cen[0], op->cen[1], op->cen[2]);
fprintf(fp, "Moving at %lf %lf %lf per timestep\n",
op->orig_move[0], op->orig_move[1], op->orig_move[2]);
fprintf(fp, " Rate relative to smallest cell size %lf %lf %lf\n",
op->orig_move[0]*((double) (mesh_size[0]*x_block_size)),
op->orig_move[1]*((double) (mesh_size[1]*y_block_size)),
op->orig_move[2]*((double) (mesh_size[2]*z_block_size)));
fprintf(fp, "Initial size %lf %lf %lf\n",
op->orig_size[0], op->orig_size[1], op->orig_size[2]);
fprintf(fp, "Final size %lf %lf %lf\n",
op->size[0], op->size[1], op->size[2]);
fprintf(fp, "Size increasing %lf %lf %lf per timestep\n",
op->inc[0], op->inc[1], op->inc[2]);
fprintf(fp, " Rate relative to smallest cell size %lf %lf %lf\n\n",
op->inc[0]*((double) (mesh_size[0]*x_block_size)),
op->inc[1]*((double) (mesh_size[1]*y_block_size)),
op->inc[2]*((double) (mesh_size[2]*z_block_size)));
}
}
fprintf(fp, "\nNumber of timesteps is %d\n", num_tsteps);
fprintf(fp, "Communicaion/computation stages per timestep is %d\n",
stages_per_ts);
fprintf(fp, "Will perform checksums every %d stages\n", checksum_freq);
fprintf(fp, "Will refine every %d timesteps\n", refine_freq);
if (plot_freq)
fprintf(fp, "Will plot results every %d timesteps\n", plot_freq);
else
fprintf(fp, "Will not plot results\n");
fprintf(fp, "Calculate on %d variables with %d point stencil\n",
num_vars, stencil);
fprintf(fp, "Communicate %d variables at a time\n", comm_vars);
fprintf(fp, "Error tolorance for variable sums is 10^(-%d)\n", error_tol);
fprintf(fp, "\nTotal time for test: (sec): %lf\n\n", average[0]);
fprintf(fp, "\nNumber of malloc calls: %lf\n", average[110]);
fprintf(fp, "\nAmount malloced: %lf\n", average[111]);
fprintf(fp, "---------------------------------------------\n");
fprintf(fp, " Computational Performance\n");
fprintf(fp, "---------------------------------------------\n\n");
fprintf(fp, " Time: ave, stddev, min, max (sec): %lf\n\n",
average[38]);
fprintf(fp, " total GFLOPS: %lf\n", total_gflops);
fprintf(fp, " Average GFLOPS per rank: %lf\n\n", gflops_rank);
fprintf(fp, " Total floating point ops: %lf\n\n", total_fp_ops);
fprintf(fp, " Adds: %lf\n", total_fp_adds);
fprintf(fp, " Divides: %lf\n\n", total_fp_divs);
fprintf(fp, "---------------------------------------------\n");
fprintf(fp, " Interblock communication\n");
fprintf(fp, "---------------------------------------------\n\n");
fprintf(fp, " Time: ave, stddev, min, max (sec): %lf\n\n",
average[37]);
for (i = 0; i < 4; i++) {
if (i == 0)
fprintf(fp, "\nTotal communication:\n\n");
else if (i == 1)
fprintf(fp, "\nX direction communication statistics:\n\n");
else if (i == 2)
fprintf(fp, "\nY direction communication statistics:\n\n");
else
fprintf(fp, "\nZ direction communication statistics:\n\n");
fprintf(fp, " average stddev minimum maximum\n");
fprintf(fp, " Total time : %lf\n", average[1+9*i]);
fprintf(fp, " Exchange same level : %lf\n", average[5+9*i]);
fprintf(fp, " Exchange diff level : %lf\n", average[6+9*i]);
fprintf(fp, " Apply BC : %lf\n", average[7+9*i]);
fprintf(fp, " Faces exchanged same : %lf\n", average[75+9*i]);
fprintf(fp, " Faces exchanged diff : %lf\n", average[76+9*i]);
fprintf(fp, " Faces with BC applied : %lf\n", average[74+9*i]);
}
fprintf(fp, "\n---------------------------------------------\n");
fprintf(fp, " Gridsum performance\n");
fprintf(fp, "---------------------------------------------\n\n");
fprintf(fp, " Time: ave, stddev, min, max (sec): %lf\n\n",
average[39]);
fprintf(fp, " calc: ave, stddev, min, max (sec): %lf\n\n",
average[41]);
fprintf(fp, " total number: %d\n", total_red);
fprintf(fp, " number per timestep: %d\n\n", num_vars);
fprintf(fp, "---------------------------------------------\n");
fprintf(fp, " Mesh Refinement\n");
fprintf(fp, "---------------------------------------------\n\n");
fprintf(fp, " Time: ave, stddev, min, max (sec): %lf\n\n",
average[42]);
fprintf(fp, " Number of refinement steps: %d\n\n", nrs);
fprintf(fp, " Total blocks : %ld\n", total_blocks);
fprintf(fp, " Blocks/timestep ave, min, max : %lf %d %d\n",
((double) total_blocks)/((double) (num_tsteps*stages_per_ts)),
nb_min, nb_max);
fprintf(fp, " Max blocks on a processor at any time: %d\n",
global_max_b);
fprintf(fp, " total blocks split : %lf\n", average[104]);
fprintf(fp, " total blocks reformed : %lf\n\n", average[105]);
fprintf(fp, " Time:\n");
fprintf(fp, " compare objects : %lf\n", average[43]);
fprintf(fp, " mark refine/coarsen : %lf\n", average[44]);
fprintf(fp, " split blocks : %lf\n", average[46]);
fprintf(fp, " total coarsen blocks: %lf\n", average[47]);
fprintf(fp, " misc time : %lf\n", average[45]);
if (target_active) {
fprintf(fp, " total target active : %lf\n", average[52]);
fprintf(fp, " reduce blocks : %lf\n", average[53]);
fprintf(fp, " decide and comm : %lf\n", average[54]);
fprintf(fp, " coarsen blocks : %lf\n", average[58]);
fprintf(fp, " add blocks : %lf\n", average[59]);
fprintf(fp, " decide and comm : %lf\n", average[60]);
fprintf(fp, " split blocks : %lf\n", average[61]);
}
fprintf(fp, "---------------------------------------------\n");
fprintf(fp, " Plot\n");
fprintf(fp, "---------------------------------------------\n\n");
fprintf(fp, " Time: ave, stddev, min, max (sec): %lf\n\n",
average[67]);
fprintf(fp, " Number of plot steps: %d\n", nps);
fprintf(fp, "\n ================== End report ===================\n");
fclose(fp);
}
if (report_perf & 4) {
printf("\n ================ Start report ===================\n\n");
printf(" Mantevo miniAMR\n");
printf(" version %s\n\n", version);
printf("serial run on 1 rank\n");
printf("initial blocks per rank %d x %d x %d\n", init_block_x,
init_block_y, init_block_z);
printf("block size %d x %d x %d\n", x_block_size, y_block_size,
z_block_size);
if (permute)
printf("Order of exchanges permuted\n");
printf("Maximum number of blocks per rank is %d\n", max_num_blocks);
if (target_active)
printf("Target number of blocks per rank is %d\n", target_active);
if (target_max)
printf("Target max number of blocks per rank is %d\n", target_max);
if (target_min)
printf("Target min number of blocks per rank is %d\n", target_min);
if (code)
printf("Code set to code %d\n", code);
printf("Number of levels of refinement is %d\n", num_refine);
printf("Blocks can change by %d levels per refinement step\n",
block_change);
if (refine_ghost)
printf("Ghost cells will be used determine is block is refined\n");
if (uniform_refine)
printf("\nBlocks will be uniformly refined\n");
else {
printf("\nBlocks will be refined by %d objects\n\n", num_objects);
for (i = 0; i < num_objects; i++) {
op = &objects[i];
if (op->type == 0)
printf("Object %d is the surface of a rectangle\n", i);
else if (op->type == 1)
printf("Object %d is the volume of a rectangle\n", i);
else if (op->type == 2)
printf("Object %d is the surface of a spheroid\n", i);
else if (op->type == 3)
printf("Object %d is the volume of a spheroid\n", i);
else if (op->type == 4)
printf("Object %d is the surface of x+ hemispheroid\n", i);
else if (op->type == 5)
printf("Object %d is the volume of x+ hemispheroid\n", i);
else if (op->type == 6)
printf("Object %d is the surface of x- hemispheroid\n", i);
else if (op->type == 7)
printf("Object %d is the volume of x- hemispheroid\n", i);
else if (op->type == 8)
printf("Object %d is the surface of y+ hemispheroid\n", i);
else if (op->type == 9)
printf("Object %d is the volume of y+ hemispheroid\n", i);
else if (op->type == 10)
printf("Object %d is the surface of y- hemispheroid\n", i);
else if (op->type == 11)
printf("Object %d is the volume of y- hemispheroid\n", i);
else if (op->type == 12)
printf("Object %d is the surface of z+ hemispheroid\n", i);
else if (op->type == 13)
printf("Object %d is the volume of z+ hemispheroid\n", i);
else if (op->type == 14)
printf("Object %d is the surface of z- hemispheroid\n", i);
else if (op->type == 15)
printf("Object %d is the volume of z- hemispheroid\n", i);
else if (op->type == 20)
printf("Object %d is the surface of x axis cylinder\n", i);
else if (op->type == 21)
printf("Object %d is the volune of x axis cylinder\n", i);
else if (op->type == 22)
printf("Object %d is the surface of y axis cylinder\n", i);
else if (op->type == 23)
printf("Object %d is the volune of y axis cylinder\n", i);
else if (op->type == 24)
printf("Object %d is the surface of z axis cylinder\n", i);
else if (op->type == 25)
printf("Object %d is the volune of z axis cylinder\n", i);
if (op->bounce == 0)
printf("Oject may leave mesh\n");
else
printf("Oject center will bounce off of walls\n");
printf("Center starting at %lf %lf %lf\n",
op->orig_cen[0], op->orig_cen[1], op->orig_cen[2]);
printf("Center end at %lf %lf %lf\n",
op->cen[0], op->cen[1], op->cen[2]);
printf("Moving at %lf %lf %lf per timestep\n",
op->orig_move[0], op->orig_move[1], op->orig_move[2]);
printf(" Rate relative to smallest cell size %lf %lf %lf\n",
op->orig_move[0]*((double) (mesh_size[0]*x_block_size)),
op->orig_move[1]*((double) (mesh_size[1]*y_block_size)),
op->orig_move[2]*((double) (mesh_size[2]*z_block_size)));
printf("Initial size %lf %lf %lf\n",
op->orig_size[0], op->orig_size[1], op->orig_size[2]);
printf("Final size %lf %lf %lf\n",
op->size[0], op->size[1], op->size[2]);
printf("Size increasing %lf %lf %lf per timestep\n",
op->inc[0], op->inc[1], op->inc[2]);
printf(" Rate relative to smallest cell size %lf %lf %lf\n\n",
op->inc[0]*((double) (mesh_size[0]*x_block_size)),
op->inc[1]*((double) (mesh_size[1]*y_block_size)),
op->inc[2]*((double) (mesh_size[2]*z_block_size)));
}
}
printf("\nNumber of timesteps is %d\n", num_tsteps);
printf("Communicaion/computation stages per timestep is %d\n",
stages_per_ts);
printf("Will perform checksums every %d stages\n", checksum_freq);
printf("Will refine every %d timesteps\n", refine_freq);
if (plot_freq)
printf("Will plot results every %d timesteps\n", plot_freq);
else
printf("Will not plot results\n");
printf("Calculate on %d variables with %d point stencil\n",
num_vars, stencil);
printf("Communicate %d variables at a time\n", comm_vars);
printf("Error tolorance for variable sums is 10^(-%d)\n", error_tol);
/*
printf("\nTotal time for test (sec): %lf\n\n", average[0]);
printf("\nNumber of malloc calls: %lf\n", average[110]);
printf("\nAmount malloced: %lf\n", average[111]);
printf("---------------------------------------------\n");
printf(" Computational Performance\n");
printf("---------------------------------------------\n\n");
printf(" Time: ave, stddev, min, max (sec): %lf\n\n", average[38]);
printf(" total GFLOPS: %lf\n", total_gflops);
printf(" Average GFLOPS per rank: %lf\n\n", gflops_rank);
printf(" Total floating point ops: %lf\n\n", total_fp_ops);
printf(" Adds: %lf\n", total_fp_adds);
printf(" Divides: %lf\n\n", total_fp_divs);
printf("---------------------------------------------\n");
printf(" Interblock communication\n");
printf("---------------------------------------------\n\n");
printf(" Time: ave, stddev, min, max (sec): %lf\n\n", average[37]);
for (i = 0; i < 4; i++) {
if (i == 0)
printf("\nTotal communication:\n\n");
else if (i == 1)
printf("\nX direction communication statistics:\n\n");
else if (i == 2)
printf("\nY direction communication statistics:\n\n");
else
printf("\nZ direction communication statistics:\n\n");
printf(" Total time : %lf\n", average[1+9*i]);
printf(" Exchange same level : %lf\n", average[5+9*i]);
printf(" Exchange diff level : %lf\n", average[6+9*i]);
printf(" Apply BC : %lf\n", average[7+9*i]);
printf(" Faces exchanged same : %lf\n", average[75+9*i]);
printf(" Faces exchanged diff : %lf\n", average[76+9*i]);
printf(" Faces with BC applied : %lf\n", average[74+9*i]);
}
printf("\n---------------------------------------------\n");
printf(" Gridsum performance\n");
printf("---------------------------------------------\n\n");
printf(" Time: ave, stddev, min, max (sec): %lf\n\n", average[39]);
printf(" calc: ave, stddev, min, max (sec): %lf\n\n",
average[41]);
printf(" total number: %d\n", total_red);
printf(" number per timestep: %d\n\n", num_vars);
printf("---------------------------------------------\n");
printf(" Mesh Refinement\n");
printf("---------------------------------------------\n\n");
printf(" Time: ave, stddev, min, max (sec): %lf\n\n", average[42]);
printf(" Number of refinement steps: %d\n\n", nrs);
printf(" Total blocks : %ld\n", total_blocks);
printf(" Blocks/timestep ave, min, max : %lf %d %d\n",
((double) total_blocks)/((double) (num_tsteps*stages_per_ts)),
nb_min, nb_max);
printf(" Max blocks on a processor at any time: %d\n",
global_max_b);
printf(" total blocks split : %lf\n", average[104]);
printf(" total blocks reformed : %lf\n\n", average[105]);
printf(" Time:\n");
printf(" compare objects : %lf\n", average[43]);
printf(" mark refine/coarsen : %lf\n", average[44]);
printf(" split blocks : %lf\n", average[46]);
printf(" total coarsen blocks: %lf\n", average[47]);
printf(" misc time : %lf\n", average[45]);
if (target_active) {
printf(" total target active : %lf\n", average[52]);
printf(" reduce blocks : %lf\n", average[53]);
printf(" decide and comm : %lf\n", average[54]);
printf(" coarsen blocks : %lf\n", average[58]);
printf(" add blocks : %lf\n", average[59]);
printf(" decide and comm : %lf\n", average[60]);
printf(" split blocks : %lf\n", average[61]);
}
printf("---------------------------------------------\n");
printf(" Plot\n");
printf("---------------------------------------------\n\n");
printf(" Time: ave, stddev, min, max (sec): %lf\n\n", average[67]);
printf(" Number of plot steps: %d\n", nps);
printf("\n ================== End report ===================\n");
*/
}
}
}
void calculate_results(void)
{
double results[128];
int i;
results[0] = timer_all;
for (i = 0; i < 9; i++)
results[i+1] = 0.0;
for (i = 0; i < 3; i++) {
results[1] += results[10+9*i] = timer_comm_dir[i];
results[5] += results[14+9*i] = timer_comm_same[i];
results[6] += results[15+9*i] = timer_comm_diff[i];
results[7] += results[16+9*i] = timer_comm_bc[i];
}
results[37] = timer_comm_all;
results[38] = timer_calc_all;
results[39] = timer_cs_all;
results[41] = timer_cs_calc;
results[42] = timer_refine_all;
results[43] = timer_refine_co;
results[44] = timer_refine_mr;
results[45] = timer_refine_cc;
results[46] = timer_refine_sb;
results[47] = timer_cb_all;
results[52] = timer_target_all;
results[53] = timer_target_rb;
results[54] = timer_target_dc;
results[58] = timer_target_cb;
results[59] = timer_target_ab;
results[60] = timer_target_da;
results[61] = timer_target_sb;
results[67] = timer_plot;
for (i = 0; i < 9; i++)
results[68+i] = 0.0;
for (i = 0; i < 3; i++) {
results[74] += results[83+9*i] = (double) counter_bc[i];
results[75] += results[84+9*i] = (double) counter_same[i];
results[76] += results[85+9*i] = (double) counter_diff[i];
}
results[104] = (double) num_refined;
results[105] = (double) num_reformed;
results[110] = (double) counter_malloc;
results[111] = size_malloc;
results[112] = (double) counter_malloc_init;
results[113] = size_malloc_init;
results[114] = (double) (counter_malloc - counter_malloc_init);
results[115] = size_malloc - size_malloc_init;
for (i = 0; i < 128; i++)
average[i] = results[i];
}
void init_profile(void)
{
int i;
timer_all = 0.0;
timer_comm_all = 0.0;
for (i = 0; i < 3; i++) {
timer_comm_dir[i] = 0.0;
timer_comm_same[i] = 0.0;
timer_comm_diff[i] = 0.0;
timer_comm_bc[i] = 0.0;
}
timer_calc_all = 0.0;
timer_cs_all = 0.0;
timer_cs_calc = 0.0;
timer_refine_all = 0.0;
timer_refine_co = 0.0;
timer_refine_mr = 0.0;
timer_refine_cc = 0.0;
timer_refine_sb = 0.0;
timer_cb_all = 0.0;
timer_target_all = 0.0;
timer_target_rb = 0.0;
timer_target_dc = 0.0;
timer_target_cb = 0.0;
timer_target_ab = 0.0;
timer_target_da = 0.0;
timer_target_sb = 0.0;
timer_plot = 0.0;
total_blocks = 0;
nrs = 0;
nps = 0;
num_refined = 0;
num_reformed = 0;
for (i = 0; i < 3; i++) {
counter_bc[i] = 0;
counter_same[i] = 0;
counter_diff[i] = 0;
}
total_red = 0;
}