blob: b237bf6b439e4cad363b227aefb759f56165c6bb [file] [log] [blame]
/*
* Copyright (c) 2011-2012, Los Alamos National Security, LLC.
* All rights Reserved.
*
* Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced
* under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National
* Laboratory (LANL), which is operated by Los Alamos National Security, LLC
* for the U.S. Department of Energy. The U.S. Government has rights to use,
* reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS
* ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
* ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified
* to produce derivative works, such modified software should be clearly marked,
* so as not to confuse it with the version available from LANL.
*
* Additionally, redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the Los Alamos National Security, LLC, Los Alamos
* National Laboratory, LANL, the U.S. Government, nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
* NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL
* SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* CLAMR -- LA-CC-11-094
* This research code is being developed as part of the
* 2011 X Division Summer Workshop for the express purpose
* of a collaborative code for development of ideas in
* the implementation of AMR codes for Exascale platforms
*
* AMR implementation of the Wave code previously developed
* as a demonstration code for regular grids on Exascale platforms
* as part of the Supercomputing Challenge and Los Alamos
* National Laboratory
*
* Authors: Bob Robey XCP-2 brobey@lanl.gov
* Neal Davis davis68@lanl.gov, davis68@illinois.edu
* David Nicholaeff dnic@lanl.gov, mtrxknight@aol.com
* Dennis Trujillo dptrujillo@lanl.gov, dptru10@gmail.com
*
*/
#ifdef HAVE_MPI
#include "mpi.h"
#endif
#include <sys/time.h>
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <list>
#include <algorithm>
#include "partition.h"
#include "KDTree.h"
#include "mesh.h"
#ifdef HAVE_MPI
#include "s7.h"
#endif
#include "zorder.h"
#include "timer.h"
#include "hsfc.h"
#ifndef DEBUG
#define DEBUG 0
#endif
typedef unsigned int uint;
int measure_type;
int meas_count = 0;
double meas_sum_average = 0.0;
extern bool localStencil;
extern int initial_order;
extern int cycle_reorder;
void Mesh::partition_measure(void)
{
if (measure_type != NO_PARTITION_MEASURE){
int ntX = TILE_SIZE;
static double offtile_ratio = 0.0;
uint num_groups = (ncells + TILE_SIZE - 1)/TILE_SIZE;
if (measure_type == WITH_DUPLICATES) {
int i = 0;
#ifdef _OPENMP
#pragma omp for reduction(+:offtile_ratio)
#endif
for (uint group_id=0; group_id < num_groups; group_id ++){
int start_idx = group_id * ntX;
int end_idx = (group_id + 1) * ntX;
int offtile =0;
for (uint ic = 0; ic < TILE_SIZE; ic++, i++){
if (i >= ncells) continue;
//taken from wave_kern_calc.cl 'setup tile' kernel
if (nlft[i] < start_idx || nlft[i] >= end_idx) offtile++;
if (level[nlft[i]] > level[i] &&
(ntop[nlft[i]] < start_idx || ntop[nlft[i]] >= end_idx) ) offtile++;
if (nrht[i] < start_idx || nrht[i] >= end_idx) offtile++;
if (level[nrht[i]] > level[i] &&
(ntop[nrht[i]] < start_idx || ntop[nrht[i]] >= end_idx) ) offtile++;
if (nbot[i] < start_idx || nbot[i] >= end_idx) offtile++;
if (level[nbot[i]] > level[i] &&
(nrht[nbot[i]] < start_idx || nrht[nbot[i]] >= end_idx) ) offtile++;
if (ntop[i] < start_idx || ntop[i] >= end_idx) offtile++;
if (level[ntop[i]] > level[i] &&
(nrht[ntop[i]] < start_idx || nrht[ntop[i]] >= end_idx) ) offtile++;
}
offtile_ratio += (double)offtile/(double)(TILE_SIZE);
//printf("DEBUG Ratio of surface area to volume is equal to %d / %d ratio is %lf\n", offtile, TILE_SIZE, (double)offtile/(double)TILE_SIZE);
}
} else if (measure_type == WITHOUT_DUPLICATES) {
int i = 0;
#ifdef _OPENMP
#pragma omp for reduction(+:offtile_ratio)
#endif
for (uint group_id=0; group_id < num_groups; group_id ++){
list<int> offtile_list;
int start_idx = group_id * ntX;
int end_idx = (group_id + 1) * ntX;
for (uint ic = 0; ic < TILE_SIZE; ic++, i++){
if (i >= ncells) continue;
if (nlft[i] < start_idx || nlft[i] >= end_idx) offtile_list.push_back(nlft[i]);
if (level[nlft[i]] > level[i] &&
(ntop[nlft[i]] < start_idx || ntop[nlft[i]] >= end_idx) ) offtile_list.push_back(ntop[nlft[i]]);
if (nrht[i] < start_idx || nrht[i] >= end_idx) offtile_list.push_back(nrht[i]);
if (level[nrht[i]] > level[i] &&
(ntop[nrht[i]] < start_idx || ntop[nrht[i]] >= end_idx) ) offtile_list.push_back(ntop[nrht[i]]);
if (nbot[i] < start_idx || nbot[i] >= end_idx) offtile_list.push_back(nbot[i]);
if (level[nbot[i]] > level[i] &&
(nrht[nbot[i]] < start_idx || nrht[nbot[i]] >= end_idx) ) offtile_list.push_back(nrht[nbot[i]]);
if (ntop[i] < start_idx || ntop[i] >= end_idx) offtile_list.push_back(ntop[i]);
if (level[ntop[i]] > level[i] &&
(nrht[ntop[i]] < start_idx || nrht[ntop[i]] >= end_idx) ) offtile_list.push_back(nrht[ntop[i]]);
}
offtile_list.sort();
offtile_list.unique();
offtile_ratio += (double)offtile_list.size()/(double)(TILE_SIZE);
//printf("DEBUG Ratio of surface area to volume is equal to %d / %d ratio is %lf\n", offtile, TILE_SIZE, (double)offtile/(double)TILE_SIZE);
}
} else if (measure_type == CVALUE) {
int i = 0;
#ifdef _OPENMP
#pragma omp for reduction(+:offtile_ratio)
#endif
for (uint group_id=0; group_id < num_groups; group_id ++){
list<int> offtile_list;
int start_idx = group_id * ntX;
int end_idx = (group_id + 1) * ntX;
for (uint ic = 0; ic < TILE_SIZE; ic++, i++){
if (i >= ncells) continue;
if (nlft[i] < start_idx || nlft[i] >= end_idx) offtile_list.push_back(nlft[i]);
if (level[nlft[i]] > level[i] &&
(ntop[nlft[i]] < start_idx || ntop[nlft[i]] >= end_idx) ) offtile_list.push_back(ntop[nlft[i]]);
if (nrht[i] < start_idx || nrht[i] >= end_idx) offtile_list.push_back(nrht[i]);
if (level[nrht[i]] > level[i] &&
(ntop[nrht[i]] < start_idx || ntop[nrht[i]] >= end_idx) ) offtile_list.push_back(ntop[nrht[i]]);
if (nbot[i] < start_idx || nbot[i] >= end_idx) offtile_list.push_back(nbot[i]);
if (level[nbot[i]] > level[i] &&
(nrht[nbot[i]] < start_idx || nrht[nbot[i]] >= end_idx) ) offtile_list.push_back(nrht[nbot[i]]);
if (ntop[i] < start_idx || ntop[i] >= end_idx) offtile_list.push_back(ntop[i]);
if (level[ntop[i]] > level[i] &&
(nrht[ntop[i]] < start_idx || nrht[ntop[i]] >= end_idx) ) offtile_list.push_back(nrht[ntop[i]]);
}
offtile_list.sort();
offtile_list.unique();
offtile_ratio += (double)offtile_list.size()/(4*sqrt((double)(TILE_SIZE)));
//printf("DEBUG Ratio of surface area to volume is equal to %d / %d ratio is %lf\n", offtile, TILE_SIZE, (double)offtile/(double)TILE_SIZE);
}
} else if (measure_type == CSTARVALUE) {
int i = 0;
#ifdef _OPENMP
#pragma omp for reduction(+:offtile_ratio)
#endif
for (uint group_id=0; group_id < num_groups; group_id ++){
list<int> offtile_list;
list<int> offtile_cache_lines; // Assumes memory is aligned
int cache_line_size = 4; // Some could be 8, or more?
int start_idx = group_id * ntX;
int end_idx = (group_id + 1) * ntX;
for (uint ic = 0; ic < TILE_SIZE; ic++, i++){
if (i >= ncells) continue;
if (nlft[i] < start_idx || nlft[i] >= end_idx) {
offtile_list.push_back(nlft[i]);
offtile_cache_lines.push_back(nlft[i]/cache_line_size);
}
if (level[nlft[i]] > level[i] && (ntop[nlft[i]] < start_idx || ntop[nlft[i]] >= end_idx) ) {
offtile_list.push_back(ntop[nlft[i]]);
offtile_cache_lines.push_back(ntop[nlft[i]]/cache_line_size);
}
if (nrht[i] < start_idx || nrht[i] >= end_idx) {
offtile_list.push_back(nrht[i]);
offtile_cache_lines.push_back(nrht[i]/cache_line_size);
}
if (level[nrht[i]] > level[i] && (ntop[nrht[i]] < start_idx || ntop[nrht[i]] >= end_idx) ) {
offtile_list.push_back(ntop[nrht[i]]);
offtile_cache_lines.push_back(ntop[nrht[i]]/cache_line_size);
}
if (nbot[i] < start_idx || nbot[i] >= end_idx) {
offtile_list.push_back(nbot[i]);
offtile_cache_lines.push_back(nbot[i]/cache_line_size);
}
if (level[nbot[i]] > level[i] && (nrht[nbot[i]] < start_idx || nrht[nbot[i]] >= end_idx) ) {
offtile_list.push_back(nrht[nbot[i]]);
offtile_cache_lines.push_back(nrht[nbot[i]]/cache_line_size);
}
if (ntop[i] < start_idx || ntop[i] >= end_idx) {
offtile_list.push_back(ntop[i]);
offtile_cache_lines.push_back(ntop[i]/cache_line_size);
}
if (level[ntop[i]] > level[i] && (nrht[ntop[i]] < start_idx || nrht[ntop[i]] >= end_idx) ) {
offtile_list.push_back(nrht[ntop[i]]);
offtile_cache_lines.push_back(nrht[ntop[i]]/cache_line_size);
}
}
offtile_list.sort();
offtile_list.unique();
offtile_cache_lines.sort();
offtile_cache_lines.unique();
double s_ngeom = (double)(offtile_list.size());
double q_ngeom = (double)(offtile_cache_lines.size());
double ngeom = (double)(TILE_SIZE);
double cover = (double)(cache_line_size);
// offtile_ratio += (s_ngeom * q_ngeom) / (4*sqrt(ngeom)*2*(1+(ngeom+cache_line_size-1)/cache_line_size));
// offtile_ratio += (q_ngeom) / (2*sqrt(ngeom)+2*((sqrt(ngeom)+cover-1)/cover));
// offtile_ratio += (q_ngeom) / ( (8*sqrt(ngeom)+cover-1)/cover );
ngeom = sqrt(ngeom);
offtile_ratio += (s_ngeom*q_ngeom*cover) / ( 4 * ngeom * (8*ngeom+cover-1) );
//printf("DEBUG Ratio of surface area to volume is equal to %d / %d ratio is %lf\n", offtile, TILE_SIZE, (double)offtile/(double)TILE_SIZE);
}
}
// printf("DEBUG Ratio of surface area to volume is equal to %d / %d \n", offtile, ontile);
#ifdef _OPENMP
#pragma omp master
{
#endif
meas_count ++;
meas_sum_average += offtile_ratio/(double)num_groups;
// printf("DEBUG %d icount %d sum_average %lf\n",__LINE__,icount, sum_average);
#ifdef _OPENMP
}
#endif
} // if PARTITION TYPE
}
void Mesh::print_partition_measure()
{
if (meas_count != 0) {
if (measure_type == NO_PARTITION_MEASURE) {
if (mype == 0) printf("No Partition Measure\n");
} else if (measure_type == WITH_DUPLICATES) {
parallel_output("Average surface area to volume ratio ", meas_sum_average/(double)meas_count, 0, "with duplicates");
} else if (measure_type == WITHOUT_DUPLICATES) {
parallel_output("Average surface area to volume ratio ", meas_sum_average/(double)meas_count, 0, "without duplicates");
} else if (measure_type == CVALUE) {
parallel_output("Partition Quality Avg C value ", meas_sum_average/(double)meas_count, 0, "");
} else if (measure_type == CSTARVALUE){
parallel_output("Partition Quality Avg C* value ", meas_sum_average/(double)meas_count, 0, "");
}
}
if (numpe > 1){
parallel_output("The MPI surface area to volume ratio ", offtile_ratio_local, 0, "without duplicates");
}
}
void Mesh::print_partition_type()
{
if (mype == 0) {
if (initial_order == ORIGINAL_ORDER) {
printf("Initial order is naive.");
} else if (initial_order == HILBERT_SORT) {
printf("Initial order is Hilbert sort.");
} else if (initial_order == HILBERT_PARTITION) {
printf("Initial order is Hilbert partitionr.");
} else if (initial_order == ZORDER) {
printf("Initial order is Z order.");
}
if (cycle_reorder == ORIGINAL_ORDER) {
printf(" No cycle reorder.");
} else if (cycle_reorder == HILBERT_SORT) {
printf(" Cycle reorder is Hilbert sort.");
} else if (cycle_reorder == HILBERT_PARTITION) {
printf(" Cycle reorder is Hilbert partition.");
} else if (cycle_reorder == ZORDER) {
printf(" Cycle reorder is Z order.");
}
if (localStencil) {
printf(" Local Stencil is on.\n");
} else {
printf("\n");
}
}
}
void Mesh::partition_cells(
int numpe, //
vector<int> &z_order, // Resulting index ordering.
enum partition_method method) // Assigned partitioning method.
{
int *info; //
double iscale, //
jscale; //
int imax, // Maximum x-index.
jmax; // Maximum y-index.
vector<int> z_index; // Ordered curve from hsfc.
vector<int> i_scaled; // x-indices normalized to a scale of [0, 1] for hsfc.
vector<int> j_scaled; // y-indices normalized to a scale of [0, 1] for hsfc.
vector<double> iunit; //
vector<double> junit; //
struct timeval tstart_cpu;
cpu_timer_start(&tstart_cpu);
// Initialize ordered curve index.
z_index.resize(ncells, 0);
//z_order.resize(ncells, 0);
if (parallel) {
#ifdef HAVE_MPI
nsizes.resize(numpe);
ndispl.resize(numpe);
MPI_Allgather(&ncells, 1, MPI_INT, &nsizes[0], 1, MPI_INT, MPI_COMM_WORLD);
ndispl[0]=0;
for (int ip=1; ip<numpe; ip++){
ndispl[ip] = ndispl[ip-1] + nsizes[ip-1];
}
noffset=0;
for (int ip=0; ip<mype; ip++){
noffset += nsizes[ip];
}
#endif
} else {
// Adjust the number of required work items to the number of cells.
proc.resize(ncells);
// Decompose the domain equitably.
calc_distribution(numpe);
noffset = 0;
}
// Partition cells according to one of several possible orderings.
int have_spatial_variables=0;
switch (method)
{ case ORIGINAL_ORDER:
// Set z_order to the current cell order.
for (uint ic = 0; ic < ncells; ++ic)
{ z_order[ic] = ic; }
cpu_timers[MESH_TIMER_PARTITION] += cpu_timer_stop(tstart_cpu);
return;
break;
case HILBERT_SORT:
// Resort the curve by Hilbert order.
have_spatial_variables = 1;
if (x.size() < ncells) {
calc_spatial_coordinates(0);
have_spatial_variables = 0;
}
calc_centerminmax();
iunit.resize(ncells);
junit.resize(ncells);
// Get the range of values in the x- and y-directions and make the scale square.
iscale = 1.0 / (xcentermax - xcentermin);
jscale = 1.0 / (ycentermax - ycentermin);
// Scale the indices to a normalized [0, 1] range for hsfc.
for (uint ic = 0; ic < ncells; ++ic){
iunit[ic] = (x[ic] + 0.5 * dx[ic] - xcentermin) * iscale;
junit[ic] = (y[ic] + 0.5 * dy[ic] - ycentermin) * jscale;
}
if (have_spatial_variables == 0){
x.clear();
dx.clear();
y.clear();
dy.clear();
}
if (parallel){
#ifdef HAVE_MPI
info = (int *)malloc(sizeof(int) * 3 * ncells_global);
vector<double>iunit_global(ncells_global);
vector<double>junit_global(ncells_global);
vector<int>z_order_global(ncells_global);
MPI_Allgatherv(&iunit[0], ncells, MPI_DOUBLE, &iunit_global[0], &nsizes[0], &ndispl[0], MPI_DOUBLE, MPI_COMM_WORLD);
MPI_Allgatherv(&junit[0], ncells, MPI_DOUBLE, &junit_global[0], &nsizes[0], &ndispl[0], MPI_DOUBLE, MPI_COMM_WORLD);
// Sort the mesh into an ordered space-filling curve from hsfc.
hsfc2sort(ncells_global, &iunit_global[0], &junit_global[0], 0, info, 1);
// Copy the cell order information from info into z_order.
for (uint ic = 0; ic < ncells_global; ++ic)
{ z_order_global[ic] = info[ic]; }
free(info);
// Order the mesh according to the calculated order (note that z_order is for both curves).
vector<int> int_global(ncells_global);
vector<int> int_global_new(ncells_global);
// gather, reorder and scatter i
MPI_Allgatherv(&i[0], ncells, MPI_INT, &int_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
for (int ic = 0; ic<(int)ncells_global; ic++){
int_global_new[ic] = int_global[z_order_global[ic]];
}
MPI_Scatterv(&int_global_new[0], &nsizes[0], &ndispl[0], MPI_INT, &i[0], ncells, MPI_INT, 0, MPI_COMM_WORLD);
// gather, reorder and scatter j
MPI_Allgatherv(&j[0], ncells, MPI_INT, &int_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
for (int ic = 0; ic<(int)ncells_global; ic++){
int_global_new[ic] = int_global[z_order_global[ic]];
}
MPI_Scatterv(&int_global_new[0], &nsizes[0], &ndispl[0], MPI_INT, &j[0], ncells, MPI_INT, 0, MPI_COMM_WORLD);
// gather, reorder and scatter level
MPI_Allgatherv(&level[0], ncells, MPI_INT, &int_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
for (int ic = 0; ic<(int)ncells_global; ic++){
int_global_new[ic] = int_global[z_order_global[ic]];
}
MPI_Scatterv(&int_global_new[0], &nsizes[0], &ndispl[0], MPI_INT, &level[0], ncells, MPI_INT, 0, MPI_COMM_WORLD);
// It is faster just to recalculate these variables instead of communicating them
if (mesh_memory.get_memory_size(celltype) >= ncells) {
calc_celltype(mesh_memory.get_memory_size(celltype));
}
if (have_spatial_variables) {
calc_spatial_coordinates(0);
}
if (mesh_memory.get_memory_size(nlft) >= ncells) {
vector<int> inv_z_order(ncells_global);
for (int ic = 0; ic<(int)ncells_global; ic++){
inv_z_order[z_order_global[ic]] = ic;
}
MPI_Allgatherv(&nlft[0], ncells, MPI_INT, &int_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
for (int ic = 0; ic<(int)ncells_global; ic++){
int_global_new[ic] = inv_z_order[int_global[z_order_global[ic]]];
}
MPI_Scatterv(&int_global_new[0], &nsizes[0], &ndispl[0], MPI_INT, &nlft[0], ncells, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Allgatherv(&nrht[0], ncells, MPI_INT, &int_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
for (int ic = 0; ic<(int)ncells_global; ic++){
int_global_new[ic] = inv_z_order[int_global[z_order_global[ic]]];
}
MPI_Scatterv(&int_global_new[0], &nsizes[0], &ndispl[0], MPI_INT, &nrht[0], ncells, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Allgatherv(&nbot[0], ncells, MPI_INT, &int_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
for (int ic = 0; ic<(int)ncells_global; ic++){
int_global_new[ic] = inv_z_order[int_global[z_order_global[ic]]];
}
MPI_Scatterv(&int_global_new[0], &nsizes[0], &ndispl[0], MPI_INT, &nbot[0], ncells, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Allgatherv(&ntop[0], ncells, MPI_INT, &int_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
for (int ic = 0; ic<(int)ncells_global; ic++){
int_global_new[ic] = inv_z_order[int_global[z_order_global[ic]]];
}
MPI_Scatterv(&int_global_new[0], &nsizes[0], &ndispl[0], MPI_INT, &ntop[0], ncells, MPI_INT, 0, MPI_COMM_WORLD);
}
MPI_Scatterv(&z_order_global[0], &nsizes[0], &ndispl[0], MPI_INT, &z_order[0], ncells, MPI_INT, 0, MPI_COMM_WORLD);
#endif
} else {
info = (int *)malloc(sizeof(int) * 3 * ncells);
// Sort the mesh into an ordered space-filling curve from hsfc.
hsfc2sort(ncells, &iunit[0], &junit[0], 0, info, 1);
// Copy the cell order information from info into z_order.
for (uint ic = 0; ic < ncells; ++ic)
{ z_order[ic] = info[ic]; }
free(info);
// Order the mesh according to the calculated order (note that z_order is for both curves).
vector<int> int_local(ncells);
mesh_memory.set_memory_attribute(nlft, 0x100);
mesh_memory.set_memory_attribute(nrht, 0x100);
mesh_memory.set_memory_attribute(nbot, 0x100);
mesh_memory.set_memory_attribute(ntop, 0x100);
mesh_memory.memory_reorder_all(&z_order[0]);
memory_reset_ptrs();
if (x.size() >= ncells) {
vector<spatial_t> real_local(ncells);
for (int ic = 0; ic<(int)ncells; ic++){
real_local[ic] = x[ic];
}
for (int ic = 0; ic<(int)ncells; ic++){
x[ic] = real_local[z_order[ic]];
}
for (int ic = 0; ic<(int)ncells; ic++){
real_local[ic] = dx[ic];
}
for (int ic = 0; ic<(int)ncells; ic++){
dx[ic] = real_local[z_order[ic]];
}
for (int ic = 0; ic<(int)ncells; ic++){
real_local[ic] = y[ic];
}
for (int ic = 0; ic<(int)ncells; ic++){
y[ic] = real_local[z_order[ic]];
}
for (int ic = 0; ic<(int)ncells; ic++){
real_local[ic] = dy[ic];
}
for (int ic = 0; ic<(int)ncells; ic++){
dy[ic] = real_local[z_order[ic]];
}
}
}
break;
case ZORDER:
// Resort the curve by z-order.
if (parallel) {
#ifdef HAVE_MPI
vector<int>i_global(ncells_global);
vector<int>j_global(ncells_global);
vector<int>level_global(ncells_global);
vector<int>z_index_global(ncells_global);
vector<int>z_order_global(ncells_global);
MPI_Allgatherv(&i[0], ncells, MPI_REAL, &i_global[0], &nsizes[0], &ndispl[0], MPI_REAL, MPI_COMM_WORLD);
MPI_Allgatherv(&j[0], ncells, MPI_REAL, &j_global[0], &nsizes[0], &ndispl[0], MPI_REAL, MPI_COMM_WORLD);
MPI_Allgatherv(&level[0], ncells, MPI_REAL, &level_global[0], &nsizes[0], &ndispl[0], MPI_REAL, MPI_COMM_WORLD);
i_scaled.resize(ncells_global);
j_scaled.resize(ncells_global);
//
imax = 0;
jmax = 0;
for (uint ic = 0; ic < ncells_global; ++ic)
{ if (i_global[ic] > imax) imax = i_global[ic];
if (j_global[ic] > jmax) jmax = j_global[ic]; }
//
iscale = 16.0 / (double)imax;
jscale = 16.0 / (double)jmax;
//
for (uint ic = 0; ic < ncells_global; ++ic)
{ i_scaled[ic]=(int) ( (double)i_global[ic]*iscale);
j_scaled[ic]=(int) ( (double)j_global[ic]*jscale); }
//
calc_zorder(ncells_global, &i_scaled[0], &j_scaled[0], &level_global[0], levmx, ibase, &z_index_global[0], &z_order_global[0]);
// Order the mesh according to the calculated order (note that z_order is for both curves).
vector<int> int_global(ncells_global);
vector<int> int_global_new(ncells_global);
// gather, reorder and scatter i
MPI_Allgatherv(&i[0], ncells, MPI_INT, &int_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
for (int ic = 0; ic<(int)ncells_global; ic++){
int_global_new[ic] = int_global[z_order_global[ic]];
}
MPI_Scatterv(&int_global_new[0], &nsizes[0], &ndispl[0], MPI_INT, &i[0], ncells, MPI_INT, 0, MPI_COMM_WORLD);
// gather, reorder and scatter j
MPI_Allgatherv(&j[0], ncells, MPI_INT, &int_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
for (int ic = 0; ic<(int)ncells_global; ic++){
int_global_new[ic] = int_global[z_order_global[ic]];
}
MPI_Scatterv(&int_global_new[0], &nsizes[0], &ndispl[0], MPI_INT, &j[0], ncells, MPI_INT, 0, MPI_COMM_WORLD);
// gather, reorder and scatter level
MPI_Allgatherv(&level[0], ncells, MPI_INT, &int_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
for (int ic = 0; ic<(int)ncells_global; ic++){
int_global_new[ic] = int_global[z_order_global[ic]];
}
MPI_Scatterv(&int_global_new[0], &nsizes[0], &ndispl[0], MPI_INT, &level[0], ncells, MPI_INT, 0, MPI_COMM_WORLD);
// It is faster just to recalculate these variables instead of communicating them
if (mesh_memory.get_memory_size(celltype) >= ncells) {
calc_celltype(mesh_memory.get_memory_size(celltype));
}
if (x.size() >= ncells) {
calc_spatial_coordinates(0);
}
if (mesh_memory.get_memory_size(nlft) >= ncells) {
vector<int> inv_z_order(ncells_global);
for (int ic = 0; ic<(int)ncells_global; ic++){
inv_z_order[z_order_global[ic]] = ic;
}
MPI_Allgatherv(&nlft[0], ncells, MPI_INT, &int_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
for (int ic = 0; ic<(int)ncells_global; ic++){
int_global_new[ic] = inv_z_order[int_global[z_order_global[ic]]];
}
MPI_Scatterv(&int_global_new[0], &nsizes[0], &ndispl[0], MPI_INT, &nlft[0], ncells, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Allgatherv(&nrht[0], ncells, MPI_INT, &int_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
for (int ic = 0; ic<(int)ncells_global; ic++){
int_global_new[ic] = inv_z_order[int_global[z_order_global[ic]]];
}
MPI_Scatterv(&int_global_new[0], &nsizes[0], &ndispl[0], MPI_INT, &nrht[0], ncells, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Allgatherv(&nbot[0], ncells, MPI_INT, &int_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
for (int ic = 0; ic<(int)ncells_global; ic++){
int_global_new[ic] = inv_z_order[int_global[z_order_global[ic]]];
}
MPI_Scatterv(&int_global_new[0], &nsizes[0], &ndispl[0], MPI_INT, &nbot[0], ncells, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Allgatherv(&ntop[0], ncells, MPI_INT, &int_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
for (int ic = 0; ic<(int)ncells_global; ic++){
int_global_new[ic] = inv_z_order[int_global[z_order_global[ic]]];
}
MPI_Scatterv(&int_global_new[0], &nsizes[0], &ndispl[0], MPI_INT, &ntop[0], ncells, MPI_INT, 0, MPI_COMM_WORLD);
}
MPI_Scatterv(&z_order_global[0], &nsizes[0], &ndispl[0], MPI_REAL, &z_order[0], ncells, MPI_REAL, 0, MPI_COMM_WORLD);
#endif
} else {
i_scaled.resize(ncells);
j_scaled.resize(ncells);
//
imax = 0;
jmax = 0;
for (uint ic = 0; ic < ncells; ++ic)
{ if (i[ic] > imax) imax = i[ic];
if (j[ic] > jmax) jmax = j[ic]; }
//
iscale = 16.0 / (double)imax;
jscale = 16.0 / (double)jmax;
//
for (uint ic = 0; ic < ncells; ++ic)
{ i_scaled[ic]=(int) ( (double)i[ic]*iscale);
j_scaled[ic]=(int) ( (double)j[ic]*jscale); }
//
calc_zorder(ncells, &i_scaled[0], &j_scaled[0], &level[0], levmx, ibase, &z_index[0], &z_order[0]);
// Order the mesh according to the calculated order (note that z_order is for both curves).
vector<int> int_local(ncells);
mesh_memory.set_memory_attribute(nlft, 0x100);
mesh_memory.set_memory_attribute(nrht, 0x100);
mesh_memory.set_memory_attribute(nbot, 0x100);
mesh_memory.set_memory_attribute(ntop, 0x100);
mesh_memory.memory_reorder_all(&z_order[0]);
memory_reset_ptrs();
if (x.size() >= ncells) {
vector<spatial_t> real_local(ncells);
for (int ic = 0; ic<(int)ncells; ic++){
real_local[ic] = x[ic];
}
for (int ic = 0; ic<(int)ncells; ic++){
x[ic] = real_local[z_order[ic]];
}
for (int ic = 0; ic<(int)ncells; ic++){
real_local[ic] = dx[ic];
}
for (int ic = 0; ic<(int)ncells; ic++){
dx[ic] = real_local[z_order[ic]];
}
for (int ic = 0; ic<(int)ncells; ic++){
real_local[ic] = y[ic];
}
for (int ic = 0; ic<(int)ncells; ic++){
y[ic] = real_local[z_order[ic]];
}
for (int ic = 0; ic<(int)ncells; ic++){
real_local[ic] = dy[ic];
}
for (int ic = 0; ic<(int)ncells; ic++){
dy[ic] = real_local[z_order[ic]];
}
}
}
break;
default:
// Note that HILBERT_PARTITION is not currently supported due to redundancy with HILBERT_SORT.
break;
}
// Output ordered mesh information.
if (DEBUG)
{ printf("orig index i j lev nlft nrht nbot ntop xlow xhigh ylow yhigh z index z order\n");
for (uint ic=0; ic<ncells; ic++){
printf(" %6d %4d %4d %4d %4d %4d %4d %4d ", index[ic], j[ic], i[ic], level[ic], nlft[ic], nrht[ic], nbot[ic], ntop[ic]);
printf(" %8.2lf %8.2lf %8.2lf %8.2lf", x[ic], x[ic]+dx[ic], y[ic], y[ic]+dy[ic]);
printf(" %6d %5d\n", z_index[ic], z_order[ic]); } }
cpu_timers[MESH_TIMER_PARTITION] += cpu_timer_stop(tstart_cpu);
}
// The distribution needs to be modified in order to spread out extra cells equitably among the work items.
void Mesh::calc_distribution(int numpe)
{
uint lsize = 0; //
for (int ip = 0; ip < numpe; ++ip) {
lsize += proc.size()/numpe;
if (ip < (int)proc.size()%numpe) lsize++;
for (int ic = 0; ic < (int)lsize; ic++) {
proc[ic] = ip;
}
}
}