| #include <stdlib.h> |
| #include <stdio.h> |
| #include <math.h> |
| |
| #include "utilities.h" |
| #include "HYPRE_struct_ls.h" |
| #include "krylov.h" |
| |
| /*-------------------------------------------------------------------------- |
| * Test driver for structured matrix interface (structured storage) |
| *--------------------------------------------------------------------------*/ |
| |
| /*---------------------------------------------------------------------- |
| * Standard 7-point laplacian in 3D with grid and anisotropy determined |
| * as command line arguments. Do `driver -help' for usage info. |
| *----------------------------------------------------------------------*/ |
| |
| int |
| main( int argc, |
| char *argv[] ) |
| { |
| int arg_index; |
| int print_usage; |
| int nx, ny, nz; |
| int P, Q, R; |
| int bx, by, bz; |
| double cx, cy, cz; |
| int solver_id; |
| |
| int A_num_ghost[6] = {0, 0, 0, 0, 0, 0}; |
| |
| HYPRE_StructMatrix A; |
| HYPRE_StructVector b; |
| HYPRE_StructVector x; |
| |
| HYPRE_StructSolver solver; |
| HYPRE_StructSolver precond; |
| int num_iterations; |
| int time_index; |
| double final_res_norm; |
| |
| int num_procs, myid; |
| |
| int p, q, r; |
| int dim; |
| int n_pre, n_post; |
| int nblocks, volume; |
| |
| int **iupper; |
| int **ilower; |
| |
| int istart[3]; |
| |
| int **offsets; |
| |
| HYPRE_StructGrid grid; |
| HYPRE_StructStencil stencil; |
| |
| int *stencil_indices; |
| double *values; |
| |
| int i, s, d; |
| int ix, iy, iz, ib; |
| |
| /*----------------------------------------------------------- |
| * Initialize some stuff |
| *-----------------------------------------------------------*/ |
| |
| /* Initialize MPI */ |
| MPI_Init(&argc, &argv); |
| |
| MPI_Comm_size(MPI_COMM_WORLD, &num_procs ); |
| MPI_Comm_rank(MPI_COMM_WORLD, &myid ); |
| |
| /*----------------------------------------------------------- |
| * Set defaults |
| *-----------------------------------------------------------*/ |
| |
| dim = 3; |
| |
| nx = 10; |
| ny = 10; |
| nz = 10; |
| |
| P = num_procs; |
| Q = 1; |
| R = 1; |
| |
| bx = 1; |
| by = 1; |
| bz = 1; |
| |
| cx = 1.0; |
| cy = 1.0; |
| cz = 1.0; |
| |
| n_pre = 1; |
| n_post = 1; |
| |
| solver_id = 0; |
| |
| istart[0] = -17; |
| istart[1] = 0 ; |
| istart[2] = 32; |
| |
| /*----------------------------------------------------------- |
| * Parse command line |
| *-----------------------------------------------------------*/ |
| |
| print_usage = 0; |
| arg_index = 1; |
| while (arg_index < argc) |
| { |
| if ( strcmp(argv[arg_index], "-n") == 0 ) |
| { |
| arg_index++; |
| nx = atoi(argv[arg_index++]); |
| ny = atoi(argv[arg_index++]); |
| nz = atoi(argv[arg_index++]); |
| } |
| else if ( strcmp(argv[arg_index], "-P") == 0 ) |
| { |
| arg_index++; |
| P = atoi(argv[arg_index++]); |
| Q = atoi(argv[arg_index++]); |
| R = atoi(argv[arg_index++]); |
| } |
| else if ( strcmp(argv[arg_index], "-b") == 0 ) |
| { |
| arg_index++; |
| bx = atoi(argv[arg_index++]); |
| by = atoi(argv[arg_index++]); |
| bz = atoi(argv[arg_index++]); |
| } |
| else if ( strcmp(argv[arg_index], "-c") == 0 ) |
| { |
| arg_index++; |
| cx = atof(argv[arg_index++]); |
| cy = atof(argv[arg_index++]); |
| cz = atof(argv[arg_index++]); |
| } |
| else if ( strcmp(argv[arg_index], "-v") == 0 ) |
| { |
| arg_index++; |
| n_pre = atoi(argv[arg_index++]); |
| n_post = atoi(argv[arg_index++]); |
| } |
| else if ( strcmp(argv[arg_index], "-d") == 0 ) |
| { |
| arg_index++; |
| dim = atoi(argv[arg_index++]); |
| } |
| else if ( strcmp(argv[arg_index], "-solver") == 0 ) |
| { |
| arg_index++; |
| solver_id = atoi(argv[arg_index++]); |
| } |
| else if ( strcmp(argv[arg_index], "-help") == 0 ) |
| { |
| print_usage = 1; |
| break; |
| } |
| else |
| { |
| break; |
| } |
| } |
| |
| /*----------------------------------------------------------- |
| * Print usage info |
| *-----------------------------------------------------------*/ |
| |
| if ( (print_usage) && (myid == 0) ) |
| { |
| printf("\n"); |
| printf("Usage: %s [<options>]\n", argv[0]); |
| printf("\n"); |
| printf(" -n <nx> <ny> <nz> : problem size per block\n"); |
| printf(" -P <Px> <Py> <Pz> : processor topology\n"); |
| printf(" -b <bx> <by> <bz> : blocking per processor\n"); |
| printf(" -c <cx> <cy> <cz> : diffusion coefficients\n"); |
| printf(" -v <n_pre> <n_post> : number of pre and post relaxations\n"); |
| printf(" -d <dim> : problem dimension (2 or 3)\n"); |
| printf(" -solver <ID> : solver ID (default = 0)\n"); |
| printf(" 0 - SMG\n"); |
| printf(" 1 - CG with SMG precond\n"); |
| printf(" 2 - CG with diagonal scaling\n"); |
| printf(" 3 - CG\n"); |
| printf("\n"); |
| } |
| |
| if ( print_usage ) |
| { |
| exit(1); |
| } |
| |
| /*----------------------------------------------------------- |
| * Check a few things |
| *-----------------------------------------------------------*/ |
| |
| if ((P*Q*R) != num_procs) |
| { |
| printf("Error: Invalid number of processors or processor topology \n"); |
| exit(1); |
| } |
| |
| /*----------------------------------------------------------- |
| * Print driver parameters |
| *-----------------------------------------------------------*/ |
| |
| if (myid == 0) |
| { |
| printf("Running with these driver parameters:\n"); |
| printf(" (nx, ny, nz) = (%d, %d, %d)\n", nx, ny, nz); |
| printf(" (Px, Py, Pz) = (%d, %d, %d)\n", P, Q, R); |
| printf(" (bx, by, bz) = (%d, %d, %d)\n", bx, by, bz); |
| printf(" (cx, cy, cz) = (%f, %f, %f)\n", cx, cy, cz); |
| printf(" (n_pre, n_post) = (%d, %d)\n", n_pre, n_post); |
| printf(" dim = %d\n", dim); |
| printf(" solver ID = %d\n", solver_id); |
| } |
| |
| /*----------------------------------------------------------- |
| * Synchronize so that timings make sense |
| *-----------------------------------------------------------*/ |
| |
| MPI_Barrier(MPI_COMM_WORLD); |
| |
| time_index = hypre_InitializeTiming("Struct Interface"); |
| hypre_BeginTiming(time_index); |
| |
| /*----------------------------------------------------------- |
| * Set up the grid structure |
| *-----------------------------------------------------------*/ |
| |
| switch (dim) |
| { |
| case 1: |
| volume = nx; |
| nblocks = bx; |
| stencil_indices = hypre_CTAlloc(int, 2); |
| offsets = hypre_CTAlloc(int*, 2); |
| offsets[0] = hypre_CTAlloc(int, 1); |
| offsets[0][0] = -1; |
| offsets[1] = hypre_CTAlloc(int, 1); |
| offsets[1][0] = 0; |
| /* compute p from P and myid */ |
| p = myid % P; |
| break; |
| case 2: |
| volume = nx*ny; |
| nblocks = bx*by; |
| stencil_indices = hypre_CTAlloc(int, 3); |
| offsets = hypre_CTAlloc(int*, 3); |
| offsets[0] = hypre_CTAlloc(int, 2); |
| offsets[0][0] = -1; |
| offsets[0][1] = 0; |
| offsets[1] = hypre_CTAlloc(int, 2); |
| offsets[1][0] = 0; |
| offsets[1][1] = -1; |
| offsets[2] = hypre_CTAlloc(int, 2); |
| offsets[2][0] = 0; |
| offsets[2][1] = 0; |
| /* compute p,q from P,Q and myid */ |
| p = myid % P; |
| q = (( myid - p)/P) % Q; |
| break; |
| case 3: |
| volume = nx*ny*nz; |
| nblocks = bx*by*bz; |
| stencil_indices = hypre_CTAlloc(int, 4); |
| offsets = hypre_CTAlloc(int*, 4); |
| offsets[0] = hypre_CTAlloc(int, 3); |
| offsets[0][0] = -1; |
| offsets[0][1] = 0; |
| offsets[0][2] = 0; |
| offsets[1] = hypre_CTAlloc(int, 3); |
| offsets[1][0] = 0; |
| offsets[1][1] = -1; |
| offsets[1][2] = 0; |
| offsets[2] = hypre_CTAlloc(int, 3); |
| offsets[2][0] = 0; |
| offsets[2][1] = 0; |
| offsets[2][2] = -1; |
| offsets[3] = hypre_CTAlloc(int, 3); |
| offsets[3][0] = 0; |
| offsets[3][1] = 0; |
| offsets[3][2] = 0; |
| /* compute p,q,r from P,Q,R and myid */ |
| p = myid % P; |
| q = (( myid - p)/P) % Q; |
| r = ( myid - p - P*q)/( P*Q ); |
| break; |
| } |
| |
| ilower = hypre_CTAlloc(int*, nblocks); |
| iupper = hypre_CTAlloc(int*, nblocks); |
| for (i = 0; i < nblocks; i++) |
| { |
| ilower[i] = hypre_CTAlloc(int, dim); |
| iupper[i] = hypre_CTAlloc(int, dim); |
| } |
| |
| for (i = 0; i < dim; i++) |
| { |
| A_num_ghost[2*i] = 1; |
| A_num_ghost[2*i + 1] = 1; |
| } |
| |
| /* compute ilower and iupper from (p,q,r), (bx,by,bz), and (nx,ny,nz) */ |
| ib = 0; |
| switch (dim) |
| { |
| case 1: |
| for (ix = 0; ix < bx; ix++) |
| { |
| ilower[ib][0] = istart[0]+ nx*(bx*p+ix); |
| iupper[ib][0] = istart[0]+ nx*(bx*p+ix+1) - 1; |
| ib++; |
| } |
| break; |
| case 2: |
| for (iy = 0; iy < by; iy++) |
| for (ix = 0; ix < bx; ix++) |
| { |
| ilower[ib][0] = istart[0]+ nx*(bx*p+ix); |
| iupper[ib][0] = istart[0]+ nx*(bx*p+ix+1) - 1; |
| ilower[ib][1] = istart[1]+ ny*(by*q+iy); |
| iupper[ib][1] = istart[1]+ ny*(by*q+iy+1) - 1; |
| ib++; |
| } |
| break; |
| case 3: |
| for (iz = 0; iz < bz; iz++) |
| for (iy = 0; iy < by; iy++) |
| for (ix = 0; ix < bx; ix++) |
| { |
| ilower[ib][0] = istart[0]+ nx*(bx*p+ix); |
| iupper[ib][0] = istart[0]+ nx*(bx*p+ix+1) - 1; |
| ilower[ib][1] = istart[1]+ ny*(by*q+iy); |
| iupper[ib][1] = istart[1]+ ny*(by*q+iy+1) - 1; |
| ilower[ib][2] = istart[2]+ nz*(bz*r+iz); |
| iupper[ib][2] = istart[2]+ nz*(bz*r+iz+1) - 1; |
| ib++; |
| } |
| break; |
| } |
| |
| HYPRE_StructGridCreate(MPI_COMM_WORLD, dim, &grid); |
| for (ib = 0; ib < nblocks; ib++) |
| { |
| HYPRE_StructGridSetExtents(grid, ilower[ib], iupper[ib]); |
| } |
| HYPRE_StructGridAssemble(grid); |
| |
| /*----------------------------------------------------------- |
| * Set up the stencil structure |
| *-----------------------------------------------------------*/ |
| |
| HYPRE_StructStencilCreate(dim, dim + 1, &stencil); |
| for (s = 0; s < dim + 1; s++) |
| { |
| HYPRE_StructStencilSetElement(stencil, s, offsets[s]); |
| } |
| |
| /*----------------------------------------------------------- |
| * Set up the matrix structure |
| *-----------------------------------------------------------*/ |
| |
| HYPRE_StructMatrixCreate(MPI_COMM_WORLD, grid, stencil, &A); |
| HYPRE_StructMatrixSetSymmetric(A, 1); |
| HYPRE_StructMatrixSetNumGhost(A, A_num_ghost); |
| HYPRE_StructMatrixInitialize(A); |
| /*----------------------------------------------------------- |
| * Fill in the matrix elements |
| *-----------------------------------------------------------*/ |
| |
| values = hypre_CTAlloc(double, (dim +1)*volume); |
| |
| /* Set the coefficients for the grid */ |
| for (i = 0; i < (dim + 1)*volume; i += (dim + 1)) |
| { |
| for (s = 0; s < (dim + 1); s++) |
| { |
| stencil_indices[s] = s; |
| switch (dim) |
| { |
| case 1: |
| values[i ] = -cx; |
| values[i+1] = 2.0*(cx); |
| break; |
| case 2: |
| values[i ] = -cx; |
| values[i+1] = -cy; |
| values[i+2] = 2.0*(cx+cy); |
| break; |
| case 3: |
| values[i ] = -cx; |
| values[i+1] = -cy; |
| values[i+2] = -cz; |
| values[i+3] = 2.0*(cx+cy+cz); |
| break; |
| } |
| } |
| } |
| for (ib = 0; ib < nblocks; ib++) |
| { |
| HYPRE_StructMatrixSetBoxValues(A, ilower[ib], iupper[ib], (dim+1), |
| stencil_indices, values); |
| } |
| |
| /* Zero out stencils reaching to real boundary */ |
| for (i = 0; i < volume; i++) |
| { |
| values[i] = 0.0; |
| } |
| for (d = 0; d < dim; d++) |
| { |
| for (ib = 0; ib < nblocks; ib++) |
| { |
| if( ilower[ib][d] == istart[d] ) |
| { |
| i = iupper[ib][d]; |
| iupper[ib][d] = istart[d]; |
| stencil_indices[0] = d; |
| HYPRE_StructMatrixSetBoxValues(A, ilower[ib], iupper[ib], |
| 1, stencil_indices, values); |
| iupper[ib][d] = i; |
| } |
| } |
| } |
| |
| HYPRE_StructMatrixAssemble(A); |
| #if 0 |
| HYPRE_StructMatrixPrint("driver.out.A", A, 0); |
| #endif |
| |
| hypre_TFree(values); |
| |
| /*----------------------------------------------------------- |
| * Set up the linear system |
| *-----------------------------------------------------------*/ |
| |
| values = hypre_CTAlloc(double, volume); |
| |
| HYPRE_StructVectorCreate(MPI_COMM_WORLD, grid, &b); |
| HYPRE_StructVectorInitialize(b); |
| for (i = 0; i < volume; i++) |
| { |
| values[i] = 1.0; |
| } |
| for (ib = 0; ib < nblocks; ib++) |
| { |
| HYPRE_StructVectorSetBoxValues(b, ilower[ib], iupper[ib], values); |
| } |
| HYPRE_StructVectorAssemble(b); |
| #if 0 |
| HYPRE_StructVectorPrint("driver.out.b", b, 0); |
| #endif |
| |
| HYPRE_StructVectorCreate(MPI_COMM_WORLD, grid, &x); |
| HYPRE_StructVectorInitialize(x); |
| for (i = 0; i < volume; i++) |
| { |
| values[i] = 0.0; |
| } |
| for (ib = 0; ib < nblocks; ib++) |
| { |
| HYPRE_StructVectorSetBoxValues(x, ilower[ib], iupper[ib], values); |
| } |
| HYPRE_StructVectorAssemble(x); |
| #if 0 |
| HYPRE_StructVectorPrint("driver.out.x0", x, 0); |
| #endif |
| |
| hypre_TFree(values); |
| |
| hypre_EndTiming(time_index); |
| hypre_PrintTiming("Struct Interface", MPI_COMM_WORLD); |
| hypre_FinalizeTiming(time_index); |
| hypre_ClearTiming(); |
| |
| /*----------------------------------------------------------- |
| * Solve the system using SMG |
| *-----------------------------------------------------------*/ |
| |
| if (solver_id == 0) |
| { |
| time_index = hypre_InitializeTiming("SMG Setup"); |
| hypre_BeginTiming(time_index); |
| |
| HYPRE_StructSMGCreate(MPI_COMM_WORLD, &solver); |
| HYPRE_StructSMGSetMemoryUse(solver, 0); |
| HYPRE_StructSMGSetMaxIter(solver, 50); |
| HYPRE_StructSMGSetTol(solver, 1.0e-06); |
| HYPRE_StructSMGSetRelChange(solver, 0); |
| HYPRE_StructSMGSetNumPreRelax(solver, n_pre); |
| HYPRE_StructSMGSetNumPostRelax(solver, n_post); |
| HYPRE_StructSMGSetLogging(solver, 1); |
| HYPRE_StructSMGSetup(solver, A, b, x); |
| |
| hypre_EndTiming(time_index); |
| hypre_PrintTiming("Setup phase times", MPI_COMM_WORLD); |
| hypre_FinalizeTiming(time_index); |
| hypre_ClearTiming(); |
| |
| time_index = hypre_InitializeTiming("SMG Solve"); |
| hypre_BeginTiming(time_index); |
| |
| HYPRE_StructSMGSolve(solver, A, b, x); |
| |
| hypre_EndTiming(time_index); |
| hypre_PrintTiming("Solve phase times", MPI_COMM_WORLD); |
| hypre_FinalizeTiming(time_index); |
| hypre_ClearTiming(); |
| |
| HYPRE_StructSMGGetNumIterations(solver, &num_iterations); |
| HYPRE_StructSMGGetFinalRelativeResidualNorm(solver, &final_res_norm); |
| HYPRE_StructSMGDestroy(solver); |
| } |
| |
| /*----------------------------------------------------------- |
| * Solve the system using CG |
| *-----------------------------------------------------------*/ |
| |
| if (solver_id > 0) |
| { |
| time_index = hypre_InitializeTiming("PCG Setup"); |
| hypre_BeginTiming(time_index); |
| |
| HYPRE_StructPCGCreate(MPI_COMM_WORLD, &solver); |
| HYPRE_PCGSetMaxIter( (HYPRE_Solver)solver, 50 ); |
| HYPRE_PCGSetTol( (HYPRE_Solver)solver, 1.0e-06 ); |
| HYPRE_PCGSetTwoNorm( (HYPRE_Solver)solver, 1 ); |
| HYPRE_PCGSetRelChange( (HYPRE_Solver)solver, 0 ); |
| HYPRE_PCGSetLogging( (HYPRE_Solver)solver, 1 ); |
| |
| if (solver_id == 1) |
| { |
| /* use symmetric SMG as preconditioner */ |
| HYPRE_StructSMGCreate(MPI_COMM_WORLD, &precond); |
| HYPRE_StructSMGSetMemoryUse(precond, 0); |
| HYPRE_StructSMGSetMaxIter(precond, 1); |
| HYPRE_StructSMGSetTol(precond, 0.0); |
| HYPRE_StructSMGSetZeroGuess(precond); |
| HYPRE_StructSMGSetNumPreRelax(precond, n_pre); |
| HYPRE_StructSMGSetNumPostRelax(precond, n_post); |
| HYPRE_StructSMGSetLogging(precond, 0); |
| HYPRE_PCGSetPrecond((HYPRE_Solver) solver, |
| (HYPRE_PtrToSolverFcn) HYPRE_StructSMGSolve, |
| (HYPRE_PtrToSolverFcn) HYPRE_StructSMGSetup, |
| (HYPRE_Solver) precond); |
| } |
| |
| else if (solver_id == 2) |
| { |
| /* use diagonal scaling as preconditioner */ |
| precond = NULL; |
| HYPRE_PCGSetPrecond((HYPRE_Solver) solver, |
| (HYPRE_PtrToSolverFcn) HYPRE_StructDiagScale, |
| (HYPRE_PtrToSolverFcn) HYPRE_StructDiagScaleSetup, |
| (HYPRE_Solver) precond); |
| } |
| |
| HYPRE_PCGSetup((HYPRE_Solver)solver, |
| (HYPRE_Matrix)A, (HYPRE_Vector)b, (HYPRE_Vector)x); |
| |
| hypre_EndTiming(time_index); |
| hypre_PrintTiming("Setup phase times", MPI_COMM_WORLD); |
| hypre_FinalizeTiming(time_index); |
| hypre_ClearTiming(); |
| |
| time_index = hypre_InitializeTiming("PCG Solve"); |
| hypre_BeginTiming(time_index); |
| |
| HYPRE_PCGSolve((HYPRE_Solver)solver, |
| (HYPRE_Matrix)A, (HYPRE_Vector)b, (HYPRE_Vector)x); |
| |
| hypre_EndTiming(time_index); |
| hypre_PrintTiming("Solve phase times", MPI_COMM_WORLD); |
| hypre_FinalizeTiming(time_index); |
| hypre_ClearTiming(); |
| |
| HYPRE_PCGGetNumIterations((HYPRE_Solver)solver, &num_iterations); |
| HYPRE_PCGGetFinalRelativeResidualNorm((HYPRE_Solver)solver, |
| &final_res_norm); |
| HYPRE_StructPCGDestroy(solver); |
| |
| if (solver_id == 1) |
| { |
| HYPRE_StructSMGDestroy(precond); |
| } |
| } |
| |
| /*----------------------------------------------------------- |
| * Print the solution and other info |
| *-----------------------------------------------------------*/ |
| |
| #if 0 |
| HYPRE_StructVectorPrint("driver.out.x", x, 0); |
| #endif |
| |
| if (myid == 0) |
| { |
| printf("\n"); |
| printf("Iterations = %d\n", num_iterations); |
| printf("Final Relative Residual Norm = %e\n", final_res_norm); |
| printf("\n"); |
| } |
| |
| /*----------------------------------------------------------- |
| * Finalize things |
| *-----------------------------------------------------------*/ |
| |
| HYPRE_StructGridDestroy(grid); |
| HYPRE_StructStencilDestroy(stencil); |
| HYPRE_StructMatrixDestroy(A); |
| HYPRE_StructVectorDestroy(b); |
| HYPRE_StructVectorDestroy(x); |
| |
| for (i = 0; i < nblocks; i++) |
| { |
| hypre_TFree(iupper[i]); |
| hypre_TFree(ilower[i]); |
| } |
| hypre_TFree(ilower); |
| hypre_TFree(iupper); |
| hypre_TFree(stencil_indices); |
| |
| for ( i = 0; i < (dim + 1); i++) |
| hypre_TFree(offsets[i]); |
| hypre_TFree(offsets); |
| |
| /* Finalize MPI */ |
| MPI_Finalize(); |
| |
| return (0); |
| } |