C subroutine writeout
C Writes summarized results.
C Modification: Number of MPI tasks and OpenMP threads passed in from
C sphot for efficiency calculation. (bmb 3/12/01)
subroutine writeout(numMPItasks, nThreads)
include ''
include ''
include ''
include ''
include ''
include ''
integer nThreads, numMPItasks, ntracSum
INTEGER*4 nescgp(negrps)
real*8 enesc(negrps)
real*8 wcut, wmin, wmax
real*8 wlost, wesc, wrr
real*8 wabs, wcen, epgain, etot
integer*4 npart, nphtot, nploss
integer*4 nlost, nesc, nrr, nabs, ncen
integer*4 nscat, nsplt, ntrac
common /mjcstuff/ nescgp, enesc, wcut, wmin, wmax,
& wlost, wesc, wrr, wabs, wcen, epgain, etot,
& npart, nphtot, nploss, nlost, nesc, nrr, nabs,
& ncen, nscat, nsplt, ntrac
real*8 escsum,escsumsq, escave,escstdev
real*8 tsum,tsumsq,tave,tstd
real*8 maxCopyGlobTime
real*8 avg_enescSum
parameter (nout1=11)
parameter (nout2=12)
if (print_flag .gt. 0) then
do 2000 i = 1, NRuns
npart = ng_npart(i)
nphtot = ng_nphtot(i)
nploss = ng_nploss(i)
etot = g_etot(i)
epgain = g_epgain(i)
nlost = ng_nlost(i)
nesc = ng_nesc(i)
nrr = ng_nrr(i)
ncen = ng_ncen(i)
nabs = ng_nabs(i)
wlost = g_wlost(i)
wesc = g_wesc(i)
wrr = g_wrr(i)
wcen = g_wcen(i)
wabs = g_wabs(i)
nscat = ng_nscat(i)
nsplt = ng_nsplt(i)
ntrac = ng_ntrac(i)
texec = g_texec(i)
do 903 j = 1,negrps
nescgp(j) = ng_nescgp(i,j)
enesc(j) = g_enesc(i,j)
903 continue
CALL wroutput(i,tgen,texec,nout1)
2000 continue
! CALL second(progEndTime)
loopWallClockTime = loopEndTime - loopBeginTime
progWallClockTime = progEndTime - progBeginTime
loopTotalCPUTime = 0.0D0
progTotalCPUTime = loopBeginTime - progBeginTime +
. progEndTime - loopEndTime
if (print_flag .gt. 0) then
escsum = 0.0d0
escsumsq = 0.0d0
tsum = 0.0d0
tsumsq = 0.0d0
ovrhsum = 0.0d0
ovrhsumsq= 0.0d0
partSum = 0.0d0
partSumSq = 0.0d0
tracSum = 0.0d0
tracSumSq = 0.0d0
c print *, "numMPItasks = ", numMPItasks
c print *, "loopTime = ", loopWallClockTime
c print *, "progTime = ", progWallClockTime
write(nout1,500) numMPItasks
500 format(//,'numMPItasks = ', i8)
write(nout1,501) loopWallClockTime
501 format('loopTime = ', f10.6)
write(nout1,502) progWallClockTime
502 format('progTime = ', f10.6)
9905 format(//////,'SUMMARY:',/,'-------',/,
1 'Run # # part. # tracks esc. prob.',
2 ' time 1st ranf',' last ranf'
3 )
do 1100 i= 1, NRuns
loopTotalCPUTime = loopTotalCPUTime + g_texec(i)
progTotalCPUTime = progTotalCPUTime + g_texec(i)
escsum = escsum + g_ffesc(i)
tsum = tsum + g_texec(i)
ovrhsum = ovrhsum + g_overheadTime(i)
partSum = partSum + ng_nphtot(i)
tracSum = tracSum + ng_ntrac(i)
write(nout1,9904) i, ng_nphtot(i), ng_ntrac(i), g_ffesc(i),
. g_texec(i), g_firstRanf(i), g_lastRanf(i)
9904 format(i8,i9,i12,f12.6,f9.4,f10.7,f10.7)
1100 continue
9979 format('-------------------------------------------',
1 '------------------------')
escave = escsum / NRuns
tave = tsum / NRuns
ovrhave = ovrhsum / NRuns
iparticleAve = partSum / NRuns
itracAve = tracSum / NRuns
do 1200 i = 1, NRuns
escsumsq = escsumsq + abs(g_ffesc(i)-escave) ** 2
tsumsq = tsumsq + abs(g_texec(i)-tave) ** 2
ovrhsumsq = ovrhsumsq + abs(g_overheadTime(i)-ovrhave) ** 2
partSumSq = partSumSq + abs(ng_nphtot(i)-iparticleAve) ** 2
tracSumSq = tracSumSq + abs(ng_ntrac(i)-itracAve) ** 2
1200 continue
escstdev = sqrt(escsumsq/NRuns)
tstd = sqrt(tsumsq/NRuns)
ovrhstd = sqrt(ovrhsumsq/NRuns)
ipartStDev = sqrt(partSumSq/NRuns)
itracStDev = sqrt(tracSumSq/NRuns)
partRatio = ipartStDev / float(iparticleAve)
tracRatio = itracStDev / float(itracAve)
escRatio = escstdev / escAve
tRatio = tStd / tave
write(nout1,9917) iparticleAve, itracAve, escAve, tave
9917 format('ave: ',i9,i12,f12.6,f9.4)
write(nout1,9918) ipartStDev, itracStDev, escStDev, tStd
9918 format('std: ',i9,i12,f12.8,f9.4)
write(nout1,9919) partRatio, tracRatio, escRatio, tRatio
9919 format('ratio: ',f8.5,f12.7,f12.8,f9.6)
maxCopyGlobTime = copyGlobTime(1)
do 1300 i = 2, numMPItasks
if (maxCopyGlobTime .lt. copyGlobTime(i)) then
maxCopyGlobTime = copyGlobTime(i)
1300 continue
c...Sum of all execute() loops /
loopSpeedup = loopTotalCPUTime / loopWallClockTime
singleCPUTime = progTotalCPUTime - allocateTime
. - seedGenTime - maxCopyGlobTime
progSpeedup = singleCPUTime / progWallClockTime
if (nThreads .eq. 0) then nThreads = 1
efficiency = progSpeedUp / (numMPItasks * nThreads)
write(nout1,9903) allocateTime, seedGenTime, maxCopyGlobTime,
. singleCPUTime, numMPItasks, nThreads,
. loopSpeedup, progSpeedup, efficiency
9903 format(//,
. 'allocate time: ',f15.6,/,
. 'random number seed generation time: ',f15.6,/,
. 'max. global copy time: ',f15.6,/,
. 'estimated single CPU time: ',f15.2,/,
. 'number of MPI tasks: ',i15, /,
. 'number of threads per MPI task: ',i15, /,
. 'loop speedup: ',f15.4,/,
. 'program speedup: ',f15.4,/,
. 'efficiency: ',f15.4
. )