m***@manchester.ac.uk
2007-11-05 19:36:49 UTC
Folks, I'm stuck as to why my simple Fortran example below doesn't
work! My understanding is that parent.exe should spawn some './
child.exe' but it appears to hang at MPI_COMM_SPAWN. This is using
mpich2 with Intel ifort compiler.
All enlightenment welcome ;)
parent.f90
-------------
program main
use mpi
implicit none
integer myid, numprocs, rc, ierr
integer, PARAMETER:: maxpes=2
character*(*), PARAMETER:: cmd='./child.exe'
character*(*), PARAMETER:: args(1)=''
integer:: info
integer:: root=0
integer:: INTERCOMM
integer:: errCodes(1:maxpes)
! --- initialise top level
call MPI_INIT( ierr )
call MPI_COMM_RANK( MPI_COMM_WORLD, myid, ierr )
call MPI_COMM_SIZE( MPI_COMM_WORLD, numprocs, ierr )
call MPI_BARRIER(MPI_COMM_WORLD, ierr)
write(*,'("parent spawning: ",I3,"/",I3)') myid, numprocs
! each(?) to spawn up to maxpes more MPI processes running mpi program
"CMD ARGS[*]" and MPI_INFO 'info'
call MPI_COMM_SPAWN(cmd, args, maxpes, info, root, MPI_COMM_WORLD,
INTERCOMM, errCodes, ierr)
write(*,'("parent done: ",I3,"/",I3)') myid, numprocs
! --- quit
call MPI_FINALIZE(rc)
stop
end
child.f90
----------
program main
use mpi
implicit none
double precision PI25DT
parameter (PI25DT = 3.141592653589793238462643d0)
double precision mypi, pi, h, sum, x, f, a, tick1, tick2
integer n, myid, numprocs, i, rc, ierr
integer:: MPI_PARENT
parameter(n=700000000)
! function to integrate
f(a) = 4.d0 / (1.d0 + a*a)
call MPI_INIT( ierr )
call MPI_COMM_RANK( MPI_COMM_WORLD, myid, ierr )
call MPI_COMM_SIZE( MPI_COMM_WORLD, numprocs, ierr )
call MPI_COMM_GET_PARENT(MPI_PARENT, ierr)
write(*,'("child ",I2,"/",I2,1x,6X)') myid, numprocs, MPI_PARENT
if (MPI_PARENT == MPI_COMM_NULL) write(*,'(A)') 'null'
call MPI_BARRIER(MPI_COMM_WORLD, ierr)
tick1 = MPI_Wtime()
h = 1.0d0/n
sum = 0.0d0
do i = myid+1, n, numprocs
x = h * (dble(i) - 0.5d0)
sum = sum + f(x)
enddo
mypi = h * sum
! collect all the partial sums
call MPI_REDUCE(mypi,pi,1,MPI_DOUBLE_PRECISION,MPI_SUM,0, &
MPI_COMM_WORLD,ierr)
call MPI_BARRIER(MPI_COMM_WORLD, ierr)
tick2 = MPI_Wtime()
! node 0 prints the answer.
if (myid .eq. 0) then
write(6, 97) pi, abs(pi - PI25DT), (tick2-tick1), numprocs
97 format(' pi is approximately: ', F18.16, &
' Error is: ', F18.16, ' wall clock time=',F10.3,'secs
on ', I3,'PEs')
endif
!!! end do
call MPI_FINALIZE(rc)
end
Makefile
----------
PARENT=parent.exe
CHILD=child.exe
all: ${PARENT} ${CHILD}
FC=mpif90
${PARENT}: parent.f90
${FC} parent.f90 -o ${PARENT}
${CHILD}: child.f90
${FC} child.f90 -o ${CHILD}
***@ratty:~/Fortran/MPI/spawn$ mpirun -np 2 ./child.exe
child 0/ 2
child **/
null
child 1/ 2
child **/
null
pi is approximately: 3.1415926535897229 Error is:
0.0000000000000702 wall clock time= 5.343secs on 2PEs
***@ratty:~/Fortran/MPI/spawn$ mpirun -np 2 ./parent.exe
parent spawning: 0/ 2
parent spawning: 1/ 2
{hangs for at least 5 mins...}
work! My understanding is that parent.exe should spawn some './
child.exe' but it appears to hang at MPI_COMM_SPAWN. This is using
mpich2 with Intel ifort compiler.
All enlightenment welcome ;)
parent.f90
-------------
program main
use mpi
implicit none
integer myid, numprocs, rc, ierr
integer, PARAMETER:: maxpes=2
character*(*), PARAMETER:: cmd='./child.exe'
character*(*), PARAMETER:: args(1)=''
integer:: info
integer:: root=0
integer:: INTERCOMM
integer:: errCodes(1:maxpes)
! --- initialise top level
call MPI_INIT( ierr )
call MPI_COMM_RANK( MPI_COMM_WORLD, myid, ierr )
call MPI_COMM_SIZE( MPI_COMM_WORLD, numprocs, ierr )
call MPI_BARRIER(MPI_COMM_WORLD, ierr)
write(*,'("parent spawning: ",I3,"/",I3)') myid, numprocs
! each(?) to spawn up to maxpes more MPI processes running mpi program
"CMD ARGS[*]" and MPI_INFO 'info'
call MPI_COMM_SPAWN(cmd, args, maxpes, info, root, MPI_COMM_WORLD,
INTERCOMM, errCodes, ierr)
write(*,'("parent done: ",I3,"/",I3)') myid, numprocs
! --- quit
call MPI_FINALIZE(rc)
stop
end
child.f90
----------
program main
use mpi
implicit none
double precision PI25DT
parameter (PI25DT = 3.141592653589793238462643d0)
double precision mypi, pi, h, sum, x, f, a, tick1, tick2
integer n, myid, numprocs, i, rc, ierr
integer:: MPI_PARENT
parameter(n=700000000)
! function to integrate
f(a) = 4.d0 / (1.d0 + a*a)
call MPI_INIT( ierr )
call MPI_COMM_RANK( MPI_COMM_WORLD, myid, ierr )
call MPI_COMM_SIZE( MPI_COMM_WORLD, numprocs, ierr )
call MPI_COMM_GET_PARENT(MPI_PARENT, ierr)
write(*,'("child ",I2,"/",I2,1x,6X)') myid, numprocs, MPI_PARENT
if (MPI_PARENT == MPI_COMM_NULL) write(*,'(A)') 'null'
call MPI_BARRIER(MPI_COMM_WORLD, ierr)
tick1 = MPI_Wtime()
h = 1.0d0/n
sum = 0.0d0
do i = myid+1, n, numprocs
x = h * (dble(i) - 0.5d0)
sum = sum + f(x)
enddo
mypi = h * sum
! collect all the partial sums
call MPI_REDUCE(mypi,pi,1,MPI_DOUBLE_PRECISION,MPI_SUM,0, &
MPI_COMM_WORLD,ierr)
call MPI_BARRIER(MPI_COMM_WORLD, ierr)
tick2 = MPI_Wtime()
! node 0 prints the answer.
if (myid .eq. 0) then
write(6, 97) pi, abs(pi - PI25DT), (tick2-tick1), numprocs
97 format(' pi is approximately: ', F18.16, &
' Error is: ', F18.16, ' wall clock time=',F10.3,'secs
on ', I3,'PEs')
endif
!!! end do
call MPI_FINALIZE(rc)
end
Makefile
----------
PARENT=parent.exe
CHILD=child.exe
all: ${PARENT} ${CHILD}
FC=mpif90
${PARENT}: parent.f90
${FC} parent.f90 -o ${PARENT}
${CHILD}: child.f90
${FC} child.f90 -o ${CHILD}
***@ratty:~/Fortran/MPI/spawn$ mpirun -np 2 ./child.exe
child 0/ 2
child **/
null
child 1/ 2
child **/
null
pi is approximately: 3.1415926535897229 Error is:
0.0000000000000702 wall clock time= 5.343secs on 2PEs
***@ratty:~/Fortran/MPI/spawn$ mpirun -np 2 ./parent.exe
parent spawning: 0/ 2
parent spawning: 1/ 2
{hangs for at least 5 mins...}