Discussion:
mpi_comm_spawn
(too old to reply)
m***@manchester.ac.uk
2007-11-05 19:36:49 UTC
Permalink
Folks, I'm stuck as to why my simple Fortran example below doesn't
work! My understanding is that parent.exe should spawn some './
child.exe' but it appears to hang at MPI_COMM_SPAWN. This is using
mpich2 with Intel ifort compiler.

All enlightenment welcome ;)

parent.f90
-------------

program main
use mpi
implicit none

integer myid, numprocs, rc, ierr

integer, PARAMETER:: maxpes=2
character*(*), PARAMETER:: cmd='./child.exe'
character*(*), PARAMETER:: args(1)=''
integer:: info
integer:: root=0
integer:: INTERCOMM
integer:: errCodes(1:maxpes)


! --- initialise top level
call MPI_INIT( ierr )
call MPI_COMM_RANK( MPI_COMM_WORLD, myid, ierr )
call MPI_COMM_SIZE( MPI_COMM_WORLD, numprocs, ierr )

call MPI_BARRIER(MPI_COMM_WORLD, ierr)
write(*,'("parent spawning: ",I3,"/",I3)') myid, numprocs

! each(?) to spawn up to maxpes more MPI processes running mpi program
"CMD ARGS[*]" and MPI_INFO 'info'
call MPI_COMM_SPAWN(cmd, args, maxpes, info, root, MPI_COMM_WORLD,
INTERCOMM, errCodes, ierr)

write(*,'("parent done: ",I3,"/",I3)') myid, numprocs

! --- quit
call MPI_FINALIZE(rc)
stop
end


child.f90
----------

program main
use mpi
implicit none

double precision PI25DT
parameter (PI25DT = 3.141592653589793238462643d0)

double precision mypi, pi, h, sum, x, f, a, tick1, tick2
integer n, myid, numprocs, i, rc, ierr
integer:: MPI_PARENT
parameter(n=700000000)
! function to integrate
f(a) = 4.d0 / (1.d0 + a*a)

call MPI_INIT( ierr )
call MPI_COMM_RANK( MPI_COMM_WORLD, myid, ierr )
call MPI_COMM_SIZE( MPI_COMM_WORLD, numprocs, ierr )
call MPI_COMM_GET_PARENT(MPI_PARENT, ierr)

write(*,'("child ",I2,"/",I2,1x,6X)') myid, numprocs, MPI_PARENT
if (MPI_PARENT == MPI_COMM_NULL) write(*,'(A)') 'null'

call MPI_BARRIER(MPI_COMM_WORLD, ierr)
tick1 = MPI_Wtime()

h = 1.0d0/n

sum = 0.0d0
do i = myid+1, n, numprocs
x = h * (dble(i) - 0.5d0)
sum = sum + f(x)
enddo
mypi = h * sum

! collect all the partial sums
call MPI_REDUCE(mypi,pi,1,MPI_DOUBLE_PRECISION,MPI_SUM,0, &
MPI_COMM_WORLD,ierr)
call MPI_BARRIER(MPI_COMM_WORLD, ierr)
tick2 = MPI_Wtime()
! node 0 prints the answer.
if (myid .eq. 0) then
write(6, 97) pi, abs(pi - PI25DT), (tick2-tick1), numprocs
97 format(' pi is approximately: ', F18.16, &
' Error is: ', F18.16, ' wall clock time=',F10.3,'secs
on ', I3,'PEs')
endif

!!! end do

call MPI_FINALIZE(rc)
end



Makefile
----------

PARENT=parent.exe
CHILD=child.exe

all: ${PARENT} ${CHILD}
FC=mpif90

${PARENT}: parent.f90
${FC} parent.f90 -o ${PARENT}

${CHILD}: child.f90
${FC} child.f90 -o ${CHILD}

***@ratty:~/Fortran/MPI/spawn$ mpirun -np 2 ./child.exe
child 0/ 2
child **/
null
child 1/ 2
child **/
null
pi is approximately: 3.1415926535897229 Error is:
0.0000000000000702 wall clock time= 5.343secs on 2PEs

***@ratty:~/Fortran/MPI/spawn$ mpirun -np 2 ./parent.exe
parent spawning: 0/ 2
parent spawning: 1/ 2

{hangs for at least 5 mins...}
David Cronk
2007-11-05 19:51:17 UTC
Permalink
Just looking very quickly, I see you did not initialize your info
argument. Try replacing info with MPI_INFO_NULL.

Hope this helps.

Dave.
Post by m***@manchester.ac.uk
Folks, I'm stuck as to why my simple Fortran example below doesn't
work! My understanding is that parent.exe should spawn some './
child.exe' but it appears to hang at MPI_COMM_SPAWN. This is using
mpich2 with Intel ifort compiler.
All enlightenment welcome ;)
parent.f90
-------------
program main
use mpi
implicit none
integer myid, numprocs, rc, ierr
integer, PARAMETER:: maxpes=2
character*(*), PARAMETER:: cmd='./child.exe'
character*(*), PARAMETER:: args(1)=''
integer:: info
integer:: root=0
integer:: INTERCOMM
integer:: errCodes(1:maxpes)
! --- initialise top level
call MPI_INIT( ierr )
call MPI_COMM_RANK( MPI_COMM_WORLD, myid, ierr )
call MPI_COMM_SIZE( MPI_COMM_WORLD, numprocs, ierr )
call MPI_BARRIER(MPI_COMM_WORLD, ierr)
write(*,'("parent spawning: ",I3,"/",I3)') myid, numprocs
! each(?) to spawn up to maxpes more MPI processes running mpi program
"CMD ARGS[*]" and MPI_INFO 'info'
call MPI_COMM_SPAWN(cmd, args, maxpes, info, root, MPI_COMM_WORLD,
INTERCOMM, errCodes, ierr)
write(*,'("parent done: ",I3,"/",I3)') myid, numprocs
! --- quit
call MPI_FINALIZE(rc)
stop
end
child.f90
----------
program main
use mpi
implicit none
double precision PI25DT
parameter (PI25DT = 3.141592653589793238462643d0)
double precision mypi, pi, h, sum, x, f, a, tick1, tick2
integer n, myid, numprocs, i, rc, ierr
integer:: MPI_PARENT
parameter(n=700000000)
! function to integrate
f(a) = 4.d0 / (1.d0 + a*a)
call MPI_INIT( ierr )
call MPI_COMM_RANK( MPI_COMM_WORLD, myid, ierr )
call MPI_COMM_SIZE( MPI_COMM_WORLD, numprocs, ierr )
call MPI_COMM_GET_PARENT(MPI_PARENT, ierr)
write(*,'("child ",I2,"/",I2,1x,6X)') myid, numprocs, MPI_PARENT
if (MPI_PARENT == MPI_COMM_NULL) write(*,'(A)') 'null'
call MPI_BARRIER(MPI_COMM_WORLD, ierr)
tick1 = MPI_Wtime()
h = 1.0d0/n
sum = 0.0d0
do i = myid+1, n, numprocs
x = h * (dble(i) - 0.5d0)
sum = sum + f(x)
enddo
mypi = h * sum
! collect all the partial sums
call MPI_REDUCE(mypi,pi,1,MPI_DOUBLE_PRECISION,MPI_SUM,0, &
MPI_COMM_WORLD,ierr)
call MPI_BARRIER(MPI_COMM_WORLD, ierr)
tick2 = MPI_Wtime()
! node 0 prints the answer.
if (myid .eq. 0) then
write(6, 97) pi, abs(pi - PI25DT), (tick2-tick1), numprocs
97 format(' pi is approximately: ', F18.16, &
' Error is: ', F18.16, ' wall clock time=',F10.3,'secs
on ', I3,'PEs')
endif
!!! end do
call MPI_FINALIZE(rc)
end
Makefile
----------
PARENT=parent.exe
CHILD=child.exe
all: ${PARENT} ${CHILD}
FC=mpif90
${PARENT}: parent.f90
${FC} parent.f90 -o ${PARENT}
${CHILD}: child.f90
${FC} child.f90 -o ${CHILD}
child 0/ 2
child **/
null
child 1/ 2
child **/
null
0.0000000000000702 wall clock time= 5.343secs on 2PEs
parent spawning: 0/ 2
parent spawning: 1/ 2
{hangs for at least 5 mins...}
--
Dr. David Cronk, Ph.D. phone: (865) 974-3735
Research Director fax: (865) 974-8296
Innovative Computing Lab http://www.cs.utk.edu/~cronk
University of Tennessee, Knoxville
m***@manchester.ac.uk
2007-11-05 20:19:01 UTC
Permalink
Post by David Cronk
Just looking very quickly, I see you did not initialize your info
argument. Try replacing info with MPI_INFO_NULL.
Hope this helps.
Dave.
Unfort it still hangs :(

Thanks, Michael
m***@manchester.ac.uk
2007-11-05 22:07:01 UTC
Permalink
Post by m***@manchester.ac.uk
Post by David Cronk
Just looking very quickly, I see you did not initialize your info
argument. Try replacing info with MPI_INFO_NULL.
Hope this helps.
Dave.
Unfort it still hangs :(
Thanks, Michael
Changing the 'args' to MPI_ARGV_NULL works -- thanks to Rajeev Thakur
at anl.gov for pointing that out

M

Loading...