Elbert
2008-03-20 01:43:55 UTC
Hi,
I have been trying a textbook example of solving the Poisson equation
using the Jacobi method with domain decomposition. The method works
fine when the communication and computation are independent, i.e. no
overlap. However I am having problems with implementing the
'overlap2d' subroutine as given in the below program.
The problem is with MPI_WAITALL, that doesn't return the appropriate
status(MPI_TAG,idx), which should be the tags given to the IRECV/ISEND
functions. You can run the attached program (tried with -np 2) and
discover the problem. Kindly let me know your thoughts on getting the
correct status.
Thanks,
Elbert
-------------jacobi_2d1.f90-----------------
PROGRAM jacobi
! An MPI parallel implementation of Jacobi method
! for a Poisson problem \/^2u=f(x,y)
IMPLICIT NONE
INCLUDE "mpif.h"
INTEGER,PARAMETER::maxn=1000,ndim=2
INTEGER::nx,sx,ex,sy,ey,i,ierr,myid,numprocs,myrank,comm2d,maxit,nbrbottom,nbrtop,s1,dims(2),ny
INTEGER::nbrleft,nbrright,coords(2),stride
DOUBLE PRECISION::a(maxn,maxn),b(maxn,maxn),f(maxn,maxn)
DOUBLE PRECISION::diff,diffw,diffnorm,t1,t2,buffer(2*maxn)
LOGICAL :: periods(2)
maxit=1000
periods(1)=.false.
periods(2)=.false.
CALL MPI_INIT(ierr)
CALL MPI_COMM_SIZE(MPI_COMM_WORLD,numprocs,ierr)
CALL MPI_COMM_RANK(MPI_COMM_WORLD,myrank,ierr)
!IF (myrank == 0) THEN
! WRITE(*,*)"Enter nx:"
! READ(*,*) nx
! nx=100
!END IF
!CALL MPI_BCAST(nx,1,MPI_INTEGER,0,MPI_COMM_WORLD,ierr)
nx=999
ny=nx
! new communicator with decomposed processes
dims(1)=0 !0 so that MPI_Dims_create supplies a suitable value
dims(2)=0
CALL MPI_DIMS_CREATE(numprocs,ndim,dims,ierr)
CALL
MPI_CART_CREATE(MPI_COMM_WORLD,ndim,dims,periods,.true.,comm2d,ierr)
CALL MPI_COMM_RANK(comm2d,myid,ierr)
CALL MPI_CART_SHIFT(comm2d,1,1,nbrleft,nbrright,ierr) !bug in
direction
CALL MPI_CART_SHIFT(comm2d,0,1,nbrbottom,nbrtop,ierr)
!decomposition of array 'nx' and output limits for 'myid' process
CALL MPI_CART_GET(comm2d,ndim,dims,periods,coords,ierr)
if(myid==0) WRITE(*,*) "Dimensions of the
decomposition:",dims(1),",",dims(2)
CALL MPI_BARRIER(comm2d,ierr)
WRITE(*,*) "I am",myid,"with
coordinates","(",coords(1),",",coords(2),")."
CALL MPE_DECOMP1D(nx,dims(1),coords(1),sy,ey)
CALL MPE_DECOMP1D(ny,dims(2),coords(2),sx,ex)
CALL twodinit(a,b,f,nx,ny,sx,ex,sy,ey)
CALL MPI_TYPE_VECTOR(ey+1-(sy-1),1,ex+1-
(sx-1),MPI_DOUBLE_PRECISION,stride,ierr)
CALL MPI_TYPE_COMMIT(stride,ierr)
!CALL MPI_BARRIER(comm2d,ierr)
CALL MPI_BUFFER_ATTACH(buffer,2*maxn*8,ierr) !allocating buffer
space
t1=MPI_WTIME()
DO i=1,maxit
CALL MPI_BARRIER(comm2d,ierr)
write(*,*) "myid=",myid,"it=",i
CALL
overlap2d(a,b,f,sx,ex,sy,ey,nx,ny,stride,comm2d,nbrbottom,nbrtop,nbrleft,nbrright)
CALL MPI_BARRIER(comm2d,ierr)
write(*,*) "myid=",myid,"it=",i
CALL
overlap2d(b,a,f,sx,ex,sy,ey,nx,ny,stride,comm2d,nbrbottom,nbrtop,nbrleft,nbrright)
! CALL
exchng2d1(a,sx,ex,sy,ey,stride,comm2d,nbrbottom,nbrtop,nbrleft,nbrright) !
communication
! CALL sweep2d(a,f,nx,ny,sx,ex,sy,ey,b) !
computation
! CALL
exchng2d1(b,sx,ex,sy,ey,stride,comm2d,nbrbottom,nbrtop,nbrleft,nbrright)
! CALL sweep2d(b,f,nx,ny,sx,ex,sy,ey,a)
diffw=diff(a,b,sx,ex,sy,ey)
CALL MPI_ALLREDUCE(diffw,diffnorm,
1,MPI_DOUBLE_PRECISION,MPI_SUM,comm2d,ierr)
IF(diffnorm < 1.0e-5) EXIT
IF(myid==0 .and. mod(REAL(i),10.0)==0.0) WRITE(*,*)
"i=",i,"sq_error=",diffnorm
END DO
t2=MPI_WTIME()
IF(myid==0) WRITE(*,*) "Converged at ",2*i,"iterations in",t2-
t1,"seconds"
!CALL MPI_BUFFER_DETACH(buffer,2*maxn*8,ierr)
CALL MPI_TYPE_FREE(stride,ierr)
CALL MPI_COMM_FREE(comm2d,ierr)
CALL MPI_FINALIZE(ierr)
END PROGRAM jacobi
SUBROUTINE
overlap2d(a,b,f,sx,ex,sy,ey,nx,ny,stride,comm2d,nbrbottom,nbrtop,nbrleft,nbrright)
IMPLICIT NONE
INCLUDE "mpif.h"
INTEGER,INTENT(IN)::comm2d,nbrbottom,nbrtop,nbrleft,nbrright,sx,ex,sy,ey,stride,nx,ny
DOUBLE PRECISION, INTENT(INOUT) :: a(sx-1:ex+1,sy-1:ey+1)
DOUBLE PRECISION, INTENT(IN) :: f(sx-1:ex+1,sy-1:ey+1)
DOUBLE PRECISION, INTENT(OUT) :: b(sx-1:ex+1,sy-1:ey+1)
INTEGER::status(MPI_STATUS_SIZE,8),ierr,req(8),dnx,i,j,k,idx
DOUBLE PRECISION::dx,dy
dnx=ex-sx+1
!1.non-blocking receives
CALL MPI_IRECV(a(sx,sy-1),dnx,MPI_DOUBLE_PRECISION,nbrbottom,
0,comm2d,req(1),ierr)
CALL MPI_IRECV(a(sx,ey+1),dnx,MPI_DOUBLE_PRECISION,nbrtop,
1,comm2d,req(2),ierr)
CALL MPI_IRECV(a(sx-1,sy),1,stride,nbrleft,2,comm2d,req(3),ierr)
CALL MPI_IRECV(a(ex+1,sy),1,stride,nbrright,3,comm2d,req(4),ierr)
!2.non-blocking sends
CALL MPI_ISEND(a(sx,ey),dnx,MPI_DOUBLE_PRECISION,nbrtop,
0,comm2d,req(5),ierr)
CALL MPI_ISEND(a(sx,sy),dnx,MPI_DOUBLE_PRECISION,nbrbottom,
1,comm2d,req(6),ierr)
CALL MPI_ISEND(a(ex,sy),1,stride,nbrright,2,comm2d,req(7),ierr)
CALL MPI_ISEND(a(sx,sy),1,stride,nbrleft,3,comm2d,req(8),ierr)
!CALL MPI_WAITALL(8,req,status,ierr)
!3.compute on local data
dx=1.0d0/DBLE(nx+1)
dy=1.0d0/DBLE(ny+1)
DO j=sy+1,ey-1
DO i=sx+1,ex-1
b(i,j)=(0.5/(dx**2+dy**2))*(dy**2*(a(i-1,j)+a(i+1,j))
+dx**2*(a(i,j-1)+a(i,j+1))-(dx*dy)**2*f(i,j))
END DO
END DO
!4.When data is received compute on corresponding boundaries
DO k=1,8
CALL MPI_WAITANY(8,req,idx,status,ierr)
WRITE(*,*) k,status(MPI_TAG,idx),idx,ierr
SELECT CASE (status(MPI_TAG,idx))
CASE(1)
j=sy
DO i=sx,ex
b(i,j)=(0.5/(dx**2+dy**2))*(dy**2*(a(i-1,j)+a(i+1,j))
+dx**2*(a(i,j-1)+a(i,j+1))-(dx*dy)**2*f(i,j))
END DO
CASE(2)
j=ey
DO i=sx,ex
b(i,j)=(0.5/(dx**2+dy**2))*(dy**2*(a(i-1,j)+a(i+1,j))
+dx**2*(a(i,j-1)+a(i,j+1))-(dx*dy)**2*f(i,j))
END DO
CASE(3)
i=sx
DO j=sy,ey
b(i,j)=(0.5/(dx**2+dy**2))*(dy**2*(a(i-1,j)+a(i+1,j))
+dx**2*(a(i,j-1)+a(i,j+1))-(dx*dy)**2*f(i,j))
END DO
CASE(4)
i=ex
DO j=sy,ey
b(i,j)=(0.5/(dx**2+dy**2))*(dy**2*(a(i-1,j)+a(i+1,j))
+dx**2*(a(i,j-1)+a(i,j+1))-(dx*dy)**2*f(i,j))
END DO
END SELECT
END DO
RETURN
END SUBROUTINE overlap2d
DOUBLE PRECISION FUNCTION diff(a,b,sx,ex,sy,ey)
IMPLICIT NONE
INTEGER,INTENT(IN)::sx,ex,sy,ey
DOUBLE PRECISION, INTENT(IN) :: a(sx-1:ex+1,sy-1:ey+1),b(sx-1:ex
+1,sy-1:ey+1)
INTEGER::i,j
DOUBLE PRECISION::sum1
sum1=0.0
DO j=sy,ey
DO i=sx,ex
sum1=sum1+(a(i,j)-b(i,j))**2
END DO
END DO
diff=sum1
RETURN
END FUNCTION diff
SUBROUTINE twodinit(a,b,f,nx,ny,sx,ex,sy,ey)
IMPLICIT NONE
INTEGER,INTENT(IN):: nx,ny,sx,ex,sy,ey
DOUBLE PRECISION, INTENT(OUT) :: a(sx-1:ex+1,sy-1:ey+1),b(sx-1:ex
+1,sy-1:ey+1),f(sx-1:ex+1,sy-1:ey+1)
INTEGER :: i,j
DO j=sy-1,ey+1
DO i=sx-1,ex+1
a(i,j)=0.0
b(i,j)=0.0
f(i,j)=0.0
END DO
END DO
! BCs
IF(sx==1) THEN
DO j=sy,ey
a(0,j)=1.0
b(0,j)=1.0
END DO
END IF
IF(ex==nx) THEN
DO j=sy,ey
a(nx+1,j)=0.0
b(nx+1,j)=0.0
END DO
END IF
IF(sy==1) THEN
DO i=sx,ex
a(i,0)=0.0
b(i,0)=0.0
END DO
END IF
IF(ey==ny) THEN
DO i=sx,ex
a(i,ny+1)=1.0
b(i,ny+1)=1.0
END DO
END IF
RETURN
END SUBROUTINE twodinit
SUBROUTINE MPE_DECOMP1D(nx,numprocs,myid,s,e)
IMPLICIT NONE
INTEGER,INTENT(OUT)::s,e
INTEGER,INTENT(IN)::nx,numprocs,myid
INTEGER::nlocal,deficit
nlocal=nx/numprocs
s=myid*nlocal+1
deficit=mod(nx,numprocs)
s=s+min(myid,deficit)
IF(myid < deficit) nlocal=nlocal+1
e=s+nlocal-1
IF(e > nx .or. myid == (numprocs-1)) e=nx
RETURN
END SUBROUTINE MPE_DECOMP1D
I have been trying a textbook example of solving the Poisson equation
using the Jacobi method with domain decomposition. The method works
fine when the communication and computation are independent, i.e. no
overlap. However I am having problems with implementing the
'overlap2d' subroutine as given in the below program.
The problem is with MPI_WAITALL, that doesn't return the appropriate
status(MPI_TAG,idx), which should be the tags given to the IRECV/ISEND
functions. You can run the attached program (tried with -np 2) and
discover the problem. Kindly let me know your thoughts on getting the
correct status.
Thanks,
Elbert
-------------jacobi_2d1.f90-----------------
PROGRAM jacobi
! An MPI parallel implementation of Jacobi method
! for a Poisson problem \/^2u=f(x,y)
IMPLICIT NONE
INCLUDE "mpif.h"
INTEGER,PARAMETER::maxn=1000,ndim=2
INTEGER::nx,sx,ex,sy,ey,i,ierr,myid,numprocs,myrank,comm2d,maxit,nbrbottom,nbrtop,s1,dims(2),ny
INTEGER::nbrleft,nbrright,coords(2),stride
DOUBLE PRECISION::a(maxn,maxn),b(maxn,maxn),f(maxn,maxn)
DOUBLE PRECISION::diff,diffw,diffnorm,t1,t2,buffer(2*maxn)
LOGICAL :: periods(2)
maxit=1000
periods(1)=.false.
periods(2)=.false.
CALL MPI_INIT(ierr)
CALL MPI_COMM_SIZE(MPI_COMM_WORLD,numprocs,ierr)
CALL MPI_COMM_RANK(MPI_COMM_WORLD,myrank,ierr)
!IF (myrank == 0) THEN
! WRITE(*,*)"Enter nx:"
! READ(*,*) nx
! nx=100
!END IF
!CALL MPI_BCAST(nx,1,MPI_INTEGER,0,MPI_COMM_WORLD,ierr)
nx=999
ny=nx
! new communicator with decomposed processes
dims(1)=0 !0 so that MPI_Dims_create supplies a suitable value
dims(2)=0
CALL MPI_DIMS_CREATE(numprocs,ndim,dims,ierr)
CALL
MPI_CART_CREATE(MPI_COMM_WORLD,ndim,dims,periods,.true.,comm2d,ierr)
CALL MPI_COMM_RANK(comm2d,myid,ierr)
CALL MPI_CART_SHIFT(comm2d,1,1,nbrleft,nbrright,ierr) !bug in
direction
CALL MPI_CART_SHIFT(comm2d,0,1,nbrbottom,nbrtop,ierr)
!decomposition of array 'nx' and output limits for 'myid' process
CALL MPI_CART_GET(comm2d,ndim,dims,periods,coords,ierr)
if(myid==0) WRITE(*,*) "Dimensions of the
decomposition:",dims(1),",",dims(2)
CALL MPI_BARRIER(comm2d,ierr)
WRITE(*,*) "I am",myid,"with
coordinates","(",coords(1),",",coords(2),")."
CALL MPE_DECOMP1D(nx,dims(1),coords(1),sy,ey)
CALL MPE_DECOMP1D(ny,dims(2),coords(2),sx,ex)
CALL twodinit(a,b,f,nx,ny,sx,ex,sy,ey)
CALL MPI_TYPE_VECTOR(ey+1-(sy-1),1,ex+1-
(sx-1),MPI_DOUBLE_PRECISION,stride,ierr)
CALL MPI_TYPE_COMMIT(stride,ierr)
!CALL MPI_BARRIER(comm2d,ierr)
CALL MPI_BUFFER_ATTACH(buffer,2*maxn*8,ierr) !allocating buffer
space
t1=MPI_WTIME()
DO i=1,maxit
CALL MPI_BARRIER(comm2d,ierr)
write(*,*) "myid=",myid,"it=",i
CALL
overlap2d(a,b,f,sx,ex,sy,ey,nx,ny,stride,comm2d,nbrbottom,nbrtop,nbrleft,nbrright)
CALL MPI_BARRIER(comm2d,ierr)
write(*,*) "myid=",myid,"it=",i
CALL
overlap2d(b,a,f,sx,ex,sy,ey,nx,ny,stride,comm2d,nbrbottom,nbrtop,nbrleft,nbrright)
! CALL
exchng2d1(a,sx,ex,sy,ey,stride,comm2d,nbrbottom,nbrtop,nbrleft,nbrright) !
communication
! CALL sweep2d(a,f,nx,ny,sx,ex,sy,ey,b) !
computation
! CALL
exchng2d1(b,sx,ex,sy,ey,stride,comm2d,nbrbottom,nbrtop,nbrleft,nbrright)
! CALL sweep2d(b,f,nx,ny,sx,ex,sy,ey,a)
diffw=diff(a,b,sx,ex,sy,ey)
CALL MPI_ALLREDUCE(diffw,diffnorm,
1,MPI_DOUBLE_PRECISION,MPI_SUM,comm2d,ierr)
IF(diffnorm < 1.0e-5) EXIT
IF(myid==0 .and. mod(REAL(i),10.0)==0.0) WRITE(*,*)
"i=",i,"sq_error=",diffnorm
END DO
t2=MPI_WTIME()
IF(myid==0) WRITE(*,*) "Converged at ",2*i,"iterations in",t2-
t1,"seconds"
!CALL MPI_BUFFER_DETACH(buffer,2*maxn*8,ierr)
CALL MPI_TYPE_FREE(stride,ierr)
CALL MPI_COMM_FREE(comm2d,ierr)
CALL MPI_FINALIZE(ierr)
END PROGRAM jacobi
SUBROUTINE
overlap2d(a,b,f,sx,ex,sy,ey,nx,ny,stride,comm2d,nbrbottom,nbrtop,nbrleft,nbrright)
IMPLICIT NONE
INCLUDE "mpif.h"
INTEGER,INTENT(IN)::comm2d,nbrbottom,nbrtop,nbrleft,nbrright,sx,ex,sy,ey,stride,nx,ny
DOUBLE PRECISION, INTENT(INOUT) :: a(sx-1:ex+1,sy-1:ey+1)
DOUBLE PRECISION, INTENT(IN) :: f(sx-1:ex+1,sy-1:ey+1)
DOUBLE PRECISION, INTENT(OUT) :: b(sx-1:ex+1,sy-1:ey+1)
INTEGER::status(MPI_STATUS_SIZE,8),ierr,req(8),dnx,i,j,k,idx
DOUBLE PRECISION::dx,dy
dnx=ex-sx+1
!1.non-blocking receives
CALL MPI_IRECV(a(sx,sy-1),dnx,MPI_DOUBLE_PRECISION,nbrbottom,
0,comm2d,req(1),ierr)
CALL MPI_IRECV(a(sx,ey+1),dnx,MPI_DOUBLE_PRECISION,nbrtop,
1,comm2d,req(2),ierr)
CALL MPI_IRECV(a(sx-1,sy),1,stride,nbrleft,2,comm2d,req(3),ierr)
CALL MPI_IRECV(a(ex+1,sy),1,stride,nbrright,3,comm2d,req(4),ierr)
!2.non-blocking sends
CALL MPI_ISEND(a(sx,ey),dnx,MPI_DOUBLE_PRECISION,nbrtop,
0,comm2d,req(5),ierr)
CALL MPI_ISEND(a(sx,sy),dnx,MPI_DOUBLE_PRECISION,nbrbottom,
1,comm2d,req(6),ierr)
CALL MPI_ISEND(a(ex,sy),1,stride,nbrright,2,comm2d,req(7),ierr)
CALL MPI_ISEND(a(sx,sy),1,stride,nbrleft,3,comm2d,req(8),ierr)
!CALL MPI_WAITALL(8,req,status,ierr)
!3.compute on local data
dx=1.0d0/DBLE(nx+1)
dy=1.0d0/DBLE(ny+1)
DO j=sy+1,ey-1
DO i=sx+1,ex-1
b(i,j)=(0.5/(dx**2+dy**2))*(dy**2*(a(i-1,j)+a(i+1,j))
+dx**2*(a(i,j-1)+a(i,j+1))-(dx*dy)**2*f(i,j))
END DO
END DO
!4.When data is received compute on corresponding boundaries
DO k=1,8
CALL MPI_WAITANY(8,req,idx,status,ierr)
WRITE(*,*) k,status(MPI_TAG,idx),idx,ierr
SELECT CASE (status(MPI_TAG,idx))
CASE(1)
j=sy
DO i=sx,ex
b(i,j)=(0.5/(dx**2+dy**2))*(dy**2*(a(i-1,j)+a(i+1,j))
+dx**2*(a(i,j-1)+a(i,j+1))-(dx*dy)**2*f(i,j))
END DO
CASE(2)
j=ey
DO i=sx,ex
b(i,j)=(0.5/(dx**2+dy**2))*(dy**2*(a(i-1,j)+a(i+1,j))
+dx**2*(a(i,j-1)+a(i,j+1))-(dx*dy)**2*f(i,j))
END DO
CASE(3)
i=sx
DO j=sy,ey
b(i,j)=(0.5/(dx**2+dy**2))*(dy**2*(a(i-1,j)+a(i+1,j))
+dx**2*(a(i,j-1)+a(i,j+1))-(dx*dy)**2*f(i,j))
END DO
CASE(4)
i=ex
DO j=sy,ey
b(i,j)=(0.5/(dx**2+dy**2))*(dy**2*(a(i-1,j)+a(i+1,j))
+dx**2*(a(i,j-1)+a(i,j+1))-(dx*dy)**2*f(i,j))
END DO
END SELECT
END DO
RETURN
END SUBROUTINE overlap2d
DOUBLE PRECISION FUNCTION diff(a,b,sx,ex,sy,ey)
IMPLICIT NONE
INTEGER,INTENT(IN)::sx,ex,sy,ey
DOUBLE PRECISION, INTENT(IN) :: a(sx-1:ex+1,sy-1:ey+1),b(sx-1:ex
+1,sy-1:ey+1)
INTEGER::i,j
DOUBLE PRECISION::sum1
sum1=0.0
DO j=sy,ey
DO i=sx,ex
sum1=sum1+(a(i,j)-b(i,j))**2
END DO
END DO
diff=sum1
RETURN
END FUNCTION diff
SUBROUTINE twodinit(a,b,f,nx,ny,sx,ex,sy,ey)
IMPLICIT NONE
INTEGER,INTENT(IN):: nx,ny,sx,ex,sy,ey
DOUBLE PRECISION, INTENT(OUT) :: a(sx-1:ex+1,sy-1:ey+1),b(sx-1:ex
+1,sy-1:ey+1),f(sx-1:ex+1,sy-1:ey+1)
INTEGER :: i,j
DO j=sy-1,ey+1
DO i=sx-1,ex+1
a(i,j)=0.0
b(i,j)=0.0
f(i,j)=0.0
END DO
END DO
! BCs
IF(sx==1) THEN
DO j=sy,ey
a(0,j)=1.0
b(0,j)=1.0
END DO
END IF
IF(ex==nx) THEN
DO j=sy,ey
a(nx+1,j)=0.0
b(nx+1,j)=0.0
END DO
END IF
IF(sy==1) THEN
DO i=sx,ex
a(i,0)=0.0
b(i,0)=0.0
END DO
END IF
IF(ey==ny) THEN
DO i=sx,ex
a(i,ny+1)=1.0
b(i,ny+1)=1.0
END DO
END IF
RETURN
END SUBROUTINE twodinit
SUBROUTINE MPE_DECOMP1D(nx,numprocs,myid,s,e)
IMPLICIT NONE
INTEGER,INTENT(OUT)::s,e
INTEGER,INTENT(IN)::nx,numprocs,myid
INTEGER::nlocal,deficit
nlocal=nx/numprocs
s=myid*nlocal+1
deficit=mod(nx,numprocs)
s=s+min(myid,deficit)
IF(myid < deficit) nlocal=nlocal+1
e=s+nlocal-1
IF(e > nx .or. myid == (numprocs-1)) e=nx
RETURN
END SUBROUTINE MPE_DECOMP1D