rudra
2010-02-04 17:38:28 UTC
Dear Friends,
I am facing a problem with mpi-parallelising my code, the attached
main.f90.
As you can see, there is a part:
1. !Initialize and check system for MPI
2. call MPI_INIT(ierr)
3. call MPI_COMM_RANK(MPI_COMM_WORLD,myid,ierr)
4. call MPI_COMM_SIZE(MPI_COMM_WORLD,numprocs,ierr)
5. write(*,*) "node",myid
6. write(*,*) "numprocs",numprocs
7. !---------------------------------------!
8.
9. !----------loop for spin----------------!
10. ! loop 1=>up spin; !
11. ! loop 2=>down spin !
12. !---------------------------------------!
13.
14. lspin: do nsp=1,spn
15.
16. ......
17.
18. ltype: do ityp=1,ntype.
and so on,
I want to run it in 32 proc(4 node with 8proc/node) with the hope that
each node will run a seperate combination of (lspin,ltype)
But as you expected, life never goes as you want. can you plz let me
know where it betrayed me?
I have the full code attached.
the output is:
node 0
numprocs 4
WORKING FOR SPIN UP
Reading POTENTIAL PARAMETERS from POTPAR_A
node 2
numprocs 4
WORKING FOR SPIN UP
Reading POTENTIAL PARAMETERS from POTPAR_A
node 1
numprocs 4
WORKING FOR SPIN UP
Reading POTENTIAL PARAMETERS from POTPAR_A
node 3
numprocs 4
WORKING FOR SPIN UP
Reading POTENTIAL PARAMETERS from POTPAR_A
Reading POTENTIAL PARAMETERS from POTPAR_B
Reading POTENTIAL PARAMETERS from POTPAR_B
Reading POTENTIAL PARAMETERS from POTPAR_B
Reading POTENTIAL PARAMETERS from POTPAR_B
WORKING FOR ATOM 1
INFO_FCC
WORKING FOR ATOM 1
INFO_FCC
WORKING FOR ATOM 1
INFO_FCC
WORKING FOR ATOM 1
but As i told i expected (spn=1,atom=1),(spn=1,atom=2),(spn=2,atom=1),
(spn=2,atom=2) should run in four different node.
How to accomplish this? the full program is 600+ line but still i am
attaching it. hope that is not too irritating.
!-----------------------------------------------!
! DRIVER ROUTINE FOR THE ASR CODE !
! Rudra Banerjee !
!===============================================!
! NOTE !
!1)To run with openmp !
!2)will use max. 8 processor/node !
!===============================================!
! CHANGE LOG !
!1) 9.09.08 Initial developmante complete !
!2) 25.02.09 Parallalization Complete !
!3) 02.12.10 Merged with LMTO !
!4) 02.02.10 improved OP !
!===============================================!
!-----------------------------------------------!
program main
use kinds, only: RDP,i3 !
use parameters !Declaring the parameters
use shared !shared OpenMP variables
use mhop !Recursion
use mmat !Structure matrix generator
use mdos !DOS
use mband !Energy band
use mfermi !Fermi lebel
use mldos !LDOS
use cgreen !Green's function
use mpardos !Magnetic moments
use extras !other routines(util)
use mfile !generate initial files
use mprintinit
use omp_lib
use iso_fortran_env
use mpi
! implicit none
integer:: i,j,k,il,l,ikki,ok
integer::ios,nit=0,nsp,its
integer:: ienrp,iend,ie,iie,istart
integer::ityp,nconv
integer:: osd1,osd2
integer,parameter :: maxclock = 10
integer::tnumber
real(8)::t1,t2,magmm
real(8):: sum1,sum2,sum3,sum4,sum5,sum6,sum7, &
& sum8,sum9,sum10,sum11,sum12,sum13
real(8)::dsum
real(8):: qtot_a,qtot_b,qtot_a1,qtot_b1
real(8)::q,e,ef,e_band
real(8):: xcha,xchb,qold_a,qold_b
real(8),dimension(maxrec,ienum+1)::xa,ya
real(8),dimension(maxrec+1,nfn+1):: xxa,yya
real(8),dimension(ntype)::es, cs, ds, gs, ep, cp, &
& xx, dp, gp, ed, cd, dd, gd
real(8),dimension(ntype)::dels, delp, deld, rs, &
& rp, rd
real(8),dimension(2)::dsqs, dsqp, dsqd, ccs, &
& ccp, ccd, os, op, od
real(8),dimension(ntype,lorbit):: agc, agd, enu_ag, ag_op
real(8),dimension(lorbit)::cpot, enuc, dpot_sq, opc
real(8),dimension(ntype,nfn+1,spn)::sorb,porb,dorb
real(8),dimension(nfn+1)::dos,dos_a,dos_b
real(8),dimension(nfn+1)::dos_adn,dos_bdn,T_dos,dos_aa
real(8),dimension(3)::a
real(8),dimension(ntype,nfn+1,spn)::dos1
real(8),dimension(0:8,spn)::A_MOM,B_MOM
real(8),dimension(3,spn)::ECG_A,ECG_B
character(10)::potin,potout
character(2)::sps
integer::tid,nthrd
real(8):: wstart,wend,wtime
character,parameter :: esc = char(27)
character(8) :: date
character(10) :: time
character(5) :: zone
integer,dimension(8) :: values
character(2),dimension(2)::cz
character(10)::alloy_rad
real(8),dimension(2)::rz
integer,dimension(2)::atz
integer::bit,bit2
character(32) :: sys
character(15) :: nis
character(8)::confA,confB
character(80)::sptyp,reltyp,xctyp,ermsg,brief
character(8) ::DOS_A_UP,DOS_B_UP,DOS_A_DN,DOS_B_DN
logical::lexist_A,lexist_B
! integer::e_a,e_b
!************************************************************
! LMTO variables
integer::d_ialpha,d_ifmt3d,d_itrans,d_jbasdn,d_ldn,d_lmaxw, &
d_ltmax(3),d_mdn,d_mmixat,d_mmixpq,d_nbas,d_nclass,d_ndimin, &
d_ngen,d_nit,d_nitat,d_niter,d_nkdmx,d_nkxyz(3),d_nl,d_nopts, &
d_norder,d_npts,d_nrxc,d_nrxyz(3),d_nsp
double precision::d_as,d_beta,d_dele,d_deltr,d_efermi,d_kap2, &
d_emax,d_emaxc,d_emin,d_eminc,d_eps,d_facvol,d_gamma,d_ommax1(3)&
,d_ommax2(3),d_range,d_rmaxes,d_rmaxs,d_rmaxs2,d_rmines,d_rms2, &
d_rmsdel,d_tolef,d_toleto,d_tolews,d_vmtz,d_wc,d_width,d_kfit
integer::d_ialpha_b,d_ifmt3d_b,d_itrans_b,d_jbasdn_b,d_ldn_b, &
d_lmaxw_b,d_ltmax_b(3),d_mdn_b,d_mmixat_b,d_mmixpq_b,d_nbas_b, &
d_nclass_b,d_ndimin_b,d_ngen_b,d_nit_b,d_nitat_b,d_niter_b, &
d_nkdmx_b,d_nkxyz_b(3),d_nl_b,d_nopts_b,d_norder_b,d_npts_b, &
d_nrxc_b,d_nrxyz_b(3),d_nsp_b
double precision::d_as_b,d_beta_b,d_dele_b,d_deltr_b,d_efermi_b,&
d_kap2_b, d_emax_b,d_emaxc_b,d_emin_b,d_eminc_b,d_eps_b, &
d_facvol_b,d_gamma_b,d_ommax1_b(3),d_ommax2_b(3),d_range_b, &
d_rmaxes_b,d_rmaxs_b,d_rmaxs2_b,d_rmines_b,d_rms2_b,d_rmsdel_b, &
d_tolef_b,d_toleto_b,d_tolews_b,d_vmtz_b,d_wc_b,d_width_b, &
d_kfit_b
!************************************************************
!************************************************************
! MPI variables
integer::ierr,myid,numprocs
!************************************************************
call readinp()
call error_chk(e_a,e_b,alloy_rad)
call printinit(cz,sptyp,reltyp,xctyp,confA,confB,alloy_rad, &
ityp,nsp)
call genfile()
allocate(map(nasite,ntsite))
open(11,file=ASMAP,status='old',IOSTAT=ios, &
& form='unformatted',iomsg=ermsg)
if(ios/=0)then
write(*,*) ios
write(19,*) trim(ermsg)
write(*,*) trim(ermsg)
stop
endif
do i=1,nasite
read(11)(map(i,j),j=1,15)
end do
write(*,*) "Reading ASMAP Complete"
close(11)
write(9,*) "Reading ASMAP Complete"
flush(9)
call cpu_time(t1)
write(*,*) ""
write(*,"('####Entering SCF loop####')")
write(9,*) ""
write(9,"('####Entering SCF loop####')")
!$ wstart = OMP_get_wtime()
lscf: do nit=1,nscf
!----------------------------------------------------------------!
! Just decorating the output !
!----------------------------------------------------------------!
write (*,fmt='(3a)',advance='no') ' ', ESC, '[1m' ! set bold !
write(*,'("Running ASR loop",1x,i3)',advance='no') nit !
write(9,*)" " !
write(9,'("Running ASR loop",1x,i3)') nit !
write (*, '(3a)') ' ', ESC, '[0m' ! restores display defaults. !
!----------------------------------------------------------------!
!~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~!
! the lmto library function !
!Function for atom A
call lm_a(oidxdn,opnu,opold,oqnu,oqold,clabl_1,lmx,w_oidn,w_opp, &
w_oqnu,w_oqold,d_ialpha,d_ifmt3d,d_itrans,d_jbasdn,d_ldn, &
d_lmaxw,d_ltmax,d_mdn,d_mmixat,d_mmixpq,d_nbas,d_nclass, &
d_ndimin,d_ngen,d_nit,d_nitat,d_niter,d_nkdmx,d_nkxyz,d_nl, &
d_nopts,d_norder,d_npts,d_nrxc,d_nrxyz,d_nsp,d_as,d_beta,d_dele,&
d_deltr,d_efermi,d_kap2,d_kfit,d_emax,d_emaxc,d_emin,d_eminc, &
d_eps,d_facvol,d_gamma,d_ommax1,d_ommax2,d_range,d_rmaxes, &
d_rmaxs,d_rmaxs2,d_rmines,d_rms2,d_rmsdel,d_tolef,d_toleto, &
d_tolews,d_vmtz,d_wc,d_width,sw13,nit)
!Function for atom B
call lm_b(oidxdn_b,opnu_b,opold_b,oqnu_b,oqold_b,clabl_1_b,lmx_b,
&
w_oidn_b,w_opp_b,w_oqnu_b,w_oqold_b,d_ialpha_b,d_ifmt3d_b, &
d_itrans_b,d_jbasdn_b,d_ldn_b,d_lmaxw_b,d_ltmax_b,d_mdn_b, &
d_mmixat_b,d_mmixpq_b,d_nbas_b,d_nclass_b,d_ndimin_b,d_ngen_b, &
d_nit_b,d_nitat_b,d_niter_b,d_nkdmx_b,d_nkxyz_b,d_nl_b, &
d_nopts_b,d_norder_b,d_npts_b,d_nrxc_b,d_nrxyz_b,d_nsp_b,d_as_b,&
d_beta_b,d_dele_b,d_deltr_b,d_efermi_b,d_kap2_b,d_kfit_b, &
d_emax_b,d_emaxc_b,d_emin_b,d_eminc_b,d_eps_b,d_facvol_b, &
d_gamma_b,d_ommax1_b,d_ommax2_b,d_range_b,d_rmaxes_b,d_rmaxs_b, &
d_rmaxs2_b,d_rmines_b,d_rms2_b,d_rmsdel_b,d_tolef_b,d_toleto_b, &
d_tolews_b,d_vmtz_b,d_wc_b,d_width_b,sw13_b,nit)
!~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~!
!Initialize and check system for MPI
call MPI_INIT(ierr)
call MPI_COMM_RANK(MPI_COMM_WORLD,myid,ierr)
call MPI_COMM_SIZE(MPI_COMM_WORLD,numprocs,ierr)
write(*,*) "node",myid
write(*,*) "numprocs",numprocs
!---------------------------------------!
!----------loop for spin----------------!
! loop 1=>up spin; !
! loop 2=>down spin !
!---------------------------------------!
lspin: do nsp=1,spn
call prspn(nsp)
do i=1,ntype
potin='POTPAR_'//ACHAR(i+16+48)
potout='POT_VAR_'//ACHAR(i+16+48)
open(i,file=potin,status='old',iostat=ios)
if(ios/=0) then
write(*,*) "error in openning file",potin
write(9,*) "error in openning file",potin
stop
endif
write(*,*) "Reading POTENTIAL PARAMETERS from ",potin
read (i,*) es(i),cs(i),ds(i),xx(i),gs(i),xx(i) !\
read (i,*) ep(i),cp(i),dp(i),xx(i),gp(i),xx(i) ! Reading up
spins
read (i,*) ed(i),cd(i),dd(i),xx(i),gd(i),xx(i) !/
if(nsp==2)then
read (i,*) es(i),cs(i),ds(i),xx(i),gs(i),xx(i) !\
read (i,*) ep(i),cp(i),dp(i),xx(i),gp(i),xx(i) ! Reading dn
spins
read (i,*) ed(i),cd(i),dd(i),xx(i),gd(i),xx(i) !/
endif
close(i)
open(i+10,file=potout,status='unknown',position='append')
write(i+10,*) es(i),cs(i),ds(i),xx(i),gs(i),xx(i)
write(i+10,*) ep(i),cp(i),dp(i),xx(i),gp(i),xx(i)
write(i+10,*) ed(i),cd(i),dd(i),xx(i),gd(i),xx(i)
close(i+10)
end do
! Reading POTENTIAL PARAMETERs from LMTO done
! \gamma -->\alpha transformation
do i=1,ntype
ds(i)=dabs(ds(i))
dp(i)=dabs(dp(i))
dd(i)=dabs(dd(i))
dels(i)=ds(i)*ds(i)
delp(i)=dp(i)*dp(i)
deld(i)=dd(i)*dd(i)
rs(i)=1.0d0-(gs(i)-alps)*((cs(i)-es(i))/dels(i))
rp(i)=1.0d0-(gp(i)-alpp)*((cp(i)-ep(i))/delp(i))
rd(i)=1.0d0-(gd(i)-alpd)*((cd(i)-ed(i))/deld(i))
dsqs(i)=rs(i)*dsqrt(dels(i))
dsqp(i)=rp(i)*dsqrt(delp(i))
dsqd(i)=rd(i)*dsqrt(deld(i))
ccs(i)=rs(i)*(cs(i)-es(i))+es(i)
ccp(i)=rp(i)*(cp(i)-ep(i))+ep(i)
ccd(i)=rd(i)*(cd(i)-ed(i))+ed(i)
os(i)=(alps-gs(i))/(dels(i)*rs(i))
op(i)=(alpp-gp(i))/(delp(i)*rp(i))
od(i)=(alpd-gd(i))/(deld(i)*rd(i))
end do
! POTPAR for different orbitals
do i=1,ntype
! POTPAR for S orbital
agc(i,1)=ccs(i)
agd(i,1)=dsqs(i)
enu_ag(i,1)=es(i)
ag_op(i,1)=os(i)
! POTPAR for P orbital
agc(i,2:4)= ccp(i)
agd(i,2:4)= dsqp(i)
enu_ag(i,2:4)=ep(i)
ag_op(i,2:4)= op(i)
! POTPAR for D orbital
agc(i,5:9)= ccd(i)
agd(i,5:9)= dsqd(i)
enu_ag(i,5:9)=ed(i)
ag_op(i,5:9)= od(i)
end do
!---------------------------------------!
! loop on atom type !
! loop 1=>ATOM_A; !
! loop 2=>ATOM_B !
!---------------------------------------!
ltype: do ityp=1,ntype
! write(*,'("WORKING FOR ATOM",1x,i1)') ityp
call pratm(ityp)
write(*,*) STRUCTURE
open(12,file=STRUCTURE,status='old',IOSTAT=ios)
do i=1,nrsite+1
do j=1,lorbit
read(12,*)(srl(j,k,i-1),k=1,lorbit)
end do
end do
close(12)
write(*,*) "Reading STRUCTURE MATRIX complete"
do l=1,lorbit
cpot(l)=agc(ityp,l)
enuc(l)=enu_ag(ityp,l)
dpot_sq(l)=agd(ityp,l)**2
opc(l)=ag_op(ityp,l)
enddo
!GENERATING POTENTIAL PARAMETERS TO BE USED IN RECURSION CODE
!$OMP PARALLEL
!$OMP SECTIONS PRIVATE(l,i)
! AP1
!$OMP SECTION
lap1: do l=1,lorbit
ap1(l)=cpot(l)+dpot_sq(l)*srl(l,l,0)
end do lap1
!= = = = = = = = = = = = = = = = = = = = = =
! AP2
!$OMP SECTION
de=(emax-emin)/dfloat(ienum)
e=emin
k=0
lap2: do i=1,ienum+1
dsum=0.0d0
do l=1,lorbit
sum1=0.0d0;sum2=0.0d0
k=k+1
sum1= (agc(1,l)/(agd(1,l)*agd(1,l)))*x + &
(agc(2,l)/(agd(2,l)*agd(2,l)))*y
sum1=(sum1+srl(l,l,0))*dpot_sq(l)
sum2=x/(agd(1,l)*agd(1,l))+y/(agd(2,l)*agd(2,l))
sum2=sum2*dpot_sq(l)
sum2=(sum2-1.0d0)*e
dsum=sum1-sum2
ap2(k)=dsum
enddo
seed(i)=e
e=e+de
end do lap2
!write(*,*) sum1,agc(2,1)
!= = = = = = = = = = = = = = = = = = = = = =
!AP3
!$OMP SECTION
de=(emax-emin)/dfloat(ienum)
e=emin
k=0
lap3: do i=1,ienum+1
dsum=0.0d0
do l=1,lorbit
sum3=0.0d0
k=k+1
sum3=(e-agc(1,l))/(agd(1,l)*agd(1,l))
sum3=sum3-(e-agc(2,l))/(agd(2,l)*agd(2,l))
sum3=sum3*(y-x)*dpot_sq(l)
ap3(k)=-sum3
end do
seed(i)=e
e=e+de
end do lap3
!= = = = = = = = = = = = = = = = = = = = = =
!AP4
!$OMP SECTION
e=emin
k=0
lap4: do i=1,ienum+1
dsum=0.0d0
do l=1,lorbit
sum4=0.0d0
k=k+1
sum4=(e-agc(1,l))/(agd(1,l)*agd(1,l))
sum4=sum4-(e-agc(2,l))/(agd(2,l)*agd(2,l))
sum4=sum4*dsqrt(x*y)*dpot_sq(l)
ap4(k)=-sum4
end do
e=e+de
end do lap4
!= = = = = = = = = = = = = = = = = = = = = =
!AP5
!$OMP SECTION
ap5=0.0d0
lap5: do i=1,lorbit
ap5(i,i)=dsqrt(dpot_sq(i))
end do lap5
!$OMP END SECTIONS
!$OMP END PARALLEL
!= = = = = = = = = = = = = = = = = = = = = =
!!!$OMP PARALLEL DO schedule(static,1)
do i=1,nrsite
call matmult(ap5,i,srl)
end do
!!!$OMP END PARALLEL DO
!=============================================
!$OMP PARALLEL
!$OMP SECTIONS PRIVATE(l,i)
!AP6
!$OMP SECTION
lap6: do l=1,lorbit
ap6(l)=cpot(l)+dpot_sq(l)*srl(l,l,0)-enuc(l)
end do lap6
!= = = = = = = = = = = = = = = = = = = = = = =
!AP7
!$OMP SECTION
lap7: do l=1,lorbit
sum7=0.0d0
sum7= ((agc(1,l)-enu_ag(1,l))/(agd(1,l)*agd(1,l)))*x + &
((agc(2,l)-enu_ag(2,l))/(agd(2,l)*agd(2,l)))*y
sum7=(sum7+srl(l,l,0))*dpot_sq(l)
ap7(l)=sum7
enddo lap7
!= = = = = = = = = = = = = = = = = = = = = = =
!AP9
!$OMP SECTION
lap9: do l=1,lorbit
sum9=0.0d0
sum9=(agc(1,l)-enu_ag(1,l))/(agd(1,l)*agd(1,l))
sum9=sum9-(agc(2,l)-enu_ag(2,l))/(agd(2,l)*agd(2,l))
sum9=sum9*(y-x)*dpot_sq(l)
ap9(l)=sum9
end do lap9
!= = = = = = = = = = = = = = = = = = = = = = =
!AP8
!$OMP SECTION
lap8: do l=1,lorbit
sum8=0.0d0
sum8=(agc(1,l)-enu_ag(1,l))/(agd(1,l)*agd(1,l))
sum8=sum8-(agc(2,l)-enu_ag(2,l))/(agd(2,l)*agd(2,l))
sum8=sum8*dsqrt(x*y)*dpot_sq(l)
ap8(l)=sum8
enddo lap8
!= = = = = = = = = = = = = = = = = = = = = = =
!AP10
!$OMP SECTION
lap10: do l=1,lorbit
ap10(l)=opc(l)
enddo lap10
!= = = = = = = = = = = = = = = = = = = = = = =
!AP11
!$OMP SECTION
lap11: do l=1,lorbit
ap11(l)=x*(agd(1,l)*ag_op(1,l)*agd(1,l))+ &
y*(agd(2,l)*ag_op(2,l)*agd(2,l))
ap11(l)=ap11(l)*(1.0d0/dpot_sq(l))
enddo lap11
!= = = = = = = = = = = = = = = = = = = = = = =
!AP13
!$OMP SECTION
lap13: do l=1,lorbit
sum13=0.0d0
sum13=agd(1,l)*ag_op(1,l)*agd(1,l)- &
agd(2,l)*ag_op(2,l)*agd(2,l)
sum13=sum13*(y-x)*(1.0d0/dpot_sq(l))
ap13(l)=sum13
end do lap13
!= = = = = = = = = = = = = = = = = = = = = = =
!AP12
!$OMP SECTION
lap12: do l=1,lorbit
ap12(l)=agd(1,l)*ag_op(1,l)*agd(1,l)- &
agd(2,l)*ag_op(2,l)*agd(2,l)
ap12(l)=ap12(l)*dsqrt(x*y)*(1.0d0/dpot_sq(l))
enddo lap12
!$OMP END SECTIONS
!$OMP END PARALLEL
!= = = = = = = = = = = = = = = = = = = = = = =
!$omp parallel
call DoParallel()
!$omp end parallel
call tdos(spec,ityp,nsp,dos, &
sorb,porb,dorb,osd1,osd2)
!***********************************************!
! Writing DOS to the files
!***********************************************!
! DOS of atom A
if(ityp==1.and.nsp==1)then !UP spin for A
open(1,file='DOS_A_UP',status='replace')
do i=1,nfn+1
write(1,*)temp(i),dos(i)
dos_aa(i)=dos(i)
enddo
close(1)
endif
if(ityp==1.and.nsp==2)then !DOWN spin for A
open(1,file='DOS_A_DN',status='replace')
do i=1,nfn+1
write(1,*)temp(i),dos(i)
dos_adn(i)=dos(i)
enddo
close(1)
endif
!DOS of atom B
if(ityp==2.and.nsp==1)then !UP spin for B
open(1,file='DOS_B_UP',status='replace')
do i=1,nfn+1
write(1,*)temp(i),dos(i)
dos_b(i)=dos(i)
enddo
close(1)
endif
if(ityp==2.and.nsp==2)then !DOWN spin for B
open(1,file='DOS_B_DN',status='replace')
do i=1,nfn+1
write(1,*)temp(i),dos(i)
dos_bdn(i)=dos(i)
enddo
close(1)
endif
end do ltype !itereation for atom type ends here
end do lspin !iteration for up/dn spin ends here
call MPI_BARRIER(MPI_COMM_WORLD,ierr)
call MPI_FINALIZE(ierr)
call ldos(dos_aa,dos_b,dos_adn,dos_bdn,temp,T_dos)
call fermi(temp,T_dos,ef)
call band(temp,T_dos,ef,e_band)
call pardos(temp,sorb,porb,dorb,ef,nit,e_band, &
qtot_a,qtot_b,qtot_a1,qtot_b1)
! open(1,file='../LMTO_A/ATOM/CHECK',status='new')
! write(1,*)ityp
! close(1)
!
! open(2,file='../LMTO_B/ATOM/CHECK',status='new')
! write(2,*)ityp
! close(2)
if(nit/=0)then
xcha=dabs(qtot_a-qold_a)
xchb=dabs(qtot_b-qold_b)
if((xcha<0.0001d0).and.(xchb<0.0001d0))then
write(*,*) "CHARGE CONVERGENCE DONE"
write(9,*) "CHARGE CONVERGENCE DONE"
flush(9)
endif
endif
qold_a=qtot_a
qold_b=qtot_b
!-------------------------------------!
!WAITING FOR lmto TO CREATE NEW FILES !
!-------------------------------------!
if (nit==nscf)go to 3000
call lm_end(oidxdn,opnu,opold,oqnu,oqold, &
clabl_1,lmx,w_oidn,w_opp,w_oqnu,w_oqold, &
ialpha,ifmt3d,itrans,jbasdn,ldn,lmaxw, &
ltmax, mdn,mmixat,mmixpq, nbas,d_nclass, ndimin,ngen,nit, &
nitat,niter,nkdmx,nkxyz,d_nl,nopts,norder,npts,nrxc,nrxyz,d_nsp,&
as, beta,dele,deltr, efermi,kap2,kfit,emax,emaxc,emin, &
eminc,eps,facvol,gamma,ommax1,ommax2,range,rmaxes, &
rmaxs,rmaxs2,rmines,rms2,rmsdel,tolef,toleto,tolews,vmtz,wc, &
width,sw13)
!30 inquire(file='CHECK_A',exist=lexist_A)
! if(.not. lexist_A) then
! goto 30
! else
! open(1,file='CHECK_A')
! close(1,status='delete')
! endif
!
!40 inquire(file='CHECK_B',exist=lexist_B)
! if(.not. lexist_B) then
! goto 40
! else
! open(1,file='CHECK_B')
! close(1,status='delete')
! endif
!
write(9,'("Done!")')
end do lscf !SCF loop ends here
3000 continue !Just one more line
deallocate(map)
call cpu_time(t2)
write(*,'(1x,"ELAPSED CPU CLOCK TIME =",1x,f10.4,1x,"s")') t2-t1
!$ wend = OMP_get_wtime()
!$ wtime = wend - wstart
write(*,'("elapsed wall total time =",1x,f10.4,1x,"s")') wtime
write(9,'(1x,"ELAPSED CPU CLOCK TIME =",1x,f10.4,1x,"s")') t2-t1
write(9,'("elapsed wall total time =",1x,f10.4,1x,"s")') wtime
close(9)
close(19)
end program main
!===============================================!
! The parallel part of the code !
!===============================================!
subroutine DoParallel()
use parameters !Declaring the parameters
use shared !Shared OpenMP variables
use mhop !Recursion
use cgreen
implicit none
integer :: il,ienrp,istart,iend,iie
real(8)::e
real(8),dimension(maxrec,ienum+1)::xa,ya
real(8),dimension(maxrec+1,nfn+1):: xxa,yya
real(8),dimension(lorbit) :: p2,p3,p4
!$omp master
write(*,'(1x,"Starting orbital loop",1x,$)')
if(verbose==1) write(9,'(1x,"Starting orbital loop",1x,$)')
!$omp end master
!$omp barrier
!$omp do
orbital: do il=1,lorbit-2,2
write(*,'(i2,$)') il
if (verbose==1) write(9,'(i2,$)') il
xa=0.00;ya=0.00
ienrp=0
e=emin-de
istart=1
iend=lorbit
! do iend=9,63,lorbit
111 continue
iie = iend-istart+1
p2(1:iie) = ap2(istart:iend)
p3(1:iie) = ap3(istart:iend)
p4(1:iie) = ap4(istart:iend)
ienrp=ienrp+1
call hop(il,e,ienrp,map,srl,ap1, &
ap6,ap7,ap8,ap9,ap10,ap11, &
ap12,ap13,p2,p3,p4,xa,ya)
istart=iend+1
iend=iend+lorbit
write(*,'(".",$)')
if (verbose==1) then
write(9,'(".",$)')
flush(9)
endif
if (iend<=npn) goto 111
call fit(xa,ya,seed,xxa,yya,temp)
call spectral(il,xxa,yya,temp,spec)
end do orbital
!$omp end do
!$omp master
write(*,'("done")')
!$omp end master
end subroutine DoParallel
I am facing a problem with mpi-parallelising my code, the attached
main.f90.
As you can see, there is a part:
1. !Initialize and check system for MPI
2. call MPI_INIT(ierr)
3. call MPI_COMM_RANK(MPI_COMM_WORLD,myid,ierr)
4. call MPI_COMM_SIZE(MPI_COMM_WORLD,numprocs,ierr)
5. write(*,*) "node",myid
6. write(*,*) "numprocs",numprocs
7. !---------------------------------------!
8.
9. !----------loop for spin----------------!
10. ! loop 1=>up spin; !
11. ! loop 2=>down spin !
12. !---------------------------------------!
13.
14. lspin: do nsp=1,spn
15.
16. ......
17.
18. ltype: do ityp=1,ntype.
and so on,
I want to run it in 32 proc(4 node with 8proc/node) with the hope that
each node will run a seperate combination of (lspin,ltype)
But as you expected, life never goes as you want. can you plz let me
know where it betrayed me?
I have the full code attached.
the output is:
node 0
numprocs 4
WORKING FOR SPIN UP
Reading POTENTIAL PARAMETERS from POTPAR_A
node 2
numprocs 4
WORKING FOR SPIN UP
Reading POTENTIAL PARAMETERS from POTPAR_A
node 1
numprocs 4
WORKING FOR SPIN UP
Reading POTENTIAL PARAMETERS from POTPAR_A
node 3
numprocs 4
WORKING FOR SPIN UP
Reading POTENTIAL PARAMETERS from POTPAR_A
Reading POTENTIAL PARAMETERS from POTPAR_B
Reading POTENTIAL PARAMETERS from POTPAR_B
Reading POTENTIAL PARAMETERS from POTPAR_B
Reading POTENTIAL PARAMETERS from POTPAR_B
WORKING FOR ATOM 1
INFO_FCC
WORKING FOR ATOM 1
INFO_FCC
WORKING FOR ATOM 1
INFO_FCC
WORKING FOR ATOM 1
but As i told i expected (spn=1,atom=1),(spn=1,atom=2),(spn=2,atom=1),
(spn=2,atom=2) should run in four different node.
How to accomplish this? the full program is 600+ line but still i am
attaching it. hope that is not too irritating.
!-----------------------------------------------!
! DRIVER ROUTINE FOR THE ASR CODE !
! Rudra Banerjee !
!===============================================!
! NOTE !
!1)To run with openmp !
!2)will use max. 8 processor/node !
!===============================================!
! CHANGE LOG !
!1) 9.09.08 Initial developmante complete !
!2) 25.02.09 Parallalization Complete !
!3) 02.12.10 Merged with LMTO !
!4) 02.02.10 improved OP !
!===============================================!
!-----------------------------------------------!
program main
use kinds, only: RDP,i3 !
use parameters !Declaring the parameters
use shared !shared OpenMP variables
use mhop !Recursion
use mmat !Structure matrix generator
use mdos !DOS
use mband !Energy band
use mfermi !Fermi lebel
use mldos !LDOS
use cgreen !Green's function
use mpardos !Magnetic moments
use extras !other routines(util)
use mfile !generate initial files
use mprintinit
use omp_lib
use iso_fortran_env
use mpi
! implicit none
integer:: i,j,k,il,l,ikki,ok
integer::ios,nit=0,nsp,its
integer:: ienrp,iend,ie,iie,istart
integer::ityp,nconv
integer:: osd1,osd2
integer,parameter :: maxclock = 10
integer::tnumber
real(8)::t1,t2,magmm
real(8):: sum1,sum2,sum3,sum4,sum5,sum6,sum7, &
& sum8,sum9,sum10,sum11,sum12,sum13
real(8)::dsum
real(8):: qtot_a,qtot_b,qtot_a1,qtot_b1
real(8)::q,e,ef,e_band
real(8):: xcha,xchb,qold_a,qold_b
real(8),dimension(maxrec,ienum+1)::xa,ya
real(8),dimension(maxrec+1,nfn+1):: xxa,yya
real(8),dimension(ntype)::es, cs, ds, gs, ep, cp, &
& xx, dp, gp, ed, cd, dd, gd
real(8),dimension(ntype)::dels, delp, deld, rs, &
& rp, rd
real(8),dimension(2)::dsqs, dsqp, dsqd, ccs, &
& ccp, ccd, os, op, od
real(8),dimension(ntype,lorbit):: agc, agd, enu_ag, ag_op
real(8),dimension(lorbit)::cpot, enuc, dpot_sq, opc
real(8),dimension(ntype,nfn+1,spn)::sorb,porb,dorb
real(8),dimension(nfn+1)::dos,dos_a,dos_b
real(8),dimension(nfn+1)::dos_adn,dos_bdn,T_dos,dos_aa
real(8),dimension(3)::a
real(8),dimension(ntype,nfn+1,spn)::dos1
real(8),dimension(0:8,spn)::A_MOM,B_MOM
real(8),dimension(3,spn)::ECG_A,ECG_B
character(10)::potin,potout
character(2)::sps
integer::tid,nthrd
real(8):: wstart,wend,wtime
character,parameter :: esc = char(27)
character(8) :: date
character(10) :: time
character(5) :: zone
integer,dimension(8) :: values
character(2),dimension(2)::cz
character(10)::alloy_rad
real(8),dimension(2)::rz
integer,dimension(2)::atz
integer::bit,bit2
character(32) :: sys
character(15) :: nis
character(8)::confA,confB
character(80)::sptyp,reltyp,xctyp,ermsg,brief
character(8) ::DOS_A_UP,DOS_B_UP,DOS_A_DN,DOS_B_DN
logical::lexist_A,lexist_B
! integer::e_a,e_b
!************************************************************
! LMTO variables
integer::d_ialpha,d_ifmt3d,d_itrans,d_jbasdn,d_ldn,d_lmaxw, &
d_ltmax(3),d_mdn,d_mmixat,d_mmixpq,d_nbas,d_nclass,d_ndimin, &
d_ngen,d_nit,d_nitat,d_niter,d_nkdmx,d_nkxyz(3),d_nl,d_nopts, &
d_norder,d_npts,d_nrxc,d_nrxyz(3),d_nsp
double precision::d_as,d_beta,d_dele,d_deltr,d_efermi,d_kap2, &
d_emax,d_emaxc,d_emin,d_eminc,d_eps,d_facvol,d_gamma,d_ommax1(3)&
,d_ommax2(3),d_range,d_rmaxes,d_rmaxs,d_rmaxs2,d_rmines,d_rms2, &
d_rmsdel,d_tolef,d_toleto,d_tolews,d_vmtz,d_wc,d_width,d_kfit
integer::d_ialpha_b,d_ifmt3d_b,d_itrans_b,d_jbasdn_b,d_ldn_b, &
d_lmaxw_b,d_ltmax_b(3),d_mdn_b,d_mmixat_b,d_mmixpq_b,d_nbas_b, &
d_nclass_b,d_ndimin_b,d_ngen_b,d_nit_b,d_nitat_b,d_niter_b, &
d_nkdmx_b,d_nkxyz_b(3),d_nl_b,d_nopts_b,d_norder_b,d_npts_b, &
d_nrxc_b,d_nrxyz_b(3),d_nsp_b
double precision::d_as_b,d_beta_b,d_dele_b,d_deltr_b,d_efermi_b,&
d_kap2_b, d_emax_b,d_emaxc_b,d_emin_b,d_eminc_b,d_eps_b, &
d_facvol_b,d_gamma_b,d_ommax1_b(3),d_ommax2_b(3),d_range_b, &
d_rmaxes_b,d_rmaxs_b,d_rmaxs2_b,d_rmines_b,d_rms2_b,d_rmsdel_b, &
d_tolef_b,d_toleto_b,d_tolews_b,d_vmtz_b,d_wc_b,d_width_b, &
d_kfit_b
!************************************************************
!************************************************************
! MPI variables
integer::ierr,myid,numprocs
!************************************************************
call readinp()
call error_chk(e_a,e_b,alloy_rad)
call printinit(cz,sptyp,reltyp,xctyp,confA,confB,alloy_rad, &
ityp,nsp)
call genfile()
allocate(map(nasite,ntsite))
open(11,file=ASMAP,status='old',IOSTAT=ios, &
& form='unformatted',iomsg=ermsg)
if(ios/=0)then
write(*,*) ios
write(19,*) trim(ermsg)
write(*,*) trim(ermsg)
stop
endif
do i=1,nasite
read(11)(map(i,j),j=1,15)
end do
write(*,*) "Reading ASMAP Complete"
close(11)
write(9,*) "Reading ASMAP Complete"
flush(9)
call cpu_time(t1)
write(*,*) ""
write(*,"('####Entering SCF loop####')")
write(9,*) ""
write(9,"('####Entering SCF loop####')")
!$ wstart = OMP_get_wtime()
lscf: do nit=1,nscf
!----------------------------------------------------------------!
! Just decorating the output !
!----------------------------------------------------------------!
write (*,fmt='(3a)',advance='no') ' ', ESC, '[1m' ! set bold !
write(*,'("Running ASR loop",1x,i3)',advance='no') nit !
write(9,*)" " !
write(9,'("Running ASR loop",1x,i3)') nit !
write (*, '(3a)') ' ', ESC, '[0m' ! restores display defaults. !
!----------------------------------------------------------------!
!~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~!
! the lmto library function !
!Function for atom A
call lm_a(oidxdn,opnu,opold,oqnu,oqold,clabl_1,lmx,w_oidn,w_opp, &
w_oqnu,w_oqold,d_ialpha,d_ifmt3d,d_itrans,d_jbasdn,d_ldn, &
d_lmaxw,d_ltmax,d_mdn,d_mmixat,d_mmixpq,d_nbas,d_nclass, &
d_ndimin,d_ngen,d_nit,d_nitat,d_niter,d_nkdmx,d_nkxyz,d_nl, &
d_nopts,d_norder,d_npts,d_nrxc,d_nrxyz,d_nsp,d_as,d_beta,d_dele,&
d_deltr,d_efermi,d_kap2,d_kfit,d_emax,d_emaxc,d_emin,d_eminc, &
d_eps,d_facvol,d_gamma,d_ommax1,d_ommax2,d_range,d_rmaxes, &
d_rmaxs,d_rmaxs2,d_rmines,d_rms2,d_rmsdel,d_tolef,d_toleto, &
d_tolews,d_vmtz,d_wc,d_width,sw13,nit)
!Function for atom B
call lm_b(oidxdn_b,opnu_b,opold_b,oqnu_b,oqold_b,clabl_1_b,lmx_b,
&
w_oidn_b,w_opp_b,w_oqnu_b,w_oqold_b,d_ialpha_b,d_ifmt3d_b, &
d_itrans_b,d_jbasdn_b,d_ldn_b,d_lmaxw_b,d_ltmax_b,d_mdn_b, &
d_mmixat_b,d_mmixpq_b,d_nbas_b,d_nclass_b,d_ndimin_b,d_ngen_b, &
d_nit_b,d_nitat_b,d_niter_b,d_nkdmx_b,d_nkxyz_b,d_nl_b, &
d_nopts_b,d_norder_b,d_npts_b,d_nrxc_b,d_nrxyz_b,d_nsp_b,d_as_b,&
d_beta_b,d_dele_b,d_deltr_b,d_efermi_b,d_kap2_b,d_kfit_b, &
d_emax_b,d_emaxc_b,d_emin_b,d_eminc_b,d_eps_b,d_facvol_b, &
d_gamma_b,d_ommax1_b,d_ommax2_b,d_range_b,d_rmaxes_b,d_rmaxs_b, &
d_rmaxs2_b,d_rmines_b,d_rms2_b,d_rmsdel_b,d_tolef_b,d_toleto_b, &
d_tolews_b,d_vmtz_b,d_wc_b,d_width_b,sw13_b,nit)
!~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~!
!Initialize and check system for MPI
call MPI_INIT(ierr)
call MPI_COMM_RANK(MPI_COMM_WORLD,myid,ierr)
call MPI_COMM_SIZE(MPI_COMM_WORLD,numprocs,ierr)
write(*,*) "node",myid
write(*,*) "numprocs",numprocs
!---------------------------------------!
!----------loop for spin----------------!
! loop 1=>up spin; !
! loop 2=>down spin !
!---------------------------------------!
lspin: do nsp=1,spn
call prspn(nsp)
do i=1,ntype
potin='POTPAR_'//ACHAR(i+16+48)
potout='POT_VAR_'//ACHAR(i+16+48)
open(i,file=potin,status='old',iostat=ios)
if(ios/=0) then
write(*,*) "error in openning file",potin
write(9,*) "error in openning file",potin
stop
endif
write(*,*) "Reading POTENTIAL PARAMETERS from ",potin
read (i,*) es(i),cs(i),ds(i),xx(i),gs(i),xx(i) !\
read (i,*) ep(i),cp(i),dp(i),xx(i),gp(i),xx(i) ! Reading up
spins
read (i,*) ed(i),cd(i),dd(i),xx(i),gd(i),xx(i) !/
if(nsp==2)then
read (i,*) es(i),cs(i),ds(i),xx(i),gs(i),xx(i) !\
read (i,*) ep(i),cp(i),dp(i),xx(i),gp(i),xx(i) ! Reading dn
spins
read (i,*) ed(i),cd(i),dd(i),xx(i),gd(i),xx(i) !/
endif
close(i)
open(i+10,file=potout,status='unknown',position='append')
write(i+10,*) es(i),cs(i),ds(i),xx(i),gs(i),xx(i)
write(i+10,*) ep(i),cp(i),dp(i),xx(i),gp(i),xx(i)
write(i+10,*) ed(i),cd(i),dd(i),xx(i),gd(i),xx(i)
close(i+10)
end do
! Reading POTENTIAL PARAMETERs from LMTO done
! \gamma -->\alpha transformation
do i=1,ntype
ds(i)=dabs(ds(i))
dp(i)=dabs(dp(i))
dd(i)=dabs(dd(i))
dels(i)=ds(i)*ds(i)
delp(i)=dp(i)*dp(i)
deld(i)=dd(i)*dd(i)
rs(i)=1.0d0-(gs(i)-alps)*((cs(i)-es(i))/dels(i))
rp(i)=1.0d0-(gp(i)-alpp)*((cp(i)-ep(i))/delp(i))
rd(i)=1.0d0-(gd(i)-alpd)*((cd(i)-ed(i))/deld(i))
dsqs(i)=rs(i)*dsqrt(dels(i))
dsqp(i)=rp(i)*dsqrt(delp(i))
dsqd(i)=rd(i)*dsqrt(deld(i))
ccs(i)=rs(i)*(cs(i)-es(i))+es(i)
ccp(i)=rp(i)*(cp(i)-ep(i))+ep(i)
ccd(i)=rd(i)*(cd(i)-ed(i))+ed(i)
os(i)=(alps-gs(i))/(dels(i)*rs(i))
op(i)=(alpp-gp(i))/(delp(i)*rp(i))
od(i)=(alpd-gd(i))/(deld(i)*rd(i))
end do
! POTPAR for different orbitals
do i=1,ntype
! POTPAR for S orbital
agc(i,1)=ccs(i)
agd(i,1)=dsqs(i)
enu_ag(i,1)=es(i)
ag_op(i,1)=os(i)
! POTPAR for P orbital
agc(i,2:4)= ccp(i)
agd(i,2:4)= dsqp(i)
enu_ag(i,2:4)=ep(i)
ag_op(i,2:4)= op(i)
! POTPAR for D orbital
agc(i,5:9)= ccd(i)
agd(i,5:9)= dsqd(i)
enu_ag(i,5:9)=ed(i)
ag_op(i,5:9)= od(i)
end do
!---------------------------------------!
! loop on atom type !
! loop 1=>ATOM_A; !
! loop 2=>ATOM_B !
!---------------------------------------!
ltype: do ityp=1,ntype
! write(*,'("WORKING FOR ATOM",1x,i1)') ityp
call pratm(ityp)
write(*,*) STRUCTURE
open(12,file=STRUCTURE,status='old',IOSTAT=ios)
do i=1,nrsite+1
do j=1,lorbit
read(12,*)(srl(j,k,i-1),k=1,lorbit)
end do
end do
close(12)
write(*,*) "Reading STRUCTURE MATRIX complete"
do l=1,lorbit
cpot(l)=agc(ityp,l)
enuc(l)=enu_ag(ityp,l)
dpot_sq(l)=agd(ityp,l)**2
opc(l)=ag_op(ityp,l)
enddo
!GENERATING POTENTIAL PARAMETERS TO BE USED IN RECURSION CODE
!$OMP PARALLEL
!$OMP SECTIONS PRIVATE(l,i)
! AP1
!$OMP SECTION
lap1: do l=1,lorbit
ap1(l)=cpot(l)+dpot_sq(l)*srl(l,l,0)
end do lap1
!= = = = = = = = = = = = = = = = = = = = = =
! AP2
!$OMP SECTION
de=(emax-emin)/dfloat(ienum)
e=emin
k=0
lap2: do i=1,ienum+1
dsum=0.0d0
do l=1,lorbit
sum1=0.0d0;sum2=0.0d0
k=k+1
sum1= (agc(1,l)/(agd(1,l)*agd(1,l)))*x + &
(agc(2,l)/(agd(2,l)*agd(2,l)))*y
sum1=(sum1+srl(l,l,0))*dpot_sq(l)
sum2=x/(agd(1,l)*agd(1,l))+y/(agd(2,l)*agd(2,l))
sum2=sum2*dpot_sq(l)
sum2=(sum2-1.0d0)*e
dsum=sum1-sum2
ap2(k)=dsum
enddo
seed(i)=e
e=e+de
end do lap2
!write(*,*) sum1,agc(2,1)
!= = = = = = = = = = = = = = = = = = = = = =
!AP3
!$OMP SECTION
de=(emax-emin)/dfloat(ienum)
e=emin
k=0
lap3: do i=1,ienum+1
dsum=0.0d0
do l=1,lorbit
sum3=0.0d0
k=k+1
sum3=(e-agc(1,l))/(agd(1,l)*agd(1,l))
sum3=sum3-(e-agc(2,l))/(agd(2,l)*agd(2,l))
sum3=sum3*(y-x)*dpot_sq(l)
ap3(k)=-sum3
end do
seed(i)=e
e=e+de
end do lap3
!= = = = = = = = = = = = = = = = = = = = = =
!AP4
!$OMP SECTION
e=emin
k=0
lap4: do i=1,ienum+1
dsum=0.0d0
do l=1,lorbit
sum4=0.0d0
k=k+1
sum4=(e-agc(1,l))/(agd(1,l)*agd(1,l))
sum4=sum4-(e-agc(2,l))/(agd(2,l)*agd(2,l))
sum4=sum4*dsqrt(x*y)*dpot_sq(l)
ap4(k)=-sum4
end do
e=e+de
end do lap4
!= = = = = = = = = = = = = = = = = = = = = =
!AP5
!$OMP SECTION
ap5=0.0d0
lap5: do i=1,lorbit
ap5(i,i)=dsqrt(dpot_sq(i))
end do lap5
!$OMP END SECTIONS
!$OMP END PARALLEL
!= = = = = = = = = = = = = = = = = = = = = =
!!!$OMP PARALLEL DO schedule(static,1)
do i=1,nrsite
call matmult(ap5,i,srl)
end do
!!!$OMP END PARALLEL DO
!=============================================
!$OMP PARALLEL
!$OMP SECTIONS PRIVATE(l,i)
!AP6
!$OMP SECTION
lap6: do l=1,lorbit
ap6(l)=cpot(l)+dpot_sq(l)*srl(l,l,0)-enuc(l)
end do lap6
!= = = = = = = = = = = = = = = = = = = = = = =
!AP7
!$OMP SECTION
lap7: do l=1,lorbit
sum7=0.0d0
sum7= ((agc(1,l)-enu_ag(1,l))/(agd(1,l)*agd(1,l)))*x + &
((agc(2,l)-enu_ag(2,l))/(agd(2,l)*agd(2,l)))*y
sum7=(sum7+srl(l,l,0))*dpot_sq(l)
ap7(l)=sum7
enddo lap7
!= = = = = = = = = = = = = = = = = = = = = = =
!AP9
!$OMP SECTION
lap9: do l=1,lorbit
sum9=0.0d0
sum9=(agc(1,l)-enu_ag(1,l))/(agd(1,l)*agd(1,l))
sum9=sum9-(agc(2,l)-enu_ag(2,l))/(agd(2,l)*agd(2,l))
sum9=sum9*(y-x)*dpot_sq(l)
ap9(l)=sum9
end do lap9
!= = = = = = = = = = = = = = = = = = = = = = =
!AP8
!$OMP SECTION
lap8: do l=1,lorbit
sum8=0.0d0
sum8=(agc(1,l)-enu_ag(1,l))/(agd(1,l)*agd(1,l))
sum8=sum8-(agc(2,l)-enu_ag(2,l))/(agd(2,l)*agd(2,l))
sum8=sum8*dsqrt(x*y)*dpot_sq(l)
ap8(l)=sum8
enddo lap8
!= = = = = = = = = = = = = = = = = = = = = = =
!AP10
!$OMP SECTION
lap10: do l=1,lorbit
ap10(l)=opc(l)
enddo lap10
!= = = = = = = = = = = = = = = = = = = = = = =
!AP11
!$OMP SECTION
lap11: do l=1,lorbit
ap11(l)=x*(agd(1,l)*ag_op(1,l)*agd(1,l))+ &
y*(agd(2,l)*ag_op(2,l)*agd(2,l))
ap11(l)=ap11(l)*(1.0d0/dpot_sq(l))
enddo lap11
!= = = = = = = = = = = = = = = = = = = = = = =
!AP13
!$OMP SECTION
lap13: do l=1,lorbit
sum13=0.0d0
sum13=agd(1,l)*ag_op(1,l)*agd(1,l)- &
agd(2,l)*ag_op(2,l)*agd(2,l)
sum13=sum13*(y-x)*(1.0d0/dpot_sq(l))
ap13(l)=sum13
end do lap13
!= = = = = = = = = = = = = = = = = = = = = = =
!AP12
!$OMP SECTION
lap12: do l=1,lorbit
ap12(l)=agd(1,l)*ag_op(1,l)*agd(1,l)- &
agd(2,l)*ag_op(2,l)*agd(2,l)
ap12(l)=ap12(l)*dsqrt(x*y)*(1.0d0/dpot_sq(l))
enddo lap12
!$OMP END SECTIONS
!$OMP END PARALLEL
!= = = = = = = = = = = = = = = = = = = = = = =
!$omp parallel
call DoParallel()
!$omp end parallel
call tdos(spec,ityp,nsp,dos, &
sorb,porb,dorb,osd1,osd2)
!***********************************************!
! Writing DOS to the files
!***********************************************!
! DOS of atom A
if(ityp==1.and.nsp==1)then !UP spin for A
open(1,file='DOS_A_UP',status='replace')
do i=1,nfn+1
write(1,*)temp(i),dos(i)
dos_aa(i)=dos(i)
enddo
close(1)
endif
if(ityp==1.and.nsp==2)then !DOWN spin for A
open(1,file='DOS_A_DN',status='replace')
do i=1,nfn+1
write(1,*)temp(i),dos(i)
dos_adn(i)=dos(i)
enddo
close(1)
endif
!DOS of atom B
if(ityp==2.and.nsp==1)then !UP spin for B
open(1,file='DOS_B_UP',status='replace')
do i=1,nfn+1
write(1,*)temp(i),dos(i)
dos_b(i)=dos(i)
enddo
close(1)
endif
if(ityp==2.and.nsp==2)then !DOWN spin for B
open(1,file='DOS_B_DN',status='replace')
do i=1,nfn+1
write(1,*)temp(i),dos(i)
dos_bdn(i)=dos(i)
enddo
close(1)
endif
end do ltype !itereation for atom type ends here
end do lspin !iteration for up/dn spin ends here
call MPI_BARRIER(MPI_COMM_WORLD,ierr)
call MPI_FINALIZE(ierr)
call ldos(dos_aa,dos_b,dos_adn,dos_bdn,temp,T_dos)
call fermi(temp,T_dos,ef)
call band(temp,T_dos,ef,e_band)
call pardos(temp,sorb,porb,dorb,ef,nit,e_band, &
qtot_a,qtot_b,qtot_a1,qtot_b1)
! open(1,file='../LMTO_A/ATOM/CHECK',status='new')
! write(1,*)ityp
! close(1)
!
! open(2,file='../LMTO_B/ATOM/CHECK',status='new')
! write(2,*)ityp
! close(2)
if(nit/=0)then
xcha=dabs(qtot_a-qold_a)
xchb=dabs(qtot_b-qold_b)
if((xcha<0.0001d0).and.(xchb<0.0001d0))then
write(*,*) "CHARGE CONVERGENCE DONE"
write(9,*) "CHARGE CONVERGENCE DONE"
flush(9)
endif
endif
qold_a=qtot_a
qold_b=qtot_b
!-------------------------------------!
!WAITING FOR lmto TO CREATE NEW FILES !
!-------------------------------------!
if (nit==nscf)go to 3000
call lm_end(oidxdn,opnu,opold,oqnu,oqold, &
clabl_1,lmx,w_oidn,w_opp,w_oqnu,w_oqold, &
ialpha,ifmt3d,itrans,jbasdn,ldn,lmaxw, &
ltmax, mdn,mmixat,mmixpq, nbas,d_nclass, ndimin,ngen,nit, &
nitat,niter,nkdmx,nkxyz,d_nl,nopts,norder,npts,nrxc,nrxyz,d_nsp,&
as, beta,dele,deltr, efermi,kap2,kfit,emax,emaxc,emin, &
eminc,eps,facvol,gamma,ommax1,ommax2,range,rmaxes, &
rmaxs,rmaxs2,rmines,rms2,rmsdel,tolef,toleto,tolews,vmtz,wc, &
width,sw13)
!30 inquire(file='CHECK_A',exist=lexist_A)
! if(.not. lexist_A) then
! goto 30
! else
! open(1,file='CHECK_A')
! close(1,status='delete')
! endif
!
!40 inquire(file='CHECK_B',exist=lexist_B)
! if(.not. lexist_B) then
! goto 40
! else
! open(1,file='CHECK_B')
! close(1,status='delete')
! endif
!
write(9,'("Done!")')
end do lscf !SCF loop ends here
3000 continue !Just one more line
deallocate(map)
call cpu_time(t2)
write(*,'(1x,"ELAPSED CPU CLOCK TIME =",1x,f10.4,1x,"s")') t2-t1
!$ wend = OMP_get_wtime()
!$ wtime = wend - wstart
write(*,'("elapsed wall total time =",1x,f10.4,1x,"s")') wtime
write(9,'(1x,"ELAPSED CPU CLOCK TIME =",1x,f10.4,1x,"s")') t2-t1
write(9,'("elapsed wall total time =",1x,f10.4,1x,"s")') wtime
close(9)
close(19)
end program main
!===============================================!
! The parallel part of the code !
!===============================================!
subroutine DoParallel()
use parameters !Declaring the parameters
use shared !Shared OpenMP variables
use mhop !Recursion
use cgreen
implicit none
integer :: il,ienrp,istart,iend,iie
real(8)::e
real(8),dimension(maxrec,ienum+1)::xa,ya
real(8),dimension(maxrec+1,nfn+1):: xxa,yya
real(8),dimension(lorbit) :: p2,p3,p4
!$omp master
write(*,'(1x,"Starting orbital loop",1x,$)')
if(verbose==1) write(9,'(1x,"Starting orbital loop",1x,$)')
!$omp end master
!$omp barrier
!$omp do
orbital: do il=1,lorbit-2,2
write(*,'(i2,$)') il
if (verbose==1) write(9,'(i2,$)') il
xa=0.00;ya=0.00
ienrp=0
e=emin-de
istart=1
iend=lorbit
! do iend=9,63,lorbit
111 continue
iie = iend-istart+1
p2(1:iie) = ap2(istart:iend)
p3(1:iie) = ap3(istart:iend)
p4(1:iie) = ap4(istart:iend)
ienrp=ienrp+1
call hop(il,e,ienrp,map,srl,ap1, &
ap6,ap7,ap8,ap9,ap10,ap11, &
ap12,ap13,p2,p3,p4,xa,ya)
istart=iend+1
iend=iend+lorbit
write(*,'(".",$)')
if (verbose==1) then
write(9,'(".",$)')
flush(9)
endif
if (iend<=npn) goto 111
call fit(xa,ya,seed,xxa,yya,temp)
call spectral(il,xxa,yya,temp,spec)
end do orbital
!$omp end do
!$omp master
write(*,'("done")')
!$omp end master
end subroutine DoParallel