Sample Code & Cluster User's Guide
Below is the code for a Sample OpenMP Program.
Sample OpenMP Program
program stress_test
!=======================================================
! Simple contrived program to see how systems perform
! repeated OpenMP multi-threaded tests.
!
! This test repeatedly "smooths" or "filters" a 3-d
! (gridded) array, starting either from a random or
! deterministic data array.
!
! This makes a good system "stress test", since the
! arrays can be as larger or small as required, and
! the job can be run for as long or as short as
! required (by varying the number of filtering passes).
! The 7-point filter around the centre-point in a 3-d
! lattice structure poses a performance challenge for
! scalar processors!
!
! This version does direct "brute force" treatment of
! domain faces, edges and corners.
!
! The program reads 8 input variables, from 6 lines
! of standard input, or redirected from a file.
! See "program parameters" below.
!=======================================================
implicit none
integer nx,ny,nz, i,j,k, iseed, icount, maxcount
integer iofreq
!
real xran1, twopi,xdist,ydist,zdist
real, allocatable :: arr_in(:,:,:), arr_out(:,:,:)
real valmax, wght, rwght3,rwght4,rwght5,rwght6
!
!
!==========================================================
! Some program parameters:
! (These may be read from a file, redirected from stdin).
!==========================================================
read(*,*) maxcount ! Max. number of filter iterations (problem length)
read(*,*) nx,ny,nz ! 3-d grid dimensions (determines problem size)
read(*,*) iofreq ! No. of iterations between writing output
read(*,*) wght ! Filter weights (coefficients)
read(*,*) valmax ! Max. value of 3-d field (for scaling only)
read(*,*) iseed ! Zero for deterministic array; otherwise random seed
!
! Some suggested values:
! 100 ! maxcount, number of filter iterations
! 400, 400, 800 ! nx,ny,nz values, to use 1 GB memory altogether.
! 10 ! iofreq, i.e, output every 10 iterations
! 0.05 ! wght, for relatively "light" filtering
! 100. ! valmax, just for order-1 output values
! 0 ! iseed, zero for deterministic array, best for
checking correctness
!
rwght3 = 1.0 - 3.0*wght ! Weight factors derived from fundamental weight
rwght4 = 1.0 - 4.0*wght
rwght5 = 1.0 - 5.0*wght
rwght6 = 1.0 - 6.0*wght
!
allocate (arr_in(nx,ny,nz))
allocate (arr_out(nx,ny,nz))
!
print *, 'Maxcount: ',maxcount
print *, 'nx,ny,nz: ',nx,ny,nz
print *, 'Output interval: ',iofreq
print *, 'Weight (wght): ',wght
print *, 'Value range: ',valmax
print *, 'Random seed: ',iseed
if(iseed.eq.0) then
print *, 'Using deterministic initialization'
else
print *, 'Using random initialization'
endif
print *, ' '
!
!=====================================================
! Initialize the main data array,
! either deterministically, or with random numbers:
!=====================================================
if(iseed.eq.0) then
twopi = 8.0*atan(1.0)
do k=1,nz
zdist=twopi*float(k-1)/float(nz-1)
do j=1,ny
ydist=twopi*float(j-1)/float(ny-1)
do i=1,nx
xdist=twopi*float(i-1)/float(nx-1)
arr_in(i,j,k) = valmax*Cos(7.*xdist)*cos(13.*ydist)*
& cos(11.*zdist)
enddo
enddo
enddo
!
else
!
do k=1,nz
do j=1,ny
do i=1,nx
arr_in(i,j,k) = valmax*(-1. + 2.0*xran1(iseed))
enddo
enddo
enddo
endif
!
write(*,*) 'Initial random values are:'
write(*,100) (arr_in(2,2,k),k=2,nz-1,nz/7)
open (unit=12,file='stresstest.dat',form='formatted')
write(12,*) 'The Parameters of this run are:'
write(12,*) 'Maxcount: ',maxcount
write(12,*) 'nx,ny,nz: ',nx,ny,nz
write(12,*) 'Output interval: ',iofreq
write(12,*) 'Weight (wght): ',wght
write(12,*) 'Value range: ',valmax
write(12,*) 'Random seed: ',iseed
if(iseed.eq.0) then
write(12,*) 'Using deterministic initialization'
else
write(12,*) 'Using random initialization'
endif
write(12,*) ' '
write(12,*) 'Initial random values are (i,j,k=1,10):'
! write (12,100) (arr_in(i,j,k),i=1,10),j=1,10),k=1,10)
write (12,100) (arr_in(10,10,k),k=1,10)
write (12,*) ' '
!
! Start of loop over smoothing iterations:
do icount=0,maxcount
!
!
!
!=====================================================
! --- Main smoothing loop:
!=====================================================
! Main body of data:
!$OMP PARALLEL DO PRIVATE(i,j,k)
do k=2,nz-1
do j=2,ny-1
do i=2,nx-1
arr_out(i,j,k) = rwght6*arr_in(i,j,k) + wght*(
& arr_in(i-1,j,k) + arr_in(i+1,j,k) +
& arr_in(i,j-1,k) + arr_in(i,j+1,k) +
& arr_in(i,j,k-1) + arr_in(i,j,k+1) )
enddo
enddo
enddo
!$OMP END PARALLEL DO
!
! Now for the 6 faces:
!$OMP PARALLEL DO PRIVATE(i,j)
do j=2,ny-1
do i=2,nx-1
arr_out(i,j,1) = rwght5*arr_in(i,j,1) + wght*(
& arr_in(i-1,j,1) + arr_in(i+1,j,1) +
& arr_in(i,j-1,1) + arr_in(i,j+1,1) +
& arr_in(i,j,2) )
arr_out(i,j,nz) = rwght5*arr_in(i,j,nz) + wght*(
& arr_in(i-1,j,nz) + arr_in(i+1,j,nz) +
& arr_in(i,j-1,nz) + arr_in(i,j+1,nz) +
& arr_in(i,j,nz-1) )
enddo
enddo
!$OMP END PARALLEL DO
!
!$OMP PARALLEL DO PRIVATE(i,k)
do k=2,nz-1
do i=2,nx-1
arr_out(i,1,k) = rwght5*arr_in(i,1,k) + wght*(
& arr_in(i-1,1,k) + arr_in(i+1,1,k) +
& arr_in(i,1,k-1) + arr_in(i,1,k+1) +
& arr_in(i,2,k) )
arr_out(i,ny,k) = rwght5*arr_in(i,ny,k) + wght*(
& arr_in(i-1,ny,k) + arr_in(i+1,ny,k) +
& arr_in(i,ny,k-1) + arr_in(i,ny,k+1) +
& arr_in(i,ny-1,k) )
enddo
enddo
!$OMP END PARALLEL DO
!
!$OMP PARALLEL DO PRIVATE(j,k)
do k=2,nz-1
do j=2,ny-1
arr_out(1,j,k) = rwght5*arr_in(1,j,k) + wght*(
& arr_in(1,j-1,k) + arr_in(1,j+1,k) +
& arr_in(1,j,k-1) + arr_in(1,j,k+1) +
& arr_in(2,j,k) )
arr_out(nx,j,k) = rwght5*arr_in(nx,j,k) + wght*(
& arr_in(nx,j-1,k) + arr_in(nx,j+1,k) +
& arr_in(nx,j,k-1) + arr_in(nx,j,k+1) +
& arr_in(nx-1,j,k) )
enddo
enddo
!$OMP END PARALLEL DO
!
! - Now 12 edges:
!$OMP PARALLEL DO PRIVATE(i)
do i=2,nx-1
arr_out(i,1,1) = rwght4*arr_in(i,1,1) + wght*(
& arr_in(i-1,1,1) + arr_in(i+1,1,1) +
& arr_in(i,1,2) + arr_in(i,2,1) )
arr_out(i,ny,1) = rwght4*arr_in(nx,ny,1) + wght*(
& arr_in(i-1,ny,1) + arr_in(i+1,ny,1) +
& arr_in(i,ny-1,1) + arr_in(i,ny,2) )
arr_out(i,1,nz) = rwght4*arr_in(i,1,nz) + wght*(
& arr_in(i-1,1,nz) + arr_in(i+1,1,nz) +
& arr_in(i,2,nz) + arr_in(i,1,nz-1) )
arr_out(i,ny,nz) = rwght4*arr_in(i,ny,nz) + wght*(
& arr_in(i-1,ny,nz) + arr_in(i+1,ny,nz) +
& arr_in(i,ny-1,nz) + arr_in(i,ny,nz-1) )
enddo
!$OMP END PARALLEL DO
!
!$OMP PARALLEL DO PRIVATE(j)
do j=2,ny-1
arr_out(1,j,1) = rwght4*arr_in(1,j,1) + wght*(
& arr_in(1,j-1,1) + arr_in(1,j+1,1) +
& arr_in(1,j,2) + arr_in(2,j,1) )
arr_out(nx,j,1) = rwght4*arr_in(nx,j,1) + wght*(
& arr_in(nx,j-1,1) + arr_in(nx,j+1,1) +
& arr_in(nx-1,j,1) + arr_in(nx,j,2) )
arr_out(1,j,nz) = rwght4*arr_in(1,j,nz) + wght*(
& arr_in(1,j-1,nz) + arr_in(1,j+1,nz) +
& arr_in(2,j,nz) + arr_in(1,j,nz-1) )
arr_out(nx,j,nz) = rwght4*arr_in(nx,j,nz) + wght*(
& arr_in(nx,j-1,nz) + arr_in(nx,j+1,nz) +
& arr_in(nx-1,j,nz) + arr_in(nx,j,nz-1) )
enddo
!$OMP END PARALLEL DO
!
!$OMP PARALLEL DO PRIVATE(k)
do k=2,nz-1
arr_out(1,1,k) = rwght4*arr_in(1,1,k) + wght*(
& arr_in(1,1,k-1) + arr_in(1,1,k+1) +
& arr_in(1,2,k) + arr_in(2,1,k) )
arr_out(nx,1,k) = rwght4*arr_in(nx,1,k) + wght*(
& arr_in(nx,1,k-1) + arr_in(nx,1,k+1) +
& arr_in(nx-1,1,k) + arr_in(nx,2,k) )
arr_out(1,ny,k) = rwght4*arr_in(1,ny,k) + wght*(
& arr_in(1,ny,k-1) + arr_in(1,ny,k+1) +
& arr_in(2,ny,k) + arr_in(1,ny-1,k) )
arr_out(nx,ny,k) = rwght4*arr_in(nx,ny,k) + wght*(
& arr_in(nx,ny,k-1) + arr_in(nx,ny,k+1) +
& arr_in(nx-1,ny,k) + arr_in(nx,ny-1,k) )
enddo
!$OMP END PARALLEL DO
!
! Now 8 corner points:
arr_out(1,1,1) = rwght3*arr_in(1,1,1) + wght*(
& arr_in(2,1,1)+arr_in(1,2,1)+arr_in(1,1,2))
arr_out(nx,1,1) = rwght3*arr_in(nx,1,1) + wght*(
& arr_in(nx-1,1,1)+arr_in(nx,2,1)+arr_in(nx,1,2))
arr_out(1,ny,1) = rwght3*arr_in(1,ny,1) + wght*(
& arr_in(2,ny,1)+arr_in(1,ny-1,1)+arr_in(1,ny,2))
arr_out(1,1,nz) = rwght3*arr_in(1,1,nz) + wght*(
& arr_in(2,1,nz)+arr_in(1,2,nz)+arr_in(1,1,nz-1))
arr_out(nx,ny,1) = rwght3*arr_in(nx,ny,1) + wght*(
& arr_in(nx-1,ny,1)+arr_in(nx,ny-1,1)+arr_in(nx,ny,2))
arr_out(1,ny,nz) = rwght3*arr_in(1,ny,nz) + wght*(
& arr_in(2,ny,nz)+arr_in(1,ny-1,nz)+arr_in(1,ny,nz-1))
arr_out(nx,1,nz) = rwght3*arr_in(nx,1,nz) + wght*(
& arr_in(nx-1,1,nz)+arr_in(nx,2,nz)+arr_in(nx,1,nz-1))
arr_out(nx,ny,nz)=rwght3*arr_in(nx,ny,nz) + wght*(
& arr_in(nx-1,ny,nz)+arr_in(nx,ny-1,nz)+arr_in(nx,ny,nz-1))
!
!
!
!--- Need to update arr_in for next iteration:
! Main body of data:
!$OMP PARALLEL DO PRIVATE(i,j,k)
do k=2,nz-1
do j=2,ny-1
do i=2,nx-1
arr_in(i,j,k) = rwght6*arr_out(i,j,k) + wght*(
& arr_out(i-1,j,k) + arr_out(i+1,j,k) +
& arr_out(i,j-1,k) + arr_out(i,j+1,k) +
& arr_out(i,j,k-1) + arr_out(i,j,k+1) )
enddo
enddo
enddo
!$OMP END PARALLEL DO
!
! Next the 6 faces
!$OMP PARALLEL DO PRIVATE(i,j)
do j=2,ny-1
do i=2,nx-1
arr_in(i,j,1) = rwght5*arr_out(i,j,1) + wght*(
& arr_out(i-1,j,1) + arr_out(i+1,j,1) +
& arr_out(i,j-1,1) + arr_out(i,j+1,1) +
& arr_out(i,j,2) )
arr_in(i,j,nz) = rwght5*arr_out(i,j,nz) + wght*(
& arr_out(i-1,j,nz) + arr_out(i+1,j,nz) +
& arr_out(i,j-1,nz) + arr_out(i,j+1,nz) +
& arr_out(i,j,nz-1) )
enddo
enddo
!$OMP END PARALLEL DO
!
!$OMP PARALLEL DO PRIVATE(i,k)
do k=2,nz-1
do i=2,nx-1
arr_in(i,1,k) = rwght5*arr_out(i,1,k) + wght*(
& arr_out(i-1,1,k) + arr_out(i+1,1,k) +
& arr_out(i,1,k-1) + arr_in(i,1,k+1) +
& arr_out(i,2,k) )
arr_in(i,ny,k) = rwght5*arr_out(i,ny,k) + wght*(
& arr_out(i-1,ny,k) + arr_out(i+1,ny,k) +
& arr_out(i,ny,k-1) + arr_in(i,ny,k+1) +
& arr_out(i,ny-1,k) )
enddo
enddo
!$OMP END PARALLEL DO
!
!$OMP PARALLEL DO PRIVATE(k,j)
do k=2,nz-1
do j=2,ny-1
arr_in(1,j,k) = rwght5*arr_out(1,j,k) + wght*(
& arr_out(1,j-1,k) + arr_out(1,j+1,k) +
& arr_out(1,j,k-1) + arr_in(1,j,k+1) +
& arr_out(2,j,k) )
arr_in(nx,j,k) = rwght5*arr_out(nx,j,k) + wght*(
& arr_out(nx,j-1,k) + arr_out(nx,j+1,k) +
& arr_out(nx,j,k-1) + arr_in(nx,j,k+1) +
& arr_out(nx-1,j,k) )
enddo
enddo
!$OMP END PARALLEL DO
!
! - Now 12 edges:
!$OMP PARALLEL DO PRIVATE(i)
do i=2,nx-1
arr_in(i,1,1) = rwght4*arr_out(i,1,1) + wght*(
& arr_out(i-1,1,1) + arr_out(i+1,1,1) +
& arr_out(i,1,2) + arr_out(i,2,1) )
arr_in(i,ny,1) = rwght4*arr_out(nx,ny,1) + wght*(
& arr_out(i-1,ny,1) + arr_out(i+1,ny,1) +
& arr_out(i,ny-1,1) + arr_out(i,ny,2) )
arr_in(i,1,nz) = rwght4*arr_out(i,1,nz) + wght*(
& arr_out(i-1,1,nz) + arr_out(i+1,1,nz) +
& arr_out(i,2,nz) + arr_out(i,1,nz-1) )
arr_in(i,ny,nz) = rwght4*arr_out(i,ny,nz) + wght*(
& arr_out(i-1,ny,nz) + arr_out(i+1,ny,nz) +
& arr_out(i,ny-1,nz) + arr_out(i,ny,nz-1) )
enddo
!$OMP END PARALLEL DO
!
!$OMP PARALLEL DO PRIVATE(j)
do j=2,ny-1
arr_in(1,j,1) = rwght4*arr_out(1,j,1) + wght*(
& arr_out(1,j-1,1) + arr_out(1,j+1,1) +
& arr_out(1,j,2) + arr_out(2,j,1) )
arr_in(nx,j,1) = rwght4*arr_out(nx,j,1) + wght*(
& arr_out(nx,j-1,1) + arr_out(nx,j+1,1) +
& arr_out(nx-1,j,1) + arr_out(nx,j,2) )
arr_in(1,j,nz) = rwght4*arr_out(1,j,nz) + wght*(
& arr_out(1,j-1,nz) + arr_out(1,j+1,nz) +
& arr_out(2,j,nz) + arr_out(1,j,nz-1) )
arr_in(nx,j,nz) = rwght4*arr_out(nx,j,nz) + wght*(
& arr_out(nx,j-1,nz) + arr_out(nx,j+1,nz) +
& arr_out(nx-1,j,nz) + arr_out(nx,j,nz-1) )
enddo
!$OMP END PARALLEL DO
!
!$OMP PARALLEL DO PRIVATE(k)
do k=2,nz-1
arr_in(1,1,k) = rwght4*arr_out(1,1,k) + wght*(
& arr_out(1,1,k-1) + arr_out(1,1,k+1) +
& arr_out(1,2,k) + arr_out(2,1,k) )
arr_in(nx,1,k) = rwght4*arr_out(nx,1,k) + wght*(
& arr_out(nx,1,k-1) + arr_out(nx,1,k+1) +
& arr_out(nx-1,1,k) + arr_out(nx,2,k) )
arr_in(1,ny,k) = rwght4*arr_out(1,ny,k) + wght*(
& arr_out(1,ny,k-1) + arr_out(1,ny,k+1) +
& arr_out(2,ny,k) + arr_out(1,ny-1,k) )
arr_in(nx,ny,k) = rwght4*arr_out(nx,ny,k) + wght*(
& arr_out(nx,ny,k-1) + arr_out(nx,ny,k+1) +
& arr_out(nx-1,ny,k) + arr_out(nx,ny-1,k) )
enddo
!$OMP END PARALLEL DO
!
! Now 8 corner points:
arr_in(1,1,1) = rwght3*arr_out(1,1,1) + wght*(
& arr_out(2,1,1)+arr_out(1,2,1)+arr_out(1,1,2))
arr_in(nx,1,1) = rwght3*arr_out(nx,1,1) + wght*(
& arr_out(nx-1,1,1)+arr_out(nx,2,1)+arr_out(nx,1,2))
arr_in(1,ny,1) = rwght3*arr_out(1,ny,1) + wght*(
& arr_out(2,ny,1)+arr_out(1,ny-1,1)+arr_out(1,ny,2))
arr_in(1,1,nz) = rwght3*arr_out(1,1,nz) + wght*(
& arr_out(2,1,nz)+arr_out(1,2,nz)+arr_out(1,1,nz-1))
arr_in(nx,ny,1) = rwght3*arr_out(nx,ny,1) + wght*(
& arr_out(nx-1,ny,1)+arr_out(nx,ny-1,1)+arr_out(nx,ny,2))
arr_in(1,ny,nz) = rwght3*arr_out(1,ny,nz) + wght*(
& arr_out(2,ny,nz)+arr_out(1,ny-1,nz)+arr_out(1,ny,nz-1))
arr_in(nx,1,nz) = rwght3*arr_out(nx,1,nz) + wght*(
& arr_out(nx-1,1,nz)+arr_out(nx,2,nz)+arr_out(nx,1,nz-1))
arr_in(nx,ny,nz)=rwght3*arr_out(nx,ny,nz) + wght*(
& arr_out(nx-1,ny,nz)+arr_out(nx,ny-1,nz)+arr_out(nx,ny,nz-1))
!
!
!
!
if(icount.eq.0) write(*,*) 'Successively smoothed values are:'
if(icount.lt.10 .or. mod(icount,iofreq).eq.0)
& write(*,100) (arr_out(2,2,k),k=2,nz-1,nz/7)
!
enddo ! (End of main smoothing loop over icount)
!
999 write(12,*) 'Final (smoothed?) values are (i,j,k=1,10):'
! write (12,100) (((arr_out(i,j,k),i=1,10),j=1,10),k=1,10)
write (12,100) (arr_out(10,10,k),k=1,10)
!
stop 'Normal end, max smoothing iterations completed.'
100 format(6e12.4)
end
function xran1(idum)
c--------------------------------------------------------------------
c Routine from Numerical Recipes to return a uniform random
c deviate between 0.0 and 1.0. Set idum to any negative number
c to initialize or reinitialize the sequence.
c--------------------------------------------------------------------
parameter (nn=97)
parameter (m1=259200, ia1=7141, ic1=54773, rm1=1./m1)
parameter (m2=134456, ia2=8121, ic2=28411, rm2=1./m2)
parameter (m3=243000, ia3=4561, ic3=51349)
real xran1, r(nn)
save r, ix1,ix2,ix3
c
data iff /0/
if (idum.lt.0 .or. iff.eq.0) then
iff = 1
ix1 = mod(ic1-idum,m1) ! seed the first routine
ix1 = mod(ia1*ix1+ic1,m1)
ix2 = mod(ix1,m2) ! and use it to seed the second
ix1 = mod(ia1*ix1+ic1,m1)
ix3 = mod(ix1,m3) ! and to seed the third
c
do 11,j=1,nn ! fill the table with sequential
ix1 = mod(ia1*ix1+ic1,m1) ! random deviates generated by the
ix2 = mod(ia2*ix2+ic2,m2) ! first two routines
r(j) = (float(ix1)+float(ix2)*rm2)*rm1
11 continue
c
idum = 1
endif
c
ix1 = mod(ia1*ix1+ic1,m1)
ix2 = mod(ia2*ix2+ic2,m2)
ix3 = mod(ia3*ix3+ic3,m3)
c
j = 1+(nn*ix3)/m3 ! use 3rd sequence for no. between 1 and 97
c
if(j.gt.nn.or.j.lt.1) pause
xran1 = r(j) ! return that table entry
r(j) = (float(ix1)+float(ix2)*rm2)*rm1 ! and refill it
return
end
function xran2(idum)
integer idum,ia,im,iq,ir,ntab,ndiv
real xran2, am, eps, rnmx
parameter (ia=16807,im=2147483647,am=1./im,iq=127773,ir=2836,
& ntab=32,ndiv=1+(im-1)/ntab,eps=1.1e-9,rnmx=1.-eps)
integer j,k,iv(ntab),iy
save iv,iy
!
data iv/ntab*0/, iy/0/
!
if (idum.le.0.or.iy.eq.0) then
idum =max(-idum,1)
do j=ntab+8,1,-1
k=idum/iq
idum=ia*(idum-k+iq)-ir*k
if(idum.lt.0) idum=idum+im
if(j.le.ntab) iv(j)=idum
enddo
iy=iv(1)
endif
!
k=idum/iq
idum=ia*(idum-k*iq)-ir*k
if (idum.lt.0) idum = idum + im
j=1+iy/ndiv
iy=iv(j)
iv(j)=idum
xran2=min(am*iy,rnmx)
return
end