c***********************************************************************
c                  B P   A M E R I C A
c          PROPRIETARY - TO BE MAINTAINED IN CONFIDENCE 
c                        COPYRIGHTED 2008
c***********************************************************************
c
c     This MPI and OpenMP template is based on trace by trace processing.
c     It should be simple to modify for record by record processing.
c     It assumes that there is no communication needed between MPI nodes.
c
c     The standard DDS Makefile to compile this code looks like this:
c
c     Name     := mpi_template
c     F77Srcs  := mpi_template.F
c     Libs     := masterslave
c     MP       := TRUE
c     MPIX     := TRUE
c
c     include ${DDSROOT}/etc/MakeVariables
c     include ${DDSROOT}/etc/MakeRules
c
c
c     Inside bp you may checkout a copy of the template from the subversion repository
c     using the command: "devcode export mpi_template new_prog_name"
c
c     Author: Richard.Clarke@bp.com
c
      program mpi_template
      implicit none
c
c     INCLUDE THE DDS API (Application Program Interface)
c
#include <fdds.h>
c
c     INCLUDE THE HEADER FILE FOR THE MPI CONVENIENCE ROUTINES
c
#include <pfddx.h>
c
c     define the number of compulsory headers
c
#define NUMCOTAGS 1
c
c     define the number of optional headers
c
#define NUMOPTAGS 5
#define NUMTAGS (NUMCOTAGS + NUMOPTAGS)
c
c     Here are variables used by the template.
c     Often you may not need to change these.
c
      integer in_bin,out_bin,buflen,ninbuf,ntrace
      integer node,np,nproc
      integer istart,iend,ier
      integer rank,size(RANK_MAX)
      integer ss(RANK_MAX),ee(RANK_MAX),ii(RANK_MAX)
      integer numcotags,numoptags,numtags
      integer itags(NUMTAGS),otags(NUMTAGS)
      integer cumsize(RANK_MAX)
      real    origin(RANK_MAX),delta(RANK_MAX)
      character*(DEFNNAME_MAX) hw(NUMTAGS)
c
      real    buf(1),hvals(1),samples(1)
      pointer(ptr_buf,buf)
      pointer(ptr_hvals,hvals)
      pointer(ptr_samples,samples)
c
c     Here are variables specific to the algorithm
c
      real     fmax,f1,f2,f3,f4
      logical  ormsby,verbose
      external online_help
c
c     check if the user requested MPI or OpenMP, open the printfile.
c     or, just print the on-line help.
c
      ier=fddx_initmpix(nproc,np,node,'ddsfilt',online_help)
c
c     initialize the arrays of compulsory and optional headers
c
      numcotags = 1
      ier = fdds_sprintf(hw(1),'Samples\0')
c
      numoptags = 5
      ier = fdds_sprintf(hw(numcotags+1),'StaCor\0')
      ier = fdds_sprintf(hw(numcotags+2),'Horz01\0')
      ier = fdds_sprintf(hw(numcotags+3),'Horz02\0')
      ier = fdds_sprintf(hw(numcotags+4),'Horz03\0')
      ier = fdds_sprintf(hw(numcotags+5),'Horz04\0')
      numtags = numcotags + numoptags
c
c     open the input file and get tags to compulsory and optional headers
c
      in_bin = fddx_inp('in','stdin:', 'ddsfilt',node, buflen,
     $     numcotags, numoptags, hw, itags)
c
      ier = fddx_checkforerrors(np,node)
c
c     find the dimensions of the input volume
c
      ier = fddx_getdim(in_bin,rank,origin,delta,size,cumsize)
c
c     if we have a regular file, check the number of traces
c     we will need to pay attention to piped input later.
c
      if ( cumsize(1) > 0 ) then
         ntrace = cumsize(rank)
      else
         ntrace = -1 * cumsize(1)
      endif
      ier=fdds_prtmsg('The input contains %d traces.\n\0',ntrace)
c
c     check if the user asked for specific trace,record ranges
c
      ier = fddx_userrange( node, rank, size, ss, ee, ii )
c
c     allocate memory
c
      ninbuf = 10
      ier = fdds_scanf('ninbuf', '%d\0', ninbuf)
      ptr_buf     = fdds_malloc8( dble(buflen) *ninbuf*sizeof_real)
      ptr_hvals   = fdds_malloc8( dble(numtags)*ninbuf*sizeof_real)
      ptr_samples = fdds_malloc8( dble(size(1))*ninbuf*sizeof_real)
c
      fmax = 0.0
      call ReadParams(node,ormsby,fmax,f1,f2,f3,f4,verbose)
c
      ier = fddx_checkforerrors(np,node)
c
c     Copy the input dictionary and make any changes
c     This is not really necessary in this example, but serves as a reference. 
c
      ier = fddx_dict( in_bin, 'scan' )
      ier = fdds_dict( 'tmp_mpi:', 'print' )
      ier = fdds_history()
c
c     If you change the axis labels, then you need to rescan the dictionary.
c
      ier = fdds_printf('axis','t px py nbc\n\0')
      ier = fdds_dict( 'tmp_mpi:', 'scan' )
      ier = fdds_dict( 'tmp_mpi:', 'print' )
c
      ier = fdds_printf('size.axis(1)','%d\n\0',size(1))
c
c     Open the output file for writing in parallel.
c     All of the MPI nodes will write to the same file.
c     We allow for a different set of headers than were used
c     on the input, but typically you can use the same.
c
      out_bin=fddx_outp('out','stdout:','ddsfilt',in_bin,node,np,
     $     'tmp_mpi:',numcotags,numoptags,hw,otags)
c
      ier = fddx_checkforerrors(np,node)
c
c     decide which traces are to be processed by which MPI nodes
c
      ier = fddx_jobrange(ntrace,np,node,istart,iend)
c
c     Here is the routine that processes a chunk of data
c
      call ProcessChunk(
     $     in_bin,out_bin,istart,iend,
     $     buflen,ninbuf,size(1),buf,
     $     rank,cumsize,ss,ee,ii,
     $     itags(1),otags(1),samples,
     $     numtags-1,itags(2),otags(2),hvals)
c
      ier = fddx_stop(np,node)
      end
c
c***********************************************************************
c
c     Routine  to process a chunk of data
c
c***********************************************************************
c
      subroutine ProcessChunk(
     $     in_bin,out_bin,istart,iend,
     $     buflen,ninbuf,nsamp,buf,
     $     rank,cumsize,ss,ee,ii,
     $     it_tag,ot_tag,samples,
     $     numtags,itags,otags,hvals)
c
      implicit none
c
#include 
#include 
c
      integer in_bin,out_bin,ier
      integer istart,iend
      integer buflen,ninbuf,nsamp,ntrace,nread
      integer rank,cumsize(RANK_MAX)
      integer ss(RANK_MAX),ee(RANK_MAX),ii(RANK_MAX)
      integer it_tag,ot_tag
      integer numtags,itags(numtags),otags(numtags)
      integer jj,kk,ll,itrace
      real    buf(buflen,ninbuf)
      real    samples(nsamp,ninbuf)
      real    hvals(numtags,ninbuf)
c
c     seek to the start of the input and output files
c     for the traces to be processed by this chunk.
c
      if ( fdds_isreg(in_bin) .gt. 0 ) then
         ier = fdds_lseek(in_bin, 0,istart,SEEK_SET)
      endif
      if ( fdds_isreg(out_bin) .gt. 0 ) then
         ier = fdds_lseek(out_bin,0,istart,SEEK_SET)
      endif
      
      itrace = istart
      ntrace = ninbuf
      do while ( itrace .le. iend )

c     watch out for the end of the chunk

         if( 1 + iend - itrace .lt. ninbuf ) then
            ntrace = 1 + iend - itrace
         endif
      
c     read traces into the input buffer.
c     we need to handle things gracefully if we run out of traces,
c     so we check the number traces actually read and only
c     process that number.

         nread=fddx_readtrace(in_bin,buflen,nsamp,ntrace,buf,
     $        it_tag,samples,numtags,itags,hvals)



c     =============================================         
c     do some stuff !
c
c     This is the part of the code that is usually modified,
c     as well as adding additional parameters to the subroutine.
c
c     OpenMP:
c     Note that almost all dds functions are NOT thread safe.
c     This means that you cannot use them inside an OpenMP loop.
c     Even simple functions such as prtmsg can lead to errors.
c     It is usually best to not have any dds calls inside the
c     openMP loops, but for debugging purposes, for example,
c     you can "protect"  code using the CRITICAL directive. 
c     This prevents different threads from executing the protected
c     code at the same time.     



c     Here is an example of a loop parallelized with openMP.


C$OMP PARALLEL DO DEFAULT(SHARED), PRIVATE(ll,kk)

         do ll = 1,nread

c     Here is an example of the OpenMP CRITICAL command.


C$OMP CRITICAL
c            ier=fdds_prtmsg('ll=%d nread=%d\n\0',ll,nread)
C$OMP END CRITICAL



c     fddx_trace_in_range is thread safe and so we CAN use it.
c     It checks if the current trace is within the user specified
c     range of traces to process.

            if(1.eq.fddx_trace_in_range
     $           (itrace+ll-1,rank,cumsize,ss,ee,ii))then
               do kk = 1,nsamp
                  samples(kk,ll) = itrace
               end do
            endif
         end do

c     =============================================

c     update samples and headers. and write the traces to file.

         ier=fddx_writetrace(out_bin,buflen,nsamp,nread,buf,
     $        ot_tag,samples,numtags,otags,hvals)

         itrace = itrace + ninbuf
         
      end do
      

      return
      end

     

c-----------------------------------------------------------------------
c     read the user parameters
c-----------------------------------------------------------------------

      subroutine ReadParams(node,ormsby,fmax,f1,f2,f3,f4,verbose)
      implicit none
#include 

      integer node,ier
      real    fmax,f1,f2,f3,f4
      logical ormsby,verbose

      ier=fdds_dict('par:','scan')


      ormsby = (fdds_switch('ormsby',1).eq.1)      

      f1 = 0.0
      f2 = 0.0
      f3 = fmax
      f4 = fmax
      ier = fdds_scanf('f1', '%f\0', f1)
      ier = fdds_scanf('f2', '%f\0', f2)
      ier = fdds_scanf('f3', '%f\0', f3)
      ier = fdds_scanf('f4', '%f\0', f4)
      
      if ( ormsby .eq. .true. ) then
         if ( f1 .gt. f2 .or. f2.gt.f3 .or. f3.gt.f4 ) then
            ier=fdds_prterr('Problem with f1=%g f2=%g f3=%g f4=%g\n\0',
     $           f1,f2,f3,f4)
         endif
      endif


      verbose = (fdds_switch('verbose',0).eq.1)




c-----------------------------------------------------------------------
c     print parameters
c-----------------------------------------------------------------------

      if ( node .eq. 0 ) then

         ier=fdds_prtmsg('\n*** PARAMETERS ***\n\n\0')
         if (ormsby) then
            ier=fdds_prtmsg('Using ormsby filter.\n\0')
            ier=fdds_prtmsg('f1=%g f2=%g f3=%g f4=%g\n\0',f1,f2,f3,f4)
         else
            ier=fdds_prtmsg('Not using ormsby filter.\n\0')
         endif

         ier=fdds_prtmsg('\n\0')
         if (verbose) then
            ier=fdds_prtmsg('Writing more verbose printout.\n\0')
         else
            ier=fdds_prtmsg('Writing less verbose printout.\n\0')
         endif

      endif

      return
      end



c     Print out the on-line help and stop.

      subroutine online_help()
      implicit none

      write(0,*) 'Read the man page!'
      stop 0


      return
      end