!===============================================================================
! Copyright 2021-2022 Intel Corporation.
!
! This software and the related documents are Intel copyrighted  materials,  and
! your use of  them is  governed by the  express license  under which  they were
! provided to you (License).  Unless the License provides otherwise, you may not
! use, modify, copy, publish, distribute,  disclose or transmit this software or
! the related documents without Intel's prior written permission.
!
! This software and the related documents  are provided as  is,  with no express
! or implied  warranties,  other  than those  that are  expressly stated  in the
! License.
!===============================================================================

!
!  Content:
!      Intel(R) oneAPI Math Kernel Library (oneMKL)
!      FORTRAN OpenMP offload example for DGEMM_BATCH_STRIDED
!*******************************************************************************

include "mkl_omp_offload.f90"
include "common_blas.f90"

program dgemm_batch_strided_example
#if defined(MKL_ILP64)
use onemkl_blas_omp_offload_ilp64
#else
use onemkl_blas_omp_offload_lp64
#endif
use common_blas
use, intrinsic :: ISO_C_BINDING

character*1 :: ta = 'N', tb = 'N'
double precision :: alpha = 1.0, beta = 1.0
integer :: passed
integer :: m = 3, n = 4, k = 5
integer :: stridea, strideb, stridec, batch_size = 10
double precision,allocatable :: a(:,:), b(:,:)
double precision,allocatable :: c(:,:), c_ref(:,:)

integer :: lda, cola, ldb, colb

integer :: ldc, colc

if (ta.eq.'N') then
   lda = m
  cola = k
else
   lda = k
  cola = m
end if

if (tb.eq.'N') then
    ldb = k
   colb = n
else
    ldb = n
   colb = k
end if

 ldc = m
colc = n

stridea = lda * cola
strideb = ldb * colb
stridec = ldc * colc

allocate(a(stridea,batch_size))
allocate(b(strideb,batch_size))
allocate(c(stridec,batch_size))
allocate(c_ref(stridec,batch_size))

if ((.not.allocated(a)) .or. (.not.allocated(b)) .or. (.not.allocated(c)) .or. (.not.allocated(c_ref))) then
  print *, "Cannot allocate matrices"
  goto 998
end if

call dinit_matrix('N', stridea, batch_size, stridea, a)
call dinit_matrix('N', strideb, batch_size, strideb, b)
call dinit_matrix('N', stridec, batch_size, stridec, c)
call dcopy_matrix(stridec, batch_size, stridec, c, c_ref)

call dgemm_batch_strided(ta, tb, m, n, k, alpha, a, lda, stridea, b, ldb, strideb, beta, c_ref, ldc, stridec, batch_size)

!$omp target data map(to:a,b) map(tofrom:c)
#if defined(ONEMKL_USE_OPENMP_VERSION) && (ONEMKL_USE_OPENMP_VERSION >= 202011)
!$omp dispatch
#else
!$omp target variant dispatch device(0) use_device_ptr(a,b,c)
#endif
call dgemm_batch_strided(ta, tb, m, n, k, alpha, a, lda, stridea, b, ldb, strideb, beta, c, ldc, stridec, batch_size)
#if !defined(ONEMKL_USE_OPENMP_VERSION) || (ONEMKL_USE_OPENMP_VERSION < 202011)
!$omp end target variant dispatch
#endif
!$omp end target data

passed = dcheck_matrix(stridec, batch_size, stridec, c, c_ref)

deallocate(a);
deallocate(b);
deallocate(c);
deallocate(c_ref);

if (passed.ne.0) then
  goto 999
else
  print *, "PASSED"
end if

stop
998 print *, 'Error: cannot allocate memory'
999 stop 1
end program
