/*******************************************************************************
* Copyright 2014-2020 Intel Corporation.
*
* This software and the related documents are Intel copyrighted  materials,  and
* your use of  them is  governed by the  express license  under which  they were
* provided to you (License).  Unless the License provides otherwise, you may not
* use, modify, copy, publish, distribute,  disclose or transmit this software or
* the related documents without Intel's prior written permission.
*
* This software and the related documents  are provided as  is,  with no express
* or implied  warranties,  other  than those  that are  expressly stated  in the
* License.
*******************************************************************************/

//@HEADER
// ***************************************************
//
// HPCG: High Performance Conjugate Gradient Benchmark
//
// Contact:
// Michael A. Heroux ( maherou@sandia.gov)
// Jack Dongarra     (dongarra@eecs.utk.edu)
// Piotr Luszczek    (luszczek@eecs.utk.edu)
//
// ***************************************************
//@HEADER

/*!
 @file UsmUtil.hpp

 HPCG routine
 */

#ifndef USMUTIL_HPP
#define USMUTIL_HPP

#include "Vector.hpp"

//Wrappers for USM memory allocation
void * sparse_malloc_host(size_t bytes, sycl::queue & main_queue);
void * sparse_malloc_shared(size_t bytes, sycl::queue & main_queue);
void * sparse_malloc_device(size_t bytes, sycl::queue & main_queue);

//DPCPP version of copy vector
sycl::event CopyVector(const Vector & v, Vector & w, sycl::queue & main_queue,
                       const std::vector<sycl::event> & deps = {});

//DPCPP version of Zero vector
sycl::event ZeroVector(const Vector & v, sycl::queue & main_queue,
                       const std::vector<sycl::event> & deps = {});

/*
  Given the local number of rows on this processor (nrow) and the work group size,
  this sets totalSize as the next size that is evenly divisible by the work group
  size.
*/
inline void get_dims_for_kernel(local_int_t  nrow, local_int_t  localWorkGroupSize, local_int_t & totalSize) {
    local_int_t rest = nrow % localWorkGroupSize;
    totalSize = (rest > 0) ? (nrow + localWorkGroupSize - rest) : nrow;
}

inline void get_dims_for_kernel(global_int_t  nrow, local_int_t  localWorkGroupSize, global_int_t & totalSize) {
    local_int_t rest = local_int_t (nrow % localWorkGroupSize);
    totalSize = (rest > 0) ? (nrow + localWorkGroupSize - rest) : nrow;
}

#endif
