#!/bin/sh
# autopkgtest check for viennacl
# (C) 2014 Anton Gladky

set -e

WORKDIR=$(mktemp -d)
trap "rm -rf $WORKDIR" 0 INT QUIT ABRT PIPE TERM
cd $WORKDIR
cat <<EOF > benchmark-utils.hpp
#ifndef _BENCHMARK_UTILS_HPP_
#define _BENCHMARK_UTILS_HPP_

/* =========================================================================
   Copyright (c) 2010-2014, Institute for Microelectronics,
                            Institute for Analysis and Scientific Computing,
                            TU Wien.
   Portions of this software are copyright by UChicago Argonne, LLC.

                            -----------------
                  ViennaCL - The Vienna Computing Library
                            -----------------

   Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at

   (A list of authors and contributors can be found in the PDF manual)

   License:         MIT (X11), see file LICENSE in the base directory
============================================================================= */

#include <iostream>

void printOps(double num_ops, double exec_time)
{
  std::cout << "GFLOPs: " << num_ops / (1000000 * exec_time * 1000) << std::endl;
}




#ifdef _WIN32

#define WINDOWS_LEAN_AND_MEAN
#include <windows.h>
#undef min
#undef max

class Timer
{
public:

  Timer()
  {
    QueryPerformanceFrequency(&freq);
  }

  void start()
  {
    QueryPerformanceCounter((LARGE_INTEGER*) &start_time);
  }

  double get() const
  {
    LARGE_INTEGER  end_time;
    QueryPerformanceCounter((LARGE_INTEGER*) &end_time);
    return (static_cast<double>(end_time.QuadPart) - static_cast<double>(start_time.QuadPart)) / static_cast<double>(freq.QuadPart);
  }


private:
  LARGE_INTEGER freq;
    LARGE_INTEGER start_time;
};

#else

#include <sys/time.h>

class Timer
{
public:

  Timer() : ts(0)
  {}

  void start()
  {
    struct timeval tval;
    gettimeofday(&tval, NULL);
    ts = static_cast<double>(tval.tv_sec * 1000000 + tval.tv_usec);
  }

  double get() const
  {
    struct timeval tval;
    gettimeofday(&tval, NULL);
    double end_time = static_cast<double>(tval.tv_sec * 1000000 + tval.tv_usec);

    return static_cast<double>(end_time-ts) / 1000000.0;
  }

private:
  double ts;
};


#endif

#endif

EOF

cat <<EOF > demo.cpp
/* =========================================================================
   Copyright (c) 2010-2014, Institute for Microelectronics,
                            Institute for Analysis and Scientific Computing,
                            TU Wien.
   Portions of this software are copyright by UChicago Argonne, LLC.

                            -----------------
                  ViennaCL - The Vienna Computing Library
                            -----------------

   Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at

   (A list of authors and contributors can be found in the PDF manual)

   License:         MIT (X11), see file LICENSE in the base directory
============================================================================= */

/*
*
*   Benchmark:   Performance of viennacl::copy(), viennacl::fast_copy(), and viennacl::async_copy()
*
*/


#include "viennacl/scalar.hpp"
#include "viennacl/vector.hpp"
#include "viennacl/linalg/inner_prod.hpp"
#include "viennacl/linalg/norm_2.hpp"

#include <cstdlib>
#include <iostream>
#include <vector>
#include "benchmark-utils.hpp"

using std::cout;
using std::cin;
using std::endl;


#define BENCHMARK_VECTOR_SIZE   10000000
#define BENCHMARK_RUNS          10


template<typename ScalarType>
void run_benchmark()
{

  Timer timer;
  double exec_time_return = 0;
  double exec_time_complete = 0;

  std::vector<ScalarType> std_vec1(BENCHMARK_VECTOR_SIZE);
  std::vector<ScalarType> std_vec2(BENCHMARK_VECTOR_SIZE);
  viennacl::vector<ScalarType> vcl_vec1(BENCHMARK_VECTOR_SIZE);
  viennacl::vector<ScalarType> vcl_vec2(BENCHMARK_VECTOR_SIZE);


  ///////////// Vector operations /////////////////

  std_vec1[0] = 1.0;
  std_vec2[0] = 1.0;
  for (int i=1; i<BENCHMARK_VECTOR_SIZE; ++i)
  {
    std_vec1[i] = std_vec1[i-1] * ScalarType(1.000001);
    std_vec2[i] = std_vec1[i-1] * ScalarType(0.999999);
  }

  // warmup:
  viennacl::copy(std_vec1, vcl_vec1);
  viennacl::fast_copy(std_vec2, vcl_vec2);
  viennacl::async_copy(std_vec2, vcl_vec1);
  viennacl::backend::finish();

  //
  // Benchmark copy operation:
  //
  timer.start();
  viennacl::copy(std_vec1, vcl_vec1);
  exec_time_return = timer.get();
  viennacl::backend::finish();
  exec_time_complete = timer.get();
  std::cout << " *** viennacl::copy(), host to device ***" << std::endl;
  std::cout << "  - Time to function return: " << exec_time_return << std::endl;
  std::cout << "  - Time to completion: " << exec_time_complete << std::endl;
  std::cout << "  - Estimated effective bandwidth: " << BENCHMARK_VECTOR_SIZE * sizeof(ScalarType) / exec_time_complete / 1e9 << " GB/sec" << std::endl;

  timer.start();
  viennacl::copy(vcl_vec1, std_vec1);
  exec_time_return = timer.get();
  viennacl::backend::finish();
  exec_time_complete = timer.get();
  std::cout << " *** viennacl::copy(), device to host ***" << std::endl;
  std::cout << "  - Time to function return: " << exec_time_return << std::endl;
  std::cout << "  - Time to completion: " << exec_time_complete << std::endl;
  std::cout << "  - Estimated effective bandwidth: " << BENCHMARK_VECTOR_SIZE * sizeof(ScalarType) / exec_time_complete / 1e9 << " GB/sec" << std::endl;


  //
  // Benchmark fast_copy operation:
  //
  timer.start();
  viennacl::fast_copy(std_vec1, vcl_vec1);
  exec_time_return = timer.get();
  viennacl::backend::finish();
  exec_time_complete = timer.get();
  std::cout << " *** viennacl::fast_copy(), host to device ***" << std::endl;
  std::cout << "  - Time to function return: " << exec_time_return << std::endl;
  std::cout << "  - Time to completion: " << exec_time_complete << std::endl;
  std::cout << "  - Estimated effective bandwidth: " << BENCHMARK_VECTOR_SIZE * sizeof(ScalarType) / exec_time_complete / 1e9 << " GB/sec" << std::endl;

  timer.start();
  viennacl::fast_copy(vcl_vec1, std_vec1);
  exec_time_return = timer.get();
  viennacl::backend::finish();
  exec_time_complete = timer.get();
  std::cout << " *** viennacl::fast_copy(), device to host ***" << std::endl;
  std::cout << "  - Time to function return: " << exec_time_return << std::endl;
  std::cout << "  - Time to completion: " << exec_time_complete << std::endl;
  std::cout << "  - Estimated effective bandwidth: " << BENCHMARK_VECTOR_SIZE * sizeof(ScalarType) / exec_time_complete / 1e9 << " GB/sec" << std::endl;

  //
  // Benchmark async_copy operation:
  //
  timer.start();
  viennacl::async_copy(vcl_vec1, std_vec1);
  exec_time_return = timer.get();
  viennacl::backend::finish();
  exec_time_complete = timer.get();
  std::cout << " *** viennacl::async_copy(), host to device ***" << std::endl;
  std::cout << "  - Time to function return: " << exec_time_return << std::endl;
  std::cout << "  - Time to completion: " << exec_time_complete << std::endl;
  std::cout << "  - Estimated effective bandwidth: " << BENCHMARK_VECTOR_SIZE * sizeof(ScalarType) / exec_time_complete / 1e9 << " GB/sec" << std::endl;

  timer.start();
  viennacl::async_copy(vcl_vec1, std_vec1);
  exec_time_return = timer.get();
  viennacl::backend::finish();
  exec_time_complete = timer.get();
  std::cout << " *** viennacl::async_copy(), device to host ***" << std::endl;
  std::cout << "  - Time to function return: " << exec_time_return << std::endl;
  std::cout << "  - Time to completion: " << exec_time_complete << std::endl;
  std::cout << "  - Estimated effective bandwidth: " << BENCHMARK_VECTOR_SIZE * sizeof(ScalarType) / exec_time_complete / 1e9 << " GB/sec" << std::endl;

}

int main()
{
  std::cout << std::endl;
  std::cout << "----------------------------------------------" << std::endl;
  std::cout << "               Device Info" << std::endl;
  std::cout << "----------------------------------------------" << std::endl;
#ifdef VIENNACL_WITH_OPENCL
  std::cout << viennacl::ocl::current_device().info() << std::endl;
#endif


#ifndef NDEBUG
  std::cout << std::endl;
  std::cout << " ******************************************************************" << std::endl;
  std::cout << " **** WARNING: This is not a release build." << std::endl;
  std::cout << " ****          Performance numbers are therefore lower than normal. " << std::endl;
  std::cout << " ******************************************************************" << std::endl;
  std::cout << std::endl;
#endif


  std::cout << std::endl;
  std::cout << "----------------------------------------------" << std::endl;
  std::cout << "----------------------------------------------" << std::endl;
  std::cout << "## Benchmark :: Vector" << std::endl;
  std::cout << "----------------------------------------------" << std::endl;
  std::cout << std::endl;
  std::cout << "   -------------------------------" << std::endl;
  std::cout << "   # benchmarking single-precision" << std::endl;
  std::cout << "   -------------------------------" << std::endl;
  run_benchmark<float>();
#ifdef VIENNACL_WITH_OPENCL
  if( viennacl::ocl::current_device().double_support() )
#endif
  {
    std::cout << std::endl;
    std::cout << "   -------------------------------" << std::endl;
    std::cout << "   # benchmarking double-precision" << std::endl;
    std::cout << "   -------------------------------" << std::endl;
    run_benchmark<double>();
  }
  return EXIT_SUCCESS;
}

EOF

g++ -o demo demo.cpp
echo "build: OK"
[ -x demo ]
./demo
echo "run: OK"
