numactl --interleave=all ./testing_cgeqrf -N 100 -N 1000 --range 10:90:10 --range 100:900:100 --range 1000:9000:1000 --range 10000:20000:2000
MAGMA 1.6.1  compiled for CUDA capability >= 3.5
CUDA runtime 7000, driver 7000. OpenMP threads 16. MKL 11.2.3, MKL threads 16. 
ndevices 3
device 0: Tesla K40c, 745.0 MHz clock, 11519.6 MB memory, capability 3.5
device 1: Tesla K40c, 745.0 MHz clock, 11519.6 MB memory, capability 3.5
device 2: Tesla K40c, 745.0 MHz clock, 11519.6 MB memory, capability 3.5
Usage: ./testing_cgeqrf [options] [-h|--help]

ngpu 1
    M     N   CPU GFlop/s (sec)   GPU GFlop/s (sec)   |R - Q^H*A|   |I - Q^H*Q|
===============================================================================
  100   100     ---   (  ---  )      2.67 (   0.00)       ---
 1000  1000     ---   (  ---  )    240.12 (   0.02)       ---
   10    10     ---   (  ---  )      0.13 (   0.00)       ---
   20    20     ---   (  ---  )      0.80 (   0.00)       ---
   30    30     ---   (  ---  )      2.05 (   0.00)       ---
   40    40     ---   (  ---  )      3.58 (   0.00)       ---
   50    50     ---   (  ---  )      5.09 (   0.00)       ---
   60    60     ---   (  ---  )      6.83 (   0.00)       ---
   70    70     ---   (  ---  )      2.17 (   0.00)       ---
   80    80     ---   (  ---  )      3.26 (   0.00)       ---
   90    90     ---   (  ---  )      3.77 (   0.00)       ---
  100   100     ---   (  ---  )      5.51 (   0.00)       ---
  200   200     ---   (  ---  )     17.49 (   0.00)       ---
  300   300     ---   (  ---  )     40.92 (   0.00)       ---
  400   400     ---   (  ---  )     63.73 (   0.01)       ---
  500   500     ---   (  ---  )     93.16 (   0.01)       ---
  600   600     ---   (  ---  )    120.87 (   0.01)       ---
  700   700     ---   (  ---  )    153.14 (   0.01)       ---
  800   800     ---   (  ---  )    183.92 (   0.01)       ---
  900   900     ---   (  ---  )    214.53 (   0.02)       ---
 1000  1000     ---   (  ---  )    250.42 (   0.02)       ---
 2000  2000     ---   (  ---  )    626.11 (   0.07)       ---
 3000  3000     ---   (  ---  )   1017.00 (   0.14)       ---
 4000  4000     ---   (  ---  )   1393.34 (   0.25)       ---
 5000  5000     ---   (  ---  )   1484.88 (   0.45)       ---
 6000  6000     ---   (  ---  )   1779.15 (   0.65)       ---
 7000  7000     ---   (  ---  )   1943.20 (   0.94)       ---
 8000  8000     ---   (  ---  )   2076.82 (   1.32)       ---
 9000  9000     ---   (  ---  )   2162.67 (   1.80)       ---
10000 10000     ---   (  ---  )   2229.67 (   2.39)       ---
12000 12000     ---   (  ---  )   2325.79 (   3.96)       ---
14000 14000     ---   (  ---  )   2379.66 (   6.15)       ---
16000 16000     ---   (  ---  )   2403.02 (   9.09)       ---
18000 18000     ---   (  ---  )   2416.62 (  12.87)       ---
20000 20000     ---   (  ---  )   2456.26 (  17.37)       ---

numactl --interleave=all ./testing_cgeqrf_gpu -N 100 -N 1000 --range 10:90:10 --range 100:900:100 --range 1000:9000:1000 --range 10000:20000:2000
MAGMA 1.6.1  compiled for CUDA capability >= 3.5
CUDA runtime 7000, driver 7000. OpenMP threads 16. MKL 11.2.3, MKL threads 16. 
ndevices 3
device 0: Tesla K40c, 745.0 MHz clock, 11519.6 MB memory, capability 3.5
device 1: Tesla K40c, 745.0 MHz clock, 11519.6 MB memory, capability 3.5
device 2: Tesla K40c, 745.0 MHz clock, 11519.6 MB memory, capability 3.5
Usage: ./testing_cgeqrf_gpu [options] [-h|--help]

version 1
    M     N   CPU GFlop/s (sec)   GPU GFlop/s (sec)    |b - A*x|
================================================================
  100   100     ---   (  ---  )      3.22 (   0.00)       ---
 1000  1000     ---   (  ---  )    225.51 (   0.02)       ---
   10    10     ---   (  ---  )      0.01 (   0.00)       ---
   20    20     ---   (  ---  )      0.05 (   0.00)       ---
   30    30     ---   (  ---  )      0.17 (   0.00)       ---
   40    40     ---   (  ---  )      0.34 (   0.00)       ---
   50    50     ---   (  ---  )      0.63 (   0.00)       ---
   60    60     ---   (  ---  )      1.05 (   0.00)       ---
   70    70     ---   (  ---  )      1.20 (   0.00)       ---
   80    80     ---   (  ---  )      1.74 (   0.00)       ---
   90    90     ---   (  ---  )      2.59 (   0.00)       ---
  100   100     ---   (  ---  )      7.08 (   0.00)       ---
  200   200     ---   (  ---  )     14.62 (   0.00)       ---
  300   300     ---   (  ---  )     32.56 (   0.00)       ---
  400   400     ---   (  ---  )     50.97 (   0.01)       ---
  500   500     ---   (  ---  )     81.01 (   0.01)       ---
  600   600     ---   (  ---  )    109.60 (   0.01)       ---
  700   700     ---   (  ---  )    137.69 (   0.01)       ---
  800   800     ---   (  ---  )    169.58 (   0.02)       ---
  900   900     ---   (  ---  )    200.46 (   0.02)       ---
 1000  1000     ---   (  ---  )    231.75 (   0.02)       ---
 2000  2000     ---   (  ---  )    607.28 (   0.07)       ---
 3000  3000     ---   (  ---  )   1004.62 (   0.14)       ---
 4000  4000     ---   (  ---  )   1374.30 (   0.25)       ---
 5000  5000     ---   (  ---  )   1466.80 (   0.45)       ---
 6000  6000     ---   (  ---  )   1722.78 (   0.67)       ---
 7000  7000     ---   (  ---  )   1939.11 (   0.94)       ---
 8000  8000     ---   (  ---  )   2077.29 (   1.31)       ---
 9000  9000     ---   (  ---  )   2153.94 (   1.81)       ---
10000 10000     ---   (  ---  )   2107.55 (   2.53)       ---
12000 12000     ---   (  ---  )   2238.10 (   4.12)       ---
14000 14000     ---   (  ---  )   2354.83 (   6.22)       ---
16000 16000     ---   (  ---  )   2369.16 (   9.22)       ---
18000 18000     ---   (  ---  )   2396.27 (  12.98)       ---
20000 20000     ---   (  ---  )   2451.14 (  17.41)       ---
