macro(pythonize_bool var)
  if (${var})
    set(${var} True)
  else()
    set(${var} False)
  endif()
endmacro()

find_package(CUDAToolkit REQUIRED)
get_target_property(CUDA_INCLUDE_DIR CUDA::cudart INTERFACE_INCLUDE_DIRECTORIES)

set(LIBCUDACXX_HIGHEST_COMPUTE_ARCH 90)

foreach (COMPUTE_ARCH ${LIBCUDACXX_COMPUTE_ARCHS})
  set(_compute_message "${_compute_message} sm_${COMPUTE_ARCH}")
  set(LIBCUDACXX_COMPUTE_ARCHS_STRING "${LIBCUDACXX_COMPUTE_ARCHS_STRING} ${COMPUTE_ARCH}")
endforeach ()

message(STATUS "Enabled CUDA architectures:${_compute_message}")

option(LIBCUDACXX_TEST_WITH_NVRTC
  "Test libcu++ with runtime compilation instead of offline compilation. Only runs device side tests."
  OFF)

if (LIBCUDACXX_TEST_WITH_NVRTC)
  # TODO: Use project properties to get path to binary.
  # Should also set up dependency on the project when NVRTC is enabled
  set(LIBCUDACXX_CUDA_COMPILER "${CMAKE_BINARY_DIR}/libcudacxx/test/utils/nvidia/nvrtc/nvrtcc")
  set(LIBCUDACXX_CUDA_COMPILER_ARG1 "")
  set(LIBCUDACXX_CUDA_TEST_WITH_NVRTC "True")
  set(LIBCUDACXX_FORCE_INCLUDE "-include '${libcudacxx_SOURCE_DIR}/test/support/nvrtc_limit_macros.h'")
  set(LIBCUDACXX_TEST_COMPILER_FLAGS "${LIBCUDACXX_TEST_COMPILER_FLAGS} -I'${CUDA_INCLUDE_DIR}'")
  # Use the NVRTCC utility to run the built test outputs
  set(LIBCUDACXX_EXECUTOR "PrefixExecutor(['${LIBCUDACXX_CUDA_COMPILER}'], LocalExecutor())")
else() # NOT LIBCUDACXX_TEST_WITH_NVRTC
  set(LIBCUDACXX_FORCE_INCLUDE "-include ${libcudacxx_SOURCE_DIR}/test/libcudacxx/force_include.h")
  set(LIBCUDACXX_CUDA_COMPILER "${CMAKE_CUDA_COMPILER}")
  set(LIBCUDACXX_CUDA_TEST_WITH_NVRTC "False")
endif()

if (NOT MSVC AND NOT ${CMAKE_CUDA_COMPILER_ID} STREQUAL "Clang")
  set(LIBCUDACXX_WARNING_LEVEL
    "--compiler-options=-Wall \
    --compiler-options=-Wextra")
endif()

# sccache cannot handle the -Fd option generationg pdb files
if (MSVC)
  set(CMAKE_MSVC_DEBUG_INFORMATION_FORMAT Embedded)
endif()

# Intel OneAPI compiler has fast math enabled by default which breaks almost all floating point tests
if (${CMAKE_CXX_COMPILER_ID} STREQUAL "IntelLLVM" OR ${CMAKE_CXX_COMPILER_ID} STREQUAL "Intel")
  set(LIBCUDACXX_TEST_COMPILER_FLAGS
    "${LIBCUDACXX_TEST_COMPILER_FLAGS} \
    --compiler-options=-fno-fast-math")
endif()

if (${CMAKE_CUDA_COMPILER_ID} STREQUAL "Clang")
  string(APPEND LIBCUDACXX_TEST_COMPILER_FLAGS
     " -Xclang -fcuda-allow-variadic-functions"
     " -Xclang -Wno-unused-parameter"
     " -Wno-unknown-cuda-version")

  find_package(CUDAToolkit)

  string(APPEND LIBCUDACXX_TEST_LINKER_FLAGS
    " -L${CUDAToolkit_LIBRARY_DIR} -lcuda -lcudart")
endif()

if (${CMAKE_CUDA_COMPILER_ID} STREQUAL "NVIDIA")
  set(LIBCUDACXX_TEST_COMPILER_FLAGS
    "${LIBCUDACXX_TEST_COMPILER_FLAGS} \
    ${LIBCUDACXX_FORCE_INCLUDE} \
    ${LIBCUDACXX_WARNING_LEVEL}")
endif()

if (${CMAKE_CUDA_COMPILER_ID} STREQUAL "NVHPC")
  set(LIBCUDACXX_TEST_COMPILER_FLAGS
    "${LIBCUDACXX_TEST_COMPILER_FLAGS} \
    -stdpar")
  set(LIBCUDACXX_TEST_LINKER_FLAGS
    "${LIBCUDACXX_TEST_LINKER_FLAGS} \
    -stdpar")
endif()

if (${CMAKE_CUDA_COMPILER_ID} STREQUAL "Clang")
  set(LIBCUDACXX_TEST_COMPILER_FLAGS
    "${LIBCUDACXX_TEST_COMPILER_FLAGS} \
    ${LIBCUDACXX_FORCE_INCLUDE} \
    -I${libcudacxx_SOURCE_DIR}/include \
    ${LIBCUDACXX_WARNING_LEVEL}")
endif()

set(LIBCUDACXX_COMPUTE_ARCHS_STRING
  "${CMAKE_CUDA_ARCHITECTURES}")

include(AddLLVM)

set(LIBCUDACXX_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}")

set(LIBCUDACXX_TARGET_INFO "libcudacxx.test.target_info.LocalTI" CACHE STRING
"TargetInfo to use when setting up test environment.")
set(LIBCUDACXX_EXECUTOR "None" CACHE STRING
"Executor to use when running tests.")

set(LIBCUDACXX_TEST_TIMEOUT "200" CACHE STRING "Enable test timeouts (Default = 200, Off = 0)")

set(AUTO_GEN_COMMENT "## Autogenerated by libcudacxx configuration.\n# Do not edit!")

set(lit_site_cfg_path "${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg")
configure_lit_site_cfg(
  "${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in"
  "${lit_site_cfg_path}")

add_lit_testsuite(check-cudacxx
  "Running libcu++ tests"
  "${CMAKE_CURRENT_BINARY_DIR}")

find_program(libcudacxx_LIT lit REQUIRED)

set(libcudacxx_LIT_FLAGS "" CACHE STRING "Semi-colon separated list of flags passed to the invocation of lit.")
message(STATUS "libcudacxx_LIT_FLAGS: ${libcudacxx_LIT_FLAGS}")

if (NOT LIBCUDACXX_TEST_WITH_NVRTC)
  # Build but don't run the tests. Used by CI to pre-seed sccache for the test machines.
  # Only executed if explicitly requested.
  add_custom_target(libcudacxx.test.lit.precompile
    DEPENDS libcudacxx.test.public_headers libcudacxx.test.internal_headers libcudacxx.test.public_headers_host_only
    COMMAND "${CMAKE_COMMAND}" -E env "LIBCUDACXX_SITE_CONFIG=${lit_site_cfg_path}"
    "${libcudacxx_LIT}" -vv --no-progress-bar ${libcudacxx_LIT_FLAGS} "-Dexecutor=\"NoopExecutor()\"" "${libcudacxx_SOURCE_DIR}/test/libcudacxx"
  )

endif()

# Restricted to avoid oversubscribing the GPU:
set(libcudacxx_LIT_PARALLEL_LEVEL 8 CACHE STRING
"Parallelism used to run libcudacxx's lit test suite."
)

add_test(NAME libcudacxx.test.lit COMMAND
  "${CMAKE_COMMAND}" -E env
    "LIBCUDACXX_SITE_CONFIG=${lit_site_cfg_path}"
  "${libcudacxx_LIT}" -vv --no-progress-bar ${libcudacxx_LIT_FLAGS}
    -j "${libcudacxx_LIT_PARALLEL_LEVEL}"
      "${libcudacxx_SOURCE_DIR}/test/libcudacxx"
)

set_tests_properties(libcudacxx.test.lit PROPERTIES
  TIMEOUT 3600
  RUN_SERIAL TRUE
)
