5#include <thrust/device_vector.h>
6#include <thrust/universal_vector.h>
11void prefetch_vector_on_device_async(
const thrust::universal_vector<T> &vec,
12 int device_id, cudaStream_t stream) {
16#if !defined(_WIN32) && !defined(__WSL2__)
17 cudaMemPrefetchAsync(vec.data().get(), vec.size() *
sizeof(T), device_id,
23void prefetch_vector_on_device_async(
const managed_vector<T> &vec,
24 int device_id, cudaStream_t stream) {
28#if !defined(_WIN32) && !defined(__WSL2__)
29 cudaMemPrefetchAsync(vec.data().get(), vec.size() *
sizeof(T), device_id,
35void prefetch_vector_on_host(
const thrust::universal_vector<T> &vec,
36 cudaStream_t stream) {
40#if !defined(_WIN32) && !defined(__WSL2__)
41 cudaMemPrefetchAsync(vec.data().get(), vec.size() *
sizeof(T),
42 cudaCpuDeviceId, stream);
43 cudaStreamSynchronize(stream);
48void prefetch_vector_on_host(
const managed_vector<T> &vec,
49 cudaStream_t stream) {
53#if !defined(_WIN32) && !defined(__WSL2__)
54 cudaMemPrefetchAsync(vec.data().get(), vec.size() *
sizeof(T),
55 cudaCpuDeviceId, stream);
56 cudaStreamSynchronize(stream);
61void print_device_vector(
const thrust::device_vector<T> &vec) {
62 std::vector<T> host_vec(vec.size());
63 thrust::copy(vec.begin(), vec.end(), host_vec.begin());
64 for (
const auto &val : host_vec) {
65 std::cout << val <<
" ";
67 std::cout << std::endl;
70template <
typename T>
void print_device_vector(
const T *vec,
size_t size) {
71 std::vector<T> host_vec(size);
72 cudaMemcpy(host_vec.data(), vec, size *
sizeof(T), cudaMemcpyDeviceToHost);
73 for (
const auto &val : host_vec) {
74 std::cout << val <<
" ";
76 std::cout << std::endl;
81 std::vector<H> handles;
88 if (handles.empty()) {
91 H handle = handles.back();
97 void release(H handle) { handles.push_back(handle); }
108size_t mix(
size_t x) {
110 x *= 0xe9846af9b1a615d;
112 x *= 0xe9846af9b1a615d;
117template <
typename T>
void hash_combine(
size_t &seed, T
const &v) {
118 seed = mix(seed + 0x9e3779b9 + std::hash<T>{}(v));