10 : num_streams(num_streams), cleanup_streams(
true) {
11 streams =
new cudaStream_t[num_streams];
12 for (
size_t i = 0; i < num_streams; ++i) {
13 cudaStreamCreateWithFlags(&streams[i], cudaStreamNonBlocking);
18 : streams(p_streams), num_streams(n), cleanup_streams(
false) {}
21 if (!cleanup_streams) {
24 for (
size_t i = 0; i < num_streams; ++i) {
25 cudaStreamDestroy(streams[i]);
30 cudaStream_t &select(
size_t index) {
return streams[index % num_streams]; }
33 for (
size_t i = 0; i < num_streams; ++i) {
34 cudaStreamSynchronize(streams[i]);
38 void sync_n(
size_t n) {
39 n = std::min(n, num_streams);
40 for (
size_t i = 0; i < n; ++i) {
41 cudaStreamSynchronize(streams[i]);
45 cudaStream_t *streams;