5#include <thrust/device_vector.h>
6#include <thrust/execution_policy.h>
7#include <thrust/iterator/counting_iterator.h>
8#include <thrust/scan.h>
9#include <thrust/sort.h>
10#include <unordered_map>
16inline void build_block_csc_indices(
17 size_t num_block_cols,
18 const std::unordered_map<BlockCoordinates, size_t> &block_indices,
19 const std::vector<BlockCoordinates> &block_coords,
20 thrust::device_vector<size_t> &d_col_pointers,
21 thrust::device_vector<size_t> &d_row_indices,
22 thrust::device_vector<size_t> &d_offsets) {
23 std::vector<BlockCoordinates> sorted = block_coords;
24 std::sort(sorted.begin(), sorted.end(),
25 [](
const BlockCoordinates &a,
const BlockCoordinates &b) {
32 thrust::host_vector<size_t> h_col_pointers(num_block_cols + 1);
33 thrust::host_vector<size_t> h_row_indices(sorted.size());
34 thrust::host_vector<size_t> h_offsets(sorted.size());
35 thrust::fill(thrust::host, h_col_pointers.begin(), h_col_pointers.end(), 0);
37 for (
size_t i = 0; i < sorted.size(); i++) {
38 const auto &coord = sorted[i];
39 h_col_pointers[coord.col]++;
40 h_row_indices[i] = coord.row;
41 h_offsets[i] = block_indices.at(coord);
44 d_col_pointers = h_col_pointers;
45 d_row_indices = h_row_indices;
46 d_offsets = h_offsets;
48 thrust::exclusive_scan(thrust::device, d_col_pointers.begin(),
49 d_col_pointers.end(), d_col_pointers.begin());
53build_scalar_to_block_map(
const thrust::device_vector<size_t> &d_scalar_offsets,
54 size_t num_block_cols,
55 thrust::device_vector<size_t> &scalar_to_block_map) {
56 const size_t scalar_dim = d_scalar_offsets[num_block_cols];
57 scalar_to_block_map.resize(scalar_dim);
59 auto s2b_map = scalar_to_block_map.data().get();
60 const auto offsets = d_scalar_offsets.data().get();
62 thrust::for_each(thrust::device, thrust::make_counting_iterator<size_t>(0),
63 thrust::make_counting_iterator<size_t>(num_block_cols),
64 [=] __device__(
size_t block_col) {
65 const size_t scalar_col = offsets[block_col];
66 const size_t dim = offsets[block_col + 1] - scalar_col;
67 for (
size_t i = 0; i < dim; i++) {
68 s2b_map[scalar_col + i] = block_col;
73template <
typename S,
typename I,
typename Matrix>
74void build_scalar_csc_structure(
75 size_t scalar_dim,
const thrust::device_vector<size_t> &d_col_pointers,
76 const thrust::device_vector<size_t> &d_row_indices,
77 const thrust::device_vector<size_t> &d_scalar_offsets,
78 const thrust::device_vector<size_t> &scalar_to_block_map, Matrix &matrix) {
79 matrix.d_pointers.resize(scalar_dim + 1);
80 thrust::fill(thrust::device, matrix.d_pointers.begin(),
81 matrix.d_pointers.end(), 0);
83 const auto p_col = d_col_pointers.data().get();
84 const auto r_idx = d_row_indices.data().get();
85 const auto scalar_offsets = d_scalar_offsets.data().get();
86 const auto s2b_map = scalar_to_block_map.data().get();
87 auto scalar_ptrs = matrix.d_pointers.data().get();
89 thrust::for_each(thrust::device, thrust::make_counting_iterator<size_t>(0),
90 thrust::make_counting_iterator<size_t>(scalar_dim),
91 [=] __device__(
const size_t scalar_col) {
92 const size_t block_col = s2b_map[scalar_col];
93 const size_t start = p_col[block_col];
94 const size_t end = p_col[block_col + 1];
95 size_t num_values = 0;
97 for (
size_t b = start; b < end; b++) {
98 const size_t block_row = r_idx[b];
99 const size_t nrows = scalar_offsets[block_row + 1] -
100 scalar_offsets[block_row];
101 const size_t scalar_row = scalar_offsets[block_row];
103 for (
size_t r = 0; r < nrows; r++) {
104 if (scalar_row + r <= scalar_col) {
111 scalar_ptrs[scalar_col] = num_values;
114 thrust::exclusive_scan(thrust::device, matrix.d_pointers.begin(),
115 matrix.d_pointers.end(), matrix.d_pointers.begin());
117 const size_t nnz = matrix.d_pointers[scalar_dim];
118 matrix.d_indices.resize(nnz);
119 matrix.d_values.resize(nnz);
121 auto row_indices = matrix.d_indices.data().get();
123 thrust::for_each(thrust::device, thrust::make_counting_iterator<size_t>(0),
124 thrust::make_counting_iterator<size_t>(scalar_dim),
125 [=] __device__(
const size_t scalar_col) {
126 const size_t block_col = s2b_map[scalar_col];
127 const size_t start = p_col[block_col];
128 const size_t end = p_col[block_col + 1];
129 size_t write_idx = scalar_ptrs[scalar_col];
131 for (
size_t b = start; b < end; b++) {
132 const size_t block_row = r_idx[b];
133 const size_t nrows = scalar_offsets[block_row + 1] -
134 scalar_offsets[block_row];
135 const size_t scalar_row = scalar_offsets[block_row];
137 for (
size_t r = 0; r < nrows; r++) {
138 if (scalar_row + r <= scalar_col) {
139 row_indices[write_idx++] =
140 static_cast<I
>(scalar_row + r);
149template <
typename S,
typename I,
typename Matrix>
150void update_scalar_csc_values(
151 size_t scalar_dim,
const thrust::device_vector<S> &block_values,
152 const thrust::device_vector<size_t> &d_col_pointers,
153 const thrust::device_vector<size_t> &d_row_indices,
154 const thrust::device_vector<size_t> &d_offsets,
155 const thrust::device_vector<size_t> &d_scalar_offsets,
156 const thrust::device_vector<size_t> &scalar_to_block_map, Matrix &matrix) {
157 const auto values = block_values.data().get();
158 const auto p_col = d_col_pointers.data().get();
159 const auto r_idx = d_row_indices.data().get();
160 const auto block_offsets = d_offsets.data().get();
161 const auto scalar_offsets = d_scalar_offsets.data().get();
162 const auto s2b_map = scalar_to_block_map.data().get();
164 const auto scalar_ptrs = matrix.d_pointers.data().get();
165 auto out_values = matrix.d_values.data().get();
168 thrust::device, thrust::make_counting_iterator<size_t>(0),
169 thrust::make_counting_iterator<size_t>(scalar_dim),
170 [=] __device__(
const size_t scalar_col) {
171 const size_t block_col = s2b_map[scalar_col];
172 const size_t start = p_col[block_col];
173 const size_t end = p_col[block_col + 1];
174 size_t write_idx = scalar_ptrs[scalar_col];
176 for (
size_t b = start; b < end; b++) {
177 const size_t block_row = r_idx[b];
179 scalar_offsets[block_row + 1] - scalar_offsets[block_row];
180 const size_t scalar_row = scalar_offsets[block_row];
181 const size_t col_in_block = scalar_col - scalar_offsets[block_col];
183 const auto block = values + block_offsets[b] + col_in_block * nrows;
184 for (
size_t r = 0; r < nrows; r++) {
185 if (scalar_row + r <= scalar_col) {
186 out_values[write_idx++] = block[r];
The top-level namespace for Graphite.
Definition eigen_solver.cpp:4