44template <
typename T,
typename S>
class Hessian {
47 std::vector<BlockCoordinates> get_block_coordinates(Graph<T, S> *graph) {
52 thrust::device_vector<BlockCoordinates> block_coords;
53 auto &f_desc = graph->get_factor_descriptors();
54 for (
auto &f : f_desc) {
55 f->get_hessian_block_coordinates(block_coords);
59 thrust::device, block_coords.begin(), block_coords.end(),
70 auto end_it = thrust::unique(
71 thrust::device, block_coords.begin(), block_coords.end(),
73 return (a.row == b.row) && (a.col == b.col);
77 block_coords.resize(end_it - block_coords.begin());
80 std::vector<BlockCoordinates> host_block_coords(block_coords.size());
81 thrust::copy(block_coords.begin(), block_coords.end(),
82 host_block_coords.begin());
83 return host_block_coords;
91 void build_indices(Graph<T, S> *graph,
92 const std::vector<BlockCoordinates> &block_coords) {
93 const auto num_columns = graph->get_num_block_columns();
94 thrust::host_vector<size_t> col_pointers(num_columns + 1);
95 thrust::host_vector<size_t> row_indices(block_coords.size());
96 thrust::host_vector<size_t> offsets(block_coords.size());
97 thrust::fill(thrust::host, col_pointers.begin(), col_pointers.end(), 0);
99 for (
size_t i = 0; i < block_coords.size(); i++) {
100 const auto &coord = block_coords[i];
102 col_pointers[coord.col]++;
103 row_indices[i] = coord.row;
104 offsets[i] = block_indices.at(coord);
108 d_col_pointers = col_pointers;
109 d_row_indices = row_indices;
111 d_hessian_offsets = graph->get_offset_vector();
113 thrust::exclusive_scan(thrust::device, d_col_pointers.begin(),
114 d_col_pointers.end(), d_col_pointers.begin());
118 scalar_to_block_map.resize(graph->get_hessian_dimension());
119 auto s2b_map = scalar_to_block_map.data().get();
120 const auto bc_offset = d_hessian_offsets.data().get();
122 thrust::device, thrust::make_counting_iterator<size_t>(0),
123 thrust::make_counting_iterator<size_t>(graph->get_num_block_columns()),
124 [=] __device__(
size_t block_column) {
125 const size_t hessian_col = bc_offset[block_column];
126 const auto dim = bc_offset[block_column + 1] - hessian_col;
128 for (
size_t i = 0; i < dim; i++) {
129 s2b_map[hessian_col + i] = block_column;
134 void backup_diagonal(Graph<T, S> *graph,
StreamPool &streams) {
135 d_prev_diagonal.resize(graph->get_hessian_dimension());
137 auto diag = d_prev_diagonal.data().get();
138 const auto h_offsets = d_hessian_offsets.data().get();
139 const auto p_col = d_col_pointers.data().get();
140 const auto r_idx = d_row_indices.data().get();
141 const auto block_locations = d_offsets.data().get();
142 const auto h = d_hessian.data().get();
145 thrust::device, thrust::make_counting_iterator<size_t>(0),
146 thrust::make_counting_iterator<size_t>(graph->get_num_block_columns()),
147 [=] __device__(
size_t block_col) {
148 const size_t hessian_col = h_offsets[block_col];
149 const size_t dim = h_offsets[block_col + 1] - hessian_col;
153 const auto end = p_col[block_col + 1];
155 const auto b = end - 1;
158 if (r_idx[b] == block_col) {
160 const auto block = h + block_locations[b];
161 for (
size_t i = 0; i < dim; i++) {
162 diag[hessian_col + i] = block[i * dim + i];
168 void apply_damping(Graph<T, S> *graph, T damping_factor,
171 auto diag = d_prev_diagonal.data().get();
172 const auto h_offsets = d_hessian_offsets.data().get();
173 const auto p_col = d_col_pointers.data().get();
174 const auto r_idx = d_row_indices.data().get();
175 auto block_locations = d_offsets.data().get();
176 auto h = d_hessian.data().get();
179 thrust::device, thrust::make_counting_iterator<size_t>(0),
180 thrust::make_counting_iterator<size_t>(graph->get_num_block_columns()),
181 [=] __device__(
size_t block_col) {
182 const size_t hessian_col = h_offsets[block_col];
183 const size_t dim = h_offsets[block_col + 1] - hessian_col;
187 const auto end = p_col[block_col + 1];
188 const auto b = end - 1;
191 if (r_idx[b] == block_col) {
193 auto block = h + block_locations[b];
194 for (
size_t i = 0; i < dim; i++) {
195 const auto h_diag = diag[hessian_col + i];
197 (S)((
double)h_diag * (1.0 + (
double)damping_factor));
203 void setup_hessian_computation(Graph<T, S> *graph,
StreamPool &streams) {
210 const auto &factors = graph->get_factor_descriptors();
211 size_t mul_count = 0;
212 for (
auto &f : factors) {
213 const size_t num_vertices = f->get_num_descriptors();
214 for (
size_t i = 0; i < num_vertices; i++) {
215 for (
size_t j = i; j < num_vertices; j++) {
216 mul_count += f->active_count();
221 h_block_offsets.resize(mul_count);
225 for (
auto &f : factors) {
226 mul_count += f->setup_hessian_computation(
227 block_indices, d_hessian, h_block_offsets.data() + mul_count,
232 d_block_offsets = h_block_offsets;
235 void execute_hessian_computation(Graph<T, S> *graph,
StreamPool &streams) {
236 thrust::fill(thrust::device, d_hessian.begin(), d_hessian.end(),
237 static_cast<S
>(0.0));
238 size_t mul_count = 0;
239 const auto &factors = graph->get_factor_descriptors();
240 for (
auto &f : factors) {
241 mul_count += f->execute_hessian_computation(
242 block_indices, d_hessian, d_block_offsets.data().get() + mul_count,
248 std::unordered_map<BlockCoordinates, size_t> block_indices;
249 thrust::device_vector<S> d_hessian;
250 thrust::device_vector<size_t> d_col_pointers;
251 thrust::device_vector<size_t> d_row_indices;
252 thrust::device_vector<size_t> d_offsets;
253 thrust::device_vector<S> d_prev_diagonal;
254 thrust::device_vector<size_t> d_hessian_offsets;
255 thrust::device_vector<size_t> scalar_to_block_map;
258 thrust::host_vector<size_t> h_block_offsets;
259 thrust::device_vector<size_t> d_block_offsets;
264 void build_structure(Graph<T, S> *graph,
StreamPool &streams) {
271 const auto block_coords = get_block_coordinates(graph);
277 size_t num_values = 0;
278 block_indices.clear();
279 for (
const auto &coord : block_coords) {
280 block_indices[coord] = num_values;
281 num_values += graph->get_variable_dimension(coord.row) *
282 graph->get_variable_dimension(coord.col);
284 d_hessian.resize(num_values);
290 build_indices(graph, block_coords);
293 setup_hessian_computation(graph, streams);
296 void update_values(Graph<T, S> *graph,
StreamPool &streams) {
301 execute_hessian_computation(graph, streams);
312 backup_diagonal(graph, streams);
315 template <
typename I>
316 void build_csc_structure(Graph<T, S> *graph, CSCMatrix<S, I> &matrix) {
318 const auto hessian_dim = graph->get_hessian_dimension();
319 matrix.d_pointers.resize(hessian_dim + 1);
320 thrust::fill(thrust::device, matrix.d_pointers.begin(),
321 matrix.d_pointers.end(), 0);
328 const auto h = d_hessian.data().get();
329 const auto p_col = d_col_pointers.data().get();
330 const auto r_idx = d_row_indices.data().get();
331 const auto block_locations = d_offsets.data().get();
332 const auto hessian_offsets =
333 d_hessian_offsets.data().get();
334 const auto s2b_map = scalar_to_block_map.data().get();
336 auto scalar_ptrs = matrix.d_pointers.data().get();
338 thrust::for_each(thrust::device, thrust::make_counting_iterator<size_t>(0),
339 thrust::make_counting_iterator<size_t>(hessian_dim),
340 [=] __device__(
const size_t hessian_col) {
341 const auto block_col = s2b_map[hessian_col];
343 const auto start = p_col[block_col];
344 const auto end = p_col[block_col + 1];
348 for (
size_t b = start; b < end; b++) {
352 const auto block_row = r_idx[b];
353 const auto nrows = hessian_offsets[block_row + 1] -
354 hessian_offsets[block_row];
356 auto scalar_row = hessian_offsets[block_row];
358 for (
size_t r = 0; r < nrows; r++) {
359 if (scalar_row + r <= hessian_col) {
366 scalar_ptrs[hessian_col] = num_values;
370 thrust::exclusive_scan(thrust::device, matrix.d_pointers.begin(),
371 matrix.d_pointers.end(), matrix.d_pointers.begin());
376 matrix.d_pointers[hessian_dim];
378 matrix.d_indices.resize(nnz);
379 matrix.d_values.resize(nnz);
381 auto p_row_indices = matrix.d_indices.data().get();
384 thrust::for_each(thrust::device, thrust::make_counting_iterator<size_t>(0),
385 thrust::make_counting_iterator<size_t>(hessian_dim),
386 [=] __device__(
const size_t hessian_col) {
387 const auto block_col = s2b_map[hessian_col];
390 const auto start = p_col[block_col];
391 const auto end = p_col[block_col + 1];
392 size_t write_idx = scalar_ptrs[hessian_col];
394 for (
size_t b = start; b < end; b++) {
397 const auto block_row = r_idx[b];
398 const auto nrows = hessian_offsets[block_row + 1] -
399 hessian_offsets[block_row];
401 auto scalar_row = hessian_offsets[block_row];
403 for (
size_t r = 0; r < nrows; r++) {
404 if (scalar_row + r <= hessian_col) {
406 p_row_indices[write_idx++] = scalar_row + r;
415 template <
typename I>
416 void update_csc_values(Graph<T, S> *graph, CSCMatrix<S, I> &matrix) {
418 const auto hessian_dim = graph->get_hessian_dimension();
419 const auto h = d_hessian.data().get();
420 const auto p_col = d_col_pointers.data().get();
421 const auto r_idx = d_row_indices.data().get();
422 const auto block_locations = d_offsets.data().get();
423 const auto hessian_offsets =
424 d_hessian_offsets.data().get();
425 const auto s2b_map = scalar_to_block_map.data().get();
427 auto scalar_row_ptrs = matrix.d_pointers.data().get();
429 auto p_values = matrix.d_values.data().get();
432 thrust::device, thrust::make_counting_iterator<size_t>(0),
433 thrust::make_counting_iterator<size_t>(hessian_dim),
434 [=] __device__(
const size_t hessian_col) {
435 const auto block_col = s2b_map[hessian_col];
438 const auto start = p_col[block_col];
439 const auto end = p_col[block_col + 1];
440 size_t write_idx = scalar_row_ptrs[hessian_col];
442 for (
size_t b = start; b < end; b++) {
445 const auto block_row = r_idx[b];
447 hessian_offsets[block_row + 1] - hessian_offsets[block_row];
449 auto scalar_row = hessian_offsets[block_row];
451 const auto col_in_block = hessian_col - hessian_offsets[block_col];
453 const auto block = h + block_locations[b] + col_in_block * nrows;
455 for (
size_t r = 0; r < nrows; r++) {
456 if (scalar_row + r <= hessian_col) {
458 p_values[write_idx++] = block[r];