Graphite  0.5.0
GPU-accelerated graph optimization framework
Loading...
Searching...
No Matches
hessian.hpp
Go to the documentation of this file.
1
2#pragma once
3#include <graphite/block.hpp>
4#include <graphite/common.hpp>
6#include <graphite/stream.hpp>
7#include <graphite/utils.hpp>
8#include <thrust/sort.h>
9#include <thrust/unique.h>
10#include <unordered_map>
11#include <utility>
12
13namespace graphite {
14
15template <typename T, typename I> class CSCMatrix {
16public:
17 thrust::device_vector<I> d_pointers;
18 thrust::device_vector<I> d_indices;
19 thrust::device_vector<T> d_values;
20};
21
22template <typename T> class HessianBlocks {
23public:
24 std::pair<size_t, size_t> dimensions;
25 size_t num_blocks;
26 thrust::device_vector<T> data;
27 thrust::device_vector<BlockCoordinates> block_coordinates;
28
29 void resize(size_t num_blocks, size_t rows, size_t cols) {
30 dimensions = {rows, cols};
31 this->num_blocks = num_blocks;
32 data.resize(rows * cols * num_blocks);
33 block_coordinates.resize(num_blocks);
34 }
35
36 void fill(const T &value) {
37 thrust::fill(thrust::device, data.begin(), data.end(), value);
38 }
39
40 void zero() {
41 thrust::fill(thrust::device, data.begin(), data.end(), static_cast<T>(0));
42 }
43};
44
45template <typename T, typename S> class Hessian {
46public:
47 // Returns coordinates of upper triangular filled-in Hessian blocks
48 std::vector<BlockCoordinates> get_block_coordinates(Graph<T, S> *graph) {
49 // For a constraint to contribute to the Hessian,
50 // the constraint must be active, and both variables must be active
51 // (non-fixed), and the block must reside in the upper triangular part of
52 // the Hessian (i.e., row index <= column index)
53 thrust::device_vector<BlockCoordinates> block_coords;
54 auto &f_desc = graph->get_factor_descriptors();
55 for (auto &f : f_desc) {
56 f->get_hessian_block_coordinates(block_coords);
57 }
58
59 thrust::sort(
60 thrust::device, block_coords.begin(), block_coords.end(),
61 [] __device__(const BlockCoordinates &a, const BlockCoordinates &b) {
62 // sort so that we get blocks in column-major order (e.g consecutive
63 // indices in same column)
64 if (a.col == b.col) {
65 return a.row < b.row;
66 }
67 return a.col < b.col;
68 });
69
70 // Remove duplicates
71 auto end_it = thrust::unique(
72 thrust::device, block_coords.begin(), block_coords.end(),
73 [] __device__(const BlockCoordinates &a, const BlockCoordinates &b) {
74 return (a.row == b.row) && (a.col == b.col);
75 });
76
77 // Resize the vector to remove duplicates
78 block_coords.resize(end_it - block_coords.begin());
79
80 // Copy to host and convert to std::vector
81 std::vector<BlockCoordinates> host_block_coords(block_coords.size());
82 thrust::copy(block_coords.begin(), block_coords.end(),
83 host_block_coords.begin());
84 return host_block_coords;
85 }
86
87 // Block coords must be unique and in column-major order
88 // Build block-csc style indices on the host that we can access on GPU for
89 // quickly constructing a scalar CSC-style Hessian matrix (upper triangle
90 // only)
91
92 void build_indices(Graph<T, S> *graph,
93 const std::vector<BlockCoordinates> &block_coords) {
94 const auto num_columns = graph->get_num_block_columns();
95 csc::build_block_csc_indices(num_columns, block_indices, block_coords,
96 d_col_pointers, d_row_indices, d_offsets);
97 d_hessian_offsets = graph->get_offset_vector();
98 csc::build_scalar_to_block_map(d_hessian_offsets, num_columns,
99 scalar_to_block_map);
100 }
101
102 void backup_diagonal(Graph<T, S> *graph, StreamPool &streams) {
103 d_prev_diagonal.resize(graph->get_hessian_dimension());
104
105 auto diag = d_prev_diagonal.data().get();
106 const auto h_offsets = d_hessian_offsets.data().get();
107 const auto p_col = d_col_pointers.data().get();
108 const auto r_idx = d_row_indices.data().get();
109 const auto block_locations = d_offsets.data().get();
110 const auto h = d_hessian.data().get();
111
112 thrust::for_each(
113 thrust::device, thrust::make_counting_iterator<size_t>(0),
114 thrust::make_counting_iterator<size_t>(graph->get_num_block_columns()),
115 [=] __device__(size_t block_col) {
116 const size_t hessian_col = h_offsets[block_col];
117 const size_t dim = h_offsets[block_col + 1] - hessian_col;
118
119 // find diagonal block in column where row == col
120 // const auto start = p_col[block_col];
121 const auto end = p_col[block_col + 1];
122
123 const auto b = end - 1; // assume there is always a diagonal block and
124 // it is always last
125
126 if (r_idx[b] == block_col) {
127 // found diagonal block, copy elements
128 const auto block = h + block_locations[b];
129 for (size_t i = 0; i < dim; i++) {
130 diag[hessian_col + i] = block[i * dim + i];
131 }
132 }
133 });
134 }
135
136 void apply_damping(Graph<T, S> *graph, T damping_factor,
137 const bool use_identity, StreamPool &streams) {
138 // Assume diagonal was already backed up when recomputing Hessian values
139 auto diag = d_prev_diagonal.data().get();
140 const auto h_offsets = d_hessian_offsets.data().get();
141 const auto p_col = d_col_pointers.data().get();
142 const auto r_idx = d_row_indices.data().get();
143 auto block_locations = d_offsets.data().get();
144 auto h = d_hessian.data().get();
145
146 thrust::for_each(
147 thrust::device, thrust::make_counting_iterator<size_t>(0),
148 thrust::make_counting_iterator<size_t>(graph->get_num_block_columns()),
149 [=] __device__(size_t block_col) {
150 const size_t hessian_col = h_offsets[block_col];
151 const size_t dim = h_offsets[block_col + 1] - hessian_col;
152
153 // find diagonal block in column where row == col
154 // const auto start = p_col[block_col];
155 const auto end = p_col[block_col + 1];
156 const auto b = end - 1; // assume there is always a diagonal block and
157 // it is always last
158
159 if (r_idx[b] == block_col) {
160 // found diagonal block, backup then apply damping
161 auto block = h + block_locations[b];
162 for (size_t i = 0; i < dim; i++) {
163 const auto h_diag = diag[hessian_col + i];
164 if (use_identity) {
165 block[i * dim + i] =
166 (S)((double)h_diag + (double)damping_factor);
167 } else {
168 block[i * dim + i] =
169 (S)((double)h_diag +
170 damping_factor *
171 std::clamp((double)h_diag, 1.0e-6, 1.0e32));
172 }
173 }
174 }
175 });
176 }
177
178 void setup_hessian_computation(Graph<T, S> *graph, StreamPool &streams) {
179 // Build offset buffer for multiplying Jacobian blocks.
180 // Each offset corresponds to the start index of an output Hessian block.\
181
182 // Generate the offsets for each product, for each factor
183
184 // First we need to compute the upper bound for the size of h_block_offsets
185 const auto &factors = graph->get_factor_descriptors();
186 size_t mul_count = 0;
187 for (auto &f : factors) {
188 const size_t num_vertices = f->get_num_descriptors();
189 for (size_t i = 0; i < num_vertices; i++) {
190 for (size_t j = i; j < num_vertices; j++) {
191 mul_count += f->active_count();
192 }
193 }
194 }
195
196 h_block_offsets.resize(mul_count);
197
198 // Now compute the offsets
199 mul_count = 0;
200 for (auto &f : factors) {
201 mul_count += f->setup_hessian_computation(
202 block_indices, d_hessian, h_block_offsets.data() + mul_count,
203 streams);
204 }
205
206 // Final copy
207 d_block_offsets = h_block_offsets;
208 }
209
210 void execute_hessian_computation(Graph<T, S> *graph, StreamPool &streams) {
211 thrust::fill(thrust::device, d_hessian.begin(), d_hessian.end(),
212 static_cast<S>(0.0));
213 size_t mul_count = 0;
214 const auto &factors = graph->get_factor_descriptors();
215 for (auto &f : factors) {
216 mul_count += f->execute_hessian_computation(
217 block_indices, d_hessian, d_block_offsets.data().get() + mul_count,
218 streams);
219 }
220 }
221
222 // Data
223 std::unordered_map<BlockCoordinates, size_t> block_indices;
224 thrust::device_vector<S> d_hessian;
225 thrust::device_vector<size_t> d_col_pointers;
226 thrust::device_vector<size_t> d_row_indices;
227 thrust::device_vector<size_t> d_offsets;
228 thrust::device_vector<S> d_prev_diagonal;
229 thrust::device_vector<size_t> d_hessian_offsets;
230 thrust::device_vector<size_t> scalar_to_block_map;
231
232 // For calculating Hessian blocks
233 thrust::host_vector<size_t> h_block_offsets;
234 thrust::device_vector<size_t> d_block_offsets;
235
236public:
237 Hessian() = default;
238
239 const thrust::device_vector<size_t> &get_block_col_pointers() const {
240 return d_col_pointers;
241 }
242
243 const thrust::device_vector<size_t> &get_block_row_indices() const {
244 return d_row_indices;
245 }
246
247 const thrust::device_vector<size_t> &get_block_value_offsets() const {
248 return d_offsets;
249 }
250
251 const thrust::device_vector<S> &get_values() const { return d_hessian; }
252
253 S *get_values_ptr() { return d_hessian.data().get(); }
254
255 const S *get_values_ptr() const { return d_hessian.data().get(); }
256
257 void build_structure(Graph<T, S> *graph, StreamPool &streams) {
258 // Implementation for building the Hessian matrix
259
260 // Assume we don't have a GPU hash map.
261 // First we need to count how many Hessian blocks we have
262 // We'll create a set of Hessian block coordinates by iterating over
263 // descriptors Ignore blocks not in the upper triangular part
264 const auto block_coords = get_block_coordinates(graph);
265
266 // Then we need to allocate memory for each block
267 // We can iterate the set and figure out the total memory,
268 // then allocate a big chunk and assign pointers accordingly
269 // TODO: Maybe we can use an GPU exclusive scan instead?
270 size_t num_values = 0;
271 block_indices.clear();
272 block_indices.reserve(block_coords.size());
273 for (const auto &coord : block_coords) {
274 block_indices[coord] = num_values;
275 num_values += graph->get_variable_dimension(coord.row) *
276 graph->get_variable_dimension(coord.col);
277 }
278 d_hessian.resize(num_values);
279
280 // We need to end up with a block CSC-style representation
281 // where we can iterate down the blocks in each block columnn
282 // and retrieve the data pointer for each block for the purpose of
283 // constructing a scalar CSC-style representation.
284 build_indices(graph, block_coords);
285
286 // Setup data for computing Hessian as product of Jacobians
287 setup_hessian_computation(graph, streams);
288 }
289
290 void update_values(Graph<T, S> *graph, StreamPool &streams) {
291 // thrust::fill(thrust::device, d_hessian.begin(), d_hessian.end(),
292 // static_cast<S>(0.0));
293
294 // setup_hessian_computation(graph, streams);
295 execute_hessian_computation(graph, streams);
296
297 // auto &f_desc = graph->get_factor_descriptors();
298 // for (auto &f : f_desc) {
299 // h_block_offsets.clear();
300 // d_block_offsets.clear();
301 // f->compute_hessian_blocks(block_indices, d_hessian, h_block_offsets,
302 // d_block_offsets, streams);
303 // }
304
305 // Back up diagonal for damping purposes
306 backup_diagonal(graph, streams);
307 }
308
309 template <typename I>
310 void build_csc_structure(Graph<T, S> *graph, CSCMatrix<S, I> &matrix) {
311 const auto hessian_dim = graph->get_hessian_dimension();
312 csc::build_scalar_csc_structure<S, I>(hessian_dim, d_col_pointers,
313 d_row_indices, d_hessian_offsets,
314 scalar_to_block_map, matrix);
315 }
316
317 template <typename I>
318 void update_csc_values(Graph<T, S> *graph, CSCMatrix<S, I> &matrix) {
319 const auto hessian_dim = graph->get_hessian_dimension();
320 csc::update_scalar_csc_values<S, I>(
321 hessian_dim, d_hessian, d_col_pointers, d_row_indices, d_offsets,
322 d_hessian_offsets, scalar_to_block_map, matrix);
323 }
324};
325
326} // namespace graphite
Definition block.hpp:6
Definition hessian.hpp:15
Definition hessian.hpp:22
Definition hessian.hpp:45
Definition stream.hpp:7
The top-level namespace for Graphite.
Definition eigen_solver.cpp:4