Graphite  0.5.0
GPU-accelerated graph optimization framework
Loading...
Searching...
No Matches
graph.hpp
Go to the documentation of this file.
1
2#pragma once
3#include <graphite/factor.hpp>
4#include <graphite/stream.hpp>
5#include <graphite/vertex.hpp>
6#include <limits>
7#include <thrust/copy.h>
8#include <thrust/execution_policy.h>
9#include <thrust/host_vector.h>
10#include <thrust/sort.h>
11
12namespace graphite {
13
15 size_t global_id;
16 size_t descriptor_index;
17 size_t local_index;
18 bool eliminated;
19};
20
30template <typename T, typename S> class Graph {
31
32private:
33 std::vector<BaseVertexDescriptor<T, S> *> vertex_descriptors;
34 std::vector<BaseFactorDescriptor<T, S> *> factor_descriptors;
35 thrust::host_vector<GlobalToLocalEntry> global_to_local_combined;
36 thrust::device_vector<GlobalToLocalEntry> d_global_to_local_combined;
37 thrust::device_vector<T> b;
38 thrust::device_vector<T> jacobian_scales;
39 size_t hessian_column;
40 std::vector<size_t> hessian_offsets;
41 bool scale_jacobians;
42 size_t elimination_block;
43
44public:
45 Graph() : scale_jacobians(true) {}
46
47 size_t get_hessian_dimension() const { return hessian_column; }
48 size_t get_variable_dimension(const size_t block_index) const {
49 return hessian_offsets[block_index + 1] - hessian_offsets[block_index];
50 }
51 size_t get_num_block_columns() const { return hessian_offsets.size() - 1; }
52
53 const std::vector<size_t> &get_offset_vector() const {
54 return hessian_offsets;
55 }
56
57 thrust::device_vector<T> &get_b() { return b; }
58
59 std::vector<BaseVertexDescriptor<T, S> *> &get_vertex_descriptors() {
60 return vertex_descriptors;
61 }
62
63 std::vector<BaseFactorDescriptor<T, S> *> &get_factor_descriptors() {
64 return factor_descriptors;
65 }
66
67 thrust::device_vector<T> &get_jacobian_scales() { return jacobian_scales; }
68
69 template <typename V> void add_vertex_descriptor(V *descriptor) {
70 vertex_descriptors.push_back(descriptor);
71 }
72
73 template <typename F> void add_factor_descriptor(F *factor) {
74 factor_descriptors.push_back(factor);
75 }
76
77 template <typename D> void add_descriptor(D *descriptor) {
78 if constexpr (std::is_base_of<BaseVertexDescriptor<T, S>, D>::value) {
79 add_vertex_descriptor(descriptor);
80 } else if constexpr (std::is_base_of<BaseFactorDescriptor<T, S>,
81 D>::value) {
82 add_factor_descriptor(descriptor);
83 } else {
84 static_assert(std::is_base_of<BaseVertexDescriptor<T, S>, D>::value ||
85 std::is_base_of<BaseFactorDescriptor<T, S>, D>::value,
86 "You tried to add something strange to the graph.");
87 }
88 }
89
90 size_t get_elimination_block_column() const { return elimination_block; }
91
92 bool initialize_optimization(const uint8_t level) {
93
94 // For each vertex descriptor, take global to local id mapping and transform
95 // it into a Hessian column to local id mapping.
96
97 global_to_local_combined.clear();
98 d_global_to_local_combined.clear();
99
100 for (size_t i = 0; i < vertex_descriptors.size(); ++i) {
101 const auto &map = vertex_descriptors[i]->get_global_map();
102 const bool eliminated = vertex_descriptors[i]->get_eliminate();
103 for (const auto &entry : map) {
104 global_to_local_combined.push_back(
105 GlobalToLocalEntry{entry.first, i, entry.second, eliminated});
106 }
107 }
108
109 // Sort the combined list by global ID on the GPU.
110 d_global_to_local_combined = global_to_local_combined;
111
112 thrust::sort(thrust::device, d_global_to_local_combined.begin(),
113 d_global_to_local_combined.end(),
114 [] __host__ __device__(const GlobalToLocalEntry &a,
115 const GlobalToLocalEntry &b) {
116 if (a.eliminated != b.eliminated) {
117 return !a.eliminated;
118 }
119 return a.global_id < b.global_id;
120 });
121
122 global_to_local_combined = d_global_to_local_combined;
123
124 // Initialize device ids and copy over factor and current vertex state
125 for (auto &desc : factor_descriptors) {
126 desc->initialize_device_ids(level);
127 }
128 deactivate_unused_vertices(level);
129
130 // Assign Hessian columns to local indices
131 hessian_column = 0;
132 size_t hessian_block_index = 0;
133 hessian_offsets.clear();
134 elimination_block = std::numeric_limits<size_t>::max();
135 for (const auto &entry : global_to_local_combined) {
136 if (vertex_descriptors[entry.descriptor_index]->is_active(
137 entry.global_id)) {
138 if (vertex_descriptors[entry.descriptor_index]->get_eliminate()) {
139 elimination_block = std::min(elimination_block, hessian_block_index);
140 }
141 vertex_descriptors[entry.descriptor_index]->set_hessian_column(
142 entry.global_id, hessian_column, hessian_block_index);
143 hessian_offsets.push_back(hessian_column);
144 hessian_column +=
145 vertex_descriptors[entry.descriptor_index]->dimension();
146 hessian_block_index++;
147 }
148 }
149 hessian_offsets.push_back(hessian_column);
150
151 // Copy vertex values to device
152 for (auto &desc : vertex_descriptors) {
153 desc->to_device();
154 }
155
156 // Copy factors to device
157 for (auto &desc : factor_descriptors) {
158 desc->to_device();
159 }
160
161 // Initialize Jacobian storage
162 for (auto &f : factor_descriptors) {
163 f->initialize_jacobian_storage();
164 }
165
166 return true;
167 }
168
169 // Deactivates vertices of inactive factors
170 // Expects that vertices and factor states are finalized
171 void deactivate_unused_vertices(const uint8_t level) {
172
173 // Check for empty descriptors
174 for (size_t i = 0; i < vertex_descriptors.size(); ++i) {
175 if (vertex_descriptors[i]->count() == 0) {
176 std::cerr << "Error: Vertex descriptor " << i << " has no entries."
177 << std::endl;
178 }
179 }
180
181 for (size_t i = 0; i < factor_descriptors.size(); ++i) {
182 if (factor_descriptors[i]->active_count() == 0) {
183 std::cerr << "Error: Factor descriptor " << i << " has no entries."
184 << std::endl;
185 }
186 }
187
188 // For each vertex descriptor, set the state MSB to 0
189 for (auto &desc : vertex_descriptors) {
190 thrust::transform(
191 thrust::cuda::par_nosync.on(0), desc->get_active_state(),
192 desc->get_active_state() + desc->count(), desc->get_active_state(),
193 [] __device__(uint8_t state) { return state & 0x7F; });
194 }
195 // For each factor descriptor
196 // Go through each vertex descriptor and set the state MSB to 1 if the
197 // constraint is active
198 for (auto &desc : factor_descriptors) {
199 desc->flag_active_vertices_async(level); // stream 0
200 }
201 // For each vertex descriptor, MSB of the active state is XOR'd with 1
202 // (0->1, 1->0)
203 for (auto &desc : vertex_descriptors) {
204 thrust::transform(
205 thrust::cuda::par_nosync.on(0), desc->get_active_state(),
206 desc->get_active_state() + desc->count(), desc->get_active_state(),
207 [] __device__(uint8_t state) { return state ^ 0x80; });
208 }
209 cudaStreamSynchronize(0);
210 }
211
212 bool build_structure() {
213 // Allocate storage for solver vectors
214 const auto size_x = get_hessian_dimension();
215 b.resize(size_x);
216 jacobian_scales.resize(size_x);
217
218 return true;
219 }
220
221 void compute_error() {
222 for (auto &factor : factor_descriptors) {
223 factor->compute_error(); // TODO: Make non-autodiff version
224 }
225 cudaStreamSynchronize(0);
226 }
227
228 T chi2() {
229 T chi2 = static_cast<T>(0);
230 for (auto &factor : factor_descriptors) {
231 chi2 += factor->chi2();
232 }
233 return chi2;
234 }
235
236 void linearize(StreamPool &streams) {
237
238 for (auto &factor : factor_descriptors) {
239 // compute error
240 if (factor->use_autodiff() && (factor->store_jacobians() ||
241 !factor->supports_dynamic_jacobians())) {
242 factor->compute_error_autodiff(streams); // synchronous
243 } else {
244 factor->compute_error(); // synchronous
245 factor->compute_jacobians(streams); // synchronous
246 }
247 }
248 cudaStreamSynchronize(0);
249
250 // Compute chi2
251 chi2();
252
253 // Compute Jacobian scale
254 if (scale_jacobians) {
255 thrust::fill(jacobian_scales.begin(), jacobian_scales.end(), 0);
256 for (auto &factor : factor_descriptors) {
257 factor->compute_hessian_scalar_diagonal_async(
258 jacobian_scales.data().get(), nullptr);
259 }
260 cudaStreamSynchronize(0);
261
262 thrust::transform(
263 thrust::device, jacobian_scales.begin(), jacobian_scales.end(),
264 jacobian_scales.begin(), [] __device__(T value) {
265 // TODO: Make this configurable
266 const double denom = std::numeric_limits<double>::epsilon() +
267 sqrt(static_cast<double>(value));
268 // const double denom = 1.0 + sqrt(static_cast<double>(value));
269 return static_cast<T>(1.0 / denom);
270 });
271 } else {
272 thrust::fill(jacobian_scales.begin(), jacobian_scales.end(), 1.0);
273 }
274
275 // Scale Jacobians
276 if (scale_jacobians) {
277 for (auto &factor : factor_descriptors) {
278 factor->scale_jacobians_async(jacobian_scales.data().get());
279 }
280 cudaStreamSynchronize(0);
281 }
282
283 // Calculate b=J^T * r
284 thrust::fill(thrust::cuda::par_nosync.on(0), b.begin(), b.end(), 0);
285 for (auto &fd : factor_descriptors) {
286 fd->compute_b_async(b.data().get(), jacobian_scales.data().get());
287 }
288
289 cudaStreamSynchronize(0);
290 }
291
292 void apply_update(const T *delta_x, StreamPool &streams) {
293 size_t i = 0;
294 for (auto &desc : vertex_descriptors) {
295 desc->apply_update_async(delta_x, jacobian_scales.data().get(),
296 streams.select(i));
297 i++;
298 }
299 streams.sync_n(i);
300 }
301
302 void backup_parameters() {
303
304 for (const auto &desc : vertex_descriptors) {
305 desc->backup_parameters_async();
306 }
307
308 cudaStreamSynchronize(0);
309 }
310
311 void revert_parameters() {
312
313 for (auto &desc : vertex_descriptors) {
314 desc->restore_parameters_async();
315 }
316
317 cudaStreamSynchronize(0);
318 }
319
320 void clear() {
321 vertex_descriptors.clear();
322 factor_descriptors.clear();
323 global_to_local_combined.clear();
324 d_global_to_local_combined.clear();
325 b.clear();
326 jacobian_scales.clear();
327 hessian_column = 0;
328 hessian_offsets.clear();
329 }
330
331 void scale_system(const bool enable_scaling) {
332 scale_jacobians = enable_scaling;
333 }
334};
335
343} // namespace graphite
Graph class which stores references to vertex and factor descriptors, and provides methods for optimi...
Definition graph.hpp:30
Definition stream.hpp:7
The top-level namespace for Graphite.
Definition eigen_solver.cpp:4
Definition graph.hpp:14