20template <
typename T,
typename S>
class Graph {
23 std::vector<BaseVertexDescriptor<T, S> *> vertex_descriptors;
24 std::vector<BaseFactorDescriptor<T, S> *> factor_descriptors;
25 thrust::device_vector<T> b;
26 thrust::device_vector<T> jacobian_scales;
27 size_t hessian_column;
28 std::vector<size_t> hessian_offsets;
32 Graph() : scale_jacobians(
true) {}
34 size_t get_hessian_dimension()
const {
return hessian_column; }
35 size_t get_variable_dimension(
const size_t block_index)
const {
36 return hessian_offsets[block_index + 1] - hessian_offsets[block_index];
38 size_t get_num_block_columns()
const {
return hessian_offsets.size() - 1; }
40 const std::vector<size_t> &get_offset_vector()
const {
41 return hessian_offsets;
44 thrust::device_vector<T> &get_b() {
return b; }
46 std::vector<BaseVertexDescriptor<T, S> *> &get_vertex_descriptors() {
47 return vertex_descriptors;
50 std::vector<BaseFactorDescriptor<T, S> *> &get_factor_descriptors() {
51 return factor_descriptors;
54 thrust::device_vector<T> &get_jacobian_scales() {
return jacobian_scales; }
56 template <
typename V>
void add_vertex_descriptor(V *descriptor) {
57 vertex_descriptors.push_back(descriptor);
60 template <
typename F>
void add_factor_descriptor(F *factor) {
61 factor_descriptors.push_back(factor);
64 template <
typename D>
void add_descriptor(D *descriptor) {
65 if constexpr (std::is_base_of<BaseVertexDescriptor<T, S>, D>::value) {
66 add_vertex_descriptor(descriptor);
67 }
else if constexpr (std::is_base_of<BaseFactorDescriptor<T, S>,
69 add_factor_descriptor(descriptor);
71 static_assert(std::is_base_of<BaseVertexDescriptor<T, S>, D>::value ||
72 std::is_base_of<BaseFactorDescriptor<T, S>, D>::value,
73 "You tried to add something strange to the graph.");
77 bool initialize_optimization(
const uint8_t level) {
82 std::vector<std::pair<size_t, std::pair<size_t, size_t>>>
83 global_to_local_combined;
85 for (
size_t i = 0; i < vertex_descriptors.size(); ++i) {
86 const auto &map = vertex_descriptors[i]->get_global_map();
87 for (
const auto &entry : map) {
88 global_to_local_combined.push_back({entry.first, {i, entry.second}});
93 std::sort(global_to_local_combined.begin(), global_to_local_combined.end(),
94 [](
const auto &a,
const auto &b) { return a.first < b.first; });
97 for (
auto &desc : factor_descriptors) {
98 desc->initialize_device_ids(level);
100 deactivate_unused_vertices(level);
104 size_t hessian_block_index = 0;
105 hessian_offsets.clear();
106 for (
const auto &entry : global_to_local_combined) {
107 if (vertex_descriptors[entry.second.first]->is_active(entry.first)) {
108 vertex_descriptors[entry.second.first]->set_hessian_column(
109 entry.first, hessian_column, hessian_block_index);
110 hessian_offsets.push_back(hessian_column);
111 hessian_column += vertex_descriptors[entry.second.first]->dimension();
112 hessian_block_index++;
115 hessian_offsets.push_back(hessian_column);
118 for (
auto &desc : vertex_descriptors) {
123 for (
auto &desc : factor_descriptors) {
128 for (
auto &f : factor_descriptors) {
129 f->initialize_jacobian_storage();
137 void deactivate_unused_vertices(
const uint8_t level) {
140 for (
size_t i = 0; i < vertex_descriptors.size(); ++i) {
141 if (vertex_descriptors[i]->count() == 0) {
142 std::cerr <<
"Error: Vertex descriptor " << i <<
" has no entries."
147 for (
size_t i = 0; i < factor_descriptors.size(); ++i) {
148 if (factor_descriptors[i]->active_count() == 0) {
149 std::cerr <<
"Error: Factor descriptor " << i <<
" has no entries."
155 for (
auto &desc : vertex_descriptors) {
157 thrust::cuda::par_nosync.on(0), desc->get_active_state(),
158 desc->get_active_state() + desc->count(), desc->get_active_state(),
159 [] __device__(uint8_t state) {
return state & 0x7F; });
164 for (
auto &desc : factor_descriptors) {
165 desc->flag_active_vertices_async(level);
169 for (
auto &desc : vertex_descriptors) {
171 thrust::cuda::par_nosync.on(0), desc->get_active_state(),
172 desc->get_active_state() + desc->count(), desc->get_active_state(),
173 [] __device__(uint8_t state) {
return state ^ 0x80; });
175 cudaStreamSynchronize(0);
178 bool build_structure() {
180 const auto size_x = get_hessian_dimension();
182 jacobian_scales.resize(size_x);
187 void compute_error() {
188 for (
auto &factor : factor_descriptors) {
189 factor->compute_error();
191 cudaStreamSynchronize(0);
195 T chi2 =
static_cast<T
>(0);
196 for (
auto &factor : factor_descriptors) {
197 chi2 += factor->chi2();
204 for (
auto &factor : factor_descriptors) {
206 if (factor->use_autodiff() && (factor->store_jacobians() ||
207 !factor->supports_dynamic_jacobians())) {
208 factor->compute_error_autodiff(streams);
210 factor->compute_error();
211 factor->compute_jacobians(streams);
214 cudaStreamSynchronize(0);
220 if (scale_jacobians) {
221 thrust::fill(jacobian_scales.begin(), jacobian_scales.end(), 0);
222 for (
auto &factor : factor_descriptors) {
223 factor->compute_hessian_scalar_diagonal_async(
224 jacobian_scales.data().get(),
nullptr);
226 cudaStreamSynchronize(0);
229 thrust::device, jacobian_scales.begin(), jacobian_scales.end(),
230 jacobian_scales.begin(), [] __device__(T value) {
231 const double denom = std::numeric_limits<double>::epsilon() +
232 sqrt(
static_cast<double>(value));
235 return static_cast<T
>(1.0 / denom);
238 thrust::fill(jacobian_scales.begin(), jacobian_scales.end(), 1.0);
242 if (scale_jacobians) {
243 for (
auto &factor : factor_descriptors) {
244 factor->scale_jacobians_async(jacobian_scales.data().get());
246 cudaStreamSynchronize(0);
250 thrust::fill(thrust::cuda::par_nosync.on(0), b.begin(), b.end(), 0);
251 for (
auto &fd : factor_descriptors) {
252 fd->compute_b_async(b.data().get(), jacobian_scales.data().get());
255 cudaStreamSynchronize(0);
258 void apply_update(
const T *delta_x,
StreamPool &streams) {
260 for (
auto &desc : vertex_descriptors) {
261 desc->apply_update_async(delta_x, jacobian_scales.data().get(),
268 void backup_parameters() {
270 for (
const auto &desc : vertex_descriptors) {
271 desc->backup_parameters_async();
274 cudaStreamSynchronize(0);
277 void revert_parameters() {
279 for (
auto &desc : vertex_descriptors) {
280 desc->restore_parameters_async();
283 cudaStreamSynchronize(0);
287 vertex_descriptors.clear();
288 factor_descriptors.clear();
290 jacobian_scales.clear();
292 hessian_offsets.clear();
295 void scale_system(
const bool enable_scaling) {
296 scale_jacobians = enable_scaling;