Graphite  0.5.0
GPU-accelerated graph optimization framework
Loading...
Searching...
No Matches
factor.hpp
Go to the documentation of this file.
1
2#pragma once
3#include <graphite/active.hpp>
4#include <graphite/block.hpp>
5#include <graphite/common.hpp>
7#include <graphite/loss.hpp>
14#include <graphite/utils.hpp>
15#include <graphite/vector.hpp>
16#include <graphite/vertex.hpp>
17#include <thrust/execution_policy.h>
18#include <thrust/inner_product.h>
19#include <thrust/universal_vector.h>
20
21namespace graphite {
22
26template <typename S> class JacobianStorage {
27public:
28 std::pair<size_t, size_t> dimensions;
29
30 thrust::device_vector<S> data;
31};
32
36template <typename T, typename S> class BaseFactorDescriptor {
37public:
38 using InvP = std::conditional_t<is_low_precision<S>::value, T, S>;
39
40 virtual ~BaseFactorDescriptor(){};
41
42 virtual bool use_autodiff() = 0;
43 virtual void compute_error() = 0;
44 virtual void compute_error_autodiff(StreamPool &streams) = 0;
45 virtual void compute_b_async(T *b, const T *jacobian_scales) = 0;
46 virtual void compute_Jv(T *out, T *in, const T *jacobian_scales,
47 StreamPool &streams) = 0;
48 virtual void compute_Jtv(T *out, T *in, const T *jacobian_scales,
49 StreamPool &streams) = 0;
50
51 virtual void flag_active_vertices_async(const uint8_t level) = 0;
52
53 virtual void compute_jacobians(StreamPool &streams) = 0;
54 virtual void compute_hessian_block_diagonal_async(
55 std::unordered_map<BaseVertexDescriptor<T, S> *,
56 thrust::device_vector<InvP>> &block_diagonals,
57 const T *jacobian_scales, cudaStream_t stream) = 0;
58
59 virtual void
60 compute_hessian_scalar_diagonal_async(T *diagonal,
61 const T *jacobian_scales) = 0;
62
63 virtual JacobianStorage<S> *get_jacobians() = 0;
64 virtual void initialize_jacobian_storage() = 0;
65
66 virtual size_t active_count() const = 0;
67 virtual size_t get_num_descriptors() const = 0;
68
69 virtual size_t get_residual_size() const = 0;
70 virtual void scale_jacobians_async(T *jacobian_scales) = 0;
71
72 virtual void initialize_device_ids(const uint8_t optimization_level) = 0;
73 virtual void to_device() = 0;
74
75 virtual T chi2() = 0;
76
77 virtual void set_jacobian_storage(const bool store) = 0;
78 virtual bool store_jacobians() = 0;
79 virtual bool supports_dynamic_jacobians() = 0;
80
81 virtual void get_hessian_block_coordinates(
82 thrust::device_vector<BlockCoordinates> &block_coords) = 0;
83
84 virtual size_t setup_hessian_computation(
85 std::unordered_map<BlockCoordinates, size_t> &block_indices,
86 thrust::device_vector<S> &d_hessian, size_t *h_block_offsets,
87 StreamPool &streams) = 0;
88
89 virtual size_t execute_hessian_computation(
90 std::unordered_map<BlockCoordinates, size_t> &block_indices,
91 thrust::device_vector<S> &d_hessian, size_t *d_block_offsets,
92 StreamPool &streams) = 0;
93};
94
95template <typename T> struct get_vertex_type {
96 using type = typename T::VertexType;
97};
98
99template <typename T> struct get_vertex_pointer_type {
100 using type = typename T::VertexType *;
101};
102
103template <typename T> struct get_vertex_pointer_pointer_type {
104 using type = typename T::VertexType **;
105};
106
107template <typename Tuple, template <typename> class MetaFunc>
109
110// Partial specialization for std::tuple
111template <typename... Ts, template <typename> class MetaFunc>
112struct transform_tuple<std::tuple<Ts...>, MetaFunc> {
113 using type = std::tuple<typename MetaFunc<Ts>::type...>;
114};
115
120template <typename T, typename S, typename FTraits>
122
123private:
124 std::vector<size_t> global_ids;
125 std::unordered_map<size_t, size_t> global_to_local_map;
126 std::vector<size_t> local_to_global_map;
127
128 HandleManager<size_t> hm;
129
130 bool _store_jacobians;
131 size_t _active_count;
132
133public:
134 using InvP = std::conditional_t<is_low_precision<S>::value, T, S>;
135
136 using Traits = FTraits;
137
138 static constexpr size_t N =
139 std::tuple_size<typename Traits::VertexDescriptors>::value;
140 static constexpr size_t error_dim = Traits::dimension;
141
142 std::array<BaseVertexDescriptor<T, S> *, N> vertex_descriptors;
143
144 using VertexDescriptorTuple = typename Traits::VertexDescriptors;
145 using VertexTypesTuple =
147 using VertexPointerTuple =
148 typename transform_tuple<VertexDescriptorTuple,
150 using VertexPointerPointerTuple =
151 typename transform_tuple<VertexDescriptorTuple,
153
154 using ObservationType = typename Traits::Observation;
155 using ConstraintDataType = typename Traits::Data;
156 using LossType = typename Traits::Loss;
157
158 thrust::host_vector<size_t> host_ids;
159 thrust::device_vector<size_t> device_ids;
160 managed_vector<ObservationType> device_obs;
161 thrust::device_vector<T> residuals;
162 managed_vector<S> precision_matrices;
163 managed_vector<ConstraintDataType> data;
164
165 managed_vector<T> chi2_vec;
166 thrust::device_vector<S> chi2_derivative;
167 managed_vector<LossType> loss;
168
169 thrust::host_vector<uint8_t> active;
170 thrust::device_vector<uint8_t> device_active;
171 thrust::device_vector<size_t> active_indices;
172
173 std::array<JacobianStorage<S>, N> jacobians;
174 std::array<S, Traits::dimension * Traits::dimension> default_precision_matrix;
175
181 template <typename... VertexDescPtrs,
182 typename = std::enable_if_t<sizeof...(VertexDescPtrs) == N>>
183 FactorDescriptor(VertexDescPtrs... vertex_descriptors)
184 : _store_jacobians(true), _active_count(0) {
185 link_factors({vertex_descriptors...});
186
187 default_precision_matrix = get_default_precision_matrix();
188 }
189
193 void compute_error() override { ops::compute_error<T>(this); }
194
199 void compute_error_autodiff(StreamPool &streams) override {
200 ops::compute_error_autodiff<T, S>(this, streams);
201 }
202
206 void compute_b_async(T *b, const T *jacobian_scales) override {
207 ops::compute_b_async<T, S>(this, b, jacobian_scales);
208 }
209
213 void compute_Jv(T *out, T *in, const T *jacobian_scales,
214 StreamPool &streams) override {
215 ops::compute_Jv<T, S>(this, out, in, jacobian_scales, streams);
216 }
217
221 void compute_Jtv(T *out, T *in, const T *jacobian_scales,
222 StreamPool &streams) override {
223 ops::compute_Jtv<T, S>(this, out, in, jacobian_scales, streams);
224 }
225
226 void flag_active_vertices_async(const uint8_t level) override {
227 ops::flag_active_vertices(this, level);
228 }
229
233 void compute_jacobians(StreamPool &streams) override {
234 if constexpr (std::is_same_v<typename Traits::Differentiation,
236 ops::compute_jacobians<T, S>(this, streams);
237 }
238 }
239
244 std::unordered_map<BaseVertexDescriptor<T, S> *,
245 thrust::device_vector<InvP>> &block_diagonals,
246 const T *jacobian_scales, cudaStream_t stream) override {
247
248 std::array<InvP *, N> diagonal_blocks;
249 for (size_t i = 0; i < N; i++) {
250 diagonal_blocks[i] = block_diagonals[vertex_descriptors[i]].data().get();
251 }
252
253 ops::compute_block_diagonal<T, S>(this, diagonal_blocks, jacobian_scales,
254 stream);
255 }
256
260 void
262 const T *jacobian_scales) override {
263 ops::compute_hessian_scalar_diagonal<T, S>(this, diagonal, jacobian_scales);
264 }
265
269 static constexpr size_t get_num_vertices() { return N; }
270
275 size_t get_num_descriptors() const override { return N; }
276
280 JacobianStorage<S> *get_jacobians() override { return jacobians.data(); }
281
288 void reserve(size_t size) {
289 global_ids.reserve(N * size);
290 host_ids.reserve(N * size);
291 device_ids.reserve(N * size);
292 device_obs.reserve(size);
293 global_to_local_map.reserve(size);
294 local_to_global_map.reserve(size);
295 precision_matrices.reserve(size * error_dim * error_dim);
296 data.reserve(size);
297 loss.reserve(size);
298 chi2_vec.reserve(size);
299 residuals.reserve(size * error_dim);
300 active.reserve(size);
301 }
302
308 void remove_factor(const size_t id) {
309 if (global_to_local_map.find(id) == global_to_local_map.end()) {
310 std::cerr << "Factor with id " << id << " not found." << std::endl;
311 return;
312 }
313
314 auto local_id = global_to_local_map[id];
315 auto last_local_id = internal_count() - 1;
316
317 // copy constraint
318 for (size_t i = 0; i < N; i++) {
319 global_ids[local_id * N + i] = global_ids[last_local_id * N + i];
320 }
321 global_ids.resize(global_ids.size() - N);
322
323 // observation
324 device_obs[local_id] = device_obs[last_local_id];
325 device_obs.pop_back();
326
327 // precision matrix
328 constexpr size_t precision_matrix_size = error_dim * error_dim;
329 for (size_t i = 0; i < precision_matrix_size; i++) {
330 precision_matrices[local_id * precision_matrix_size + i] =
331 precision_matrices[last_local_id * precision_matrix_size + i];
332 }
333
334 precision_matrices.resize(precision_matrices.size() -
335 precision_matrix_size);
336
337 // Constraint, loss and chi2
338 data[local_id] = data[last_local_id];
339 data.pop_back();
340
341 loss[local_id] = loss[last_local_id];
342 loss.pop_back();
343
344 chi2_vec[local_id] = chi2_vec[last_local_id];
345 chi2_vec.pop_back();
346
347 active[local_id] = active[last_local_id];
348 active.pop_back();
349
350 // Next, fix ids
351 const auto last_global_id = local_to_global_map[last_local_id];
352 global_to_local_map[last_global_id] = local_id;
353 local_to_global_map[local_id] = last_global_id;
354
355 // Remove unused entries
356 global_to_local_map.erase(id);
357 local_to_global_map.pop_back();
358
359 hm.release(id);
360 }
361
373 size_t
374 add_factor(const std::array<size_t, N> &ids,
375 const ObservationType &obs = ObservationType(),
376 const S *precision_matrix = nullptr,
377 const ConstraintDataType &constraint_data = ConstraintDataType(),
378 const LossType &loss_func = LossType()) {
379
380 const auto id = hm.get();
381 const auto local_id = internal_count();
382
383 global_to_local_map.insert({id, local_id});
384 local_to_global_map.push_back(id);
385
386 global_ids.insert(global_ids.end(), ids.begin(), ids.end());
387 device_obs.push_back(obs);
388
389 constexpr size_t precision_matrix_size = error_dim * error_dim;
390 if (precision_matrix) {
391 precision_matrices.resize(precision_matrices.size() +
392 precision_matrix_size);
393 for (size_t i = 0; i < precision_matrix_size; i++) {
394 precision_matrices[local_id * precision_matrix_size + i] =
395 precision_matrix[i];
396 }
397 } else {
398 // constexpr auto pmat = get_default_precision_matrix();
399 precision_matrices.resize(precision_matrices.size() +
400 precision_matrix_size);
401
402 for (size_t i = 0; i < precision_matrix_size; i++) {
403 precision_matrices[local_id * precision_matrix_size + i] =
404 default_precision_matrix[i];
405 }
406 }
407
408 data.push_back(constraint_data);
409 loss.push_back(loss_func);
410 active.push_back(0);
411 return id; // only global within this class (it's just an external id)
412 }
413
419 void set_active(size_t id, const uint8_t active_value) {
420 if (global_to_local_map.find(id) == global_to_local_map.end()) {
421 std::cerr << "Factor with id " << id << " not found." << std::endl;
422 return;
423 }
424
425 constexpr uint8_t NOT_MSB = 0x7F; // 01111111
426
427 auto local_id = global_to_local_map[id];
428 active[local_id] =
429 NOT_MSB & active_value; // we reserve the MSB for later use
430 }
431
436 thrust::fill(thrust::device, active.begin(), active.end(), 0x1);
437 }
438
443 size_t internal_count() const { return global_ids.size() / N; }
444
449 size_t active_count() const override { return _active_count; }
450
455 void initialize_device_ids(const uint8_t optimization_level) override {
456 // Map constraint ids to local ids
457 host_ids.resize(global_ids.size());
458 for (size_t i = 0; i < global_ids.size(); i++) {
459 host_ids[i] =
460 vertex_descriptors[i % N]->get_global_map().at(global_ids[i]);
461 }
462 device_ids = host_ids;
463
464 device_active = active;
465 _active_count = build_active_indices(device_active, active_indices,
466 internal_count(), optimization_level);
467 }
468
472 void to_device() override {
473 chi2_vec.resize(internal_count());
474 chi2_derivative.resize(internal_count());
475
476 // Resize and reset residuals
477 residuals.resize(error_dim * internal_count());
478 thrust::fill(residuals.begin(), residuals.end(), 0);
479 }
480
487 const std::array<BaseVertexDescriptor<T, S> *, N> &vertex_descriptors) {
488 this->vertex_descriptors = vertex_descriptors;
489 }
490
491 template <std::size_t... I>
492 VertexPointerPointerTuple get_vertices_impl(std::index_sequence<I...>) {
493 return std::make_tuple(
494 (static_cast<typename std::tuple_element<I, VertexDescriptorTuple>::type
495 *>(vertex_descriptors[I])
496 ->vertices())...);
497 }
498
504 VertexPointerPointerTuple get_vertices() {
505 return get_vertices_impl(std::make_index_sequence<N>{});
506 }
507
508 template <std::size_t... I>
509 static constexpr std::array<size_t, N>
510 get_vertex_sizes_impl(std::index_sequence<I...>) {
511 return std::array<size_t, N>{
512 std::tuple_element_t<I, VertexDescriptorTuple>::dim...};
513 }
514
519 static constexpr std::array<size_t, N> get_vertex_sizes() {
520 return get_vertex_sizes_impl(std::make_index_sequence<N>{});
521 }
522
527 for (size_t i = 0; i < N; i++) {
528 if (store_jacobians() || !std::is_same_v<typename Traits::Differentiation,
530 jacobians[i].dimensions = {error_dim,
531 vertex_descriptors[i]->dimension()};
532 jacobians[i].data.resize(
533 error_dim * vertex_descriptors[i]->dimension() * internal_count());
534 }
535 }
536 }
537
543 virtual size_t get_residual_size() const override {
544 return error_dim * internal_count();
545 }
546
551 virtual T chi2() override {
552 ops::compute_chi2_async<T, S>(this); // runs on stream 0
553 // TODO: Make this consider kernels and active edges
554 return thrust::reduce(thrust::cuda::par.on(0), chi2_vec.begin(),
555 chi2_vec.end(), static_cast<T>(0.0),
556 thrust::plus<T>()); // want to sync here on stream 0
557 }
558
564 T chi2(const size_t id) const {
565 auto it = global_to_local_map.find(id);
566 if (it == global_to_local_map.end()) {
567 std::cerr << "Factor with id " << id << " not found." << std::endl;
568 return static_cast<T>(0.0);
569 }
570 return chi2_vec[it->second];
571 }
572
580 ConstraintDataType *get_constraint_data(const size_t id) {
581 auto it = global_to_local_map.find(id);
582 if (it == global_to_local_map.end()) {
583 std::cerr << "Factor with id " << id << " not found." << std::endl;
584 return nullptr;
585 }
586 return &data[it->second];
587 }
588
595 std::array<size_t, N> get_vertex_ids(const size_t id) const {
596 auto it = global_to_local_map.find(id);
597 if (it == global_to_local_map.end()) {
598 std::cerr << "Factor with id " << id << " not found." << std::endl;
599 return {};
600 }
601 const auto local_id = it->second;
602 std::array<size_t, N> ids;
603 for (size_t i = 0; i < N; i++) {
604 ids[i] = global_ids[local_id * N + i];
605 }
606 return ids;
607 }
608
613 virtual void scale_jacobians_async(T *jacobian_scales) override {
614 ops::scale_jacobians<T, S>(this, jacobian_scales);
615 }
616
622 virtual bool use_autodiff() override {
623 return use_autodiff_impl<typename Traits::Differentiation>();
624 }
625
632 virtual void set_jacobian_storage(const bool store) {
633 _store_jacobians = store;
634 }
635
640 virtual bool store_jacobians() override { return _store_jacobians; }
641
647 virtual bool supports_dynamic_jacobians() override {
648 return std::is_same_v<typename Traits::Differentiation,
650 }
651
658 thrust::device_vector<BlockCoordinates> &block_coords) override {
659 const size_t num_vertices = get_num_vertices();
660 const auto &active_factors = active_indices;
661 const auto ids = device_ids.data().get();
662 for (size_t i = 0; i < num_vertices; i++) {
663 const auto vi_active = vertex_descriptors[i]->get_active_state();
664 const auto vi_block_ids = vertex_descriptors[i]->get_block_ids();
665 for (size_t j = i; j < num_vertices; j++) {
666 const auto vj_active = vertex_descriptors[j]->get_active_state();
667 const auto vj_block_ids = vertex_descriptors[j]->get_block_ids();
668 // Iterate over active factors and generate block coordinates
669 auto num_coords = block_coords.size();
670 block_coords.resize(block_coords.size() + active_factors.size());
671 auto end = thrust::transform_if(
672 thrust::device, active_factors.begin(), active_factors.end(),
673 block_coords.begin() + num_coords,
674 [=] __device__(size_t factor_idx) {
675 const size_t vi_id = ids[factor_idx * num_vertices + i];
676 const size_t vj_id = ids[factor_idx * num_vertices + j];
677
678 const auto block_i = vi_block_ids[vi_id];
679 const auto block_j = vj_block_ids[vj_id];
680 if (block_i > block_j) {
681 return BlockCoordinates{block_j, block_i};
682 }
683 return BlockCoordinates{block_i, block_j};
684 },
685 [=] __device__(const size_t &factor_idx) {
686 const auto vi_id = ids[factor_idx * num_vertices + i];
687 const auto vj_id = ids[factor_idx * num_vertices + j];
688 return (is_vertex_active(vi_active, vi_id) &&
689 is_vertex_active(vj_active, vj_id));
690 });
691 block_coords.resize(end - block_coords.begin());
692 }
693 }
694 }
695
703 std::unordered_map<BlockCoordinates, size_t> &block_indices,
704 thrust::device_vector<S> &d_hessian, size_t *h_block_offsets,
705 StreamPool &streams) override {
706
707 const size_t num_vertices = get_num_vertices();
708 const auto &d_active_factors = active_indices;
709 thrust::host_vector<size_t> h_active_factors = active_indices;
710 const auto d_ids = device_ids.data().get();
711 const auto h_ids = &host_ids[0];
712
713 size_t mul_count = 0;
714 // Determine max number of multiplications based on active factors
715 for (size_t i = 0; i < num_vertices; i++) {
716 for (size_t j = i; j < num_vertices; j++) {
717 mul_count += d_active_factors.size();
718 }
719 }
720
721 size_t write_idx = 0;
722
723 // Then actually do the multiplications
724 for (size_t i = 0; i < num_vertices; i++) {
725 const auto vi_active = vertex_descriptors[i]->get_active_state();
726 const auto vi_block_ids = vertex_descriptors[i]->get_block_ids();
727 for (size_t j = i; j < num_vertices; j++) {
728 const auto vj_active = vertex_descriptors[j]->get_active_state();
729 const auto vj_block_ids = vertex_descriptors[j]->get_block_ids();
730 // Iterate over active factors and generate block coordinates
731 for (const auto &factor_idx : h_active_factors) {
732 // TODO: Build this in the GPU using a GPU hash map
733 const auto vi_id = h_ids[factor_idx * num_vertices + i];
734 const auto vj_id = h_ids[factor_idx * num_vertices + j];
735
736 if (is_vertex_active(vi_active, vi_id) &&
737 is_vertex_active(vj_active, vj_id)) {
738 const auto block_i = vi_block_ids[vi_id];
739 const auto block_j = vj_block_ids[vj_id];
740 BlockCoordinates coordinates{block_i, block_j};
741 if (block_i > block_j) {
742 coordinates = BlockCoordinates{block_j, block_i};
743 }
744
745 auto it = block_indices.find(coordinates);
746 if (it != block_indices.end()) {
747 const size_t block_offset = it->second;
748 h_block_offsets[write_idx++] = block_offset;
749 } else {
750 // TODO: this should actually be an error, but also impossible
751 h_block_offsets[write_idx++] = 0;
752 std::cerr << "Error: Hessian block coordinate not found!"
753 << std::endl;
754 }
755 } else {
756 h_block_offsets[write_idx++] = 0;
757 }
758 }
759 }
760 }
761
762 return mul_count;
763 }
764
772 std::unordered_map<BlockCoordinates, size_t> &block_indices,
773 thrust::device_vector<S> &d_hessian, size_t *d_block_offsets,
774 StreamPool &streams) override {
775 const size_t num_vertices = get_num_vertices();
776 const auto &d_active_factors = active_indices;
777 thrust::host_vector<size_t> h_active_factors = active_indices;
778 const auto d_ids = device_ids.data().get();
779
780 size_t mul_count = 0;
781 // Determine max number of multiplications based on active factors
782 for (size_t i = 0; i < num_vertices; i++) {
783 for (size_t j = i; j < num_vertices; j++) {
784 mul_count += d_active_factors.size();
785 }
786 }
787
788 size_t write_idx = 0;
789
790 size_t stream_idx = 0;
791 // Then actually do the multiplications
792 for (size_t i = 0; i < num_vertices; i++) {
793 const auto vi_active = vertex_descriptors[i]->get_active_state();
794 const auto vi_block_ids = vertex_descriptors[i]->get_block_ids();
795 for (size_t j = i; j < num_vertices; j++) {
796 const auto vj_active = vertex_descriptors[j]->get_active_state();
797 // Iterate over active factors and generate block coordinates
798 const auto start_idx = write_idx;
799 write_idx += d_active_factors.size();
800 const auto stream = streams.select(stream_idx++);
801
802 const auto dim_i = vertex_descriptors[i]->dimension();
803 const auto dim_j = vertex_descriptors[j]->dimension();
804 const size_t block_dim = dim_i * dim_j;
805 const size_t num_threads = d_active_factors.size() * block_dim;
806 const size_t threads_per_block = 256;
807 const size_t num_blocks =
808 (num_threads + threads_per_block - 1) / threads_per_block;
809
810 ops::compute_hessian_block_kernel<T, S, N, error_dim>
811 <<<num_blocks, threads_per_block, 0, stream>>>(
812 i, j, dim_i, dim_j, d_active_factors.data().get(),
813 d_active_factors.size(), d_ids, d_block_offsets + start_idx,
814 vi_active, vj_active, vertex_descriptors[i]->get_hessian_ids(),
815 vertex_descriptors[j]->get_hessian_ids(),
816 jacobians[i].data.data().get(), jacobians[j].data.data().get(),
817 precision_matrices.data().get(), chi2_derivative.data().get(),
818 d_hessian.data().get());
819 }
820 }
821
822 streams.sync_all();
823
824 return mul_count;
825 }
826
830 void clear() {
831 global_ids.clear();
832 global_to_local_map.clear();
833 local_to_global_map.clear();
834
835 hm.clear();
836
837 _active_count = 0;
838
839 host_ids.clear();
840 device_ids.clear();
841 device_obs.clear();
842 residuals.clear();
843 precision_matrices.clear();
844 data.clear();
845
846 chi2_vec.clear();
847 chi2_derivative.clear();
848 loss.clear();
849
850 active.clear();
851 device_active.clear();
852 active_indices.clear();
853
854 for (size_t i = 0; i < N; i++) {
855 jacobians[i].data.clear();
856 }
857 }
858
859private:
864 constexpr static std::array<S, error_dim * error_dim>
866 constexpr size_t E = error_dim;
867 return []() constexpr {
868 std::array<S, E *E> pmat = {};
869 for (size_t i = 0; i < E; i++) {
870 pmat[i * E + i] = static_cast<S>(1.0);
871 }
872 return pmat;
873 }
874 ();
875 }
876};
877
878} // namespace graphite
Base class for factor descriptors.
Definition factor.hpp:36
Definition block.hpp:6
Represents a group of factors which will be processed together on the GPU.
Definition factor.hpp:121
static constexpr size_t get_num_vertices()
Returns the number of vertices connected to each factor.
Definition factor.hpp:269
virtual bool store_jacobians() override
Returns whether the Jacobians are stored.
Definition factor.hpp:640
void to_device() override
Prepares descriptor for GPU processing.
Definition factor.hpp:472
virtual bool supports_dynamic_jacobians() override
Determines whether dynamic Jacobian computation is supported (only supported for manual differentiati...
Definition factor.hpp:647
void compute_hessian_block_diagonal_async(std::unordered_map< BaseVertexDescriptor< T, S > *, thrust::device_vector< InvP > > &block_diagonals, const T *jacobian_scales, cudaStream_t stream) override
Computes the block diagonal of the Hessian matrix asynchronously.
Definition factor.hpp:243
size_t add_factor(const std::array< size_t, N > &ids, const ObservationType &obs=ObservationType(), const S *precision_matrix=nullptr, const ConstraintDataType &constraint_data=ConstraintDataType(), const LossType &loss_func=LossType())
Adds a factor to the descriptor.
Definition factor.hpp:374
void compute_hessian_scalar_diagonal_async(T *diagonal, const T *jacobian_scales) override
Computes the scalar diagonal of the Hessian matrix asynchronously.
Definition factor.hpp:261
void compute_jacobians(StreamPool &streams) override
Computes the Jacobians using manual differentiation.
Definition factor.hpp:233
virtual bool use_autodiff() override
Determines whether to use automatic differentiation based on the traits of the factor.
Definition factor.hpp:622
virtual size_t setup_hessian_computation(std::unordered_map< BlockCoordinates, size_t > &block_indices, thrust::device_vector< S > &d_hessian, size_t *h_block_offsets, StreamPool &streams) override
Sets up data needed to compute the upper triangle of the Hessian matrix on the GPU.
Definition factor.hpp:702
static constexpr std::array< size_t, N > get_vertex_sizes()
Returns the sizes (dimensions) of the vertices.
Definition factor.hpp:519
void initialize_jacobian_storage() override
Allocates memory for the Jacobians.
Definition factor.hpp:526
size_t internal_count() const
Returns the number of all active and inactive factors.
Definition factor.hpp:443
void compute_Jv(T *out, T *in, const T *jacobian_scales, StreamPool &streams) override
Computes the product of the Jacobian matrix and a vector.
Definition factor.hpp:213
virtual size_t get_residual_size() const override
Returns the size of the vector for all active and inactive residuals.
Definition factor.hpp:543
size_t get_num_descriptors() const override
Returns the number of vertex descriptors (same as the number of vertices for each factor).
Definition factor.hpp:275
virtual void get_hessian_block_coordinates(thrust::device_vector< BlockCoordinates > &block_coords) override
Determines which Hessian blocks are filled in (upper triangle).
Definition factor.hpp:657
virtual void scale_jacobians_async(T *jacobian_scales) override
Scales the Jacobians asynchronously.
Definition factor.hpp:613
void set_active(size_t id, const uint8_t active_value)
Sets the active state of a factor.
Definition factor.hpp:419
FactorDescriptor(VertexDescPtrs... vertex_descriptors)
Constructs a FactorDescriptor with the given vertex descriptors.
Definition factor.hpp:183
void compute_error_autodiff(StreamPool &streams) override
Simultaneously computes the residuals and corresponding Jacobians using automatic differentiation.
Definition factor.hpp:199
void clear()
Clears all data associated with this factor descriptor.
Definition factor.hpp:830
void compute_error() override
Computes the residuals across all factors in the descriptor.
Definition factor.hpp:193
virtual size_t execute_hessian_computation(std::unordered_map< BlockCoordinates, size_t > &block_indices, thrust::device_vector< S > &d_hessian, size_t *d_block_offsets, StreamPool &streams) override
Executes the Hessian block computations for this descriptor using the data from setup_hessian_computa...
Definition factor.hpp:771
std::array< size_t, N > get_vertex_ids(const size_t id) const
Returns the vertex ids connected to a specific factor.
Definition factor.hpp:595
VertexPointerPointerTuple get_vertices()
Returns a tuple of vertex pointers.
Definition factor.hpp:504
T chi2(const size_t id) const
Returns the chi-squared value for a specific factor.
Definition factor.hpp:564
static constexpr std::array< S, error_dim *error_dim > get_default_precision_matrix()
Gets the default precision matrix (identity matrix).
Definition factor.hpp:865
void remove_factor(const size_t id)
Removes a factor by its id.
Definition factor.hpp:308
void link_factors(const std::array< BaseVertexDescriptor< T, S > *, N > &vertex_descriptors)
Links the factor to the given vertex descriptors. This is already called during construction.
Definition factor.hpp:486
ConstraintDataType * get_constraint_data(const size_t id)
Returns the constraint data for a specific factor.
Definition factor.hpp:580
void compute_Jtv(T *out, T *in, const T *jacobian_scales, StreamPool &streams) override
Computes the product of the transposed Jacobian matrix and a vector.
Definition factor.hpp:221
void reserve(size_t size)
Reserves memory for the factor. Generally, you should always reserve the memory you need before addin...
Definition factor.hpp:288
size_t active_count() const override
Returns the number of active factors.
Definition factor.hpp:449
virtual void set_jacobian_storage(const bool store)
Sets whether to store the Jacobians. If false, Jacobians will be computed dynamically (on-the-fly),...
Definition factor.hpp:632
JacobianStorage< S > * get_jacobians() override
Returns the Jacobian storage for the factor.
Definition factor.hpp:280
void compute_b_async(T *b, const T *jacobian_scales) override
Computes the gradient vector b asynchronously.
Definition factor.hpp:206
virtual T chi2() override
Computes the chi-squared value for all active and inactive factors.
Definition factor.hpp:551
void initialize_device_ids(const uint8_t optimization_level) override
Initializes the device ids and a list of active factors.
Definition factor.hpp:455
void reset_active()
Resets the active state of all factors to 0x1.
Definition factor.hpp:435
Storage class for Jacobians of a factor.
Definition factor.hpp:26
Definition stream.hpp:7
The top-level namespace for Graphite.
Definition eigen_solver.cpp:4
Definition differentiation.hpp:8
Definition factor.hpp:99
Definition factor.hpp:95
Definition factor.hpp:108