Graphite
Loading...
Searching...
No Matches
factor.hpp
Go to the documentation of this file.
1
2#pragma once
3#include <graphite/active.hpp>
4#include <graphite/block.hpp>
5#include <graphite/common.hpp>
7#include <graphite/loss.hpp>
14#include <graphite/utils.hpp>
15#include <graphite/vector.hpp>
16#include <graphite/vertex.hpp>
17#include <thrust/execution_policy.h>
18#include <thrust/inner_product.h>
19#include <thrust/universal_vector.h>
20
21namespace graphite {
22
26template <typename S> class JacobianStorage {
27public:
28 std::pair<size_t, size_t> dimensions;
29
30 thrust::device_vector<S> data;
31};
32
36template <typename T, typename S> class BaseFactorDescriptor {
37public:
38 using InvP = std::conditional_t<is_low_precision<S>::value, T, S>;
39
40 virtual ~BaseFactorDescriptor(){};
41
42 virtual bool use_autodiff() = 0;
43 virtual void compute_error() = 0;
44 virtual void compute_error_autodiff(StreamPool &streams) = 0;
45 virtual void compute_b_async(T *b, const T *jacobian_scales) = 0;
46 virtual void compute_Jv(T *out, T *in, const T *jacobian_scales,
47 StreamPool &streams) = 0;
48 virtual void compute_Jtv(T *out, T *in, const T *jacobian_scales,
49 StreamPool &streams) = 0;
50
51 virtual void flag_active_vertices_async(const uint8_t level) = 0;
52
53 virtual void compute_jacobians(StreamPool &streams) = 0;
54 virtual void compute_hessian_block_diagonal_async(
55 std::unordered_map<BaseVertexDescriptor<T, S> *,
56 thrust::device_vector<InvP>> &block_diagonals,
57 const T *jacobian_scales, cudaStream_t stream) = 0;
58
59 virtual void
60 compute_hessian_scalar_diagonal_async(T *diagonal,
61 const T *jacobian_scales) = 0;
62
63 virtual JacobianStorage<S> *get_jacobians() = 0;
64 virtual void initialize_jacobian_storage() = 0;
65
66 virtual size_t active_count() const = 0;
67 virtual size_t get_num_descriptors() const = 0;
68
69 virtual size_t get_residual_size() const = 0;
70 virtual void scale_jacobians_async(T *jacobian_scales) = 0;
71
72 virtual void initialize_device_ids(const uint8_t optimization_level) = 0;
73 virtual void to_device() = 0;
74
75 virtual T chi2() = 0;
76
77 virtual void set_jacobian_storage(const bool store) = 0;
78 virtual bool store_jacobians() = 0;
79 virtual bool supports_dynamic_jacobians() = 0;
80
81 virtual void get_hessian_block_coordinates(
82 thrust::device_vector<BlockCoordinates> &block_coords) = 0;
83
84 virtual size_t setup_hessian_computation(
85 std::unordered_map<BlockCoordinates, size_t> &block_indices,
86 thrust::device_vector<S> &d_hessian, size_t *h_block_offsets,
87 StreamPool &streams) = 0;
88
89 virtual size_t execute_hessian_computation(
90 std::unordered_map<BlockCoordinates, size_t> &block_indices,
91 thrust::device_vector<S> &d_hessian, size_t *d_block_offsets,
92 StreamPool &streams) = 0;
93};
94
95template <typename T> struct get_vertex_type {
96 using type = typename T::VertexType;
97};
98
99template <typename T> struct get_vertex_pointer_type {
100 using type = typename T::VertexType *;
101};
102
103template <typename T> struct get_vertex_pointer_pointer_type {
104 using type = typename T::VertexType **;
105};
106
107template <typename Tuple, template <typename> class MetaFunc>
109
110// Partial specialization for std::tuple
111template <typename... Ts, template <typename> class MetaFunc>
112struct transform_tuple<std::tuple<Ts...>, MetaFunc> {
113 using type = std::tuple<typename MetaFunc<Ts>::type...>;
114};
115
116template <typename Tuple, template <typename> class MetaFunc>
117using transform_tuple_t = typename transform_tuple<Tuple, MetaFunc>::type;
118
123template <typename T, typename S, typename FTraits>
125
126private:
127 std::vector<size_t> global_ids;
128 std::unordered_map<size_t, size_t> global_to_local_map;
129 std::vector<size_t> local_to_global_map;
130
131 HandleManager<size_t> hm;
132
133 bool _store_jacobians;
134 size_t _active_count;
135
136public:
137 using InvP = std::conditional_t<is_low_precision<S>::value, T, S>;
138
139 using Traits = FTraits;
140
141 static constexpr size_t N =
142 std::tuple_size<typename Traits::VertexDescriptors>::value;
143 static constexpr size_t error_dim = Traits::dimension;
144
145 std::array<BaseVertexDescriptor<T, S> *, N> vertex_descriptors;
146
147 using VertexDescriptorTuple = typename Traits::VertexDescriptors;
148 using VertexTypesTuple =
149 transform_tuple_t<VertexDescriptorTuple, get_vertex_type>;
150 using VertexPointerTuple =
151 transform_tuple_t<VertexDescriptorTuple, get_vertex_pointer_type>;
152 using VertexPointerPointerTuple =
153 transform_tuple_t<VertexDescriptorTuple, get_vertex_pointer_pointer_type>;
154
155 using ObservationType = typename Traits::Observation;
156 using ConstraintDataType = typename Traits::Data;
157 using LossType = typename Traits::Loss;
158
159 thrust::host_vector<size_t> host_ids;
160 thrust::device_vector<size_t> device_ids;
161 managed_vector<ObservationType> device_obs;
162 thrust::device_vector<T> residuals;
163 managed_vector<S> precision_matrices;
164 managed_vector<ConstraintDataType> data;
165
166 managed_vector<T> chi2_vec;
167 thrust::device_vector<S> chi2_derivative;
168 managed_vector<LossType> loss;
169
170 thrust::host_vector<uint8_t> active;
171 thrust::device_vector<uint8_t> device_active;
172 thrust::device_vector<size_t> active_indices;
173
174 std::array<JacobianStorage<S>, N> jacobians;
175 std::array<S, Traits::dimension * Traits::dimension> default_precision_matrix;
176
182 template <typename... VertexDescPtrs,
183 typename = std::enable_if_t<sizeof...(VertexDescPtrs) == N>>
184 FactorDescriptor(VertexDescPtrs... vertex_descriptors)
185 : _store_jacobians(true), _active_count(0) {
186 link_factors({vertex_descriptors...});
187
188 default_precision_matrix = get_default_precision_matrix();
189 }
190
194 void compute_error() override { ops::compute_error<T>(this); }
195
200 void compute_error_autodiff(StreamPool &streams) override {
201 ops::compute_error_autodiff<T, S>(this, streams);
202 }
203
207 void compute_b_async(T *b, const T *jacobian_scales) override {
208 ops::compute_b_async<T, S>(this, b, jacobian_scales);
209 }
210
214 void compute_Jv(T *out, T *in, const T *jacobian_scales,
215 StreamPool &streams) override {
216 ops::compute_Jv<T, S>(this, out, in, jacobian_scales, streams);
217 }
218
222 void compute_Jtv(T *out, T *in, const T *jacobian_scales,
223 StreamPool &streams) override {
224 ops::compute_Jtv<T, S>(this, out, in, jacobian_scales, streams);
225 }
226
227 void flag_active_vertices_async(const uint8_t level) override {
228 ops::flag_active_vertices(this, level);
229 }
230
234 void compute_jacobians(StreamPool &streams) override {
235 if constexpr (std::is_same_v<typename Traits::Differentiation,
237 ops::compute_jacobians<T, S>(this, streams);
238 }
239 }
240
245 std::unordered_map<BaseVertexDescriptor<T, S> *,
246 thrust::device_vector<InvP>> &block_diagonals,
247 const T *jacobian_scales, cudaStream_t stream) override {
248
249 std::array<InvP *, N> diagonal_blocks;
250 for (size_t i = 0; i < N; i++) {
251 diagonal_blocks[i] = block_diagonals[vertex_descriptors[i]].data().get();
252 }
253
254 ops::compute_block_diagonal<T, S>(this, diagonal_blocks, jacobian_scales,
255 stream);
256 }
257
261 void
263 const T *jacobian_scales) override {
264 ops::compute_hessian_scalar_diagonal<T, S>(this, diagonal, jacobian_scales);
265 }
266
270 static constexpr size_t get_num_vertices() { return N; }
271
276 size_t get_num_descriptors() const override { return N; }
277
281 JacobianStorage<S> *get_jacobians() override { return jacobians.data(); }
282
289 void reserve(size_t size) {
290 global_ids.reserve(N * size);
291 host_ids.reserve(N * size);
292 device_ids.reserve(N * size);
293 device_obs.reserve(size);
294 global_to_local_map.reserve(size);
295 local_to_global_map.reserve(size);
296 precision_matrices.reserve(size * error_dim * error_dim);
297 data.reserve(size);
298 loss.reserve(size);
299 chi2_vec.reserve(size);
300 residuals.reserve(size * error_dim);
301 active.reserve(size);
302 }
303
309 void remove_factor(const size_t id) {
310 if (global_to_local_map.find(id) == global_to_local_map.end()) {
311 std::cerr << "Factor with id " << id << " not found." << std::endl;
312 return;
313 }
314
315 auto local_id = global_to_local_map[id];
316 auto last_local_id = internal_count() - 1;
317
318 // copy constraint
319 for (size_t i = 0; i < N; i++) {
320 global_ids[local_id * N + i] = global_ids[last_local_id * N + i];
321 }
322 global_ids.resize(global_ids.size() - N);
323
324 // observation
325 device_obs[local_id] = device_obs[last_local_id];
326 device_obs.pop_back();
327
328 // precision matrix
329 constexpr size_t precision_matrix_size = error_dim * error_dim;
330 for (size_t i = 0; i < precision_matrix_size; i++) {
331 precision_matrices[local_id * precision_matrix_size + i] =
332 precision_matrices[last_local_id * precision_matrix_size + i];
333 }
334
335 precision_matrices.resize(precision_matrices.size() -
336 precision_matrix_size);
337
338 // Constraint, loss and chi2
339 data[local_id] = data[last_local_id];
340 data.pop_back();
341
342 loss[local_id] = loss[last_local_id];
343 loss.pop_back();
344
345 chi2_vec[local_id] = chi2_vec[last_local_id];
346 chi2_vec.pop_back();
347
348 active[local_id] = active[last_local_id];
349 active.pop_back();
350
351 // Next, fix ids
352 const auto last_global_id = local_to_global_map[last_local_id];
353 global_to_local_map[last_global_id] = local_id;
354 local_to_global_map[local_id] = last_global_id;
355
356 // Remove unused entries
357 global_to_local_map.erase(id);
358 local_to_global_map.pop_back();
359
360 hm.release(id);
361 }
362
374 size_t
375 add_factor(const std::array<size_t, N> &ids,
376 const ObservationType &obs = ObservationType(),
377 const S *precision_matrix = nullptr,
378 const ConstraintDataType &constraint_data = ConstraintDataType(),
379 const LossType &loss_func = LossType()) {
380
381 const auto id = hm.get();
382 const auto local_id = internal_count();
383
384 global_to_local_map.insert({id, local_id});
385 local_to_global_map.push_back(id);
386
387 global_ids.insert(global_ids.end(), ids.begin(), ids.end());
388 device_obs.push_back(obs);
389
390 constexpr size_t precision_matrix_size = error_dim * error_dim;
391 if (precision_matrix) {
392 precision_matrices.resize(precision_matrices.size() +
393 precision_matrix_size);
394 for (size_t i = 0; i < precision_matrix_size; i++) {
395 precision_matrices[local_id * precision_matrix_size + i] =
396 precision_matrix[i];
397 }
398 } else {
399 // constexpr auto pmat = get_default_precision_matrix();
400 precision_matrices.resize(precision_matrices.size() +
401 precision_matrix_size);
402
403 for (size_t i = 0; i < precision_matrix_size; i++) {
404 precision_matrices[local_id * precision_matrix_size + i] =
405 default_precision_matrix[i];
406 }
407 }
408
409 data.push_back(constraint_data);
410 loss.push_back(loss_func);
411 active.push_back(0);
412 return id; // only global within this class (it's just an external id)
413 }
414
420 void set_active(size_t id, const uint8_t active_value) {
421 if (global_to_local_map.find(id) == global_to_local_map.end()) {
422 std::cerr << "Factor with id " << id << " not found." << std::endl;
423 return;
424 }
425
426 constexpr uint8_t NOT_MSB = 0x7F; // 01111111
427
428 auto local_id = global_to_local_map[id];
429 active[local_id] =
430 NOT_MSB & active_value; // we reserve the MSB for later use
431 }
432
437 thrust::fill(thrust::device, active.begin(), active.end(), 0x1);
438 }
439
444 size_t internal_count() const { return global_ids.size() / N; }
445
450 size_t active_count() const override { return _active_count; }
451
456 void initialize_device_ids(const uint8_t optimization_level) override {
457 // Map constraint ids to local ids
458 host_ids.resize(global_ids.size());
459 for (size_t i = 0; i < global_ids.size(); i++) {
460 host_ids[i] =
461 vertex_descriptors[i % N]->get_global_map().at(global_ids[i]);
462 }
463 device_ids = host_ids;
464
465 device_active = active;
466 _active_count = build_active_indices(device_active, active_indices,
467 internal_count(), optimization_level);
468 }
469
473 void to_device() override {
474 chi2_vec.resize(internal_count());
475 chi2_derivative.resize(internal_count());
476
477 // Resize and reset residuals
478 residuals.resize(error_dim * internal_count());
479 thrust::fill(residuals.begin(), residuals.end(), 0);
480 }
481
488 const std::array<BaseVertexDescriptor<T, S> *, N> &vertex_descriptors) {
489 this->vertex_descriptors = vertex_descriptors;
490 }
491
492 template <std::size_t... I>
493 VertexPointerPointerTuple get_vertices_impl(std::index_sequence<I...>) {
494 return std::make_tuple(
495 (static_cast<typename std::tuple_element<I, VertexDescriptorTuple>::type
496 *>(vertex_descriptors[I])
497 ->vertices())...);
498 }
499
505 VertexPointerPointerTuple get_vertices() {
506 return get_vertices_impl(std::make_index_sequence<N>{});
507 }
508
509 template <std::size_t... I>
510 static constexpr std::array<size_t, N>
511 get_vertex_sizes_impl(std::index_sequence<I...>) {
512 return std::array<size_t, N>{
513 std::tuple_element_t<I, VertexDescriptorTuple>::dim...};
514 }
515
520 static constexpr std::array<size_t, N> get_vertex_sizes() {
521 return get_vertex_sizes_impl(std::make_index_sequence<N>{});
522 }
523
528 for (size_t i = 0; i < N; i++) {
529 if (store_jacobians() || !std::is_same_v<typename Traits::Differentiation,
531 jacobians[i].dimensions = {error_dim,
532 vertex_descriptors[i]->dimension()};
533 jacobians[i].data.resize(
534 error_dim * vertex_descriptors[i]->dimension() * internal_count());
535 }
536 }
537 }
538
544 virtual size_t get_residual_size() const override {
545 return error_dim * internal_count();
546 }
547
552 virtual T chi2() override {
553 ops::compute_chi2_async<T, S>(this); // runs on stream 0
554 // TODO: Make this consider kernels and active edges
555 return thrust::reduce(thrust::cuda::par.on(0), chi2_vec.begin(),
556 chi2_vec.end(), static_cast<T>(0.0),
557 thrust::plus<T>()); // want to sync here on stream 0
558 }
559
565 T chi2(const size_t id) const {
566 auto it = global_to_local_map.find(id);
567 if (it == global_to_local_map.end()) {
568 std::cerr << "Factor with id " << id << " not found." << std::endl;
569 return static_cast<T>(0.0);
570 }
571 return chi2_vec[it->second];
572 }
573
581 ConstraintDataType *get_constraint_data(const size_t id) {
582 auto it = global_to_local_map.find(id);
583 if (it == global_to_local_map.end()) {
584 std::cerr << "Factor with id " << id << " not found." << std::endl;
585 return nullptr;
586 }
587 return &data[it->second];
588 }
589
596 std::array<size_t, N> get_vertex_ids(const size_t id) const {
597 auto it = global_to_local_map.find(id);
598 if (it == global_to_local_map.end()) {
599 std::cerr << "Factor with id " << id << " not found." << std::endl;
600 return {};
601 }
602 const auto local_id = it->second;
603 std::array<size_t, N> ids;
604 for (size_t i = 0; i < N; i++) {
605 ids[i] = global_ids[local_id * N + i];
606 }
607 return ids;
608 }
609
614 virtual void scale_jacobians_async(T *jacobian_scales) override {
615 ops::scale_jacobians<T, S>(this, jacobian_scales);
616 }
617
623 virtual bool use_autodiff() override {
624 return use_autodiff_impl<typename Traits::Differentiation>();
625 }
626
633 virtual void set_jacobian_storage(const bool store) {
634 _store_jacobians = store;
635 }
636
641 virtual bool store_jacobians() override { return _store_jacobians; }
642
648 virtual bool supports_dynamic_jacobians() override {
649 return std::is_same_v<typename Traits::Differentiation,
651 }
652
659 thrust::device_vector<BlockCoordinates> &block_coords) override {
660 const size_t num_vertices = get_num_vertices();
661 const auto &active_factors = active_indices;
662 const auto ids = device_ids.data().get();
663 for (size_t i = 0; i < num_vertices; i++) {
664 const auto vi_active = vertex_descriptors[i]->get_active_state();
665 const auto vi_block_ids = vertex_descriptors[i]->get_block_ids();
666 for (size_t j = i; j < num_vertices; j++) {
667 const auto vj_active = vertex_descriptors[j]->get_active_state();
668 const auto vj_block_ids = vertex_descriptors[j]->get_block_ids();
669 // Iterate over active factors and generate block coordinates
670 auto num_coords = block_coords.size();
671 block_coords.resize(block_coords.size() + active_factors.size());
672 auto end = thrust::transform_if(
673 thrust::device, active_factors.begin(), active_factors.end(),
674 block_coords.begin() + num_coords,
675 [=] __device__(size_t factor_idx) {
676 const size_t vi_id = ids[factor_idx * num_vertices + i];
677 const size_t vj_id = ids[factor_idx * num_vertices + j];
678
679 const auto block_i = vi_block_ids[vi_id];
680 const auto block_j = vj_block_ids[vj_id];
681 if (block_i > block_j) {
682 return BlockCoordinates{block_j, block_i};
683 }
684 return BlockCoordinates{block_i, block_j};
685 },
686 [=] __device__(const size_t &factor_idx) {
687 const auto vi_id = ids[factor_idx * num_vertices + i];
688 const auto vj_id = ids[factor_idx * num_vertices + j];
689 return (is_vertex_active(vi_active, vi_id) &&
690 is_vertex_active(vj_active, vj_id));
691 });
692 block_coords.resize(end - block_coords.begin());
693 }
694 }
695 }
696
704 std::unordered_map<BlockCoordinates, size_t> &block_indices,
705 thrust::device_vector<S> &d_hessian, size_t *h_block_offsets,
706 StreamPool &streams) override {
707
708 const size_t num_vertices = get_num_vertices();
709 const auto &d_active_factors = active_indices;
710 thrust::host_vector<size_t> h_active_factors = active_indices;
711 const auto d_ids = device_ids.data().get();
712 const auto h_ids = &host_ids[0];
713
714 size_t mul_count = 0;
715 // Determine max number of multiplications based on active factors
716 for (size_t i = 0; i < num_vertices; i++) {
717 for (size_t j = i; j < num_vertices; j++) {
718 mul_count += d_active_factors.size();
719 }
720 }
721
722 size_t write_idx = 0;
723
724 // Then actually do the multiplications
725 for (size_t i = 0; i < num_vertices; i++) {
726 const auto vi_active = vertex_descriptors[i]->get_active_state();
727 const auto vi_block_ids = vertex_descriptors[i]->get_block_ids();
728 for (size_t j = i; j < num_vertices; j++) {
729 const auto vj_active = vertex_descriptors[j]->get_active_state();
730 const auto vj_block_ids = vertex_descriptors[j]->get_block_ids();
731 // Iterate over active factors and generate block coordinates
732 for (const auto &factor_idx : h_active_factors) {
733 // TODO: Build this in the GPU using a GPU hash map
734 const auto vi_id = h_ids[factor_idx * num_vertices + i];
735 const auto vj_id = h_ids[factor_idx * num_vertices + j];
736
737 if (is_vertex_active(vi_active, vi_id) &&
738 is_vertex_active(vj_active, vj_id)) {
739 const auto block_i = vi_block_ids[vi_id];
740 const auto block_j = vj_block_ids[vj_id];
741 BlockCoordinates coordinates{block_i, block_j};
742 if (block_i > block_j) {
743 coordinates = BlockCoordinates{block_j, block_i};
744 }
745
746 auto it = block_indices.find(coordinates);
747 if (it != block_indices.end()) {
748 const size_t block_offset = it->second;
749 h_block_offsets[write_idx++] = block_offset;
750 } else {
751 // TODO: this should actually be an error, but also impossible
752 h_block_offsets[write_idx++] = 0;
753 std::cerr << "Error: Hessian block coordinate not found!"
754 << std::endl;
755 }
756 } else {
757 h_block_offsets[write_idx++] = 0;
758 }
759 }
760 }
761 }
762
763 return mul_count;
764 }
765
773 std::unordered_map<BlockCoordinates, size_t> &block_indices,
774 thrust::device_vector<S> &d_hessian, size_t *d_block_offsets,
775 StreamPool &streams) override {
776 const size_t num_vertices = get_num_vertices();
777 const auto &d_active_factors = active_indices;
778 thrust::host_vector<size_t> h_active_factors = active_indices;
779 const auto d_ids = device_ids.data().get();
780
781 size_t mul_count = 0;
782 // Determine max number of multiplications based on active factors
783 for (size_t i = 0; i < num_vertices; i++) {
784 for (size_t j = i; j < num_vertices; j++) {
785 mul_count += d_active_factors.size();
786 }
787 }
788
789 size_t write_idx = 0;
790
791 size_t stream_idx = 0;
792 // Then actually do the multiplications
793 for (size_t i = 0; i < num_vertices; i++) {
794 const auto vi_active = vertex_descriptors[i]->get_active_state();
795 const auto vi_block_ids = vertex_descriptors[i]->get_block_ids();
796 for (size_t j = i; j < num_vertices; j++) {
797 const auto vj_active = vertex_descriptors[j]->get_active_state();
798 // Iterate over active factors and generate block coordinates
799 const auto start_idx = write_idx;
800 write_idx += d_active_factors.size();
801 const auto stream = streams.select(stream_idx++);
802
803 const auto dim_i = vertex_descriptors[i]->dimension();
804 const auto dim_j = vertex_descriptors[j]->dimension();
805 const size_t block_dim = dim_i * dim_j;
806 const size_t num_threads = d_active_factors.size() * block_dim;
807 const size_t threads_per_block = 256;
808 const size_t num_blocks =
809 (num_threads + threads_per_block - 1) / threads_per_block;
810
811 ops::compute_hessian_block_kernel<T, S, N, error_dim>
812 <<<num_blocks, threads_per_block, 0, stream>>>(
813 i, j, dim_i, dim_j, d_active_factors.data().get(),
814 d_active_factors.size(), d_ids, d_block_offsets + start_idx,
815 vi_active, vj_active, vertex_descriptors[i]->get_hessian_ids(),
816 vertex_descriptors[j]->get_hessian_ids(),
817 jacobians[i].data.data().get(), jacobians[j].data.data().get(),
818 precision_matrices.data().get(), chi2_derivative.data().get(),
819 d_hessian.data().get());
820 }
821 }
822
823 streams.sync_all();
824
825 return mul_count;
826 }
827
831 void clear() {
832 global_ids.clear();
833 global_to_local_map.clear();
834 local_to_global_map.clear();
835
836 hm.clear();
837
838 _active_count = 0;
839
840 host_ids.clear();
841 device_ids.clear();
842 device_obs.clear();
843 residuals.clear();
844 precision_matrices.clear();
845 data.clear();
846
847 chi2_vec.clear();
848 chi2_derivative.clear();
849 loss.clear();
850
851 active.clear();
852 device_active.clear();
853 active_indices.clear();
854
855 for (size_t i = 0; i < N; i++) {
856 jacobians[i].data.clear();
857 }
858 }
859
860private:
865 constexpr static std::array<S, error_dim * error_dim>
867 constexpr size_t E = error_dim;
868 return []() constexpr {
869 std::array<S, E *E> pmat = {};
870 for (size_t i = 0; i < E; i++) {
871 pmat[i * E + i] = static_cast<S>(1.0);
872 }
873 return pmat;
874 }
875 ();
876 }
877};
878
879} // namespace graphite
Base class for factor descriptors.
Definition factor.hpp:36
Definition block.hpp:6
Represents a group of factors which will be processed together on the GPU.
Definition factor.hpp:124
static constexpr size_t get_num_vertices()
Returns the number of vertices connected to each factor.
Definition factor.hpp:270
virtual bool store_jacobians() override
Returns whether the Jacobians are stored.
Definition factor.hpp:641
void to_device() override
Prepares descriptor for GPU processing.
Definition factor.hpp:473
virtual bool supports_dynamic_jacobians() override
Determines whether dynamic Jacobian computation is supported (only supported for manual differentiati...
Definition factor.hpp:648
void compute_hessian_block_diagonal_async(std::unordered_map< BaseVertexDescriptor< T, S > *, thrust::device_vector< InvP > > &block_diagonals, const T *jacobian_scales, cudaStream_t stream) override
Computes the block diagonal of the Hessian matrix asynchronously.
Definition factor.hpp:244
size_t add_factor(const std::array< size_t, N > &ids, const ObservationType &obs=ObservationType(), const S *precision_matrix=nullptr, const ConstraintDataType &constraint_data=ConstraintDataType(), const LossType &loss_func=LossType())
Adds a factor to the descriptor.
Definition factor.hpp:375
void compute_hessian_scalar_diagonal_async(T *diagonal, const T *jacobian_scales) override
Computes the scalar diagonal of the Hessian matrix asynchronously.
Definition factor.hpp:262
void compute_jacobians(StreamPool &streams) override
Computes the Jacobians using manual differentiation.
Definition factor.hpp:234
virtual bool use_autodiff() override
Determines whether to use automatic differentiation based on the traits of the factor.
Definition factor.hpp:623
virtual size_t setup_hessian_computation(std::unordered_map< BlockCoordinates, size_t > &block_indices, thrust::device_vector< S > &d_hessian, size_t *h_block_offsets, StreamPool &streams) override
Sets up data needed to compute the upper triangle of the Hessian matrix on the GPU.
Definition factor.hpp:703
static constexpr std::array< size_t, N > get_vertex_sizes()
Returns the sizes (dimensions) of the vertices.
Definition factor.hpp:520
void initialize_jacobian_storage() override
Allocates memory for the Jacobians.
Definition factor.hpp:527
size_t internal_count() const
Returns the number of all active and inactive factors.
Definition factor.hpp:444
void compute_Jv(T *out, T *in, const T *jacobian_scales, StreamPool &streams) override
Computes the product of the Jacobian matrix and a vector.
Definition factor.hpp:214
virtual size_t get_residual_size() const override
Returns the size of the vector for all active and inactive residuals.
Definition factor.hpp:544
size_t get_num_descriptors() const override
Returns the number of vertex descriptors (same as the number of vertices for each factor).
Definition factor.hpp:276
virtual void get_hessian_block_coordinates(thrust::device_vector< BlockCoordinates > &block_coords) override
Determines which Hessian blocks are filled in (upper triangle).
Definition factor.hpp:658
virtual void scale_jacobians_async(T *jacobian_scales) override
Scales the Jacobians asynchronously.
Definition factor.hpp:614
void set_active(size_t id, const uint8_t active_value)
Sets the active state of a factor.
Definition factor.hpp:420
FactorDescriptor(VertexDescPtrs... vertex_descriptors)
Constructs a FactorDescriptor with the given vertex descriptors.
Definition factor.hpp:184
void compute_error_autodiff(StreamPool &streams) override
Simultaneously computes the residuals and corresponding Jacobians using automatic differentiation.
Definition factor.hpp:200
void clear()
Clears all data associated with this factor descriptor.
Definition factor.hpp:831
void compute_error() override
Computes the residuals across all factors in the descriptor.
Definition factor.hpp:194
virtual size_t execute_hessian_computation(std::unordered_map< BlockCoordinates, size_t > &block_indices, thrust::device_vector< S > &d_hessian, size_t *d_block_offsets, StreamPool &streams) override
Executes the Hessian block computations for this descriptor using the data from setup_hessian_computa...
Definition factor.hpp:772
std::array< size_t, N > get_vertex_ids(const size_t id) const
Returns the vertex ids connected to a specific factor.
Definition factor.hpp:596
VertexPointerPointerTuple get_vertices()
Returns a tuple of vertex pointers.
Definition factor.hpp:505
T chi2(const size_t id) const
Returns the chi-squared value for a specific factor.
Definition factor.hpp:565
static constexpr std::array< S, error_dim *error_dim > get_default_precision_matrix()
Gets the default precision matrix (identity matrix).
Definition factor.hpp:866
void remove_factor(const size_t id)
Removes a factor by its id.
Definition factor.hpp:309
void link_factors(const std::array< BaseVertexDescriptor< T, S > *, N > &vertex_descriptors)
Links the factor to the given vertex descriptors. This is already called during construction.
Definition factor.hpp:487
ConstraintDataType * get_constraint_data(const size_t id)
Returns the constraint data for a specific factor.
Definition factor.hpp:581
void compute_Jtv(T *out, T *in, const T *jacobian_scales, StreamPool &streams) override
Computes the product of the transposed Jacobian matrix and a vector.
Definition factor.hpp:222
void reserve(size_t size)
Reserves memory for the factor. Generally, you should always reserve the memory you need before addin...
Definition factor.hpp:289
size_t active_count() const override
Returns the number of active factors.
Definition factor.hpp:450
virtual void set_jacobian_storage(const bool store)
Sets whether to store the Jacobians. If false, Jacobians will be computed dynamically (on-the-fly),...
Definition factor.hpp:633
JacobianStorage< S > * get_jacobians() override
Returns the Jacobian storage for the factor.
Definition factor.hpp:281
void compute_b_async(T *b, const T *jacobian_scales) override
Computes the gradient vector b asynchronously.
Definition factor.hpp:207
virtual T chi2() override
Computes the chi-squared value for all active and inactive factors.
Definition factor.hpp:552
void initialize_device_ids(const uint8_t optimization_level) override
Initializes the device ids and a list of active factors.
Definition factor.hpp:456
void reset_active()
Resets the active state of all factors to 0x1.
Definition factor.hpp:436
Storage class for Jacobians of a factor.
Definition factor.hpp:26
Definition stream.hpp:7
Definition differentiation.hpp:8
Definition factor.hpp:99
Definition factor.hpp:95
Definition factor.hpp:108