17#include <thrust/execution_policy.h>
18#include <thrust/inner_product.h>
19#include <thrust/universal_vector.h>
28 std::pair<size_t, size_t> dimensions;
30 thrust::device_vector<S> data;
38 using InvP = std::conditional_t<is_low_precision<S>::value, T, S>;
42 virtual bool use_autodiff() = 0;
43 virtual void compute_error() = 0;
44 virtual void compute_error_autodiff(
StreamPool &streams) = 0;
45 virtual void compute_b_async(T *b,
const T *jacobian_scales) = 0;
46 virtual void compute_Jv(T *out, T *in,
const T *jacobian_scales,
48 virtual void compute_Jtv(T *out, T *in,
const T *jacobian_scales,
51 virtual void flag_active_vertices_async(
const uint8_t level) = 0;
53 virtual void compute_jacobians(
StreamPool &streams) = 0;
54 virtual void compute_hessian_block_diagonal_async(
55 std::unordered_map<BaseVertexDescriptor<T, S> *,
56 thrust::device_vector<InvP>> &block_diagonals,
57 const T *jacobian_scales, cudaStream_t stream) = 0;
60 compute_hessian_scalar_diagonal_async(T *diagonal,
61 const T *jacobian_scales) = 0;
63 virtual JacobianStorage<S> *get_jacobians() = 0;
64 virtual void initialize_jacobian_storage() = 0;
66 virtual size_t active_count()
const = 0;
67 virtual size_t get_num_descriptors()
const = 0;
69 virtual size_t get_residual_size()
const = 0;
70 virtual void scale_jacobians_async(T *jacobian_scales) = 0;
72 virtual void initialize_device_ids(
const uint8_t optimization_level) = 0;
73 virtual void to_device() = 0;
77 virtual void set_jacobian_storage(
const bool store) = 0;
78 virtual bool store_jacobians() = 0;
79 virtual bool supports_dynamic_jacobians() = 0;
81 virtual void get_hessian_block_coordinates(
82 thrust::device_vector<BlockCoordinates> &block_coords) = 0;
84 virtual size_t setup_hessian_computation(
85 std::unordered_map<BlockCoordinates, size_t> &block_indices,
86 thrust::device_vector<S> &d_hessian,
size_t *h_block_offsets,
89 virtual size_t execute_hessian_computation(
90 std::unordered_map<BlockCoordinates, size_t> &block_indices,
91 thrust::device_vector<S> &d_hessian,
size_t *d_block_offsets,
96 using type =
typename T::VertexType;
100 using type =
typename T::VertexType *;
104 using type =
typename T::VertexType **;
107template <
typename Tuple,
template <
typename>
class MetaFunc>
111template <
typename... Ts,
template <
typename>
class MetaFunc>
113 using type = std::tuple<typename MetaFunc<Ts>::type...>;
116template <
typename Tuple,
template <
typename>
class MetaFunc>
123template <
typename T,
typename S,
typename FTraits>
127 std::vector<size_t> global_ids;
128 std::unordered_map<size_t, size_t> global_to_local_map;
129 std::vector<size_t> local_to_global_map;
131 HandleManager<size_t> hm;
133 bool _store_jacobians;
134 size_t _active_count;
137 using InvP = std::conditional_t<is_low_precision<S>::value, T, S>;
139 using Traits = FTraits;
141 static constexpr size_t N =
142 std::tuple_size<typename Traits::VertexDescriptors>::value;
143 static constexpr size_t error_dim = Traits::dimension;
145 std::array<BaseVertexDescriptor<T, S> *, N> vertex_descriptors;
147 using VertexDescriptorTuple =
typename Traits::VertexDescriptors;
148 using VertexTypesTuple =
149 transform_tuple_t<VertexDescriptorTuple, get_vertex_type>;
150 using VertexPointerTuple =
151 transform_tuple_t<VertexDescriptorTuple, get_vertex_pointer_type>;
152 using VertexPointerPointerTuple =
153 transform_tuple_t<VertexDescriptorTuple, get_vertex_pointer_pointer_type>;
155 using ObservationType =
typename Traits::Observation;
156 using ConstraintDataType =
typename Traits::Data;
157 using LossType =
typename Traits::Loss;
159 thrust::host_vector<size_t> host_ids;
160 thrust::device_vector<size_t> device_ids;
161 managed_vector<ObservationType> device_obs;
162 thrust::device_vector<T> residuals;
163 managed_vector<S> precision_matrices;
164 managed_vector<ConstraintDataType> data;
166 managed_vector<T> chi2_vec;
167 thrust::device_vector<S> chi2_derivative;
168 managed_vector<LossType> loss;
170 thrust::host_vector<uint8_t> active;
171 thrust::device_vector<uint8_t> device_active;
172 thrust::device_vector<size_t> active_indices;
174 std::array<JacobianStorage<S>, N> jacobians;
175 std::array<S, Traits::dimension * Traits::dimension> default_precision_matrix;
182 template <
typename... VertexDescPtrs,
183 typename = std::enable_if_t<
sizeof...(VertexDescPtrs) == N>>
185 : _store_jacobians(
true), _active_count(0) {
201 ops::compute_error_autodiff<T, S>(
this, streams);
208 ops::compute_b_async<T, S>(
this, b, jacobian_scales);
216 ops::compute_Jv<T, S>(
this, out, in, jacobian_scales, streams);
224 ops::compute_Jtv<T, S>(
this, out, in, jacobian_scales, streams);
227 void flag_active_vertices_async(
const uint8_t level)
override {
228 ops::flag_active_vertices(
this, level);
235 if constexpr (std::is_same_v<
typename Traits::Differentiation,
237 ops::compute_jacobians<T, S>(
this, streams);
245 std::unordered_map<BaseVertexDescriptor<T, S> *,
246 thrust::device_vector<InvP>> &block_diagonals,
247 const T *jacobian_scales, cudaStream_t stream)
override {
249 std::array<InvP *, N> diagonal_blocks;
250 for (
size_t i = 0; i < N; i++) {
251 diagonal_blocks[i] = block_diagonals[vertex_descriptors[i]].data().get();
254 ops::compute_block_diagonal<T, S>(
this, diagonal_blocks, jacobian_scales,
263 const T *jacobian_scales)
override {
264 ops::compute_hessian_scalar_diagonal<T, S>(
this, diagonal, jacobian_scales);
290 global_ids.reserve(N * size);
291 host_ids.reserve(N * size);
292 device_ids.reserve(N * size);
293 device_obs.reserve(size);
294 global_to_local_map.reserve(size);
295 local_to_global_map.reserve(size);
296 precision_matrices.reserve(size * error_dim * error_dim);
299 chi2_vec.reserve(size);
300 residuals.reserve(size * error_dim);
301 active.reserve(size);
310 if (global_to_local_map.find(
id) == global_to_local_map.end()) {
311 std::cerr <<
"Factor with id " <<
id <<
" not found." << std::endl;
315 auto local_id = global_to_local_map[id];
319 for (
size_t i = 0; i < N; i++) {
320 global_ids[local_id * N + i] = global_ids[last_local_id * N + i];
322 global_ids.resize(global_ids.size() - N);
325 device_obs[local_id] = device_obs[last_local_id];
326 device_obs.pop_back();
329 constexpr size_t precision_matrix_size = error_dim * error_dim;
330 for (
size_t i = 0; i < precision_matrix_size; i++) {
331 precision_matrices[local_id * precision_matrix_size + i] =
332 precision_matrices[last_local_id * precision_matrix_size + i];
335 precision_matrices.resize(precision_matrices.size() -
336 precision_matrix_size);
339 data[local_id] = data[last_local_id];
342 loss[local_id] = loss[last_local_id];
345 chi2_vec[local_id] = chi2_vec[last_local_id];
348 active[local_id] = active[last_local_id];
352 const auto last_global_id = local_to_global_map[last_local_id];
353 global_to_local_map[last_global_id] = local_id;
354 local_to_global_map[local_id] = last_global_id;
357 global_to_local_map.erase(
id);
358 local_to_global_map.pop_back();
376 const ObservationType &obs = ObservationType(),
377 const S *precision_matrix =
nullptr,
378 const ConstraintDataType &constraint_data = ConstraintDataType(),
379 const LossType &loss_func = LossType()) {
381 const auto id = hm.get();
384 global_to_local_map.insert({id, local_id});
385 local_to_global_map.push_back(
id);
387 global_ids.insert(global_ids.end(), ids.begin(), ids.end());
388 device_obs.push_back(obs);
390 constexpr size_t precision_matrix_size = error_dim * error_dim;
391 if (precision_matrix) {
392 precision_matrices.resize(precision_matrices.size() +
393 precision_matrix_size);
394 for (
size_t i = 0; i < precision_matrix_size; i++) {
395 precision_matrices[local_id * precision_matrix_size + i] =
400 precision_matrices.resize(precision_matrices.size() +
401 precision_matrix_size);
403 for (
size_t i = 0; i < precision_matrix_size; i++) {
404 precision_matrices[local_id * precision_matrix_size + i] =
405 default_precision_matrix[i];
409 data.push_back(constraint_data);
410 loss.push_back(loss_func);
421 if (global_to_local_map.find(
id) == global_to_local_map.end()) {
422 std::cerr <<
"Factor with id " <<
id <<
" not found." << std::endl;
426 constexpr uint8_t NOT_MSB = 0x7F;
428 auto local_id = global_to_local_map[id];
430 NOT_MSB & active_value;
437 thrust::fill(thrust::device, active.begin(), active.end(), 0x1);
458 host_ids.resize(global_ids.size());
459 for (
size_t i = 0; i < global_ids.size(); i++) {
461 vertex_descriptors[i % N]->get_global_map().at(global_ids[i]);
463 device_ids = host_ids;
465 device_active = active;
466 _active_count = build_active_indices(device_active, active_indices,
479 thrust::fill(residuals.begin(), residuals.end(), 0);
488 const std::array<BaseVertexDescriptor<T, S> *, N> &vertex_descriptors) {
489 this->vertex_descriptors = vertex_descriptors;
492 template <std::size_t... I>
493 VertexPointerPointerTuple get_vertices_impl(std::index_sequence<I...>) {
494 return std::make_tuple(
495 (
static_cast<typename std::tuple_element<I, VertexDescriptorTuple>::type
496 *
>(vertex_descriptors[I])
506 return get_vertices_impl(std::make_index_sequence<N>{});
509 template <std::size_t... I>
510 static constexpr std::array<size_t, N>
511 get_vertex_sizes_impl(std::index_sequence<I...>) {
512 return std::array<size_t, N>{
513 std::tuple_element_t<I, VertexDescriptorTuple>::dim...};
521 return get_vertex_sizes_impl(std::make_index_sequence<N>{});
528 for (
size_t i = 0; i < N; i++) {
529 if (
store_jacobians() || !std::is_same_v<
typename Traits::Differentiation,
531 jacobians[i].dimensions = {error_dim,
532 vertex_descriptors[i]->dimension()};
533 jacobians[i].data.resize(
534 error_dim * vertex_descriptors[i]->dimension() *
internal_count());
553 ops::compute_chi2_async<T, S>(
this);
555 return thrust::reduce(thrust::cuda::par.on(0), chi2_vec.begin(),
556 chi2_vec.end(),
static_cast<T
>(0.0),
565 T
chi2(
const size_t id)
const {
566 auto it = global_to_local_map.find(
id);
567 if (it == global_to_local_map.end()) {
568 std::cerr <<
"Factor with id " <<
id <<
" not found." << std::endl;
569 return static_cast<T
>(0.0);
571 return chi2_vec[it->second];
582 auto it = global_to_local_map.find(
id);
583 if (it == global_to_local_map.end()) {
584 std::cerr <<
"Factor with id " <<
id <<
" not found." << std::endl;
587 return &data[it->second];
597 auto it = global_to_local_map.find(
id);
598 if (it == global_to_local_map.end()) {
599 std::cerr <<
"Factor with id " <<
id <<
" not found." << std::endl;
602 const auto local_id = it->second;
603 std::array<size_t, N> ids;
604 for (
size_t i = 0; i < N; i++) {
605 ids[i] = global_ids[local_id * N + i];
615 ops::scale_jacobians<T, S>(
this, jacobian_scales);
624 return use_autodiff_impl<typename Traits::Differentiation>();
634 _store_jacobians = store;
649 return std::is_same_v<
typename Traits::Differentiation,
659 thrust::device_vector<BlockCoordinates> &block_coords)
override {
661 const auto &active_factors = active_indices;
662 const auto ids = device_ids.data().get();
663 for (
size_t i = 0; i < num_vertices; i++) {
664 const auto vi_active = vertex_descriptors[i]->get_active_state();
665 const auto vi_block_ids = vertex_descriptors[i]->get_block_ids();
666 for (
size_t j = i; j < num_vertices; j++) {
667 const auto vj_active = vertex_descriptors[j]->get_active_state();
668 const auto vj_block_ids = vertex_descriptors[j]->get_block_ids();
670 auto num_coords = block_coords.size();
671 block_coords.resize(block_coords.size() + active_factors.size());
672 auto end = thrust::transform_if(
673 thrust::device, active_factors.begin(), active_factors.end(),
674 block_coords.begin() + num_coords,
675 [=] __device__(
size_t factor_idx) {
676 const size_t vi_id = ids[factor_idx * num_vertices + i];
677 const size_t vj_id = ids[factor_idx * num_vertices + j];
679 const auto block_i = vi_block_ids[vi_id];
680 const auto block_j = vj_block_ids[vj_id];
681 if (block_i > block_j) {
686 [=] __device__(
const size_t &factor_idx) {
687 const auto vi_id = ids[factor_idx * num_vertices + i];
688 const auto vj_id = ids[factor_idx * num_vertices + j];
689 return (is_vertex_active(vi_active, vi_id) &&
690 is_vertex_active(vj_active, vj_id));
692 block_coords.resize(end - block_coords.begin());
704 std::unordered_map<BlockCoordinates, size_t> &block_indices,
705 thrust::device_vector<S> &d_hessian,
size_t *h_block_offsets,
709 const auto &d_active_factors = active_indices;
710 thrust::host_vector<size_t> h_active_factors = active_indices;
711 const auto d_ids = device_ids.data().get();
712 const auto h_ids = &host_ids[0];
714 size_t mul_count = 0;
716 for (
size_t i = 0; i < num_vertices; i++) {
717 for (
size_t j = i; j < num_vertices; j++) {
718 mul_count += d_active_factors.size();
722 size_t write_idx = 0;
725 for (
size_t i = 0; i < num_vertices; i++) {
726 const auto vi_active = vertex_descriptors[i]->get_active_state();
727 const auto vi_block_ids = vertex_descriptors[i]->get_block_ids();
728 for (
size_t j = i; j < num_vertices; j++) {
729 const auto vj_active = vertex_descriptors[j]->get_active_state();
730 const auto vj_block_ids = vertex_descriptors[j]->get_block_ids();
732 for (
const auto &factor_idx : h_active_factors) {
734 const auto vi_id = h_ids[factor_idx * num_vertices + i];
735 const auto vj_id = h_ids[factor_idx * num_vertices + j];
737 if (is_vertex_active(vi_active, vi_id) &&
738 is_vertex_active(vj_active, vj_id)) {
739 const auto block_i = vi_block_ids[vi_id];
740 const auto block_j = vj_block_ids[vj_id];
742 if (block_i > block_j) {
746 auto it = block_indices.find(coordinates);
747 if (it != block_indices.end()) {
748 const size_t block_offset = it->second;
749 h_block_offsets[write_idx++] = block_offset;
752 h_block_offsets[write_idx++] = 0;
753 std::cerr <<
"Error: Hessian block coordinate not found!"
757 h_block_offsets[write_idx++] = 0;
773 std::unordered_map<BlockCoordinates, size_t> &block_indices,
774 thrust::device_vector<S> &d_hessian,
size_t *d_block_offsets,
777 const auto &d_active_factors = active_indices;
778 thrust::host_vector<size_t> h_active_factors = active_indices;
779 const auto d_ids = device_ids.data().get();
781 size_t mul_count = 0;
783 for (
size_t i = 0; i < num_vertices; i++) {
784 for (
size_t j = i; j < num_vertices; j++) {
785 mul_count += d_active_factors.size();
789 size_t write_idx = 0;
791 size_t stream_idx = 0;
793 for (
size_t i = 0; i < num_vertices; i++) {
794 const auto vi_active = vertex_descriptors[i]->get_active_state();
795 const auto vi_block_ids = vertex_descriptors[i]->get_block_ids();
796 for (
size_t j = i; j < num_vertices; j++) {
797 const auto vj_active = vertex_descriptors[j]->get_active_state();
799 const auto start_idx = write_idx;
800 write_idx += d_active_factors.size();
801 const auto stream = streams.select(stream_idx++);
803 const auto dim_i = vertex_descriptors[i]->dimension();
804 const auto dim_j = vertex_descriptors[j]->dimension();
805 const size_t block_dim = dim_i * dim_j;
806 const size_t num_threads = d_active_factors.size() * block_dim;
807 const size_t threads_per_block = 256;
808 const size_t num_blocks =
809 (num_threads + threads_per_block - 1) / threads_per_block;
811 ops::compute_hessian_block_kernel<T, S, N, error_dim>
812 <<<num_blocks, threads_per_block, 0, stream>>>(
813 i, j, dim_i, dim_j, d_active_factors.data().get(),
814 d_active_factors.size(), d_ids, d_block_offsets + start_idx,
815 vi_active, vj_active, vertex_descriptors[i]->get_hessian_ids(),
816 vertex_descriptors[j]->get_hessian_ids(),
817 jacobians[i].data.data().get(), jacobians[j].data.data().get(),
818 precision_matrices.data().get(), chi2_derivative.data().get(),
819 d_hessian.data().get());
833 global_to_local_map.clear();
834 local_to_global_map.clear();
844 precision_matrices.clear();
848 chi2_derivative.clear();
852 device_active.clear();
853 active_indices.clear();
855 for (
size_t i = 0; i < N; i++) {
856 jacobians[i].data.clear();
865 constexpr static std::array<S, error_dim * error_dim>
867 constexpr size_t E = error_dim;
868 return []()
constexpr {
869 std::array<S, E *E> pmat = {};
870 for (
size_t i = 0; i < E; i++) {
871 pmat[i * E + i] =
static_cast<S
>(1.0);
Base class for factor descriptors.
Definition factor.hpp:36
Represents a group of factors which will be processed together on the GPU.
Definition factor.hpp:124
static constexpr size_t get_num_vertices()
Returns the number of vertices connected to each factor.
Definition factor.hpp:270
virtual bool store_jacobians() override
Returns whether the Jacobians are stored.
Definition factor.hpp:641
void to_device() override
Prepares descriptor for GPU processing.
Definition factor.hpp:473
virtual bool supports_dynamic_jacobians() override
Determines whether dynamic Jacobian computation is supported (only supported for manual differentiati...
Definition factor.hpp:648
void compute_hessian_block_diagonal_async(std::unordered_map< BaseVertexDescriptor< T, S > *, thrust::device_vector< InvP > > &block_diagonals, const T *jacobian_scales, cudaStream_t stream) override
Computes the block diagonal of the Hessian matrix asynchronously.
Definition factor.hpp:244
size_t add_factor(const std::array< size_t, N > &ids, const ObservationType &obs=ObservationType(), const S *precision_matrix=nullptr, const ConstraintDataType &constraint_data=ConstraintDataType(), const LossType &loss_func=LossType())
Adds a factor to the descriptor.
Definition factor.hpp:375
void compute_hessian_scalar_diagonal_async(T *diagonal, const T *jacobian_scales) override
Computes the scalar diagonal of the Hessian matrix asynchronously.
Definition factor.hpp:262
void compute_jacobians(StreamPool &streams) override
Computes the Jacobians using manual differentiation.
Definition factor.hpp:234
virtual bool use_autodiff() override
Determines whether to use automatic differentiation based on the traits of the factor.
Definition factor.hpp:623
virtual size_t setup_hessian_computation(std::unordered_map< BlockCoordinates, size_t > &block_indices, thrust::device_vector< S > &d_hessian, size_t *h_block_offsets, StreamPool &streams) override
Sets up data needed to compute the upper triangle of the Hessian matrix on the GPU.
Definition factor.hpp:703
static constexpr std::array< size_t, N > get_vertex_sizes()
Returns the sizes (dimensions) of the vertices.
Definition factor.hpp:520
void initialize_jacobian_storage() override
Allocates memory for the Jacobians.
Definition factor.hpp:527
size_t internal_count() const
Returns the number of all active and inactive factors.
Definition factor.hpp:444
void compute_Jv(T *out, T *in, const T *jacobian_scales, StreamPool &streams) override
Computes the product of the Jacobian matrix and a vector.
Definition factor.hpp:214
virtual size_t get_residual_size() const override
Returns the size of the vector for all active and inactive residuals.
Definition factor.hpp:544
size_t get_num_descriptors() const override
Returns the number of vertex descriptors (same as the number of vertices for each factor).
Definition factor.hpp:276
virtual void get_hessian_block_coordinates(thrust::device_vector< BlockCoordinates > &block_coords) override
Determines which Hessian blocks are filled in (upper triangle).
Definition factor.hpp:658
virtual void scale_jacobians_async(T *jacobian_scales) override
Scales the Jacobians asynchronously.
Definition factor.hpp:614
void set_active(size_t id, const uint8_t active_value)
Sets the active state of a factor.
Definition factor.hpp:420
FactorDescriptor(VertexDescPtrs... vertex_descriptors)
Constructs a FactorDescriptor with the given vertex descriptors.
Definition factor.hpp:184
void compute_error_autodiff(StreamPool &streams) override
Simultaneously computes the residuals and corresponding Jacobians using automatic differentiation.
Definition factor.hpp:200
void clear()
Clears all data associated with this factor descriptor.
Definition factor.hpp:831
void compute_error() override
Computes the residuals across all factors in the descriptor.
Definition factor.hpp:194
virtual size_t execute_hessian_computation(std::unordered_map< BlockCoordinates, size_t > &block_indices, thrust::device_vector< S > &d_hessian, size_t *d_block_offsets, StreamPool &streams) override
Executes the Hessian block computations for this descriptor using the data from setup_hessian_computa...
Definition factor.hpp:772
std::array< size_t, N > get_vertex_ids(const size_t id) const
Returns the vertex ids connected to a specific factor.
Definition factor.hpp:596
VertexPointerPointerTuple get_vertices()
Returns a tuple of vertex pointers.
Definition factor.hpp:505
T chi2(const size_t id) const
Returns the chi-squared value for a specific factor.
Definition factor.hpp:565
static constexpr std::array< S, error_dim *error_dim > get_default_precision_matrix()
Gets the default precision matrix (identity matrix).
Definition factor.hpp:866
void remove_factor(const size_t id)
Removes a factor by its id.
Definition factor.hpp:309
void link_factors(const std::array< BaseVertexDescriptor< T, S > *, N > &vertex_descriptors)
Links the factor to the given vertex descriptors. This is already called during construction.
Definition factor.hpp:487
ConstraintDataType * get_constraint_data(const size_t id)
Returns the constraint data for a specific factor.
Definition factor.hpp:581
void compute_Jtv(T *out, T *in, const T *jacobian_scales, StreamPool &streams) override
Computes the product of the transposed Jacobian matrix and a vector.
Definition factor.hpp:222
void reserve(size_t size)
Reserves memory for the factor. Generally, you should always reserve the memory you need before addin...
Definition factor.hpp:289
size_t active_count() const override
Returns the number of active factors.
Definition factor.hpp:450
virtual void set_jacobian_storage(const bool store)
Sets whether to store the Jacobians. If false, Jacobians will be computed dynamically (on-the-fly),...
Definition factor.hpp:633
JacobianStorage< S > * get_jacobians() override
Returns the Jacobian storage for the factor.
Definition factor.hpp:281
void compute_b_async(T *b, const T *jacobian_scales) override
Computes the gradient vector b asynchronously.
Definition factor.hpp:207
virtual T chi2() override
Computes the chi-squared value for all active and inactive factors.
Definition factor.hpp:552
void initialize_device_ids(const uint8_t optimization_level) override
Initializes the device ids and a list of active factors.
Definition factor.hpp:456
void reset_active()
Resets the active state of all factors to 0x1.
Definition factor.hpp:436
Storage class for Jacobians of a factor.
Definition factor.hpp:26
Definition differentiation.hpp:8
Definition factor.hpp:103