17#include <thrust/execution_policy.h>
18#include <thrust/inner_product.h>
19#include <thrust/universal_vector.h>
28 std::pair<size_t, size_t> dimensions;
30 thrust::device_vector<S> data;
38 using InvP = std::conditional_t<is_low_precision<S>::value, T, S>;
42 virtual bool use_autodiff() = 0;
43 virtual void compute_error() = 0;
44 virtual void compute_error_autodiff(
StreamPool &streams) = 0;
45 virtual void compute_b_async(T *b,
const T *jacobian_scales) = 0;
46 virtual void compute_Jv(T *out, T *in,
const T *jacobian_scales,
48 virtual void compute_Jtv(T *out, T *in,
const T *jacobian_scales,
51 virtual void flag_active_vertices_async(
const uint8_t level) = 0;
53 virtual void compute_jacobians(
StreamPool &streams) = 0;
54 virtual void compute_hessian_block_diagonal_async(
55 std::unordered_map<BaseVertexDescriptor<T, S> *,
56 thrust::device_vector<InvP>> &block_diagonals,
57 const T *jacobian_scales, cudaStream_t stream) = 0;
60 compute_hessian_scalar_diagonal_async(T *diagonal,
61 const T *jacobian_scales) = 0;
63 virtual JacobianStorage<S> *get_jacobians() = 0;
64 virtual void initialize_jacobian_storage() = 0;
66 virtual size_t active_count()
const = 0;
67 virtual size_t get_num_descriptors()
const = 0;
69 virtual size_t get_residual_size()
const = 0;
70 virtual void scale_jacobians_async(T *jacobian_scales) = 0;
72 virtual void initialize_device_ids(
const uint8_t optimization_level) = 0;
73 virtual void to_device() = 0;
77 virtual void set_jacobian_storage(
const bool store) = 0;
78 virtual bool store_jacobians() = 0;
79 virtual bool supports_dynamic_jacobians() = 0;
81 virtual void get_hessian_block_coordinates(
82 thrust::device_vector<BlockCoordinates> &block_coords) = 0;
84 virtual size_t setup_hessian_computation(
85 std::unordered_map<BlockCoordinates, size_t> &block_indices,
86 thrust::device_vector<S> &d_hessian,
size_t *h_block_offsets,
89 virtual size_t execute_hessian_computation(
90 std::unordered_map<BlockCoordinates, size_t> &block_indices,
91 thrust::device_vector<S> &d_hessian,
size_t *d_block_offsets,
96 using type =
typename T::VertexType;
100 using type =
typename T::VertexType *;
104 using type =
typename T::VertexType **;
107template <
typename Tuple,
template <
typename>
class MetaFunc>
111template <
typename... Ts,
template <
typename>
class MetaFunc>
113 using type = std::tuple<typename MetaFunc<Ts>::type...>;
120template <
typename T,
typename S,
typename FTraits>
124 std::vector<size_t> global_ids;
125 std::unordered_map<size_t, size_t> global_to_local_map;
126 std::vector<size_t> local_to_global_map;
128 HandleManager<size_t> hm;
130 bool _store_jacobians;
131 size_t _active_count;
134 using InvP = std::conditional_t<is_low_precision<S>::value, T, S>;
136 using Traits = FTraits;
138 static constexpr size_t N =
139 std::tuple_size<typename Traits::VertexDescriptors>::value;
140 static constexpr size_t error_dim = Traits::dimension;
142 std::array<BaseVertexDescriptor<T, S> *, N> vertex_descriptors;
144 using VertexDescriptorTuple =
typename Traits::VertexDescriptors;
145 using VertexTypesTuple =
147 using VertexPointerTuple =
150 using VertexPointerPointerTuple =
154 using ObservationType =
typename Traits::Observation;
155 using ConstraintDataType =
typename Traits::Data;
156 using LossType =
typename Traits::Loss;
158 thrust::host_vector<size_t> host_ids;
159 thrust::device_vector<size_t> device_ids;
160 managed_vector<ObservationType> device_obs;
161 thrust::device_vector<T> residuals;
162 managed_vector<S> precision_matrices;
163 managed_vector<ConstraintDataType> data;
165 managed_vector<T> chi2_vec;
166 thrust::device_vector<S> chi2_derivative;
167 managed_vector<LossType> loss;
169 thrust::host_vector<uint8_t> active;
170 thrust::device_vector<uint8_t> device_active;
171 thrust::device_vector<size_t> active_indices;
173 std::array<JacobianStorage<S>, N> jacobians;
174 std::array<S, Traits::dimension * Traits::dimension> default_precision_matrix;
181 template <
typename... VertexDescPtrs,
182 typename = std::enable_if_t<
sizeof...(VertexDescPtrs) == N>>
184 : _store_jacobians(
true), _active_count(0) {
200 ops::compute_error_autodiff<T, S>(
this, streams);
207 ops::compute_b_async<T, S>(
this, b, jacobian_scales);
215 ops::compute_Jv<T, S>(
this, out, in, jacobian_scales, streams);
223 ops::compute_Jtv<T, S>(
this, out, in, jacobian_scales, streams);
226 void flag_active_vertices_async(
const uint8_t level)
override {
227 ops::flag_active_vertices(
this, level);
234 if constexpr (std::is_same_v<
typename Traits::Differentiation,
236 ops::compute_jacobians<T, S>(
this, streams);
244 std::unordered_map<BaseVertexDescriptor<T, S> *,
245 thrust::device_vector<InvP>> &block_diagonals,
246 const T *jacobian_scales, cudaStream_t stream)
override {
248 std::array<InvP *, N> diagonal_blocks;
249 for (
size_t i = 0; i < N; i++) {
250 diagonal_blocks[i] = block_diagonals[vertex_descriptors[i]].data().get();
253 ops::compute_block_diagonal<T, S>(
this, diagonal_blocks, jacobian_scales,
262 const T *jacobian_scales)
override {
263 ops::compute_hessian_scalar_diagonal<T, S>(
this, diagonal, jacobian_scales);
289 global_ids.reserve(N * size);
290 host_ids.reserve(N * size);
291 device_ids.reserve(N * size);
292 device_obs.reserve(size);
293 global_to_local_map.reserve(size);
294 local_to_global_map.reserve(size);
295 precision_matrices.reserve(size * error_dim * error_dim);
298 chi2_vec.reserve(size);
299 residuals.reserve(size * error_dim);
300 active.reserve(size);
309 if (global_to_local_map.find(
id) == global_to_local_map.end()) {
310 std::cerr <<
"Factor with id " <<
id <<
" not found." << std::endl;
314 auto local_id = global_to_local_map[id];
318 for (
size_t i = 0; i < N; i++) {
319 global_ids[local_id * N + i] = global_ids[last_local_id * N + i];
321 global_ids.resize(global_ids.size() - N);
324 device_obs[local_id] = device_obs[last_local_id];
325 device_obs.pop_back();
328 constexpr size_t precision_matrix_size = error_dim * error_dim;
329 for (
size_t i = 0; i < precision_matrix_size; i++) {
330 precision_matrices[local_id * precision_matrix_size + i] =
331 precision_matrices[last_local_id * precision_matrix_size + i];
334 precision_matrices.resize(precision_matrices.size() -
335 precision_matrix_size);
338 data[local_id] = data[last_local_id];
341 loss[local_id] = loss[last_local_id];
344 chi2_vec[local_id] = chi2_vec[last_local_id];
347 active[local_id] = active[last_local_id];
351 const auto last_global_id = local_to_global_map[last_local_id];
352 global_to_local_map[last_global_id] = local_id;
353 local_to_global_map[local_id] = last_global_id;
356 global_to_local_map.erase(
id);
357 local_to_global_map.pop_back();
375 const ObservationType &obs = ObservationType(),
376 const S *precision_matrix =
nullptr,
377 const ConstraintDataType &constraint_data = ConstraintDataType(),
378 const LossType &loss_func = LossType()) {
380 const auto id = hm.get();
383 global_to_local_map.insert({id, local_id});
384 local_to_global_map.push_back(
id);
386 global_ids.insert(global_ids.end(), ids.begin(), ids.end());
387 device_obs.push_back(obs);
389 constexpr size_t precision_matrix_size = error_dim * error_dim;
390 if (precision_matrix) {
391 precision_matrices.resize(precision_matrices.size() +
392 precision_matrix_size);
393 for (
size_t i = 0; i < precision_matrix_size; i++) {
394 precision_matrices[local_id * precision_matrix_size + i] =
399 precision_matrices.resize(precision_matrices.size() +
400 precision_matrix_size);
402 for (
size_t i = 0; i < precision_matrix_size; i++) {
403 precision_matrices[local_id * precision_matrix_size + i] =
404 default_precision_matrix[i];
408 data.push_back(constraint_data);
409 loss.push_back(loss_func);
420 if (global_to_local_map.find(
id) == global_to_local_map.end()) {
421 std::cerr <<
"Factor with id " <<
id <<
" not found." << std::endl;
425 constexpr uint8_t NOT_MSB = 0x7F;
427 auto local_id = global_to_local_map[id];
429 NOT_MSB & active_value;
436 thrust::fill(thrust::device, active.begin(), active.end(), 0x1);
457 host_ids.resize(global_ids.size());
458 for (
size_t i = 0; i < global_ids.size(); i++) {
460 vertex_descriptors[i % N]->get_global_map().at(global_ids[i]);
462 device_ids = host_ids;
464 device_active = active;
465 _active_count = build_active_indices(device_active, active_indices,
478 thrust::fill(residuals.begin(), residuals.end(), 0);
487 const std::array<BaseVertexDescriptor<T, S> *, N> &vertex_descriptors) {
488 this->vertex_descriptors = vertex_descriptors;
491 template <std::size_t... I>
492 VertexPointerPointerTuple get_vertices_impl(std::index_sequence<I...>) {
493 return std::make_tuple(
494 (
static_cast<typename std::tuple_element<I, VertexDescriptorTuple>::type
495 *
>(vertex_descriptors[I])
505 return get_vertices_impl(std::make_index_sequence<N>{});
508 template <std::size_t... I>
509 static constexpr std::array<size_t, N>
510 get_vertex_sizes_impl(std::index_sequence<I...>) {
511 return std::array<size_t, N>{
512 std::tuple_element_t<I, VertexDescriptorTuple>::dim...};
520 return get_vertex_sizes_impl(std::make_index_sequence<N>{});
527 for (
size_t i = 0; i < N; i++) {
528 if (
store_jacobians() || !std::is_same_v<
typename Traits::Differentiation,
530 jacobians[i].dimensions = {error_dim,
531 vertex_descriptors[i]->dimension()};
532 jacobians[i].data.resize(
533 error_dim * vertex_descriptors[i]->dimension() *
internal_count());
552 ops::compute_chi2_async<T, S>(
this);
554 return thrust::reduce(thrust::cuda::par.on(0), chi2_vec.begin(),
555 chi2_vec.end(),
static_cast<T
>(0.0),
564 T
chi2(
const size_t id)
const {
565 auto it = global_to_local_map.find(
id);
566 if (it == global_to_local_map.end()) {
567 std::cerr <<
"Factor with id " <<
id <<
" not found." << std::endl;
568 return static_cast<T
>(0.0);
570 return chi2_vec[it->second];
581 auto it = global_to_local_map.find(
id);
582 if (it == global_to_local_map.end()) {
583 std::cerr <<
"Factor with id " <<
id <<
" not found." << std::endl;
586 return &data[it->second];
596 auto it = global_to_local_map.find(
id);
597 if (it == global_to_local_map.end()) {
598 std::cerr <<
"Factor with id " <<
id <<
" not found." << std::endl;
601 const auto local_id = it->second;
602 std::array<size_t, N> ids;
603 for (
size_t i = 0; i < N; i++) {
604 ids[i] = global_ids[local_id * N + i];
614 ops::scale_jacobians<T, S>(
this, jacobian_scales);
623 return use_autodiff_impl<typename Traits::Differentiation>();
633 _store_jacobians = store;
648 return std::is_same_v<
typename Traits::Differentiation,
658 thrust::device_vector<BlockCoordinates> &block_coords)
override {
660 const auto &active_factors = active_indices;
661 const auto ids = device_ids.data().get();
662 for (
size_t i = 0; i < num_vertices; i++) {
663 const auto vi_active = vertex_descriptors[i]->get_active_state();
664 const auto vi_block_ids = vertex_descriptors[i]->get_block_ids();
665 for (
size_t j = i; j < num_vertices; j++) {
666 const auto vj_active = vertex_descriptors[j]->get_active_state();
667 const auto vj_block_ids = vertex_descriptors[j]->get_block_ids();
669 auto num_coords = block_coords.size();
670 block_coords.resize(block_coords.size() + active_factors.size());
671 auto end = thrust::transform_if(
672 thrust::device, active_factors.begin(), active_factors.end(),
673 block_coords.begin() + num_coords,
674 [=] __device__(
size_t factor_idx) {
675 const size_t vi_id = ids[factor_idx * num_vertices + i];
676 const size_t vj_id = ids[factor_idx * num_vertices + j];
678 const auto block_i = vi_block_ids[vi_id];
679 const auto block_j = vj_block_ids[vj_id];
680 if (block_i > block_j) {
685 [=] __device__(
const size_t &factor_idx) {
686 const auto vi_id = ids[factor_idx * num_vertices + i];
687 const auto vj_id = ids[factor_idx * num_vertices + j];
688 return (is_vertex_active(vi_active, vi_id) &&
689 is_vertex_active(vj_active, vj_id));
691 block_coords.resize(end - block_coords.begin());
703 std::unordered_map<BlockCoordinates, size_t> &block_indices,
704 thrust::device_vector<S> &d_hessian,
size_t *h_block_offsets,
708 const auto &d_active_factors = active_indices;
709 thrust::host_vector<size_t> h_active_factors = active_indices;
710 const auto d_ids = device_ids.data().get();
711 const auto h_ids = &host_ids[0];
713 size_t mul_count = 0;
715 for (
size_t i = 0; i < num_vertices; i++) {
716 for (
size_t j = i; j < num_vertices; j++) {
717 mul_count += d_active_factors.size();
721 size_t write_idx = 0;
724 for (
size_t i = 0; i < num_vertices; i++) {
725 const auto vi_active = vertex_descriptors[i]->get_active_state();
726 const auto vi_block_ids = vertex_descriptors[i]->get_block_ids();
727 for (
size_t j = i; j < num_vertices; j++) {
728 const auto vj_active = vertex_descriptors[j]->get_active_state();
729 const auto vj_block_ids = vertex_descriptors[j]->get_block_ids();
731 for (
const auto &factor_idx : h_active_factors) {
733 const auto vi_id = h_ids[factor_idx * num_vertices + i];
734 const auto vj_id = h_ids[factor_idx * num_vertices + j];
736 if (is_vertex_active(vi_active, vi_id) &&
737 is_vertex_active(vj_active, vj_id)) {
738 const auto block_i = vi_block_ids[vi_id];
739 const auto block_j = vj_block_ids[vj_id];
741 if (block_i > block_j) {
745 auto it = block_indices.find(coordinates);
746 if (it != block_indices.end()) {
747 const size_t block_offset = it->second;
748 h_block_offsets[write_idx++] = block_offset;
751 h_block_offsets[write_idx++] = 0;
752 std::cerr <<
"Error: Hessian block coordinate not found!"
756 h_block_offsets[write_idx++] = 0;
772 std::unordered_map<BlockCoordinates, size_t> &block_indices,
773 thrust::device_vector<S> &d_hessian,
size_t *d_block_offsets,
776 const auto &d_active_factors = active_indices;
777 thrust::host_vector<size_t> h_active_factors = active_indices;
778 const auto d_ids = device_ids.data().get();
780 size_t mul_count = 0;
782 for (
size_t i = 0; i < num_vertices; i++) {
783 for (
size_t j = i; j < num_vertices; j++) {
784 mul_count += d_active_factors.size();
788 size_t write_idx = 0;
790 size_t stream_idx = 0;
792 for (
size_t i = 0; i < num_vertices; i++) {
793 const auto vi_active = vertex_descriptors[i]->get_active_state();
794 const auto vi_block_ids = vertex_descriptors[i]->get_block_ids();
795 for (
size_t j = i; j < num_vertices; j++) {
796 const auto vj_active = vertex_descriptors[j]->get_active_state();
798 const auto start_idx = write_idx;
799 write_idx += d_active_factors.size();
800 const auto stream = streams.select(stream_idx++);
802 const auto dim_i = vertex_descriptors[i]->dimension();
803 const auto dim_j = vertex_descriptors[j]->dimension();
804 const size_t block_dim = dim_i * dim_j;
805 const size_t num_threads = d_active_factors.size() * block_dim;
806 const size_t threads_per_block = 256;
807 const size_t num_blocks =
808 (num_threads + threads_per_block - 1) / threads_per_block;
810 ops::compute_hessian_block_kernel<T, S, N, error_dim>
811 <<<num_blocks, threads_per_block, 0, stream>>>(
812 i, j, dim_i, dim_j, d_active_factors.data().get(),
813 d_active_factors.size(), d_ids, d_block_offsets + start_idx,
814 vi_active, vj_active, vertex_descriptors[i]->get_hessian_ids(),
815 vertex_descriptors[j]->get_hessian_ids(),
816 jacobians[i].data.data().get(), jacobians[j].data.data().get(),
817 precision_matrices.data().get(), chi2_derivative.data().get(),
818 d_hessian.data().get());
832 global_to_local_map.clear();
833 local_to_global_map.clear();
843 precision_matrices.clear();
847 chi2_derivative.clear();
851 device_active.clear();
852 active_indices.clear();
854 for (
size_t i = 0; i < N; i++) {
855 jacobians[i].data.clear();
864 constexpr static std::array<S, error_dim * error_dim>
866 constexpr size_t E = error_dim;
867 return []()
constexpr {
868 std::array<S, E *E> pmat = {};
869 for (
size_t i = 0; i < E; i++) {
870 pmat[i * E + i] =
static_cast<S
>(1.0);
Base class for factor descriptors.
Definition factor.hpp:36
Represents a group of factors which will be processed together on the GPU.
Definition factor.hpp:121
static constexpr size_t get_num_vertices()
Returns the number of vertices connected to each factor.
Definition factor.hpp:269
virtual bool store_jacobians() override
Returns whether the Jacobians are stored.
Definition factor.hpp:640
void to_device() override
Prepares descriptor for GPU processing.
Definition factor.hpp:472
virtual bool supports_dynamic_jacobians() override
Determines whether dynamic Jacobian computation is supported (only supported for manual differentiati...
Definition factor.hpp:647
void compute_hessian_block_diagonal_async(std::unordered_map< BaseVertexDescriptor< T, S > *, thrust::device_vector< InvP > > &block_diagonals, const T *jacobian_scales, cudaStream_t stream) override
Computes the block diagonal of the Hessian matrix asynchronously.
Definition factor.hpp:243
size_t add_factor(const std::array< size_t, N > &ids, const ObservationType &obs=ObservationType(), const S *precision_matrix=nullptr, const ConstraintDataType &constraint_data=ConstraintDataType(), const LossType &loss_func=LossType())
Adds a factor to the descriptor.
Definition factor.hpp:374
void compute_hessian_scalar_diagonal_async(T *diagonal, const T *jacobian_scales) override
Computes the scalar diagonal of the Hessian matrix asynchronously.
Definition factor.hpp:261
void compute_jacobians(StreamPool &streams) override
Computes the Jacobians using manual differentiation.
Definition factor.hpp:233
virtual bool use_autodiff() override
Determines whether to use automatic differentiation based on the traits of the factor.
Definition factor.hpp:622
virtual size_t setup_hessian_computation(std::unordered_map< BlockCoordinates, size_t > &block_indices, thrust::device_vector< S > &d_hessian, size_t *h_block_offsets, StreamPool &streams) override
Sets up data needed to compute the upper triangle of the Hessian matrix on the GPU.
Definition factor.hpp:702
static constexpr std::array< size_t, N > get_vertex_sizes()
Returns the sizes (dimensions) of the vertices.
Definition factor.hpp:519
void initialize_jacobian_storage() override
Allocates memory for the Jacobians.
Definition factor.hpp:526
size_t internal_count() const
Returns the number of all active and inactive factors.
Definition factor.hpp:443
void compute_Jv(T *out, T *in, const T *jacobian_scales, StreamPool &streams) override
Computes the product of the Jacobian matrix and a vector.
Definition factor.hpp:213
virtual size_t get_residual_size() const override
Returns the size of the vector for all active and inactive residuals.
Definition factor.hpp:543
size_t get_num_descriptors() const override
Returns the number of vertex descriptors (same as the number of vertices for each factor).
Definition factor.hpp:275
virtual void get_hessian_block_coordinates(thrust::device_vector< BlockCoordinates > &block_coords) override
Determines which Hessian blocks are filled in (upper triangle).
Definition factor.hpp:657
virtual void scale_jacobians_async(T *jacobian_scales) override
Scales the Jacobians asynchronously.
Definition factor.hpp:613
void set_active(size_t id, const uint8_t active_value)
Sets the active state of a factor.
Definition factor.hpp:419
FactorDescriptor(VertexDescPtrs... vertex_descriptors)
Constructs a FactorDescriptor with the given vertex descriptors.
Definition factor.hpp:183
void compute_error_autodiff(StreamPool &streams) override
Simultaneously computes the residuals and corresponding Jacobians using automatic differentiation.
Definition factor.hpp:199
void clear()
Clears all data associated with this factor descriptor.
Definition factor.hpp:830
void compute_error() override
Computes the residuals across all factors in the descriptor.
Definition factor.hpp:193
virtual size_t execute_hessian_computation(std::unordered_map< BlockCoordinates, size_t > &block_indices, thrust::device_vector< S > &d_hessian, size_t *d_block_offsets, StreamPool &streams) override
Executes the Hessian block computations for this descriptor using the data from setup_hessian_computa...
Definition factor.hpp:771
std::array< size_t, N > get_vertex_ids(const size_t id) const
Returns the vertex ids connected to a specific factor.
Definition factor.hpp:595
VertexPointerPointerTuple get_vertices()
Returns a tuple of vertex pointers.
Definition factor.hpp:504
T chi2(const size_t id) const
Returns the chi-squared value for a specific factor.
Definition factor.hpp:564
static constexpr std::array< S, error_dim *error_dim > get_default_precision_matrix()
Gets the default precision matrix (identity matrix).
Definition factor.hpp:865
void remove_factor(const size_t id)
Removes a factor by its id.
Definition factor.hpp:308
void link_factors(const std::array< BaseVertexDescriptor< T, S > *, N > &vertex_descriptors)
Links the factor to the given vertex descriptors. This is already called during construction.
Definition factor.hpp:486
ConstraintDataType * get_constraint_data(const size_t id)
Returns the constraint data for a specific factor.
Definition factor.hpp:580
void compute_Jtv(T *out, T *in, const T *jacobian_scales, StreamPool &streams) override
Computes the product of the transposed Jacobian matrix and a vector.
Definition factor.hpp:221
void reserve(size_t size)
Reserves memory for the factor. Generally, you should always reserve the memory you need before addin...
Definition factor.hpp:288
size_t active_count() const override
Returns the number of active factors.
Definition factor.hpp:449
virtual void set_jacobian_storage(const bool store)
Sets whether to store the Jacobians. If false, Jacobians will be computed dynamically (on-the-fly),...
Definition factor.hpp:632
JacobianStorage< S > * get_jacobians() override
Returns the Jacobian storage for the factor.
Definition factor.hpp:280
void compute_b_async(T *b, const T *jacobian_scales) override
Computes the gradient vector b asynchronously.
Definition factor.hpp:206
virtual T chi2() override
Computes the chi-squared value for all active and inactive factors.
Definition factor.hpp:551
void initialize_device_ids(const uint8_t optimization_level) override
Initializes the device ids and a list of active factors.
Definition factor.hpp:455
void reset_active()
Resets the active state of all factors to 0x1.
Definition factor.hpp:435
Storage class for Jacobians of a factor.
Definition factor.hpp:26
The top-level namespace for Graphite.
Definition eigen_solver.cpp:4
Definition differentiation.hpp:8
Definition factor.hpp:103