27 SchurComplement<T, S> schur;
28 SchurPreconditioner<T, S> *preconditioner;
30 thrust::device_vector<T> r;
31 thrust::device_vector<T> p;
32 thrust::device_vector<T> z;
33 thrust::device_vector<T> Ap;
34 thrust::device_vector<T> x_backup;
43 SchurPreconditioner<T, S> *preconditioner)
44 : schur(H), pose_dim(0), max_iter(max_iter), tol(tol),
45 rejection_ratio(rejection_ratio), preconditioner(preconditioner) {}
49 virtual void update_structure(Graph<T, S> *graph,
51 H.build_structure(graph, streams);
52 schur.build_structure(graph, streams);
53 schur.setup_schur_vector_multiply(graph, streams);
55 const auto &offsets = graph->get_offset_vector();
56 pose_dim = offsets[schur.lowest_eliminated_block_col];
58 preconditioner->update_structure(graph, &schur, streams);
64 x_backup.resize(pose_dim);
67 virtual void update_values(Graph<T, S> *graph,
StreamPool &streams)
override {
68 H.update_values(graph, streams);
71 virtual void set_damping_factor(Graph<T, S> *graph, T damping_factor,
72 const bool use_identity,
74 H.apply_damping(graph, damping_factor, use_identity, streams);
75 preconditioner->set_damping_factor(graph, &schur, damping_factor,
76 use_identity, streams);
79 virtual bool solve(Graph<T, S> *graph, T *x,
StreamPool &streams)
override {
82 schur.update_values(graph, streams);
83 preconditioner->update_values(graph, &schur, streams);
87 const cudaStream_t stream = streams.select(0);
88 const auto dim_h = graph->get_hessian_dimension();
90 thrust::fill(thrust::cuda::par_nosync.on(stream), x, x + dim_h,
93 thrust::copy(thrust::cuda::par_nosync.on(stream),
94 schur.get_b_Schur().begin(), schur.get_b_Schur().end(),
97 cudaStreamSynchronize(stream);
99 preconditioner->apply(graph, &schur, z.data().get(), r.data().get(),
101 thrust::copy(thrust::cuda::par_nosync.on(stream), z.begin(), z.end(),
104 T rz = thrust::inner_product(thrust::cuda::par.on(stream), r.begin(),
105 r.end(), z.begin(),
static_cast<T
>(0));
106 T rz_0 = std::numeric_limits<T>::infinity();
108 for (
size_t k = 0; k < max_iter; ++k) {
109 if (rz ==
static_cast<T
>(0)) {
114 schur.execute_schur_vector_multiply(graph, streams, Ap.data().get(),
118 thrust::inner_product(thrust::cuda::par.on(stream), p.begin(),
119 p.end(), Ap.begin(),
static_cast<T
>(0));
120 if (denom ==
static_cast<T
>(0) || std::isnan(denom)) {
125 const T alpha = rz / denom;
128 thrust::copy(thrust::cuda::par_nosync.on(stream), x, x + pose_dim,
131 ops::axpy_async(stream, pose_dim, x, alpha, p.data().get(), x);
133 ops::axpy_async(stream, pose_dim, r.data().get(), -alpha, Ap.data().get(),
136 cudaStreamSynchronize(stream);
138 preconditioner->apply(graph, &schur, z.data().get(), r.data().get(),
142 thrust::inner_product(thrust::cuda::par.on(stream), r.begin(),
143 r.end(), z.begin(),
static_cast<T
>(0));
144 if (std::abs(rz_new) > rejection_ratio * rz_0 || std::isnan(rz_new)) {
145 thrust::copy(thrust::cuda::par.on(stream), x_backup.begin(),
149 rz_0 = std::min(rz_0, std::abs(rz_new));
152 const T beta = rz_new / rz;
156 ops::axpy_async(stream, pose_dim, p.data().get(), beta, p.data().get(),
158 cudaStreamSynchronize(stream);
160 if (std::abs(
static_cast<T
>(rz_new)) < tol) {
165 cudaStreamSynchronize(stream);
166 schur.compute_landmark_update(graph, streams, x + pose_dim, x);