Graphite  0.5.0
GPU-accelerated graph optimization framework
Loading...
Searching...
No Matches
pcg_schur.hpp
Go to the documentation of this file.
1
2#pragma once
3
4#include <cublas_v2.h>
9#include <graphite/schur.hpp>
11#include <ratio>
12#include <thrust/device_vector.h>
13#include <thrust/execution_policy.h>
14#include <thrust/fill.h>
15#include <thrust/host_vector.h>
16#include <thrust/inner_product.h>
17
18#include <algorithm>
19#include <cmath>
20#include <limits>
21
22namespace graphite {
24template <typename T, typename S> class PCGSchurSolver : public Solver<T, S> {
25private:
26 Hessian<T, S> H;
27 SchurComplement<T, S> schur;
28 SchurPreconditioner<T, S> *preconditioner;
29
30 thrust::device_vector<T> r;
31 thrust::device_vector<T> p;
32 thrust::device_vector<T> z;
33 thrust::device_vector<T> Ap;
34 thrust::device_vector<T> x_backup;
35
36 size_t pose_dim;
37 size_t max_iter;
38 T tol;
39 T rejection_ratio;
40
41public:
42 PCGSchurSolver(size_t max_iter, T tol, T rejection_ratio,
43 SchurPreconditioner<T, S> *preconditioner)
44 : schur(H), pose_dim(0), max_iter(max_iter), tol(tol),
45 rejection_ratio(rejection_ratio), preconditioner(preconditioner) {}
46
47 ~PCGSchurSolver() = default;
48
49 virtual void update_structure(Graph<T, S> *graph,
50 StreamPool &streams) override {
51 H.build_structure(graph, streams);
52 schur.build_structure(graph, streams);
53 schur.setup_schur_vector_multiply(graph, streams);
54
55 const auto &offsets = graph->get_offset_vector();
56 pose_dim = offsets[schur.lowest_eliminated_block_col];
57
58 preconditioner->update_structure(graph, &schur, streams);
59
60 r.resize(pose_dim);
61 p.resize(pose_dim);
62 z.resize(pose_dim);
63 Ap.resize(pose_dim);
64 x_backup.resize(pose_dim);
65 }
66
67 virtual void update_values(Graph<T, S> *graph, StreamPool &streams) override {
68 H.update_values(graph, streams);
69 }
70
71 virtual void set_damping_factor(Graph<T, S> *graph, T damping_factor,
72 const bool use_identity,
73 StreamPool &streams) override {
74 H.apply_damping(graph, damping_factor, use_identity, streams);
75 preconditioner->set_damping_factor(graph, &schur, damping_factor,
76 use_identity, streams);
77 }
78
79 virtual bool solve(Graph<T, S> *graph, T *x, StreamPool &streams) override {
80 // We need to defer all recomputation (update_values) until this point to
81 // avoid excessive recomputation (e.g. when modifying the diagonal)
82 schur.update_values(graph, streams);
83 preconditioner->update_values(graph, &schur, streams);
84
85 // Now initialize vectors for PCG loop
86
87 const cudaStream_t stream = streams.select(0);
88 const auto dim_h = graph->get_hessian_dimension();
89
90 thrust::fill(thrust::cuda::par_nosync.on(stream), x, x + dim_h,
91 static_cast<T>(0));
92
93 thrust::copy(thrust::cuda::par_nosync.on(stream),
94 schur.get_b_Schur().begin(), schur.get_b_Schur().end(),
95 r.begin());
96
97 cudaStreamSynchronize(stream);
98
99 preconditioner->apply(graph, &schur, z.data().get(), r.data().get(),
100 streams);
101 thrust::copy(thrust::cuda::par_nosync.on(stream), z.begin(), z.end(),
102 p.begin());
103
104 T rz = thrust::inner_product(thrust::cuda::par.on(stream), r.begin(),
105 r.end(), z.begin(), static_cast<T>(0));
106 T rz_0 = std::numeric_limits<T>::infinity();
107
108 for (size_t k = 0; k < max_iter; ++k) {
109 if (rz == static_cast<T>(0)) {
110 break;
111 }
112
113 // 3. Compute Schur*p
114 schur.execute_schur_vector_multiply(graph, streams, Ap.data().get(),
115 p.data().get());
116
117 const T denom =
118 thrust::inner_product(thrust::cuda::par.on(stream), p.begin(),
119 p.end(), Ap.begin(), static_cast<T>(0));
120 if (denom == static_cast<T>(0) || std::isnan(denom)) {
121 break;
122 }
123
124 // 4. Compute alpha = dot(r, z) / dot(p, v2)
125 const T alpha = rz / denom;
126
127 // 5. x += alpha * p
128 thrust::copy(thrust::cuda::par_nosync.on(stream), x, x + pose_dim,
129 x_backup.begin());
130
131 ops::axpy_async(stream, pose_dim, x, alpha, p.data().get(), x);
132 // 6. r -= alpha * v2
133 ops::axpy_async(stream, pose_dim, r.data().get(), -alpha, Ap.data().get(),
134 r.data().get());
135
136 cudaStreamSynchronize(stream);
137 // Apply preconditioner again
138 preconditioner->apply(graph, &schur, z.data().get(), r.data().get(),
139 streams);
140
141 const T rz_new =
142 thrust::inner_product(thrust::cuda::par.on(stream), r.begin(),
143 r.end(), z.begin(), static_cast<T>(0));
144 if (std::abs(rz_new) > rejection_ratio * rz_0 || std::isnan(rz_new)) {
145 thrust::copy(thrust::cuda::par.on(stream), x_backup.begin(),
146 x_backup.end(), x);
147 break;
148 }
149 rz_0 = std::min(rz_0, std::abs(rz_new));
150
151 // 8. Compute beta
152 const T beta = rz_new / rz;
153 rz = rz_new;
154
155 // 9. Update p
156 ops::axpy_async(stream, pose_dim, p.data().get(), beta, p.data().get(),
157 z.data().get());
158 cudaStreamSynchronize(stream);
159
160 if (std::abs(static_cast<T>(rz_new)) < tol) {
161 break;
162 }
163 }
164
165 cudaStreamSynchronize(stream);
166 schur.compute_landmark_update(graph, streams, x + pose_dim, x);
167 return true;
168 }
169};
170
171} // namespace graphite
PCG solver using the Schur complement.
Definition pcg_schur.hpp:24
Linear solver interface. Implement this for your own linear solvers.
Definition solver.hpp:12
Definition stream.hpp:7
The top-level namespace for Graphite.
Definition eigen_solver.cpp:4