Graphite
Loading...
Searching...
No Matches
hessian.hpp
Go to the documentation of this file.
1
2#pragma once
4
5namespace graphite {
6
7namespace ops {
8
9template <typename T, typename S, size_t N, size_t E>
10__global__ void compute_hessian_block_kernel(
11 const size_t vi, const size_t vj, size_t dim_i, size_t dim_j,
12 const size_t *active_factors, const size_t num_active_factors,
13 const size_t *ids, const size_t *block_offsets, const uint8_t *vi_active,
14 const uint8_t *vj_active, const size_t *hessian_offset_i,
15 const size_t *hessian_offset_j, const S *jacobian_i, const S *jacobian_j,
16 const S *precision, const S *chi2_derivative, S *hessian) {
17 // TODO: simpify and optimize this kernel
18 const auto idx = get_thread_id();
19
20 const auto block_id = idx / (dim_i * dim_j);
21
22 if (block_id >= num_active_factors) {
23 return;
24 }
25
26 const auto factor_idx = active_factors[block_id];
27
28 const size_t vi_id = ids[factor_idx * N + vi];
29 const size_t vj_id = ids[factor_idx * N + vj];
30
31 if (is_vertex_active(vi_active, vi_id) &&
32 is_vertex_active(vj_active, vj_id)) {
33
34 const size_t block_size = dim_i * dim_j;
35 const size_t offset = idx % block_size;
36 // Hessian block may be rectangular
37 // output blocks are all column major
38
39 const bool transposed = hessian_offset_i[vi_id] > hessian_offset_j[vj_id];
40
41 auto ji = factor_idx * E * dim_i + jacobian_i;
42 auto jj = factor_idx * E * dim_j + jacobian_j;
43 const auto precision_offset = factor_idx * E * E;
44 const auto p = precision + precision_offset;
45
46 if (transposed) {
47 cuda::std::swap(ji, jj);
48 cuda::std::swap(dim_i, dim_j);
49 }
50
51 const size_t row = offset % dim_i;
52 const size_t col = offset / dim_i;
53
54 // Each thread computes one element of the Hessian block
55 using highp = T;
56 highp value = 0;
57
58 // computes J_i^T * P * J_j
59
60 const auto J = jj + col * E;
61 const auto Jt = ji + row * E;
62#pragma unroll
63 for (int i = 0; i < E; i++) { // p row
64 highp pj = 0;
65#pragma unroll
66 for (int j = 0; j < E; j++) { // p col
67 pj += (highp)p[i * E + j] * (highp)J[j];
68 }
69 value += (highp)Jt[i] * pj;
70 }
71
72 value *= (highp)chi2_derivative[factor_idx];
73
74 auto block = hessian + (block_offsets[block_id] + (row + col * dim_i));
75 S lp_value = static_cast<S>(value);
76 atomicAdd(block, lp_value);
77 }
78}
79
80template <typename S, int D>
81__global__ void augment_hessian_diagonal_kernel(S *diagonal_blocks,
82 S *scalar_diagonal, const S mu,
83 const uint8_t *active_state,
84 const size_t num_threads) {
85 const size_t idx = get_thread_id();
86
87 if (idx >= num_threads) {
88 return;
89 }
90
91 constexpr auto block_size = D * D;
92
93 const auto vertex_id = idx;
94 if (!is_vertex_active(active_state, vertex_id)) {
95 return;
96 }
97
98 S *block = diagonal_blocks + vertex_id * block_size;
99 for (size_t i = 0; i < D; i++) {
100
101 // const double diag = static_cast<double>(block[i * D + i]);
102 const double diag = static_cast<double>(scalar_diagonal[vertex_id * D + i]);
103 const double new_diag =
104 diag + static_cast<double>(mu) * std::clamp(diag, 1.0e-6, 1.0e32);
105 // const double new_diag =
106 // diag + static_cast<double>(mu) * diag;
107 block[i * D + i] = static_cast<S>(new_diag);
108 }
109}
110
111template <typename T, typename S, typename V>
112void augment_block_diagonal(V *v, InvP<T, S> *block_diagonal,
113 InvP<T, S> *scalar_diagonal, T mu,
114 cudaStream_t stream) {
115 const size_t num_threads = v->count();
116 const auto threads_per_block = 256;
117 const auto num_blocks =
118 (num_threads + threads_per_block - 1) / threads_per_block;
119
120 augment_hessian_diagonal_kernel<InvP<T, S>, V::dim>
121 <<<num_blocks, threads_per_block, 0, stream>>>(
122 block_diagonal, scalar_diagonal, (InvP<T, S>)mu,
123 v->get_active_state(), num_threads);
124}
125
126template <typename T, typename S, int D>
127__global__ void apply_block_jacobi_kernel(T *z, const T *r, S *block_diagonal,
128 const size_t *hessian_ids,
129 const uint8_t *active_state,
130 const size_t num_threads) {
131 const size_t idx = get_thread_id();
132 const auto local_vertex_id = idx / D;
133
134 if (idx >= num_threads || !is_vertex_active(active_state, local_vertex_id)) {
135 return;
136 }
137
138 constexpr auto block_size = D * D;
139
140 S *block = block_diagonal + local_vertex_id * block_size;
141 const auto hessian_offset = hessian_ids[local_vertex_id];
142 const auto offset = idx % D;
143 const auto row = offset;
144
145 T value = 0;
146#pragma unroll
147 for (size_t i = 0; i < D; i++) {
148 value += (T)block[row + i * D] * r[hessian_offset + i];
149 }
150 z[hessian_offset + row] = value;
151}
152
153template <typename T, typename S, typename V>
154void apply_block_jacobi(V *v, T *z, const T *r, InvP<T, S> *block_diagonal,
155 cudaStream_t stream) {
156 const size_t num_parameters = v->count() * v->dimension();
157 const size_t num_threads = num_parameters;
158 const auto threads_per_block = 256;
159 const auto num_blocks =
160 (num_threads + threads_per_block - 1) / threads_per_block;
161
162 apply_block_jacobi_kernel<T, InvP<T, S>, V::dim>
163 <<<num_blocks, threads_per_block, 0, stream>>>(
164 z, r, block_diagonal, v->get_hessian_ids(), v->get_active_state(),
165 num_threads);
166}
167
168template <typename highp, typename InvP, typename T, size_t I, size_t N,
169 size_t E, size_t D>
170__global__ void compute_hessian_diagonal_kernel(
171 InvP *diagonal_blocks, const T *jacs, const size_t *active_ids,
172 const size_t *ids, const uint8_t *active_state, const T *pmat,
173 const T *chi2_derivative, const size_t num_threads) {
174 const size_t idx = get_thread_id();
175
176 if (idx >= num_threads) {
177 return;
178 }
179
180 constexpr size_t jacobian_size = D * E;
181 constexpr size_t block_size = D * D;
182
183 // Stored as E x d col major, but we need to transpose it to d x E, where d is
184 // the vertex size
185 const size_t factor_id = active_ids[idx / block_size];
186 const size_t local_id =
187 ids[factor_id * N +
188 I]; // N is the number of vertices involved in the factor
189 if (!is_vertex_active(active_state, local_id)) {
190 return;
191 }
192 const auto jacobian_offset = factor_id * jacobian_size;
193
194 constexpr auto precision_matrix_size = E * E;
195 const auto precision_offset = factor_id * precision_matrix_size;
196
197 // Identify H block row and column (column major)
198 // const size_t row = idx % D;
199 // const size_t col = idx / D;
200
201 const size_t offset = idx % block_size;
202 const size_t row = offset % D;
203 const size_t col = offset / D;
204
205 // left[i]*pmat[i*E+j]*right[i] = h value
206 // where i goes from 0 to E
207 const T *Jt = jacs + jacobian_offset + row * E;
208 const T *J = jacs + jacobian_offset + col * E;
209
210 const T *precision_matrix = pmat + precision_offset;
211
212 highp value = 0;
213#pragma unroll
214 for (int i = 0; i < E; i++) { // pmat row
215 highp pj = 0;
216#pragma unroll
217 for (int j = 0; j < E; j++) { // pmat col
218 pj += (highp)precision_matrix[i * E + j] * (highp)J[j];
219 }
220 value += (highp)Jt[i] * pj;
221 // value += Jt[i]*J[i];
222 }
223
224 // if (row == col) {
225 // value = 1;
226 // // printf("Thread %d, row: %d, col: %d, value: %f\n", idx, row, col,
227 // value);
228 // // printf("D=%d\n", D);
229 // }
230 // else {
231 // value = 0;
232 // }
233 value *= (highp)chi2_derivative[factor_id];
234
235 // T* block = diagonal_blocks + local_id*block_size + (idx % block_size);
236 // printf("Thread %d, Hessian offset: %u\n", idx, local_id);
237 // if (value != 0) {
238 // printf("Thread %d, row: %d, col: %d, value: %f\n", idx, row, col, value);
239 // }
240 // if (D == 9 && local_id*block_size + row + col*D > 1701) {
241 // printf("Thread %d, vertex id: %u, row: %d, col: %d, value: %f, offset:
242 // %u \n", idx, local_id, row, col, value, local_id*block_size + row +
243 // col*D);
244
245 // }
246 // value = 5.0;
247 // T* block = diagonal_blocks + local_id*block_size + row + col*D;
248 InvP *block = diagonal_blocks + (local_id * block_size + row + col * D);
249 // if (row == col) {
250 // // local_id = 5;
251 // // row = 6; col = 7;
252 // // value = 8.5;
253 // printf("Thread %d, vertex id: %llu, row: %llu, col: %llu, value: %f,
254 // offset: %u \n", idx, local_id, row, col, value, local_id*block_size +
255 // row + col*D);
256 // }
257 // if (row == 8 && col == 8) {
258 // printf("Number of threads: %d\n", num_threads);
259 // printf("Thread %d, vertex id: %u, row: %zu, col: %zu, value: %f\n",
260 // idx, local_id, row, col, value);
261 // }
262 // T* block = diagonal_blocks + row*D + col;
263 InvP lp_value = static_cast<InvP>(value);
264 atomicAdd(block, lp_value);
265 // block[0] = value;
266 // *block = value;
267}
268
269template <typename highp, typename InvP, typename T, size_t I, size_t N,
270 size_t E, size_t D, typename VT, typename F>
271__global__ void compute_hessian_diagonal_dynamic_kernel(
272 InvP *diagonal_blocks, const size_t *active_ids, const size_t *ids,
273 const size_t *hessian_ids, const VT args,
274 const typename F::ObservationType *obs, const highp *jacobian_scales,
275 const typename F::ConstraintDataType *constraint_data,
276 const uint8_t *active_state, const T *pmat, const T *chi2_derivative,
277 const size_t num_threads) {
278 const size_t idx = get_thread_id();
279
280 if (idx >= num_threads) {
281 return;
282 }
283
284 constexpr size_t jacobian_size = D * E;
285 constexpr size_t block_size = D * D;
286
287 // Stored as E x d col major, but we need to transpose it to d x E, where d is
288 // the vertex size
289 const size_t factor_id = active_ids[idx / block_size];
290 const size_t local_id =
291 ids[factor_id * N +
292 I]; // N is the number of vertices involved in the factor
293 if (!is_vertex_active(active_state, local_id)) {
294 return;
295 }
296
297 constexpr auto precision_matrix_size = E * E;
298 const auto precision_offset = factor_id * precision_matrix_size;
299
300 // Identify H block row and column (column major)
301
302 const size_t offset = idx % block_size;
303 const size_t row = offset % D;
304 const size_t col = offset / D;
305
306 highp jacobian[jacobian_size];
307
308 compute_Jblock<highp, I, N, typename F::ObservationType, E, F, VT>(
309 jacobian, factor_id, local_id, obs, constraint_data, ids, hessian_ids,
310 args, std::make_index_sequence<N>{});
311
312 const auto hessian_offset = hessian_ids[local_id];
313
314 const highp *Jt = jacobian + row * E;
315 const highp *J = jacobian + col * E;
316
317 const T *precision_matrix = pmat + precision_offset;
318
319 const highp *jscale = jacobian_scales + hessian_offset;
320
321 highp value = 0;
322#pragma unroll
323 for (int i = 0; i < E; i++) { // pmat row
324 highp pj = 0;
325#pragma unroll
326 for (int j = 0; j < E; j++) { // pmat col
327 pj += (highp)precision_matrix[i * E + j] * (highp)J[j] * jscale[col];
328 }
329 value += (highp)Jt[i] * jscale[row] * pj;
330 }
331
332 value *= (highp)chi2_derivative[factor_id];
333
334 InvP *block = diagonal_blocks + (local_id * block_size + row + col * D);
335
336 InvP lp_value = static_cast<InvP>(value);
337 atomicAdd(block, lp_value);
338}
339
340template <typename T, typename S, typename F, std::size_t... Is>
341void launch_kernel_block_diagonal(
342 F *f, std::array<InvP<T, S> *, F::get_num_vertices()> &diagonal_blocks,
343 std::array<const size_t *, F::get_num_vertices()> &hessian_ids,
344 std::array<S *, F::get_num_vertices()> &jacs, const T *jacobian_scales,
345 const size_t num_factors, cudaStream_t stream, std::index_sequence<Is...>) {
346 (([&] {
347 constexpr size_t num_vertices = F::get_num_vertices();
348 constexpr size_t dimension = F::get_vertex_sizes()[Is];
349 const size_t num_threads = num_factors * dimension * dimension;
350 // std::cout << "Launching block diagonal kernel" << std::endl;
351 // std::cout << "Num threads: " << num_threads << std::endl;
352 // std::cout << "dimension: " << dimension << std::endl;
353 // std::cout << "num_factors: " << num_factors << std::endl;
354 size_t threads_per_block = 256;
355 size_t num_blocks =
356 (num_threads + threads_per_block - 1) / threads_per_block;
357
358 // std::cout << "Checking obs ptr: " << f->device_obs.data().get() <<
359 // std::endl; std::cout << "Checking residual ptr: " <<
360 // f->residuals.data().get() << std::endl; std::cout << "Checking ids
361 // ptr: " << f->device_ids.data().get() << std::endl;
362
363 if (f->store_jacobians() || !is_analytical<F>()) {
364 compute_hessian_diagonal_kernel<T, InvP<T, S>, S, Is, num_vertices,
365 F::error_dim, dimension>
366 <<<num_blocks, threads_per_block, 0, stream>>>(
367 diagonal_blocks[Is], jacs[Is], f->active_indices.data().get(),
368 f->device_ids.data().get(),
369 f->vertex_descriptors[Is]->get_active_state(),
370 f->precision_matrices.data().get(),
371 f->chi2_derivative.data().get(), num_threads);
372 } else {
373 if constexpr (is_analytical<F>()) {
374 compute_hessian_diagonal_dynamic_kernel<
375 T, InvP<T, S>, S, Is, num_vertices, F::error_dim, dimension,
376 typename F::VertexPointerPointerTuple, F>
377 <<<num_blocks, threads_per_block, 0, stream>>>(
378 diagonal_blocks[Is], f->active_indices.data().get(),
379 f->device_ids.data().get(), hessian_ids[Is], f->get_vertices(),
380 f->device_obs.data().get(), jacobian_scales,
381 f->data.data().get(),
382 f->vertex_descriptors[Is]->get_active_state(),
383 f->precision_matrices.data().get(),
384 f->chi2_derivative.data().get(), num_threads);
385 }
386 }
387 }()),
388 ...);
389}
390
391template <typename T, typename S, typename F>
392void compute_block_diagonal(
393 F *f, std::array<InvP<T, S> *, F::get_num_vertices()> &diagonal_blocks,
394 const T *jacobian_scales, cudaStream_t stream) {
395
396 // Then for each vertex, we need to compute the error
397 constexpr auto num_vertices = F::get_num_vertices();
398 constexpr auto vertex_sizes = F::get_vertex_sizes();
399
400 // At this point all necessary data should be on the GPU
401 // std::array<T*, num_vertices> verts;
402 auto verts = f->get_vertices();
403 std::array<S *, num_vertices> jacs;
404 std::array<const size_t *, num_vertices> hessian_ids;
405 for (int i = 0; i < num_vertices; i++) {
406 jacs[i] = f->jacobians[i].data.data().get();
407 hessian_ids[i] = f->vertex_descriptors[i]->get_hessian_ids();
408 }
409
410 const auto num_factors = f->active_count();
411
412 launch_kernel_block_diagonal<T, S>(f, diagonal_blocks, hessian_ids, jacs,
413 jacobian_scales, num_factors, stream,
414 std::make_index_sequence<num_vertices>{});
415}
416
417template <typename highp, typename T, size_t I, size_t N, size_t E, size_t D>
418__global__ void compute_hessian_scalar_diagonal_kernel(
419 highp *diagonal, const T *jacs, const size_t *active_ids, const size_t *ids,
420 const size_t *hessian_ids, const uint8_t *active_state, const T *pmat,
421 const T *chi2_derivative, const size_t num_threads) {
422 const size_t idx = get_thread_id();
423
424 if (idx >= num_threads) {
425 return;
426 }
427
428 constexpr size_t jacobian_size = D * E;
429
430 // Stored as E x d col major, but we need to transpose it to d x E, where d is
431 // the vertex size
432 const size_t factor_id = active_ids[idx / D];
433 const size_t local_id =
434 ids[factor_id * N +
435 I]; // N is the number of vertices involved in the factor
436 if (!is_vertex_active(active_state, local_id)) {
437 return;
438 }
439 const auto jacobian_offset = factor_id * jacobian_size;
440
441 constexpr auto precision_matrix_size = E * E;
442 const auto precision_offset = factor_id * precision_matrix_size;
443
444 // Identify H block row and column (column major)
445 const size_t row = idx % D;
446 const size_t col = row;
447
448 // left[i]*pmat[i*E+j]*right[i] = h value
449 // where i goes from 0 to E
450 const T *Jt = jacs + jacobian_offset + row * E;
451 const T *J = jacs + jacobian_offset + col * E;
452
453 const T *precision_matrix = pmat + precision_offset;
454
455 highp value = 0;
456#pragma unroll
457 for (int i = 0; i < E; i++) { // pmat row
458 highp pj = 0;
459#pragma unroll
460 for (int j = 0; j < E; j++) { // pmat col
461 pj += (highp)precision_matrix[i * E + j] * (highp)J[j];
462 }
463 value += (highp)Jt[i] * pj;
464 }
465
466 value *= (highp)chi2_derivative[factor_id];
467
468 // T* block = diagonal_blocks+(local_id*block_size + row + col*D);
469 const size_t hessian_offset = hessian_ids[local_id];
470 highp *location = diagonal + hessian_offset + row;
471 // T lp_value = static_cast<T>(value);
472 atomicAdd(location, value);
473}
474
475template <typename highp, typename T, size_t I, size_t N, size_t E, size_t D,
476 typename VT, typename F, bool use_scales>
477__global__ void compute_hessian_scalar_diagonal_dynamic_kernel(
478 highp *diagonal, const T *jacs, const size_t *active_ids, const size_t *ids,
479 const size_t *hessian_ids, const VT args,
480 const typename F::ObservationType *obs, const highp *jacobian_scales,
481 const typename F::ConstraintDataType *constraint_data,
482 const uint8_t *active_state, const T *pmat, const T *chi2_derivative,
483 const size_t num_threads) {
484 const size_t idx = get_thread_id();
485
486 if (idx >= num_threads) {
487 return;
488 }
489
490 constexpr size_t jacobian_size = D * E;
491
492 // Stored as E x d col major, but we need to transpose it to d x E, where d is
493 // the vertex size
494 const size_t factor_id = active_ids[idx / D];
495 const size_t local_id =
496 ids[factor_id * N +
497 I]; // N is the number of vertices involved in the factor
498 if (!is_vertex_active(active_state, local_id)) {
499 return;
500 }
501
502 constexpr auto precision_matrix_size = E * E;
503 const auto precision_offset = factor_id * precision_matrix_size;
504
505 // Identify H block row and column (column major)
506 const size_t row = idx % D;
507 const size_t col = row;
508
509 // using G = std::conditional_t<is_low_precision<T>::value, highp, T>;
510 // G jacobian[jacobian_size];
511 highp jacobian[jacobian_size];
512
513 compute_Jblock<highp, I, N, typename F::ObservationType, E, F, VT>(
514 jacobian, factor_id, local_id, obs, constraint_data, ids, hessian_ids,
515 args, std::make_index_sequence<N>{});
516 const auto hessian_offset = hessian_ids[local_id];
517 const highp *Jt = jacobian + row * E;
518 const highp *J = jacobian + col * E;
519
520 const T *precision_matrix = pmat + precision_offset;
521 highp value = 0;
522 if constexpr (use_scales) {
523 const highp *jscale = jacobian_scales + hessian_offset;
524
525#pragma unroll
526 for (int i = 0; i < E; i++) { // pmat row
527 highp pj = 0;
528#pragma unroll
529 for (int j = 0; j < E; j++) { // pmat col
530 pj += (highp)precision_matrix[i * E + j] * (highp)J[j] * jscale[col];
531 }
532 value += (highp)Jt[i] * jscale[row] * pj;
533 }
534 } else {
535
536#pragma unroll
537 for (int i = 0; i < E; i++) { // pmat row
538 highp pj = 0;
539#pragma unroll
540 for (int j = 0; j < E; j++) { // pmat col
541 pj += (highp)precision_matrix[i * E + j] * (highp)J[j];
542 }
543 value += (highp)Jt[i] * pj;
544 }
545 }
546
547 value *= (highp)chi2_derivative[factor_id];
548
549 // T* block = diagonal_blocks+(local_id*block_size + row + col*D);
550 highp *location = diagonal + hessian_offset + row;
551 // T lp_value = static_cast<T>(value);
552 atomicAdd(location, value);
553}
554
555template <typename T, typename S, typename F, std::size_t... Is>
556void launch_kernel_hessian_scalar_diagonal(
557 F *f, T *diagonal,
558 std::array<const size_t *, F::get_num_vertices()> &hessian_ids,
559 std::array<S *, F::get_num_vertices()> &jacs, const T *jacobian_scales,
560 const size_t num_factors, std::index_sequence<Is...>) {
561 (([&] {
562 constexpr size_t num_vertices = F::get_num_vertices();
563 constexpr size_t dimension = F::get_vertex_sizes()[Is];
564 const size_t num_threads = num_factors * dimension;
565
566 size_t threads_per_block = 256;
567 size_t num_blocks =
568 (num_threads + threads_per_block - 1) / threads_per_block;
569
570 if (f->store_jacobians() || !is_analytical<F>()) {
571 compute_hessian_scalar_diagonal_kernel<T, S, Is, num_vertices,
572 F::error_dim, dimension>
573 <<<num_blocks, threads_per_block>>>(
574 diagonal, jacs[Is], f->active_indices.data().get(),
575 f->device_ids.data().get(), hessian_ids[Is],
576 f->vertex_descriptors[Is]->get_active_state(),
577 f->precision_matrices.data().get(),
578 f->chi2_derivative.data().get(), num_threads);
579 } else {
580 if constexpr (is_analytical<F>()) {
581 if (jacobian_scales == nullptr) {
582 compute_hessian_scalar_diagonal_dynamic_kernel<
583 T, S, Is, num_vertices, F::error_dim, dimension,
584 typename F::VertexPointerPointerTuple, F, false>
585 <<<num_blocks, threads_per_block>>>(
586 diagonal, jacs[Is], f->active_indices.data().get(),
587 f->device_ids.data().get(), hessian_ids[Is],
588 f->get_vertices(), f->device_obs.data().get(), nullptr,
589 f->data.data().get(),
590 f->vertex_descriptors[Is]->get_active_state(),
591 f->precision_matrices.data().get(),
592 f->chi2_derivative.data().get(), num_threads);
593 } else {
594 compute_hessian_scalar_diagonal_dynamic_kernel<
595 T, S, Is, num_vertices, F::error_dim, dimension,
596 typename F::VertexPointerPointerTuple, F, true>
597 <<<num_blocks, threads_per_block>>>(
598 diagonal, jacs[Is], f->active_indices.data().get(),
599 f->device_ids.data().get(), hessian_ids[Is],
600 f->get_vertices(), f->device_obs.data().get(),
601 jacobian_scales, f->data.data().get(),
602 f->vertex_descriptors[Is]->get_active_state(),
603 f->precision_matrices.data().get(),
604 f->chi2_derivative.data().get(), num_threads);
605 }
606 }
607 }
608 }()),
609 ...);
610}
611
612template <typename T, typename S, typename F>
613void compute_hessian_scalar_diagonal(F *f, T *diagonal,
614 const T *jacobian_scales) {
615
616 // Then for each vertex, we need to compute the error
617 constexpr auto num_vertices = F::get_num_vertices();
618 constexpr auto vertex_sizes = F::get_vertex_sizes();
619
620 // At this point all necessary data should be on the GPU
621 // std::array<T*, num_vertices> verts;
622 auto verts = f->get_vertices();
623 std::array<S *, num_vertices> jacs;
624 std::array<const size_t *, num_vertices> hessian_ids;
625 for (int i = 0; i < num_vertices; i++) {
626 jacs[i] = f->jacobians[i].data.data().get();
627 hessian_ids[i] = f->vertex_descriptors[i]->get_hessian_ids();
628 }
629
630 const auto num_factors = f->active_count();
631
632 launch_kernel_hessian_scalar_diagonal<T, S>(
633 f, diagonal, hessian_ids, jacs, jacobian_scales, num_factors,
634 std::make_index_sequence<num_vertices>{});
635}
636
637} // namespace ops
638
639} // namespace graphite