Operations for Graphite. More...
Classes | |
| struct | arg_helper |
| struct | BlockCopyOp |
| struct | first_arg |
| struct | first_arg< R(*)(First, Rest...)> |
| struct | fn_arity |
| struct | fn_arity< R(*)(Args...)> |
| struct | HplMatVecOp |
| struct | MulOp |
| Stores offsets for Hpl*Hll^(-1)*Hpl^T operation. More... | |
| struct | SchurMulTuple |
Functions | |
| template<size_t I, size_t N> | |
| __global__ void | flag_active_vertices_kernel (const size_t *ids, uint8_t *v_active, const uint8_t *f_active, const size_t factor_count, const uint8_t level) |
| template<typename F , std::size_t... Is> | |
| void | launch_kernel_flag_active (F *f, const uint8_t level, std::index_sequence< Is... >) |
| template<typename F > | |
| void | flag_active_vertices (F *f, const uint8_t level) |
| template<typename T , typename P , size_t E> | |
| __device__ T | compute_chi2 (const T *residuals, const P *pmat, const size_t factor_id) |
| template<typename T , typename S , size_t E, typename L > | |
| __global__ void | compute_chi2_kernel (T *chi2, S *chi2_derivative, const T *residuals, const size_t num_threads, const S *pmat, const L *loss) |
| template<typename T , typename S , typename F > | |
| void | compute_chi2_async (F *f) |
| template<typename F , typename D > | |
| __device__ constexpr bool | takes_vertices () |
| template<typename F , typename D , typename VertexPointers , typename ParameterBlocks , typename Observation , typename ConstraintData , typename ErrorVector , std::size_t... Is> | |
| __device__ void | call_error_fn (VertexPointers &vertices, ParameterBlocks ¶meters, Observation &local_obs, ConstraintData &local_data, ErrorVector &local_error, std::index_sequence< Is... >) |
| template<typename T , typename S , size_t I, size_t N, typename M , size_t E, typename F , typename VT , std::size_t... Is> | |
| __global__ void | compute_error_kernel_autodiff (const M *obs, T *error, const typename F::ConstraintDataType *constraint_data, const size_t *active_ids, const size_t *ids, const size_t *hessian_ids, const size_t num_threads, VT args, S *jacs, const uint8_t *active_state, std::index_sequence< Is... >) |
| template<typename T , typename S , typename F , typename VT , std::size_t... Is> | |
| void | launch_kernel_autodiff (F *f, std::array< const size_t *, F::get_num_vertices()> &hessian_ids, VT &verts, std::array< S *, F::get_num_vertices()> &jacs, const size_t num_factors, StreamPool &streams, std::index_sequence< Is... >) |
| template<typename T , typename S , typename F > | |
| void | compute_error_autodiff (F *f, StreamPool &streams) |
| template<typename T , size_t N, typename M , size_t E, typename F , typename VT , std::size_t... Is> | |
| __global__ void | compute_error_kernel (const M *obs, T *error, const typename F::ConstraintDataType *constraint_data, const size_t *active_ids, const size_t *ids, const size_t num_threads, VT args, std::index_sequence< Is... >) |
| template<typename T , typename F > | |
| void | compute_error (F *f) |
| template<typename T , typename S , size_t N, size_t E> | |
| __global__ void | compute_hessian_block_kernel (const size_t vi, const size_t vj, size_t dim_i, size_t dim_j, const size_t *active_factors, const size_t num_active_factors, const size_t *ids, const size_t *block_offsets, const uint8_t *vi_active, const uint8_t *vj_active, const size_t *hessian_offset_i, const size_t *hessian_offset_j, const S *jacobian_i, const S *jacobian_j, const S *precision, const S *chi2_derivative, S *hessian) |
| template<typename S , int D> | |
| __global__ void | augment_hessian_diagonal_kernel (S *diagonal_blocks, S *scalar_diagonal, const S mu, const bool use_identity, const uint8_t *active_state, const size_t num_threads) |
| template<typename T , typename S , typename V > | |
| void | augment_block_diagonal (V *v, InvP< T, S > *block_diagonal, InvP< T, S > *scalar_diagonal, const T mu, const bool use_identity, cudaStream_t stream) |
| template<typename T , typename S , int D> | |
| __global__ void | apply_block_jacobi_kernel (T *z, const T *r, S *block_diagonal, const size_t *hessian_ids, const uint8_t *active_state, const size_t num_threads) |
| template<typename T , typename S , typename V > | |
| void | apply_block_jacobi (V *v, T *z, const T *r, InvP< T, S > *block_diagonal, cudaStream_t stream) |
| template<typename highp , typename InvP , typename T , size_t I, size_t N, size_t E, size_t D> | |
| __global__ void | compute_hessian_diagonal_kernel (InvP *diagonal_blocks, const T *jacs, const size_t *active_ids, const size_t *ids, const uint8_t *active_state, const T *pmat, const T *chi2_derivative, const size_t num_threads) |
| template<typename highp , typename InvP , typename T , size_t I, size_t N, size_t E, size_t D, typename VT , typename F > | |
| __global__ void | compute_hessian_diagonal_dynamic_kernel (InvP *diagonal_blocks, const size_t *active_ids, const size_t *ids, const size_t *hessian_ids, const VT args, const typename F::ObservationType *obs, const highp *jacobian_scales, const typename F::ConstraintDataType *constraint_data, const uint8_t *active_state, const T *pmat, const T *chi2_derivative, const size_t num_threads) |
| template<typename T , typename S , typename F , std::size_t... Is> | |
| void | launch_kernel_block_diagonal (F *f, std::array< InvP< T, S > *, F::get_num_vertices()> &diagonal_blocks, std::array< const size_t *, F::get_num_vertices()> &hessian_ids, std::array< S *, F::get_num_vertices()> &jacs, const T *jacobian_scales, const size_t num_factors, cudaStream_t stream, std::index_sequence< Is... >) |
| template<typename T , typename S , typename F > | |
| void | compute_block_diagonal (F *f, std::array< InvP< T, S > *, F::get_num_vertices()> &diagonal_blocks, const T *jacobian_scales, cudaStream_t stream) |
| template<typename highp , typename T , size_t I, size_t N, size_t E, size_t D> | |
| __global__ void | compute_hessian_scalar_diagonal_kernel (highp *diagonal, const T *jacs, const size_t *active_ids, const size_t *ids, const size_t *hessian_ids, const uint8_t *active_state, const T *pmat, const T *chi2_derivative, const size_t num_threads) |
| template<typename highp , typename T , size_t I, size_t N, size_t E, size_t D, typename VT , typename F , bool use_scales> | |
| __global__ void | compute_hessian_scalar_diagonal_dynamic_kernel (highp *diagonal, const T *jacs, const size_t *active_ids, const size_t *ids, const size_t *hessian_ids, const VT args, const typename F::ObservationType *obs, const highp *jacobian_scales, const typename F::ConstraintDataType *constraint_data, const uint8_t *active_state, const T *pmat, const T *chi2_derivative, const size_t num_threads) |
| template<typename T , typename S , typename F , std::size_t... Is> | |
| void | launch_kernel_hessian_scalar_diagonal (F *f, T *diagonal, std::array< const size_t *, F::get_num_vertices()> &hessian_ids, std::array< S *, F::get_num_vertices()> &jacs, const T *jacobian_scales, const size_t num_factors, std::index_sequence< Is... >) |
| template<typename T , typename S , typename F > | |
| void | compute_hessian_scalar_diagonal (F *f, T *diagonal, const T *jacobian_scales) |
| template<typename T , typename S , size_t I, size_t N, typename M , size_t E, typename F , typename VT , std::size_t... Is> | |
| __global__ void | compute_jacobian_kernel (const M *obs, T *error, S *jacs, const typename F::ConstraintDataType *constraint_data, const size_t *active_ids, const size_t *ids, const size_t num_threads, const VT args, const uint8_t *active_state, std::index_sequence< Is... >) |
| template<typename T , typename S , typename F , typename VT , std::size_t... Is> | |
| void | launch_kernel_jacobians (F *f, std::array< const size_t *, F::get_num_vertices()> &hessian_ids, VT &verts, std::array< S *, F::get_num_vertices()> &jacs, const size_t num_factors, StreamPool &streams, std::index_sequence< Is... >) |
| template<typename T , typename S , typename F > | |
| void | compute_jacobians (F *f, StreamPool &streams) |
| template<typename highp , typename T , size_t I, size_t N, size_t E, size_t D> | |
| __global__ void | scale_jacobians_kernel (T *jacs, const highp *jacobian_scales, const size_t *active_ids, const size_t *ids, const size_t *hessian_ids, const uint8_t *active_state, const size_t num_threads) |
| template<typename T , typename S , typename F , std::size_t... Is> | |
| void | launch_kernel_scale_jacobians (F *f, T *jacobian_scales, std::array< const size_t *, F::get_num_vertices()> &hessian_ids, std::array< S *, F::get_num_vertices()> &jacs, const size_t num_factors, std::index_sequence< Is... >) |
| template<typename T , typename S , typename F > | |
| void | scale_jacobians (F *f, T *jacobian_scales) |
| template<typename T , typename S , size_t I, size_t N, size_t E, typename F , std::size_t... Is> | |
| __global__ void | compute_b_kernel (T *b, const T *error, const size_t *active_ids, const size_t *ids, const size_t *hessian_ids, const size_t num_threads, const S *jacs, const uint8_t *active_state, const S *pmat, const S *loss_derivative, std::index_sequence< Is... >) |
| template<typename T , typename S , size_t I, size_t N, typename M , size_t E, typename F , typename VT , std::size_t... Is> | |
| __global__ void | compute_b_dynamic_kernel (T *b, const T *error, const size_t *active_ids, const size_t *ids, const size_t *hessian_ids, const size_t num_threads, const VT args, const M *obs, const T *jacobian_scales, const typename F::ConstraintDataType *constraint_data, const uint8_t *active_state, const S *pmat, const S *loss_derivative, std::index_sequence< Is... >) |
| template<typename T , typename S , typename F , std::size_t... Is> | |
| void | launch_kernel_compute_b (F *f, T *b, std::array< const size_t *, F::get_num_vertices()> &hessian_ids, std::array< S *, F::get_num_vertices()> &jacs, const T *jacobian_scales, const size_t num_factors, std::index_sequence< Is... >) |
| template<typename T , typename S , typename F > | |
| void | compute_b_async (F *f, T *b, const T *jacobian_scales) |
| template<typename T , typename G , size_t I, size_t N, typename M , size_t E, typename F , typename VT , std::size_t... Is> | |
| __device__ void | compute_Jcol_ad (Dual< T, G > *error, const size_t col, const size_t factor_id, const size_t vertex_id, const M *obs, const typename F::ConstraintDataType *constraint_data, size_t *ids, const size_t *hessian_ids, VT args, std::index_sequence< Is... >) |
| template<typename T , typename S , size_t I, size_t N, size_t E, size_t D, typename F , std::size_t... Is> | |
| __global__ void | compute_Jv_kernel (T *y, const T *x, const size_t *active_ids, const size_t *ids, const size_t *hessian_ids, const size_t num_threads, const S *jacs, const uint8_t *active_state, std::index_sequence< Is... >) |
| template<typename T , typename S , size_t I, size_t N, typename M , size_t E, typename F , typename VT , std::size_t... Is> | |
| __global__ void | compute_Jv_dynamic_manual2 (T *y, T *x, const M *obs, const T *jacobian_scales, const typename F::ConstraintDataType *constraint_data, const size_t *active_ids, const size_t *ids, const size_t *hessian_ids, const size_t num_factors, VT args, const uint8_t *active_state, std::index_sequence< Is... >) |
| template<typename T , typename S , typename F , std::size_t... Is> | |
| void | launch_kernel_compute_Jv (F *f, T *out, T *in, std::array< const size_t *, F::get_num_vertices()> &hessian_ids, std::array< S *, F::get_num_vertices()> &jacs, const T *jacobian_scales, const size_t num_factors, StreamPool &streams, std::index_sequence< Is... >) |
| template<typename T , typename S , typename F > | |
| void | compute_Jv (F *f, T *out, T *in, const T *jacobian_scales, StreamPool &streams) |
| template<typename T , typename S , size_t I, size_t N, size_t E, size_t D, typename F , std::size_t... Is> | |
| __global__ void | compute_JtPv_kernel (T *y, const T *x, const size_t *active_ids, const size_t *ids, const size_t *hessian_ids, const size_t num_threads, const S *jacs, const uint8_t *active_state, const S *pmat, const S *chi2_derivative, const std::index_sequence< Is... >) |
| template<typename T , typename S , size_t I, size_t N, typename M , size_t E, size_t D, typename F , typename VT , std::size_t... Is> | |
| __global__ void | compute_JtPv_dynamic_kernel (T *y, const T *x, const size_t *active_ids, const size_t *ids, const size_t *hessian_ids, const size_t num_threads, const VT args, const M *obs, const T *jacobian_scales, const typename F::ConstraintDataType *constraint_data, const uint8_t *active_state, const S *pmat, const S *chi2_derivative, const std::index_sequence< Is... >) |
| template<typename T , typename S , typename F , std::size_t... Is> | |
| void | launch_kernel_compute_JtPv (F *f, T *out, T *in, std::array< const size_t *, F::get_num_vertices()> &hessian_ids, std::array< S *, F::get_num_vertices()> &jacs, const T *jacobian_scales, const size_t num_factors, StreamPool &streams, std::index_sequence< Is... >) |
| template<typename T , typename S , typename F > | |
| void | compute_Jtv (F *f, T *out, T *in, const T *jacobian_scales, StreamPool &streams) |
| __global__ void | count_pose_rows_per_landmark_column_kernel (const size_t *col_pointers, const size_t *row_indices, size_t landmark_col_start, size_t num_block_columns, size_t *pose_counts) |
| __global__ void | fill_schur_structure_pairs_kernel (const size_t *col_pointers, const size_t *row_indices, const size_t landmark_col_start, const size_t num_block_columns, const size_t *pose_counts, const size_t *pair_offsets, BlockCoordinates *pairs_out) |
| __global__ void | fill_schur_mul_tuples_kernel (const size_t *col_pointers, const size_t *row_indices, const size_t *block_offsets, size_t landmark_col_start, size_t num_block_columns, const size_t *pose_counts, const size_t *pair_offsets, SchurMulTuple *tuples_out) |
| template<typename T , typename S > | |
| __global__ void | schur_block_product_kernel (const MulOp< S > *ops, const size_t num_ops, const size_t dim_a, const size_t dim_b, const size_t dim_c) |
| template<int DIM_B, typename T , typename S > | |
| __global__ void | schur_block_product_kernel_dim_b (const MulOp< S > *ops, const size_t num_ops, const size_t dim_a, const size_t dim_c) |
| template<typename highp , typename S , typename T > | |
| __global__ void | block_matvec_assign_batched_kernel (const S *values, const size_t *a_offsets, const T *x_base, T *y_base, const size_t *vec_offsets, size_t num_blocks, size_t dim) |
| template<typename T , typename S > | |
| __global__ void | block_matvec_add_batched_kernel (const S *values, const HplMatVecOp *ops, const size_t num_ops, const T *x_base, T *y_base, const size_t rows, const size_t cols) |
| template<typename T , typename S > | |
| __global__ void | block_matvec_transpose_add_batched_kernel (const S *values, const HplMatVecOp *ops, const size_t num_ops, const T *x_base, T *y_base, const size_t rows, const size_t cols) |
| template<typename Src , typename Dst = Src> | |
| __global__ void | block_copy_batched_kernel (const Src *src_values, Dst *dst_values, const BlockCopyOp *ops, const size_t num_ops, const size_t rows, const size_t cols) |
| template<typename VertexType , typename State , typename Traits , typename T > | |
| __global__ void | backup_state_kernel (VertexType **vertices, State *dst, const uint8_t *active_state, const size_t num_vertices) |
| template<typename VertexType , typename State , typename Traits , typename T > | |
| __global__ void | set_state_kernel (VertexType **vertices, const State *src, const uint8_t *active_state, const size_t num_vertices) |
| template<typename T , typename S , typename Descriptor , typename V > | |
| __global__ void | apply_update_kernel (V **vertices, const T *delta_x, const T *jacobian_scales, const size_t *hessian_ids, const uint8_t *active_state, const size_t num_threads) |
| template<typename T , typename S , typename V > | |
| void | apply_update (V *v, const T *delta_x, T *jacobian_scales, cudaStream_t stream) |
| template<typename T > | |
| __global__ void | axpy_kernel (size_t n, T *z, const T a, const T *x, T *y) |
| template<typename T > | |
| void | axpy_async (cudaStream_t stream, size_t n, T *z, const T a, const T *x, T *y) |
| template<typename T > | |
| __global__ void | damping_kernel (size_t n, T *z, const T damping_factor, const bool use_identity, const T *diag, const T *x) |
| template<typename T > | |
| void | damp_by_factor_async (cudaStream_t stream, size_t n, T *z, const T damping_factor, const bool use_identity, const T *diag, const T *x) |
| template<typename T > | |
| __global__ void | clamp_kernel (size_t n, T min_val, T max_val, T *x) |
| template<typename T > | |
| void | clamp_async (cudaStream_t stream, size_t n, T min_val, T max_val, T *x) |
| template<typename T > | |
| __global__ void | rescale_vec_kernel (size_t n, T *out, const T scale, const T *x) |
| template<typename T > | |
| void | rescale_vec_async (cudaStream_t stream, size_t n, T *out, const T scale, const T *x) |
| template<typename T > | |
| __global__ void | compute_adam_step (const size_t n, T *gradient, T *step, T *m, T *v, const T lr, const T beta1, const T beta2, const T epsilon, const size_t t) |
| template<typename T > | |
| void | compute_adam_step_async (cudaStream_t stream, const size_t n, T *gradient, T *step, T *m, T *v, const T lr, const T beta1, const T beta2, const T epsilon, const size_t t) |
Operations for Graphite.