PaStiX Handbook: build/kernels/pastix

Go to the documentation of this file.
 /**
  * @file pastix_slrcores.h
  *
  * PaStiX kernel header.
  *
  * @copyright 2016-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
  *                      Univ. Bordeaux. All rights reserved.
  *
  * @version 6.3.2
  * @author Mathieu Faverge
  * @author Pierre Ramet
  * @author Xavier Lacoste
  * @author Esragul Korkmaz
  * @author Nolan Bredel
  * @date 2023-07-21
  * @generated from /builds/solverstack/pastix/kernels/pastix_zlrcores.h, normal z -> s, Wed Dec 13 12:09:04 2023
  *
  */
 #ifndef _pastix_slrcores_h_
 #define _pastix_slrcores_h_
  
 #include "pastix_lowrank.h"
  
 /**
  *
  * @addtogroup kernel_lr
  * @{
  *    This module contains all the low-rank kernels working on pastix_lr_t
  *    matrix representations.
  *
  *    @name PastixFloat low-rank kernels
  *    @{
  */
 void core_slralloc( pastix_int_t M, pastix_int_t N, pastix_int_t rkmax, pastix_lrblock_t *A );
 void core_slrfree ( pastix_lrblock_t *A );
 int  core_slrsze  ( int copy, pastix_int_t M, pastix_int_t N, pastix_lrblock_t *A, pastix_int_t newrk, pastix_int_t newrkmax, pastix_int_t rklimit );
 int  core_slr2ge  ( pastix_trans_t trans, pastix_int_t M, pastix_int_t N, const pastix_lrblock_t *Alr, float *A, pastix_int_t lda );
  
 void core_slrcpy  ( const pastix_lr_t *lowrank,
                     pastix_trans_t transA, float alpha,
                     pastix_int_t M1, pastix_int_t N1, const pastix_lrblock_t *A,
                     pastix_int_t M2, pastix_int_t N2,       pastix_lrblock_t *B,
                     pastix_int_t offx, pastix_int_t offy );
  
 void core_slrconcatenate_u( float alpha,
                             pastix_int_t M1, pastix_int_t N1, const pastix_lrblock_t *A,
                             pastix_int_t M2,                        pastix_lrblock_t *B,
                             pastix_int_t offx,
                             float *u1u2 );
 void core_slrconcatenate_v( pastix_trans_t transA1, float alpha,
                             pastix_int_t M1, pastix_int_t N1, const pastix_lrblock_t *A,
                                              pastix_int_t N2,       pastix_lrblock_t *B,
                             pastix_int_t offy,
                             float *v1v2 );
  
 float core_slrnrm( pastix_normtype_t ntype, int transV,
                     pastix_int_t M, pastix_int_t N,
                     const pastix_lrblock_t *A );
  
 size_t core_slrgetsize( pastix_int_t M, pastix_int_t N, pastix_lrblock_t *A );
 char *core_slrpack( pastix_int_t M, pastix_int_t N, const pastix_lrblock_t *A, char *buffer );
 char *core_slrunpack( pastix_int_t M, pastix_int_t N, pastix_lrblock_t *A, char *buffer );
 const char * core_slrunpack2( pastix_int_t M, pastix_int_t N, pastix_lrblock_t *A, const char *input, char **outptr );
  
 /**
  *    @}
  * @}
  *
  * @addtogroup kernel_lr_lrmm
  * @{
  *    This are the kernels to compute the low-rank updates
  *
  *    @name PastixFloat LRMM low-rank kernels
  *    @{
  */
  
 /**
  * @brief Structure to store all the parameters of the core_slrmm family functions
  */
 typedef struct core_slrmm_s {
     const pastix_lr_t      *lowrank;     /**< The lowrank structure                                                 */
     pastix_trans_t          transA;      /**< Specify op(A) and is equal to PastixNoTrans, PastixTrans, or PastixTrans */
     pastix_trans_t          transB;      /**< Specify op(B) and is equal to PastixNoTrans, PastixTrans, or PastixTrans */
     pastix_int_t            M;           /**< Number of rows     of the A matrix                                    */
     pastix_int_t            N;           /**< Number of columns  of the B matrix                                    */
     pastix_int_t            K;           /**< Number of columns  of the A matrix (= number of rows of the B matrix) */
     pastix_int_t            Cm;          /**< Number of rows     of the C matrix that receives the AB contribution  */
     pastix_int_t            Cn;          /**< Number of columns  of the C matrix that receives the AB contribution  */
     pastix_int_t            offx;        /**< Horizontal offsets of the AB product in the C matrix                  */
     pastix_int_t            offy;        /**< Vertical   offsets of the AB product in the C matrix                  */
     float      alpha;       /**< The alpha factor                                                      */
     const pastix_lrblock_t *A;           /**< The A matrix described in a low-rank structure                        */
     const pastix_lrblock_t *B;           /**< The B matrix described in a low-rank structure                        */
     float      beta;        /**< The beta factor                                                       */
     pastix_lrblock_t       *C;           /**< The C matrix described in a low-rank structure                        */
     float     *work;        /**< The pointer to an available workspace                                 */
     pastix_int_t            lwork;       /**< The size of the given workspace                                       */
     pastix_int_t            lwused;      /**< The size of the workspace that is already used                        */
     pastix_atomic_lock_t   *lock;        /**< The lock to protect the concurrent accesses on the C matrix           */
 } core_slrmm_t;
  
 /**
  * @brief Initialize all the parameters of the core_slrmm family functions to ease the access
  */
 #define PASTE_CORE_SLRMM_PARAMS(_a_)                   \
     const pastix_lr_t      *lowrank = (_a_)->lowrank;  \
     pastix_trans_t          transA  = (_a_)->transA;   \
     pastix_trans_t          transB  = (_a_)->transB;   \
     pastix_int_t            M       = (_a_)->M;        \
     pastix_int_t            N       = (_a_)->N;        \
     pastix_int_t            K       = (_a_)->K;        \
     pastix_int_t            Cm      = (_a_)->Cm;       \
     pastix_int_t            Cn      = (_a_)->Cn;       \
     pastix_int_t            offx    = (_a_)->offx;     \
     pastix_int_t            offy    = (_a_)->offy;     \
     float      alpha   = (_a_)->alpha;    \
     const pastix_lrblock_t *A       = (_a_)->A;        \
     const pastix_lrblock_t *B       = (_a_)->B;        \
     float      beta    = (_a_)->beta;     \
     pastix_lrblock_t       *C       = (_a_)->C;        \
     float     *work    = (_a_)->work;     \
     pastix_int_t            lwork   = (_a_)->lwork;    \
     pastix_atomic_lock_t   *lock    = (_a_)->lock;
  
 /**
  * @brief Void all the parameters of the core_slrmm family functions to silent warnings
  */
 #define PASTE_CORE_SLRMM_VOID                   \
     (void)lowrank;                              \
     (void)transA;                               \
     (void)transB;                               \
     (void)M;                                    \
     (void)N;                                    \
     (void)K;                                    \
     (void)Cm;                                   \
     (void)Cn;                                   \
     (void)offx;                                 \
     (void)offy;                                 \
     (void)alpha;                                \
     (void)A;                                    \
     (void)B;                                    \
     (void)beta;                                 \
     (void)C;                                    \
     (void)work;                                 \
     (void)lwork;                                \
     (void)lock
  
 /**
  * @brief Function to get a workspace pointer if space is available in the one provided
  * @param[inout] params  The parameters structure for core_slrmm family functions
  * @param[in]    newsize The required workspace size in number of elements
  * @return The pointer to the workspace if enough space available, NULL otherwise.
  */
 static inline float *
 core_slrmm_getws( core_slrmm_t *params,
                   ssize_t newsize )
 {
     float *work = NULL;
     if ( (params->lwused + newsize) <= params->lwork )
     {
         work = params->work + params->lwused;
         params->lwused += newsize;
     }
     /* else */
     /* { */
     /*     if ( (params->work == NULL) || (params->lwused == 0) ) */
     /*     { */
     /*         params->work = realloc( params->work, newsize * sizeof(float) ); */
     /*         params->lwork  = newsize; */
     /*         params->lwused = newsize; */
     /*         work = params->work; */
     /*     } */
     /* } */
     return work;
 }
  
 /**
  *    @}
  *    @name update_fr Functions to perform the update on a full-rank matrix
  *    @{
  */
 pastix_fixdbl_t core_sfrfr2fr( core_slrmm_t *params );
 pastix_fixdbl_t core_sfrlr2fr( core_slrmm_t *params );
 pastix_fixdbl_t core_slrfr2fr( core_slrmm_t *params );
 pastix_fixdbl_t core_slrlr2fr( core_slrmm_t *params );
  
 /**
  *    @}
  *    @name update_lr Functions to prepare the AB product for an update on a low-rank matrix
  *    @{
  */
 pastix_fixdbl_t core_sfrfr2lr( core_slrmm_t     *params,
                                pastix_lrblock_t *AB,
                                int              *infomask,
                                pastix_int_t      Kmax );
 pastix_fixdbl_t core_sfrlr2lr( core_slrmm_t     *params,
                                pastix_lrblock_t *AB,
                                int              *infomask,
                                pastix_int_t      Brkmin );
 pastix_fixdbl_t core_slrfr2lr( core_slrmm_t     *params,
                                pastix_lrblock_t *AB,
                                int              *infomask,
                                pastix_int_t      Arkmin );
 pastix_fixdbl_t core_slrlr2lr( core_slrmm_t     *params,
                                pastix_lrblock_t *AB,
                                int              *infomask );
  
 /**
  *    @}
  *    @name add_lr Functions to add the AB contribution in a low-rank format to any C matrix
  *    @{
  */
 pastix_fixdbl_t core_slradd( core_slrmm_t           *params,
                              const pastix_lrblock_t *AB,
                              pastix_trans_t          transV,
                              int                     infomask );
  
 /**
  *    @}
  */
 pastix_fixdbl_t core_slrmm( core_slrmm_t *params );
  
 /**
  * @}
  *
  * @addtogroup kernel_lr_svd
  * @{
  *    This is the SVD implementation of the low-rank kernels based on the LAPACK
  *    GESVD function.
  *
  *    @name PastixFloat SVD low-rank kernels
  *    @{
  */
 pastix_fixdbl_t core_sge2lr_svd( int use_reltol, pastix_fixdbl_t tol, pastix_int_t rklimit,
                                  pastix_int_t m, pastix_int_t n,
                                  const void *Avoid, pastix_int_t lda, pastix_lrblock_t *Alr );
 pastix_fixdbl_t core_srradd_svd( const pastix_lr_t *lowrank, pastix_trans_t transA1, const void *alphaptr,
                                  pastix_int_t M1, pastix_int_t N1, const pastix_lrblock_t *A,
                                  pastix_int_t M2, pastix_int_t N2,       pastix_lrblock_t *B,
                                  pastix_int_t offx, pastix_int_t offy);
  
 /**
  *    @}
  * @}
  *
  * @addtogroup kernel_lr_rrqr
  * @{
  *    These are the rank-revealing QR implementations to generate the low-rank
  *    representations of a full rank matrix.
  *
  *    @name PastixFloat main template to convert a full rank matrix to low-rank
  *    @{
  */
  
 /**
  * @brief TODO
  */
 typedef int (*core_srrqr_cp_t)( float tol, pastix_int_t maxrank, int refine, pastix_int_t nb,
                                 pastix_int_t m, pastix_int_t n,
                                 float *A, pastix_int_t lda,
                                 pastix_int_t *jpvt, float *tau,
                                 float *work, pastix_int_t lwork,  float *rwork );
  
 /**
  * @brief TODO
  */
 typedef int (*core_srrqr_rt_t)( float tol, pastix_int_t maxrank, pastix_int_t nb,
                                 pastix_int_t m, pastix_int_t n,
                                 float *A, pastix_int_t lda, float *tau,
                                 float *B, pastix_int_t ldb, float *tau_b,
                                 float *work, pastix_int_t lwork,  float normA );
  
 /**
  *    @}
  *    @name PastixFloat Rank Revealing QR kernels for low-rank
  *    @{
  */
 pastix_fixdbl_t core_sge2lr_pqrcp( int use_reltol, pastix_fixdbl_t tol, pastix_int_t rklimit,
                                    pastix_int_t m, pastix_int_t n,
                                    const void *Avoid, pastix_int_t lda, pastix_lrblock_t *Alr );
 pastix_fixdbl_t core_srradd_pqrcp( const pastix_lr_t *lowrank, pastix_trans_t transA1, const void *alphaptr,
                                    pastix_int_t M1, pastix_int_t N1, const pastix_lrblock_t *A,
                                    pastix_int_t M2, pastix_int_t N2,       pastix_lrblock_t *B,
                                    pastix_int_t offx, pastix_int_t offy );
  
 pastix_fixdbl_t core_sge2lr_rqrcp( int use_reltol, pastix_fixdbl_t tol, pastix_int_t rklimit,
                                    pastix_int_t m, pastix_int_t n,
                                    const void *Avoid, pastix_int_t lda, pastix_lrblock_t *Alr );
 pastix_fixdbl_t core_srradd_rqrcp( const pastix_lr_t *lowrank, pastix_trans_t transA1, const void *alphaptr,
                                    pastix_int_t M1, pastix_int_t N1, const pastix_lrblock_t *A,
                                    pastix_int_t M2, pastix_int_t N2,       pastix_lrblock_t *B,
                                    pastix_int_t offx, pastix_int_t offy );
  
 pastix_fixdbl_t core_sge2lr_tqrcp( int use_reltol, pastix_fixdbl_t tol, pastix_int_t rklimit,
                                    pastix_int_t m, pastix_int_t n,
                                    const void *Avoid, pastix_int_t lda, pastix_lrblock_t *Alr );
 pastix_fixdbl_t core_srradd_tqrcp( const pastix_lr_t *lowrank, pastix_trans_t transA1, const void *alphaptr,
                                    pastix_int_t M1, pastix_int_t N1, const pastix_lrblock_t *A,
                                    pastix_int_t M2, pastix_int_t N2,       pastix_lrblock_t *B,
                                    pastix_int_t offx, pastix_int_t offy );
  
 pastix_fixdbl_t core_sge2lr_rqrrt( int use_reltol, pastix_fixdbl_t tol, pastix_int_t rklimit,
                                    pastix_int_t m, pastix_int_t n,
                                    const void *Avoid, pastix_int_t lda, pastix_lrblock_t *Alr );
  
  
 pastix_fixdbl_t core_sge2lr_qrcp( core_srrqr_cp_t rrqrfct,
                                   int use_reltol, pastix_fixdbl_t tol, pastix_int_t rklimit,
                                   pastix_int_t m, pastix_int_t n,
                                   const void *Avoid, pastix_int_t lda,
                                   pastix_lrblock_t *Alr );
 pastix_fixdbl_t core_sge2lr_qrrt( core_srrqr_rt_t rrqrfct,
                                   int use_reltol, pastix_fixdbl_t tol, pastix_int_t rklimit,
                                   pastix_int_t m, pastix_int_t n,
                                   const void *Avoid, pastix_int_t lda,
                                   pastix_lrblock_t *Alr);
  
 pastix_fixdbl_t core_srradd_qr( core_srrqr_cp_t rrqrfct,
                                 const pastix_lr_t *lowrank, pastix_trans_t transA1, const void *alphaptr,
                                 pastix_int_t M1, pastix_int_t N1, const pastix_lrblock_t *A,
                                 pastix_int_t M2, pastix_int_t N2,       pastix_lrblock_t *B,
                                 pastix_int_t offx, pastix_int_t offy );
  
 /**
  *    @}
  * @}
  *
  * @addtogroup kernel_lr_debug
  * @{
  *    This is the debug routines for the low rank kernels.
  *
  *    @name PastixFloat low-rank debug functions
  *    @{
  */
 void core_slrdbg_printsvd( pastix_int_t              M,
                            pastix_int_t              N,
                            const float *A,
                            pastix_int_t              lda );
  
 int  core_slrdbg_check_orthogonality( pastix_int_t              M,
                                       pastix_int_t              N,
                                       const float *A,
                                       pastix_int_t              lda );
  
 int  core_slrdbg_check_orthogonality_AB( pastix_int_t M, pastix_int_t NA, pastix_int_t NB,
                                          const float *A, pastix_int_t lda,
                                          const float *B, pastix_int_t ldb );
  
 /**
  *    @}
  * @}
  */
  
 #endif /* _pastix_slrcores_h_ */