23 #include "kernels_trace.h"
27 #ifndef DOXYGEN_SHOULD_SKIP_THIS
28 #define MAXSIZEOFBLOCKS 64
29 static double done = 1.0;
30 static double mdone = -1.0;
67 pastix_int_t *nbpivots,
77 for (k=0; k<n; k++, m--){
78 if ( fabs(*Akk) < criterion ) {
80 *Akk = (double)(-criterion);
83 *Akk = (double)criterion;
91 cblas_dcopy( m, Amk, 1, Akm, lda );
94 cblas_dscal(m, ( alpha ), Amk, 1 );
101 cblas_dsyrk(CblasColMajor, CblasLower, CblasNoTrans,
144 pastix_int_t *nbpivots,
147 pastix_int_t k, blocknbr, blocksize, matrixsize, col;
148 double *Akk, *Amk, *Akm, *Amm;
152 blocknbr = pastix_iceil( n, MAXSIZEOFBLOCKS );
154 for (k=0; k<blocknbr; k++) {
156 blocksize = pastix_imin(MAXSIZEOFBLOCKS, n-k*MAXSIZEOFBLOCKS);
157 Akk = A+(k*MAXSIZEOFBLOCKS)*(lda+1);
158 Amk = Akk + blocksize;
159 Akm = Akk + blocksize * lda;
160 Amm = Amk + blocksize * lda;
165 if ((k*MAXSIZEOFBLOCKS+blocksize) < n) {
167 matrixsize = n-(k*MAXSIZEOFBLOCKS+blocksize);
176 cblas_dtrsm(CblasColMajor,
177 CblasRight, CblasLower,
178 CblasTrans, CblasUnit,
179 matrixsize, blocksize,
184 for(col = 0; col < blocksize; col++) {
186 cblas_dcopy(matrixsize, Amk + col*lda, 1,
190 alpha = 1.0 / *(Akk + col*(lda+1));
191 cblas_dscal( matrixsize, (alpha),
196 cblas_dgemm(CblasColMajor,
197 CblasNoTrans, CblasNoTrans,
198 matrixsize, matrixsize, blocksize,
236 pastix_int_t ncols, stride;
237 pastix_int_t nbpivots = 0;
238 pastix_fixdbl_t time, flops;
241 double criterion = solvmtx->diagthreshold;
243 time = kernel_trace_start( PastixKernelSYTRF );
246 stride = (cblk->
cblktype & CBLK_LAYOUT_2D) ? ncols : cblk->
stride;
248 if ( cblk->
cblktype & CBLK_COMPRESSED ) {
254 assert( lrL->
rk == -1 );
255 assert( stride == lrL->
rkmax );
267 flops = FLOPS_DSYTRF( ncols );
268 kernel_trace_start_lvl2( PastixKernelLvl2SYTRF );
270 kernel_trace_stop_lvl2( flops );
272 kernel_trace_stop( cblk->
fblokptr->
inlast, PastixKernelSYTRF, ncols, 0, 0, flops, time );
275 pastix_atomic_add_32b( &(solvmtx->nbpivots), nbpivots );
329 pastix_int_t M, N, K, lda, ldb, ldc, ldd;
338 if ( cblk->
cblktype & CBLK_LAYOUT_2D ) {
355 for (iterblok=blok; iterblok<lblok; iterblok++) {
361 assert( fblok < fcblk[1].fblokptr );
383 pastix_cblk_lock( fcblk );
391 pastix_cblk_unlock( fcblk );
432 pastix_int_t nbpivots;
440 cblk, L, L, &(solvmtx->lowrank) );
442 if ( (DLt != NULL) && (cblk->
cblktype & CBLK_LAYOUT_2D) ) {
492 pastix_int_t nbpivots;
494 if ( !(cblk->
cblktype & CBLK_LAYOUT_2D) ) {
498 if (cblk->
cblktype & CBLK_COMPRESSED) {
502 assert( dataDLt == NULL );
513 for( ; blok < lblk; blok++ )
515 fcblk = solvmtx->cblktab + blok->
fcblknm;
517 if ( fcblk->
cblktype & CBLK_FANIN ) {
531 work, lwork, &(solvmtx->lowrank) );
static void core_dsytf2sp(pastix_int_t n, double *A, pastix_int_t lda, pastix_int_t *nbpivots, double criterion)
Compute the sequential static pivoting factorization of the symmetric matrix n-by-n A such that A = L...
int core_dgemdm(pastix_trans_t transA, pastix_trans_t transB, int M, int N, int K, double alpha, const double *A, int LDA, const double *B, int LDB, double beta, double *C, int LDC, const double *D, int incD, double *WORK, int LWORK)
Perform one of the following matrix-matrix operations.
void core_dsytrfsp(pastix_int_t n, double *A, pastix_int_t lda, pastix_int_t *nbpivots, double criterion)
Compute the block static pivoting factorization of the symmetric matrix n-by-n A such that A = L * D ...
void core_dsytrfsp1d_gemm(const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcblk, const double *L, double *C, double *work)
int cpucblk_dsytrfsp1d_panel(SolverMatrix *solvmtx, SolverCblk *cblk, void *L, void *DLt)
Compute the LDL^t factorization of one panel.
void cpucblk_dalloc(pastix_coefside_t side, SolverCblk *cblk)
Allocate the cblk structure to store the coefficient.
void cpucblk_dtrsmsp(pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk, const void *A, void *C, const pastix_lr_t *lowrank)
Compute the updates associated to a column of off-diagonal blocks.
void cpucblk_dscalo(pastix_trans_t trans, SolverCblk *cblk, void *dataL, void *dataLD)
Copy the L term with scaling for the two-terms algorithm.
int cpucblk_dsytrfsp1d(SolverMatrix *solvmtx, SolverCblk *cblk, double *DLt, double *work, pastix_int_t lwork)
Perform the LDL^t factorization of a given panel and apply all its updates.
void cpucblk_dalloc_lrws(const SolverCblk *cblk, pastix_lrblock_t *lrblok, double *ws)
Initialize lrblock structure from a workspace from all blocks of the cblk associated.
pastix_fixdbl_t cpucblk_dgemmsp(pastix_coefside_t sideA, pastix_trans_t trans, const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcblk, const void *A, const void *B, void *C, double *work, pastix_int_t lwork, const pastix_lr_t *lowrank)
Compute the updates associated to one off-diagonal block.
void cpucblk_drelease_deps(pastix_coefside_t side, SolverMatrix *solvmtx, const SolverCblk *cblk, SolverCblk *fcbk)
Release the dependencies of the given cblk after an update.
int cpucblk_dsytrfsp1d_sytrf(SolverMatrix *solvmtx, SolverCblk *cblk, void *dataL)
Computes the LDL^t factorization of the diagonal block in a panel.
The block low-rank structure to hold a matrix in low-rank form.
static pastix_int_t blok_rownbr(const SolverBlok *blok)
Compute the number of rows of a block.
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
static int is_block_inside_fblock(const SolverBlok *blok, const SolverBlok *fblok)
Check if a block is included inside another one.
static void * cblk_getdataU(const SolverCblk *cblk)
Get the pointer to the data associated to the upper part of the cblk.
static void * cblk_getdataL(const SolverCblk *cblk)
Get the pointer to the data associated to the lower part of the cblk.
Solver column block structure.