26 #ifndef DOXYGEN_SHOULD_SKIP_THIS
27 #define MAXSIZEOFBLOCKS 64
80 if ( cabsf(*Akk) < criterion ) {
86 if ( crealf(*Akk) < 0.0 )
88 pastix_print_error(
"Negative diagonal term\n" );
95 cblas_cscal(n-k-1, CBLAS_SADDR( alpha ), Amk, 1 );
100 cblas_cher(CblasColMajor, CblasLower,
154 blocknbr = pastix_iceil( n, MAXSIZEOFBLOCKS );
156 for (k=0; k<blocknbr; k++) {
158 blocksize = pastix_imin(MAXSIZEOFBLOCKS, n-k*MAXSIZEOFBLOCKS);
159 tmp = A+(k*MAXSIZEOFBLOCKS)*(lda+1);
164 if ((k*MAXSIZEOFBLOCKS+blocksize) < n) {
166 tmp1 = tmp + blocksize;
167 tmp2 = tmp1 + blocksize * lda;
169 matrixsize = n-(k*MAXSIZEOFBLOCKS+blocksize);
175 cblas_ctrsm(CblasColMajor,
176 CblasRight, CblasLower,
177 CblasConjTrans, CblasNonUnit,
178 matrixsize, blocksize,
179 CBLAS_SADDR(cone), tmp, lda,
183 cblas_cherk(CblasColMajor, CblasLower, CblasNoTrans,
184 matrixsize, blocksize,
185 (
float)mcone, tmp1, lda,
186 (
float)cone, tmp2, lda);
234 stride = (cblk->
cblktype & CBLK_LAYOUT_2D) ? ncols : cblk->
stride;
240 if ( cblk->
cblktype & CBLK_COMPRESSED ) {
246 assert( lrL->
rk == -1 );
247 assert( stride == lrL->
rkmax );
253 flops = FLOPS_CPOTRF( ncols );
256 kernel_trace_stop_lvl2( flops );
261 pastix_atomic_add_32b( &(solvmtx->
nbpivots), nbpivots );
301 cblk, L, L, &(solvmtx->
lowrank) );
350 for( ; blok < lblk; blok++ )
354 if ( fcblk->
cblktype & CBLK_FANIN ) {
361 work, lwork, &(solvmtx->
lowrank) );
399 assert( cblk->
cblktype & CBLK_TASKS_2D );
406 for( i=0; blok < lblk; i++, blok++ )
408 assert( !((solvmtx->
cblktab + blok->
fcblknm)->cblktype & CBLK_RECV) );
412 while ( ( blok < lblk ) &&
413 ( blok[0].fcblknm == blok[1].fcblknm ) &&
414 ( blok[0].lcblknm == blok[1].lcblknm ) )
454 if ( fcbk->
cblktype & CBLK_FANIN ) {
464 work, lwork, &(solvmtx->
lowrank) );
469 while ( ( blok < lblk ) &&
470 ( blok[-1].fcblknm == blok[0].fcblknm ) &&
471 ( blok[-1].lcblknm == blok[0].lcblknm ) );
static void core_cpotf2sp(pastix_int_t n, pastix_complex32_t *A, pastix_int_t lda, pastix_int_t *nbpivots, float criterion)
Compute the sequential static pivoting Cholesky factorization of the matrix n-by-n A = L * L^t .
BEGIN_C_DECLS typedef int pastix_int_t
float _Complex pastix_complex32_t
static void pqueuePush1(pastix_queue_t *q, pastix_int_t elt, double key1)
Push an element with a single key.
static void kernel_trace_stop(int8_t inlast, pastix_ktype_t ktype, int m, int n, int k, double flops, double starttime)
Stop the trace of a single kernel.
static double kernel_trace_start(pastix_ktype_t ktype)
Start the trace of a single kernel.
void core_cpotrfsp(pastix_int_t n, pastix_complex32_t *A, pastix_int_t lda, pastix_int_t *nbpivots, float criterion)
Compute the block static pivoting Cholesky factorization of the matrix n-by-n A = L * L^t .
int cpucblk_cpotrfsp1dplus(SolverMatrix *solvmtx, SolverCblk *cblk)
Perform the Cholesky factorization of a given panel and submit tasks for the subsequent updates.
void cpucblk_cpotrfsp1dplus_update(SolverMatrix *solvmtx, SolverBlok *blok, pastix_complex32_t *work, pastix_int_t lwork)
Apply the updates of the cholesky factorisation of a given panel.
pastix_fixdbl_t cpucblk_cgemmsp(pastix_coefside_t sideA, pastix_trans_t trans, const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcblk, const void *A, const void *B, void *C, pastix_complex32_t *work, pastix_int_t lwork, const pastix_lr_t *lowrank)
Compute the updates associated to one off-diagonal block.
int cpucblk_cpotrfsp1d(SolverMatrix *solvmtx, SolverCblk *cblk, pastix_complex32_t *work, pastix_int_t lwork)
Perform the Cholesky factorization of a given panel and apply all its updates.
void cpucblk_crelease_deps(pastix_coefside_t side, SolverMatrix *solvmtx, const SolverCblk *cblk, SolverCblk *fcbk)
Release the dependencies of the given cblk after an update.
int cpucblk_cpotrfsp1d_panel(SolverMatrix *solvmtx, SolverCblk *cblk, void *L)
Compute the Cholesky factorization of one panel.
void cpucblk_ctrsmsp(pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk, const void *A, void *C, const pastix_lr_t *lowrank)
Compute the updates associated to a column of off-diagonal blocks.
void cpucblk_calloc(pastix_coefside_t side, SolverCblk *cblk)
Allocate the cblk structure to store the coefficient.
int cpucblk_cpotrfsp1d_potrf(SolverMatrix *solvmtx, SolverCblk *cblk, void *dataL)
Compute the Cholesky factorization of the diagonal block in a panel.
The block low-rank structure to hold a matrix in low-rank form.
SolverBlok *restrict bloktab
static void * cblk_getdataL(const SolverCblk *cblk)
Get the pointer to the data associated to the lower part of the cblk.
SolverCblk *restrict cblktab
volatile int32_t nbpivots
Solver column block structure.
Solver column block structure.