22 #include "kernels_trace.h"
24 #ifndef DOXYGEN_SHOULD_SKIP_THIS
25 #define MAXSIZEOFBLOCKS 64
26 static pastix_complex64_t zone = 1.0;
27 static pastix_complex64_t mzone = -1.0;
67 pastix_complex64_t *A,
69 pastix_int_t *nbpivots,
73 pastix_complex64_t *Akk = A;
74 pastix_complex64_t *Amk = A+1;
75 pastix_complex64_t alpha;
78 if ( cabs(*Akk) < criterion ) {
79 (*Akk) = (pastix_complex64_t)criterion;
87 cblas_zscal(n-k-1, CBLAS_SADDR( alpha ), Amk, 1 );
92 cblas_zsyrk( CblasColMajor, CblasLower, CblasNoTrans,
94 CBLAS_SADDR( mzone ), Amk, lda,
95 CBLAS_SADDR( zone ), Akk, lda );
137 pastix_complex64_t *A,
139 pastix_int_t *nbpivots,
142 pastix_int_t k, blocknbr, blocksize, matrixsize;
143 pastix_complex64_t *tmp,*tmp1,*tmp2;
146 blocknbr = pastix_iceil( n, MAXSIZEOFBLOCKS );
148 for (k=0; k<blocknbr; k++) {
150 blocksize = pastix_imin(MAXSIZEOFBLOCKS, n-k*MAXSIZEOFBLOCKS);
151 tmp = A+(k*MAXSIZEOFBLOCKS)*(lda+1);
156 if ((k*MAXSIZEOFBLOCKS+blocksize) < n) {
158 tmp1 = tmp + blocksize;
159 tmp2 = tmp1 + blocksize * lda;
161 matrixsize = n-(k*MAXSIZEOFBLOCKS+blocksize);
167 cblas_ztrsm(CblasColMajor,
168 CblasRight, CblasLower,
169 CblasTrans, CblasNonUnit,
170 matrixsize, blocksize,
171 CBLAS_SADDR(zone), tmp, lda,
175 cblas_zsyrk(CblasColMajor, CblasLower, CblasNoTrans,
176 matrixsize, blocksize,
177 CBLAS_SADDR( mzone ), tmp1, lda,
178 CBLAS_SADDR( zone ), tmp2, lda);
216 pastix_int_t ncols, stride;
217 pastix_int_t nbpivots = 0;
218 pastix_fixdbl_t time, flops;
219 pastix_complex64_t *L;
221 double criterion = solvmtx->diagthreshold;
223 time = kernel_trace_start( PastixKernelPXTRF );
226 stride = (cblk->
cblktype & CBLK_LAYOUT_2D) ? ncols : cblk->
stride;
232 if ( cblk->
cblktype & CBLK_COMPRESSED ) {
238 assert( lrL->
rk == -1 );
239 assert( stride == lrL->
rkmax );
241 L = (pastix_complex64_t *)dataL;
245 flops = FLOPS_ZPOTRF( ncols );
246 kernel_trace_start_lvl2( PastixKernelLvl2PXTRF );
248 kernel_trace_stop_lvl2( flops );
250 kernel_trace_stop( cblk->
fblokptr->
inlast, PastixKernelPXTRF, ncols, 0, 0, flops, time );
253 pastix_atomic_add_32b( &(solvmtx->nbpivots), nbpivots );
288 pastix_int_t nbpivots;
293 cblk, L, L, &(solvmtx->lowrank) );
328 pastix_complex64_t *work,
334 pastix_int_t nbpivots;
342 for( ; blok < lblk; blok++ )
344 fcblk = (solvmtx->cblktab + blok->
fcblknm);
346 if ( fcblk->
cblktype & CBLK_FANIN ) {
353 work, lwork, &(solvmtx->lowrank) );
int cpucblk_zpxtrfsp1d_panel(SolverMatrix *solvmtx, SolverCblk *cblk, void *L)
Compute the LL^t factorization of one panel.
int cpucblk_zpxtrfsp1d(SolverMatrix *solvmtx, SolverCblk *cblk, pastix_complex64_t *work, pastix_int_t lwork)
Perform the LL^t factorization of a given panel and apply all its updates.
static void core_zpxtf2sp(pastix_int_t n, pastix_complex64_t *A, pastix_int_t lda, pastix_int_t *nbpivots, double criterion)
Compute the sequential static pivoting LL^t factorization of the matrix n-by-n A = L * L^t .
int cpucblk_zpxtrfsp1d_pxtrf(SolverMatrix *solvmtx, SolverCblk *cblk, void *dataL)
Compute the LL^t factorization of the diagonal block in a panel.
void core_zpxtrfsp(pastix_int_t n, pastix_complex64_t *A, pastix_int_t lda, pastix_int_t *nbpivots, double criterion)
Compute the block static pivoting LL^t factorization of the matrix n-by-n A = L * L^t .
void cpucblk_ztrsmsp(pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk, const void *A, void *C, const pastix_lr_t *lowrank)
Compute the updates associated to a column of off-diagonal blocks.
void cpucblk_zalloc(pastix_coefside_t side, SolverCblk *cblk)
Allocate the cblk structure to store the coefficient.
pastix_fixdbl_t cpucblk_zgemmsp(pastix_coefside_t sideA, pastix_trans_t trans, const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcblk, const void *A, const void *B, void *C, pastix_complex64_t *work, pastix_int_t lwork, const pastix_lr_t *lowrank)
Compute the updates associated to one off-diagonal block.
void cpucblk_zrelease_deps(pastix_coefside_t side, SolverMatrix *solvmtx, const SolverCblk *cblk, SolverCblk *fcbk)
Release the dependencies of the given cblk after an update.
The block low-rank structure to hold a matrix in low-rank form.
static void * cblk_getdataL(const SolverCblk *cblk)
Get the pointer to the data associated to the lower part of the cblk.
Solver column block structure.