23 #include "kernels_trace.h"
25 #ifndef DOXYGEN_SHOULD_SKIP_THIS
26 #define MAXSIZEOFBLOCKS 64
27 static float sone = 1.0;
28 static float msone = -1.0;
69 pastix_int_t *nbpivots,
72 pastix_int_t k, minMN;
73 float *Akk, *Aik, alpha;
75 minMN = pastix_imin( m, n );
78 for (k=0; k<minMN; k++) {
81 if ( fabsf(*Akk) < criterion ) {
83 *Akk = (float)(-criterion);
86 *Akk = (float)criterion;
93 cblas_sscal(m-k-1, ( alpha ), Aik, 1 );
98 cblas_sger(CblasColMajor, m-k-1, n-k-1,
141 pastix_int_t *nbpivots,
144 pastix_int_t k, blocknbr, blocksize, matrixsize, tempm;
145 float *Akk, *Lik, *Ukj, *Aij;
147 blocknbr = pastix_iceil( n, MAXSIZEOFBLOCKS );
151 for (k=0; k<blocknbr; k++) {
153 tempm = n - k * MAXSIZEOFBLOCKS;
154 blocksize = pastix_imin(MAXSIZEOFBLOCKS, tempm);
155 Lik = Akk + blocksize;
156 Ukj = Akk + blocksize*lda;
157 Aij = Ukj + blocksize;
160 core_sgetf2sp( tempm, blocksize, Akk, lda, nbpivots, criterion );
162 matrixsize = tempm - blocksize;
163 if ( matrixsize > 0 ) {
166 cblas_strsm(CblasColMajor,
167 CblasLeft, CblasLower,
168 CblasNoTrans, CblasUnit,
169 blocksize, matrixsize,
174 cblas_sgemm(CblasColMajor,
175 CblasNoTrans, CblasNoTrans,
176 matrixsize, matrixsize, blocksize,
182 Akk += blocksize * (lda+1);
222 pastix_int_t ncols, stride;
223 pastix_int_t nbpivots = 0;
224 pastix_fixdbl_t time, flops;
228 float criterion = solvmtx->diagthreshold;
230 time = kernel_trace_start( PastixKernelGETRF );
233 stride = (cblk->
cblktype & CBLK_LAYOUT_2D) ? ncols : cblk->
stride;
235 if ( cblk->
cblktype & CBLK_COMPRESSED ) {
240 assert( (lrL->
rk == -1) && (lrU->
rk == -1) );
246 assert( stride == lrL->
rkmax );
247 assert( stride == lrU->
rkmax );
258 flops = FLOPS_SGETRF( ncols, ncols );
259 kernel_trace_start_lvl2( PastixKernelLvl2GETRF );
261 kernel_trace_stop_lvl2( flops );
266 kernel_trace_stop( cblk->
fblokptr->
inlast, PastixKernelGETRF, ncols, 0, 0, flops, time );
269 pastix_atomic_add_32b( &(solvmtx->nbpivots), nbpivots );
310 pastix_int_t nbpivots;
320 cblk, L, L, &(solvmtx->lowrank) );
323 cblk, U, U, &(solvmtx->lowrank) );
364 pastix_int_t nbpivots;
372 for( ; blok < lblk; blok++ )
374 fcblk = solvmtx->cblktab + blok->
fcblknm;
376 if ( fcblk->
cblktype & CBLK_FANIN ) {
384 work, lwork, &(solvmtx->lowrank) );
387 if ( blok+1 < lblk ) {
391 work, lwork, &(solvmtx->lowrank) );