24 #include "kernels_trace.h"
26 #ifndef DOXYGEN_SHOULD_SKIP_THIS
27 #define MAXSIZEOFBLOCKS 64
28 static pastix_complex64_t zone = 1.0;
29 static pastix_complex64_t mzone = -1.0;
69 pastix_complex64_t *A,
71 pastix_int_t *nbpivots,
75 pastix_complex64_t *Akk = A;
76 pastix_complex64_t *Amk = A+1;
77 pastix_complex64_t alpha;
80 if ( cabs(*Akk) < criterion ) {
81 (*Akk) = (pastix_complex64_t)criterion;
86 if ( creal(*Akk) < 0.0 )
88 pastix_print_error(
"Negative diagonal term\n" );
95 cblas_zscal(n-k-1, CBLAS_SADDR( alpha ), Amk, 1 );
100 cblas_zher(CblasColMajor, CblasLower,
145 pastix_complex64_t *A,
147 pastix_int_t *nbpivots,
150 pastix_int_t k, blocknbr, blocksize, matrixsize;
151 pastix_complex64_t *tmp,*tmp1,*tmp2;
154 blocknbr = pastix_iceil( n, MAXSIZEOFBLOCKS );
156 for (k=0; k<blocknbr; k++) {
158 blocksize = pastix_imin(MAXSIZEOFBLOCKS, n-k*MAXSIZEOFBLOCKS);
159 tmp = A+(k*MAXSIZEOFBLOCKS)*(lda+1);
164 if ((k*MAXSIZEOFBLOCKS+blocksize) < n) {
166 tmp1 = tmp + blocksize;
167 tmp2 = tmp1 + blocksize * lda;
169 matrixsize = n-(k*MAXSIZEOFBLOCKS+blocksize);
175 cblas_ztrsm(CblasColMajor,
176 CblasRight, CblasLower,
177 CblasConjTrans, CblasNonUnit,
178 matrixsize, blocksize,
179 CBLAS_SADDR(zone), tmp, lda,
183 cblas_zherk(CblasColMajor, CblasLower, CblasNoTrans,
184 matrixsize, blocksize,
185 (
double)mzone, tmp1, lda,
186 (
double)zone, tmp2, lda);
224 pastix_int_t ncols, stride;
225 pastix_int_t nbpivots = 0;
226 pastix_fixdbl_t time, flops;
227 pastix_complex64_t *L;
229 double criterion = solvmtx->diagthreshold;
231 time = kernel_trace_start( PastixKernelPOTRF );
234 stride = (cblk->
cblktype & CBLK_LAYOUT_2D) ? ncols : cblk->
stride;
240 if ( cblk->
cblktype & CBLK_COMPRESSED ) {
246 assert( lrL->
rk == -1 );
247 assert( stride == lrL->
rkmax );
249 L = (pastix_complex64_t *)dataL;
253 flops = FLOPS_ZPOTRF( ncols );
254 kernel_trace_start_lvl2( PastixKernelLvl2POTRF );
256 kernel_trace_stop_lvl2( flops );
258 kernel_trace_stop( cblk->
fblokptr->
inlast, PastixKernelPOTRF, ncols, 0, 0, flops, time );
261 pastix_atomic_add_32b( &(solvmtx->nbpivots), nbpivots );
296 pastix_int_t nbpivots;
301 cblk, L, L, &(solvmtx->lowrank) );
336 pastix_complex64_t *work,
342 pastix_int_t nbpivots;
350 for( ; blok < lblk; blok++ )
352 fcblk = solvmtx->cblktab + blok->
fcblknm;
354 if ( fcblk->
cblktype & CBLK_FANIN ) {
361 work, lwork, &(solvmtx->lowrank) );