23 #include "kernels_trace.h"
27 #ifndef DOXYGEN_SHOULD_SKIP_THIS
28 #define MAXSIZEOFBLOCKS 64
29 static pastix_complex64_t zone = 1.0;
30 static pastix_complex64_t mzone = -1.0;
65 pastix_complex64_t *A,
67 pastix_int_t *nbpivots,
71 pastix_complex64_t *Akk = A;
72 pastix_complex64_t *Amk = A+1;
73 pastix_complex64_t *Akm = A+lda;
74 pastix_complex64_t alpha;
77 for (k=0; k<n; k++, m--){
78 if ( cabs(*Akk) < criterion ) {
79 if ( creal(*Akk) < 0. ) {
80 *Akk = (pastix_complex64_t)(-criterion);
83 *Akk = (pastix_complex64_t)criterion;
91 cblas_zcopy( m, Amk, 1, Akm, lda );
94 cblas_zscal(m, CBLAS_SADDR( alpha ), Amk, 1 );
101 cblas_zsyrk(CblasColMajor, CblasLower, CblasNoTrans,
103 CBLAS_SADDR( alpha ), Amk, lda,
104 CBLAS_SADDR( zone ), Akk, lda);
142 pastix_complex64_t *A,
144 pastix_int_t *nbpivots,
147 pastix_int_t k, blocknbr, blocksize, matrixsize, col;
148 pastix_complex64_t *Akk, *Amk, *Akm, *Amm;
149 pastix_complex64_t alpha;
152 blocknbr = pastix_iceil( n, MAXSIZEOFBLOCKS );
154 for (k=0; k<blocknbr; k++) {
156 blocksize = pastix_imin(MAXSIZEOFBLOCKS, n-k*MAXSIZEOFBLOCKS);
157 Akk = A+(k*MAXSIZEOFBLOCKS)*(lda+1);
158 Amk = Akk + blocksize;
159 Akm = Akk + blocksize * lda;
160 Amm = Amk + blocksize * lda;
165 if ((k*MAXSIZEOFBLOCKS+blocksize) < n) {
167 matrixsize = n-(k*MAXSIZEOFBLOCKS+blocksize);
176 cblas_ztrsm(CblasColMajor,
177 CblasRight, CblasLower,
178 CblasTrans, CblasUnit,
179 matrixsize, blocksize,
180 CBLAS_SADDR(zone), Akk, lda,
184 for(col = 0; col < blocksize; col++) {
186 cblas_zcopy(matrixsize, Amk + col*lda, 1,
190 alpha = 1.0 / *(Akk + col*(lda+1));
191 cblas_zscal( matrixsize, CBLAS_SADDR(alpha),
196 cblas_zgemm(CblasColMajor,
197 CblasNoTrans, CblasNoTrans,
198 matrixsize, matrixsize, blocksize,
199 CBLAS_SADDR(mzone), Amk, lda,
201 CBLAS_SADDR(zone), Amm, lda);
236 pastix_int_t ncols, stride;
237 pastix_int_t nbpivots = 0;
238 pastix_fixdbl_t time, flops;
239 pastix_complex64_t *L;
241 double criterion = solvmtx->diagthreshold;
243 time = kernel_trace_start( PastixKernelSYTRF );
246 stride = (cblk->
cblktype & CBLK_LAYOUT_2D) ? ncols : cblk->
stride;
248 if ( cblk->
cblktype & CBLK_COMPRESSED ) {
254 assert( lrL->
rk == -1 );
255 assert( stride == lrL->
rkmax );
257 L = (pastix_complex64_t *)dataL;
267 flops = FLOPS_ZSYTRF( ncols );
268 kernel_trace_start_lvl2( PastixKernelLvl2SYTRF );
270 kernel_trace_stop_lvl2( flops );
272 kernel_trace_stop( cblk->
fblokptr->
inlast, PastixKernelSYTRF, ncols, 0, 0, flops, time );
275 pastix_atomic_add_32b( &(solvmtx->nbpivots), nbpivots );
322 const pastix_complex64_t *L,
323 pastix_complex64_t *C,
324 pastix_complex64_t *work )
329 const pastix_complex64_t *blokA;
330 const pastix_complex64_t *blokB;
331 const pastix_complex64_t *blokD;
332 pastix_complex64_t *blokC;
334 pastix_int_t M, N, K, lda, ldb, ldc, ldd;
343 if ( cblk->
cblktype & CBLK_LAYOUT_2D ) {
360 for (iterblok=blok; iterblok<lblok; iterblok++) {
366 assert( fblok < fcblk[1].fblokptr );
388 pastix_cblk_lock( fcblk );
396 pastix_cblk_unlock( fcblk );
437 pastix_int_t nbpivots;
445 cblk, L, L, &(solvmtx->lowrank) );
447 if ( (DLt != NULL) && (cblk->
cblktype & CBLK_LAYOUT_2D) ) {
489 pastix_complex64_t *DLt,
490 pastix_complex64_t *work,
497 pastix_int_t nbpivots;
499 if ( !(cblk->
cblktype & CBLK_LAYOUT_2D) ) {
503 if (cblk->
cblktype & CBLK_COMPRESSED) {
507 assert( dataDLt == NULL );
518 for( ; blok < lblk; blok++ )
520 fcblk = solvmtx->cblktab + blok->
fcblknm;
522 if ( fcblk->
cblktype & CBLK_FANIN ) {
536 work, lwork, &(solvmtx->lowrank) );