23 #include "kernels_trace.h"
27 #ifndef DOXYGEN_SHOULD_SKIP_THIS
28 #define MAXSIZEOFBLOCKS 64
29 static pastix_complex64_t zone = 1.0;
30 static pastix_complex64_t mzone = -1.0;
65 pastix_complex64_t *A,
67 pastix_int_t *nbpivots,
71 pastix_complex64_t *Akk = A;
72 pastix_complex64_t *Amk = A+1;
73 pastix_complex64_t *Akm = A+lda;
74 pastix_complex64_t zalpha;
78 for (k=0; k<n; k++, m--){
79 if ( cabs(*Akk) < criterion ) {
80 if ( creal(*Akk) < 0. ) {
81 *Akk = (pastix_complex64_t)(-criterion);
84 *Akk = (pastix_complex64_t)criterion;
89 zalpha = 1.0 / (*Akk);
91 cblas_zcopy( m, Amk, 1, Akm, lda );
92 LAPACKE_zlacgv_work( m, Akm, 1 );
95 cblas_zscal(m, CBLAS_SADDR( zalpha ), Amk, 1 );
97 dalpha = -1.0 * creal(*Akk);
102 cblas_zher(CblasColMajor, CblasLower,
143 pastix_complex64_t *A,
145 pastix_int_t *nbpivots,
148 pastix_int_t k, blocknbr, blocksize, matrixsize, col;
149 pastix_complex64_t *Akk, *Amk, *Akm, *Amm;
150 pastix_complex64_t alpha;
153 blocknbr = pastix_iceil( n, MAXSIZEOFBLOCKS );
155 for (k=0; k<blocknbr; k++) {
157 blocksize = pastix_imin(MAXSIZEOFBLOCKS, n-k*MAXSIZEOFBLOCKS);
158 Akk = A+(k*MAXSIZEOFBLOCKS)*(lda+1);
159 Amk = Akk + blocksize;
160 Akm = Akk + blocksize * lda;
161 Amm = Amk + blocksize * lda;
166 if ((k*MAXSIZEOFBLOCKS+blocksize) < n) {
168 matrixsize = n-(k*MAXSIZEOFBLOCKS+blocksize);
177 cblas_ztrsm(CblasColMajor,
178 CblasRight, CblasLower,
179 CblasConjTrans, CblasUnit,
180 matrixsize, blocksize,
181 CBLAS_SADDR(zone), Akk, lda,
185 for(col = 0; col < blocksize; col++) {
187 cblas_zcopy(matrixsize, Amk + col*lda, 1,
189 LAPACKE_zlacgv_work( matrixsize, Akm + col, lda );
192 alpha = 1.0 / *(Akk + col*(lda+1));
193 cblas_zscal( matrixsize, CBLAS_SADDR(alpha),
198 cblas_zgemm(CblasColMajor,
199 CblasNoTrans, CblasNoTrans,
200 matrixsize, matrixsize, blocksize,
201 CBLAS_SADDR(mzone), Amk, lda,
203 CBLAS_SADDR(zone), Amm, lda);
238 pastix_int_t ncols, stride;
239 pastix_int_t nbpivots = 0;
240 pastix_fixdbl_t time, flops;
241 pastix_complex64_t *L;
243 double criterion = solvmtx->diagthreshold;
245 time = kernel_trace_start( PastixKernelHETRF );
248 stride = (cblk->
cblktype & CBLK_LAYOUT_2D) ? ncols : cblk->
stride;
250 if ( cblk->
cblktype & CBLK_COMPRESSED ) {
253 assert( lrL->
rk == -1 );
257 assert( stride == lrL->
rkmax );
259 L = (pastix_complex64_t *)dataL;
269 flops = FLOPS_ZHETRF( ncols );
270 kernel_trace_start_lvl2( PastixKernelLvl2HETRF );
272 kernel_trace_stop_lvl2( flops );
274 kernel_trace_stop( cblk->
fblokptr->
inlast, PastixKernelHETRF, ncols, 0, 0, flops, time );
277 pastix_atomic_add_32b( &(solvmtx->nbpivots), nbpivots );
324 const pastix_complex64_t *L,
325 pastix_complex64_t *C,
326 pastix_complex64_t *work )
331 const pastix_complex64_t *blokA;
332 const pastix_complex64_t *blokB;
333 const pastix_complex64_t *blokD;
334 pastix_complex64_t *blokC;
336 pastix_int_t M, N, K, lda, ldb, ldc, ldd;
345 if ( cblk->
cblktype & CBLK_LAYOUT_2D ) {
362 for (iterblok=blok; iterblok<lblok; iterblok++) {
368 assert( fblok < fcblk[1].fblokptr );
390 pastix_cblk_lock( fcblk );
398 pastix_cblk_unlock( fcblk );
442 pastix_int_t nbpivots;
450 cblk, L, L, &(solvmtx->lowrank) );
452 if ( (DLh != NULL) && (cblk->
cblktype & CBLK_LAYOUT_2D) ) {
494 pastix_complex64_t *DLh,
495 pastix_complex64_t *work,
502 pastix_int_t nbpivots;
504 if ( !(cblk->
cblktype & CBLK_LAYOUT_2D) ) {
508 if (cblk->
cblktype & CBLK_COMPRESSED) {
512 assert( dataDLh == NULL );
523 for( ; blok < lblk; blok++ )
525 fcblk = solvmtx->cblktab + blok->
fcblknm;
527 if ( fcblk->
cblktype & CBLK_FANIN ) {
541 work, lwork, &(solvmtx->lowrank) );