23 #include "kernels_trace.h"
27 #ifndef DOXYGEN_SHOULD_SKIP_THIS
28 #define MAXSIZEOFBLOCKS 64
29 static pastix_complex64_t zone = 1.0;
30 static pastix_complex64_t mzone = -1.0;
65 pastix_complex64_t *A,
67 pastix_int_t *nbpivots,
70 pastix_int_t k, m, ret;
71 pastix_complex64_t *Akk = A;
72 pastix_complex64_t *Amk = A+1;
73 pastix_complex64_t *Akm = A+lda;
74 pastix_complex64_t zalpha;
78 for (k=0; k<n; k++, m--){
79 if ( cabs(*Akk) < criterion ) {
80 if ( creal(*Akk) < 0. ) {
81 *Akk = (pastix_complex64_t)(-criterion);
84 *Akk = (pastix_complex64_t)criterion;
89 zalpha = 1.0 / (*Akk);
91 cblas_zcopy( m, Amk, 1, Akm, lda );
92 ret = LAPACKE_zlacgv_work( m, Akm, 1 );
96 cblas_zscal(m, CBLAS_SADDR( zalpha ), Amk, 1 );
98 dalpha = -1.0 * creal(*Akk);
103 cblas_zher(CblasColMajor, CblasLower,
145 pastix_complex64_t *A,
147 pastix_int_t *nbpivots,
150 pastix_int_t k, blocknbr, blocksize, matrixsize, col, ret;
151 pastix_complex64_t *Akk, *Amk, *Akm, *Amm;
152 pastix_complex64_t alpha;
155 blocknbr = pastix_iceil( n, MAXSIZEOFBLOCKS );
157 for (k=0; k<blocknbr; k++) {
159 blocksize = pastix_imin(MAXSIZEOFBLOCKS, n-k*MAXSIZEOFBLOCKS);
160 Akk = A+(k*MAXSIZEOFBLOCKS)*(lda+1);
161 Amk = Akk + blocksize;
162 Akm = Akk + blocksize * lda;
163 Amm = Amk + blocksize * lda;
168 if ((k*MAXSIZEOFBLOCKS+blocksize) < n) {
170 matrixsize = n-(k*MAXSIZEOFBLOCKS+blocksize);
179 cblas_ztrsm(CblasColMajor,
180 CblasRight, CblasLower,
181 CblasConjTrans, CblasUnit,
182 matrixsize, blocksize,
183 CBLAS_SADDR(zone), Akk, lda,
187 for(col = 0; col < blocksize; col++) {
189 cblas_zcopy(matrixsize, Amk + col*lda, 1,
191 ret = LAPACKE_zlacgv_work( matrixsize, Akm + col, lda );
195 alpha = 1.0 / *(Akk + col*(lda+1));
196 cblas_zscal( matrixsize, CBLAS_SADDR(alpha),
201 cblas_zgemm(CblasColMajor,
202 CblasNoTrans, CblasNoTrans,
203 matrixsize, matrixsize, blocksize,
204 CBLAS_SADDR(mzone), Amk, lda,
206 CBLAS_SADDR(zone), Amm, lda);
242 pastix_int_t ncols, stride;
243 pastix_int_t nbpivots = 0;
244 pastix_fixdbl_t time, flops;
245 pastix_complex64_t *L;
247 double criterion = solvmtx->diagthreshold;
249 time = kernel_trace_start( PastixKernelHETRF );
252 stride = (cblk->
cblktype & CBLK_LAYOUT_2D) ? ncols : cblk->
stride;
254 if ( cblk->
cblktype & CBLK_COMPRESSED ) {
257 assert( lrL->
rk == -1 );
261 assert( stride == lrL->
rkmax );
263 L = (pastix_complex64_t *)dataL;
273 flops = FLOPS_ZHETRF( ncols );
274 kernel_trace_start_lvl2( PastixKernelLvl2HETRF );
276 kernel_trace_stop_lvl2( flops );
278 kernel_trace_stop( cblk->
fblokptr->
inlast, PastixKernelHETRF, ncols, 0, 0, flops, time );
281 pastix_atomic_add_32b( &(solvmtx->nbpivots), nbpivots );
323 const pastix_complex64_t *L,
324 pastix_complex64_t *C,
325 pastix_complex64_t *work )
330 const pastix_complex64_t *blokA;
331 const pastix_complex64_t *blokB;
332 const pastix_complex64_t *blokD;
333 pastix_complex64_t *blokC;
335 pastix_int_t M, N, K, lda, ldb, ldc, ldd;
344 if ( cblk->
cblktype & CBLK_LAYOUT_2D ) {
361 for (iterblok=blok; iterblok<lblok; iterblok++) {
367 assert( fblok < fcblk[1].fblokptr );
389 pastix_cblk_lock( fcblk );
397 pastix_cblk_unlock( fcblk );
441 pastix_int_t nbpivots;
449 cblk, L, L, &(solvmtx->lowrank) );
451 if ( (DLh != NULL) && (cblk->
cblktype & CBLK_LAYOUT_2D) ) {
493 pastix_complex64_t *DLh,
494 pastix_complex64_t *work,
501 pastix_int_t nbpivots;
503 if ( !(cblk->
cblktype & CBLK_LAYOUT_2D) ) {
507 if (cblk->
cblktype & CBLK_COMPRESSED) {
511 assert( dataDLh == NULL );
522 for( ; blok < lblk; blok++ )
524 fcblk = solvmtx->cblktab + blok->
fcblknm;
526 if ( fcblk->
cblktype & CBLK_FANIN ) {
540 work, lwork, &(solvmtx->lowrank) );
static void core_zhetf2sp(pastix_int_t n, pastix_complex64_t *A, pastix_int_t lda, pastix_int_t *nbpivots, double criterion)
Compute the sequential static pivoting factorization of the hermitian matrix n-by-n A such that A = L...
int cpucblk_zhetrfsp1d(SolverMatrix *solvmtx, SolverCblk *cblk, pastix_complex64_t *DLh, pastix_complex64_t *work, pastix_int_t lwork)
Perform the LDL^h factorization of a given panel and apply all its updates.
int cpucblk_zhetrfsp1d_hetrf(SolverMatrix *solvmtx, SolverCblk *cblk, void *dataL)
Computes the LDL^h factorization of the diagonal block in a panel.
int cpucblk_zhetrfsp1d_panel(SolverMatrix *solvmtx, SolverCblk *cblk, void *L, void *DLh)
Compute the LDL^h factorization of one panel.
void core_zhetrfsp(pastix_int_t n, pastix_complex64_t *A, pastix_int_t lda, pastix_int_t *nbpivots, double criterion)
Compute the block static pivoting factorization of the hermitian matrix n-by-n A such that A = L * D ...
void core_zhetrfsp1d_gemm(const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcblk, const pastix_complex64_t *L, pastix_complex64_t *C, pastix_complex64_t *work)
int core_zgemdm(pastix_trans_t transA, pastix_trans_t transB, int M, int N, int K, pastix_complex64_t alpha, const pastix_complex64_t *A, int LDA, const pastix_complex64_t *B, int LDB, pastix_complex64_t beta, pastix_complex64_t *C, int LDC, const pastix_complex64_t *D, int incD, pastix_complex64_t *WORK, int LWORK)
Perform one of the following matrix-matrix operations.
void cpucblk_ztrsmsp(pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk, const void *A, void *C, const pastix_lr_t *lowrank)
Compute the updates associated to a column of off-diagonal blocks.
void cpucblk_zalloc(pastix_coefside_t side, SolverCblk *cblk)
Allocate the cblk structure to store the coefficient.
pastix_fixdbl_t cpucblk_zgemmsp(pastix_coefside_t sideA, pastix_trans_t trans, const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcblk, const void *A, const void *B, void *C, pastix_complex64_t *work, pastix_int_t lwork, const pastix_lr_t *lowrank)
Compute the updates associated to one off-diagonal block.
void cpucblk_zrelease_deps(pastix_coefside_t side, SolverMatrix *solvmtx, const SolverCblk *cblk, SolverCblk *fcbk)
Release the dependencies of the given cblk after an update.
void cpucblk_zalloc_lrws(const SolverCblk *cblk, pastix_lrblock_t *lrblok, pastix_complex64_t *ws)
Initialize lrblock structure from a workspace from all blocks of the cblk associated.
void cpucblk_zscalo(pastix_trans_t trans, SolverCblk *cblk, void *dataL, void *dataLD)
Copy the L term with scaling for the two-terms algorithm.
The block low-rank structure to hold a matrix in low-rank form.
static pastix_int_t blok_rownbr(const SolverBlok *blok)
Compute the number of rows of a block.
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
static int is_block_inside_fblock(const SolverBlok *blok, const SolverBlok *fblok)
Check if a block is included inside another one.
static void * cblk_getdataU(const SolverCblk *cblk)
Get the pointer to the data associated to the upper part of the cblk.
static void * cblk_getdataL(const SolverCblk *cblk)
Get the pointer to the data associated to the lower part of the cblk.
Solver column block structure.