29 #ifndef DOXYGEN_SHOULD_SKIP_THIS
30 #define MAXSIZEOFBLOCKS 64
31 static pastix_complex64_t zone = 1.0;
32 static pastix_complex64_t mzone = -1.0;
67 pastix_complex64_t *A,
73 pastix_complex64_t *Akk = A;
74 pastix_complex64_t *Amk = A+1;
75 pastix_complex64_t *Akm = A+lda;
76 pastix_complex64_t zalpha;
80 for (k=0; k<n; k++, m--){
81 if ( cabs(*Akk) < criterion ) {
82 if ( creal(*Akk) < 0. ) {
83 *Akk = (pastix_complex64_t)(-criterion);
86 *Akk = (pastix_complex64_t)criterion;
91 zalpha = 1.0 / (*Akk);
94 cblas_zcopy( m, Amk, 1, Akm, lda );
95 ret = LAPACKE_zlacgv_work( m, Akm, 1 );
99 cblas_zscal(m, CBLAS_SADDR( zalpha ), Amk, 1 );
101 dalpha = -1.0 * creal(*Akk);
106 cblas_zher(CblasColMajor, CblasLower,
148 pastix_complex64_t *A,
153 pastix_int_t k, blocknbr, blocksize, matrixsize, col, ret;
154 pastix_complex64_t *Akk, *Amk, *Akm, *Amm;
155 pastix_complex64_t alpha;
158 blocknbr = pastix_iceil( n, MAXSIZEOFBLOCKS );
160 for (k=0; k<blocknbr; k++) {
162 blocksize = pastix_imin(MAXSIZEOFBLOCKS, n-k*MAXSIZEOFBLOCKS);
163 Akk = A+(k*MAXSIZEOFBLOCKS)*(lda+1);
164 Amk = Akk + blocksize;
165 Akm = Akk + blocksize * lda;
166 Amm = Amk + blocksize * lda;
171 if ((k*MAXSIZEOFBLOCKS+blocksize) < n) {
173 matrixsize = n-(k*MAXSIZEOFBLOCKS+blocksize);
182 cblas_ztrsm(CblasColMajor,
183 CblasRight, CblasLower,
184 CblasConjTrans, CblasUnit,
185 matrixsize, blocksize,
186 CBLAS_SADDR(zone), Akk, lda,
190 for(col = 0; col < blocksize; col++) {
192 cblas_zcopy(matrixsize, Amk + col*lda, 1,
194 ret = LAPACKE_zlacgv_work( matrixsize, Akm + col, lda );
198 alpha = 1.0 / *(Akk + col*(lda+1));
199 cblas_zscal( matrixsize, CBLAS_SADDR(alpha),
204 cblas_zgemm(CblasColMajor,
205 CblasNoTrans, CblasNoTrans,
206 matrixsize, matrixsize, blocksize,
207 CBLAS_SADDR(mzone), Amk, lda,
209 CBLAS_SADDR(zone), Amm, lda);
248 pastix_complex64_t *L;
255 stride = (cblk->
cblktype & CBLK_LAYOUT_2D) ? ncols : cblk->
stride;
257 if ( cblk->
cblktype & CBLK_COMPRESSED ) {
263 assert( lrL->
rk == -1 );
264 assert( stride == lrL->
rkmax );
266 L = (pastix_complex64_t *)dataL;
276 flops = FLOPS_ZHETRF( ncols );
279 kernel_trace_stop_lvl2( flops );
284 pastix_atomic_add_32b( &(solvmtx->
nbpivots), nbpivots );
326 const pastix_complex64_t *L,
327 pastix_complex64_t *C,
328 pastix_complex64_t *work )
333 const pastix_complex64_t *blokA;
334 const pastix_complex64_t *blokB;
335 const pastix_complex64_t *blokD;
336 pastix_complex64_t *blokC;
347 if ( cblk->
cblktype & CBLK_LAYOUT_2D ) {
364 for (iterblok=blok; iterblok<lblok; iterblok++) {
370 assert( fblok < fcblk[1].fblokptr );
392 pastix_cblk_lock( fcblk );
400 pastix_cblk_unlock( fcblk );
452 cblk, L, L, &(solvmtx->
lowrank) );
454 if ( (DLh != NULL) && (cblk->
cblktype & CBLK_LAYOUT_2D) ) {
496 pastix_complex64_t *DLh,
497 pastix_complex64_t *work,
506 if ( !(cblk->
cblktype & CBLK_LAYOUT_2D) ) {
510 if (cblk->
cblktype & CBLK_COMPRESSED) {
514 assert( dataDLh == NULL );
525 for( ; blok < lblk; blok++ )
529 if ( fcblk->
cblktype & CBLK_FANIN ) {
543 work, lwork, &(solvmtx->
lowrank) );
581 assert( cblk->
cblktype & CBLK_TASKS_2D );
588 for( i=0; blok < lblk; i++, blok++ )
590 assert( !((solvmtx->
cblktab + blok->
fcblknm)->cblktype & CBLK_RECV) );
594 while ( ( blok < lblk ) &&
595 ( blok[0].fcblknm == blok[1].fcblknm ) &&
596 ( blok[0].lcblknm == blok[1].lcblknm ) )
628 pastix_complex64_t *work )
635 if ( fcbk->
cblktype & CBLK_FANIN ) {
649 while ( ( blok < lblk ) &&
650 ( blok[-1].fcblknm == blok[0].fcblknm ) &&
651 ( blok[-1].lcblknm == blok[0].lcblknm ) );
static void core_zhetf2sp(pastix_int_t n, pastix_complex64_t *A, pastix_int_t lda, pastix_int_t *nbpivots, double criterion)
Compute the sequential static pivoting factorization of the hermitian matrix n-by-n A such that A = L...
int cpucblk_zhetrfsp1d(SolverMatrix *solvmtx, SolverCblk *cblk, pastix_complex64_t *DLh, pastix_complex64_t *work, pastix_int_t lwork)
Perform the LDL^h factorization of a given panel and apply all its updates.
int cpucblk_zhetrfsp1d_hetrf(SolverMatrix *solvmtx, SolverCblk *cblk, void *dataL)
Computes the LDL^h factorization of the diagonal block in a panel.
int cpucblk_zhetrfsp1d_panel(SolverMatrix *solvmtx, SolverCblk *cblk, void *L, void *DLh)
Compute the LDL^h factorization of one panel.
void cpucblk_zhetrfsp1dplus_update(SolverMatrix *solvmtx, SolverBlok *blok, pastix_complex64_t *work)
Apply the updates of the LDL^h factorisation of a given panel.
void core_zhetrfsp(pastix_int_t n, pastix_complex64_t *A, pastix_int_t lda, pastix_int_t *nbpivots, double criterion)
Compute the block static pivoting factorization of the hermitian matrix n-by-n A such that A = L * D ...
void core_zhetrfsp1d_gemm(const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcblk, const pastix_complex64_t *L, pastix_complex64_t *C, pastix_complex64_t *work)
int cpucblk_zhetrfsp1dplus(SolverMatrix *solvmtx, SolverCblk *cblk)
Perform the LDL^h factorization of a given panel and submit tasks for the subsequent updates.
BEGIN_C_DECLS typedef int pastix_int_t
static void pqueuePush1(pastix_queue_t *q, pastix_int_t elt, double key1)
Push an element with a single key.
static void kernel_trace_stop(int8_t inlast, pastix_ktype_t ktype, int m, int n, int k, double flops, double starttime)
Stop the trace of a single kernel.
static double kernel_trace_start(pastix_ktype_t ktype)
Start the trace of a single kernel.
int core_zgemdm(pastix_trans_t transA, pastix_trans_t transB, int M, int N, int K, pastix_complex64_t alpha, const pastix_complex64_t *A, int LDA, const pastix_complex64_t *B, int LDB, pastix_complex64_t beta, pastix_complex64_t *C, int LDC, const pastix_complex64_t *D, int incD, pastix_complex64_t *WORK, int LWORK)
Perform one of the following matrix-matrix operations.
void cpucblk_ztrsmsp(pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk, const void *A, void *C, const pastix_lr_t *lowrank)
Compute the updates associated to a column of off-diagonal blocks.
void cpucblk_zalloc(pastix_coefside_t side, SolverCblk *cblk)
Allocate the cblk structure to store the coefficient.
pastix_fixdbl_t cpucblk_zgemmsp(pastix_coefside_t sideA, pastix_trans_t trans, const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcblk, const void *A, const void *B, void *C, pastix_complex64_t *work, pastix_int_t lwork, const pastix_lr_t *lowrank)
Compute the updates associated to one off-diagonal block.
void cpucblk_zrelease_deps(pastix_coefside_t side, SolverMatrix *solvmtx, const SolverCblk *cblk, SolverCblk *fcbk)
Release the dependencies of the given cblk after an update.
void cpucblk_zscalo(pastix_trans_t trans, const SolverCblk *cblk, void *dataL, void *dataLD)
Copy the L term with scaling for the two-terms algorithm.
void cpucblk_zalloc_lrws(const SolverCblk *cblk, pastix_lrblock_t *lrblok, pastix_complex64_t *ws)
Initialize lrblock structure from a workspace for all blocks of the cblk associated.
The block low-rank structure to hold a matrix in low-rank form.
static pastix_int_t blok_rownbr(const SolverBlok *blok)
Compute the number of rows of a block.
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
static int is_block_inside_fblock(const SolverBlok *blok, const SolverBlok *fblok)
Check if a block is included inside another one.
SolverBlok *restrict bloktab
static void * cblk_getdataU(const SolverCblk *cblk)
Get the pointer to the data associated to the upper part of the cblk.
static void * cblk_getdataL(const SolverCblk *cblk)
Get the pointer to the data associated to the lower part of the cblk.
SolverCblk *restrict cblktab
volatile int32_t nbpivots
Solver column block structure.
Solver column block structure.