21 #ifndef _pastix_ccores_h_
22 #define _pastix_ccores_h_
24 #ifndef DOXYGEN_SHOULD_SKIP_THIS
25 #define pastix_cblk_lock( cblk_ ) pastix_atomic_lock( &((cblk_)->lock) )
26 #define pastix_cblk_unlock( cblk_ ) pastix_atomic_unlock( &((cblk_)->lock) )
40 pastix_complex32_t *A,
45 unsigned long long int seed );
48 const pastix_complex32_t *A,
50 pastix_complex32_t *B,
55 pastix_complex32_t alpha,
56 const pastix_complex32_t *A,
58 pastix_complex32_t beta,
59 pastix_complex32_t *B,
66 pastix_complex32_t alpha,
67 const pastix_complex32_t *A,
69 const pastix_complex32_t *B,
71 pastix_complex32_t beta,
72 pastix_complex32_t *C,
74 const pastix_complex32_t *D,
76 pastix_complex32_t *WORK,
84 pastix_complex32_t *A,
87 pastix_complex32_t *tau,
88 pastix_complex32_t *work,
97 pastix_complex32_t *A,
100 pastix_complex32_t *tau,
101 pastix_complex32_t *work,
105 pastix_int_t maxrank,
109 pastix_complex32_t *A,
111 pastix_complex32_t *tau,
112 pastix_complex32_t *B,
114 pastix_complex32_t *tau_b,
115 pastix_complex32_t *work,
119 pastix_int_t maxrank,
124 pastix_complex32_t *A,
127 pastix_complex32_t *tau,
128 pastix_complex32_t *work,
135 pastix_complex32_t alpha,
136 const pastix_complex32_t *A,
138 pastix_complex32_t beta,
139 pastix_complex32_t *B,
144 const pastix_complex32_t *A,
146 const pastix_complex32_t *D,
148 pastix_complex32_t *B,
159 pastix_complex32_t *U,
161 pastix_complex32_t *V,
169 pastix_complex32_t *U,
171 pastix_complex32_t *V,
181 pastix_complex32_t *U,
183 pastix_complex32_t *V,
192 pastix_complex32_t *A,
194 pastix_int_t *nbpivots,
197 pastix_complex32_t *A,
199 pastix_int_t *nbpivots,
202 pastix_complex32_t *A,
204 pastix_int_t *nbpivots,
206 #if defined(PRECISION_z) || defined(PRECISION_c)
208 pastix_complex32_t *A,
210 pastix_int_t *nbpivots,
214 pastix_complex32_t *A,
216 pastix_int_t *nbpivots,
234 const pastix_complex32_t *L1,
235 pastix_complex32_t *L2,
236 const pastix_complex32_t *U1,
237 pastix_complex32_t *U2 );
247 pastix_complex32_t *work,
266 pastix_int_t blok_mk,
267 pastix_int_t blok_nk,
268 pastix_int_t blok_mn,
304 pastix_complex32_t *work,
305 pastix_int_t lwork );
320 pastix_complex32_t *work,
321 pastix_int_t lwork );
327 #if defined(PRECISION_z) || defined(PRECISION_c)
341 pastix_complex32_t *work1,
342 pastix_complex32_t *work2,
343 pastix_int_t lwork );
358 pastix_complex32_t *work,
359 pastix_int_t lwork );
379 pastix_complex32_t *Dlt,
380 pastix_complex32_t *work,
381 pastix_int_t lwork );
390 pastix_complex32_t *ws );
401 const SolverMatrix *solvmtx,
402 const pastix_bcsc_t *bcsc,
403 pastix_int_t itercblk );
405 const SolverMatrix *solvmtx,
406 const pastix_bcsc_t *bcsc,
407 pastix_int_t itercblk,
408 const char *directory );
411 pastix_complex32_t *S,
432 SolverMatrix *solvmtx,
435 SolverMatrix *solvmtx,
440 SolverMatrix *solvmtx );
441 void cpucblk_cupdate_reqtab( SolverMatrix *solvmtx );
442 #if defined( PASTIX_WITH_MPI )
444 SolverMatrix *solvmtx,
446 void cpucblk_cisend_rhs_bwd( SolverMatrix *solvmtx,
450 void cpucblk_cmpi_rhs_fwd_progress(
const args_solve_t *enums,
451 SolverMatrix *solvmtx,
455 SolverMatrix *solvmtx,
461 SolverMatrix *solvmtx,
466 SolverMatrix *solvmtx,
469 void cpucblk_cmpi_rhs_bwd_progress(
const args_solve_t *enums,
470 SolverMatrix *solvmtx,
474 SolverMatrix *solvmtx,
480 SolverMatrix *solvmtx,
485 SolverMatrix *solvmtx,
492 pastix_complex32_t *work,
517 const SolverMatrix *solvmtx,
520 pastix_int_t *gain );
540 pastix_complex32_t *b,
549 const pastix_complex32_t *B,
551 pastix_complex32_t *C,
555 SolverMatrix *datacode,
559 SolverMatrix *datacode,
565 pastix_complex32_t *b,
567 pastix_complex32_t *work );
577 #if defined(PRECISION_z) || defined(PRECISION_c)
581 const pastix_complex32_t *L,
582 pastix_complex32_t *C,
583 pastix_complex32_t *work );
588 const pastix_complex32_t *L,
589 pastix_complex32_t *C,
590 pastix_complex32_t *work );
void core_chetrfsp(pastix_int_t n, pastix_complex32_t *A, pastix_int_t lda, pastix_int_t *nbpivots, float criterion)
Compute the block static pivoting factorization of the hermitian matrix n-by-n A such that A = L * D ...
int cpucblk_chetrfsp1d_panel(SolverMatrix *solvmtx, SolverCblk *cblk, void *L, void *DLh)
Compute the LDL^h factorization of one panel.
int cpucblk_chetrfsp1d(SolverMatrix *solvmtx, SolverCblk *cblk, pastix_complex32_t *DLh, pastix_complex32_t *work, pastix_int_t lwork)
Perform the LDL^h factorization of a given panel and apply all its updates.
void core_chetrfsp1d_gemm(const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcblk, const pastix_complex32_t *L, pastix_complex32_t *C, pastix_complex32_t *work)
int cpucblk_chetrfsp1d_hetrf(SolverMatrix *solvmtx, SolverCblk *cblk, void *dataL)
Computes the LDL^h factorization of the diagonal block in a panel.
int cpucblk_cpxtrfsp1d_panel(SolverMatrix *solvmtx, SolverCblk *cblk, void *L)
Compute the LL^t factorization of one panel.
int cpucblk_cpxtrfsp1d_pxtrf(SolverMatrix *solvmtx, SolverCblk *cblk, void *dataL)
Compute the LL^t factorization of the diagonal block in a panel.
int cpucblk_cpxtrfsp1d(SolverMatrix *solvmtx, SolverCblk *cblk, pastix_complex32_t *work, pastix_int_t lwork)
Perform the LL^t factorization of a given panel and apply all its updates.
int core_crqrcp(float tol, pastix_int_t maxrank, int refine, pastix_int_t nb, pastix_int_t m, pastix_int_t n, pastix_complex32_t *A, pastix_int_t lda, pastix_int_t *jpvt, pastix_complex32_t *tau, pastix_complex32_t *work, pastix_int_t lwork, float *rwork)
Compute a randomized QR factorization.
int core_ctradd(pastix_uplo_t uplo, pastix_trans_t trans, pastix_int_t M, pastix_int_t N, pastix_complex32_t alpha, const pastix_complex32_t *A, pastix_int_t LDA, pastix_complex32_t beta, pastix_complex32_t *B, pastix_int_t LDB)
Add two triangular matrices together as in PBLAS pctradd.
pastix_fixdbl_t core_clrorthu_cgs(pastix_int_t M1, pastix_int_t N1, pastix_int_t M2, pastix_int_t N2, pastix_int_t r1, pastix_int_t *r2ptr, pastix_int_t offx, pastix_int_t offy, pastix_complex32_t *U, pastix_int_t ldu, pastix_complex32_t *V, pastix_int_t ldv)
Try to orthognalize the U part of the low-rank form, and update the V part accordingly using CGS.
void core_csytrfsp(pastix_int_t n, pastix_complex32_t *A, pastix_int_t lda, pastix_int_t *nbpivots, float criterion)
Compute the block static pivoting factorization of the symmetric matrix n-by-n A such that A = L * D ...
int core_cgeadd(pastix_trans_t trans, pastix_int_t M, pastix_int_t N, pastix_complex32_t alpha, const pastix_complex32_t *A, pastix_int_t LDA, pastix_complex32_t beta, pastix_complex32_t *B, pastix_int_t LDB)
Add two matrices together.
void core_cplrnt(int m, int n, pastix_complex32_t *A, int lda, int gM, int m0, int n0, unsigned long long int seed)
Generate a random tile.
int core_crqrrt(float tol, pastix_int_t maxrank, pastix_int_t nb, pastix_int_t m, pastix_int_t n, pastix_complex32_t *A, pastix_int_t lda, pastix_complex32_t *tau, pastix_complex32_t *B, pastix_int_t ldb, pastix_complex32_t *tau_b, pastix_complex32_t *work, pastix_int_t lwork, float normA)
Compute a randomized QR factorization with rotation technique.
int core_cscalo(pastix_trans_t trans, pastix_int_t M, pastix_int_t N, const pastix_complex32_t *A, pastix_int_t lda, const pastix_complex32_t *D, pastix_int_t ldd, pastix_complex32_t *B, pastix_int_t ldb)
Scale a matrix by a diagonal out of place.
int core_cgemdm(pastix_trans_t transA, pastix_trans_t transB, int M, int N, int K, pastix_complex32_t alpha, const pastix_complex32_t *A, int LDA, const pastix_complex32_t *B, int LDB, pastix_complex32_t beta, pastix_complex32_t *C, int LDC, const pastix_complex32_t *D, int incD, pastix_complex32_t *WORK, int LWORK)
Perform one of the following matrix-matrix operations.
pastix_fixdbl_t core_clrorthu_fullqr(pastix_int_t M, pastix_int_t N, pastix_int_t rank, pastix_complex32_t *U, pastix_int_t ldu, pastix_complex32_t *V, pastix_int_t ldv)
Try to orthognalize the u part of the low-rank form, and update the v part accordingly using full QR.
void core_cpotrfsp(pastix_int_t n, pastix_complex32_t *A, pastix_int_t lda, pastix_int_t *nbpivots, float criterion)
Compute the block static pivoting Cholesky factorization of the matrix n-by-n A = L * L^t .
pastix_fixdbl_t core_clrorthu_partialqr(pastix_int_t M, pastix_int_t N, pastix_int_t r1, pastix_int_t *r2ptr, pastix_int_t offx, pastix_int_t offy, pastix_complex32_t *U, pastix_int_t ldu, pastix_complex32_t *V, pastix_int_t ldv)
Try to orthognalize the U part of the low-rank form, and update the V part accordingly using partial ...
void core_cgetmo(int m, int n, const pastix_complex32_t *A, int lda, pastix_complex32_t *B, int ldb)
Transposes a m-by-n matrix out of place using an extra workspace of size m-by-n.
void core_cpxtrfsp(pastix_int_t n, pastix_complex32_t *A, pastix_int_t lda, pastix_int_t *nbpivots, float criterion)
Compute the block static pivoting LL^t factorization of the matrix n-by-n A = L * L^t .
int core_ctqrcp(float tol, pastix_int_t maxrank, int unused, pastix_int_t nb, pastix_int_t m, pastix_int_t n, pastix_complex32_t *A, pastix_int_t lda, pastix_int_t *jpvt, pastix_complex32_t *tau, pastix_complex32_t *work, pastix_int_t lwork, float *rwork)
Compute a randomized QR factorization with truncated updates.
void core_cgetrfsp(pastix_int_t n, pastix_complex32_t *A, pastix_int_t lda, pastix_int_t *nbpivots, float criterion)
Compute the block static pivoting LU factorization of the matrix m-by-n A = L * U.
int core_cpqrcp(float tol, pastix_int_t maxrank, int full_update, pastix_int_t nb, pastix_int_t m, pastix_int_t n, pastix_complex32_t *A, pastix_int_t lda, pastix_int_t *jpvt, pastix_complex32_t *tau, pastix_complex32_t *work, pastix_int_t lwork, float *rwork)
Compute a rank-reavealing QR factorization.
void core_csytrfsp1d_gemm(const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcblk, const pastix_complex32_t *L, pastix_complex32_t *C, pastix_complex32_t *work)
void cpucblk_cfillin(pastix_coefside_t side, const SolverMatrix *solvmtx, const pastix_bcsc_t *bcsc, pastix_int_t itercblk)
Initialize the coeftab structure from the internal bcsc.
void cpucblk_crequest_cleanup(pastix_coefside_t side, pastix_int_t sched, SolverMatrix *solvmtx)
Waitall routine for current cblk request.
void cpucblk_crelease_rhs_fwd_deps(const args_solve_t *enums, SolverMatrix *solvmtx, pastix_rhs_t rhsb, const SolverCblk *cblk, SolverCblk *fcbk)
Release the dependencies of the given cblk after an update.
void cpucblk_calloc_lr(pastix_coefside_t side, SolverCblk *cblk, int rkmax)
Allocate the cblk structure to store the coefficient.
void cpucblk_cadd(pastix_coefside_t side, float alpha, const SolverCblk *cblkA, SolverCblk *cblkB, const pastix_lr_t *lowrank)
Add two column bloks in full rank format.
void cpucblk_cmemory(pastix_coefside_t side, const SolverMatrix *solvmtx, SolverCblk *cblk, pastix_int_t *orig, pastix_int_t *gain)
Return the memory gain of the low-rank form over the full-rank form for a single column-block.
pastix_fixdbl_t cpucblk_cgemmsp(pastix_coefside_t sideA, pastix_trans_t trans, const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcblk, const void *A, const void *B, void *C, pastix_complex32_t *work, pastix_int_t lwork, const pastix_lr_t *lowrank)
Compute the updates associated to one off-diagonal block.
pastix_fixdbl_t cpublok_ctrsmsp(pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk, pastix_int_t blok_m, const void *A, void *C, const pastix_lr_t *lowrank)
Compute the updates associated to one off-diagonal block.
void cpucblk_cscalo(pastix_trans_t trans, SolverCblk *cblk, void *dataL, void *dataLD)
Copy the L term with scaling for the two-terms algorithm.
int cpucblk_csytrfsp1d(SolverMatrix *solvmtx, SolverCblk *cblk, pastix_complex32_t *Dlt, pastix_complex32_t *work, pastix_int_t lwork)
Perform the LDL^t factorization of a given panel and apply all its updates.
int cpucblk_cpotrfsp1d(SolverMatrix *solvmtx, SolverCblk *cblk, pastix_complex32_t *work, pastix_int_t lwork)
Perform the Cholesky factorization of a given panel and apply all its updates.
int cpucblk_cgetrfsp1d(SolverMatrix *solvmtx, SolverCblk *cblk, pastix_complex32_t *work, pastix_int_t lwork)
Perform the LU factorization of a given panel and apply all its updates.
void cpucblk_crequest_rhs_fwd_cleanup(const args_solve_t *enums, pastix_int_t sched, SolverMatrix *solvmtx, pastix_rhs_t rhsb)
Waitall routine for current cblk request.
int cpucblk_cgeaddsp1d(const SolverCblk *cblk1, SolverCblk *cblk2, const pastix_complex32_t *L1, pastix_complex32_t *L2, const pastix_complex32_t *U1, pastix_complex32_t *U2)
Add two column blocks together.
void cpucblk_cuncompress(pastix_coefside_t side, SolverCblk *cblk)
Uncompress a single column block from low-rank format to full-rank format.
int cpucblk_cincoming_rhs_fwd_deps(int rank, const args_solve_t *enums, SolverMatrix *solvmtx, SolverCblk *cblk, pastix_rhs_t rhsb)
Wait for incoming dependencies, and return when cblk->ctrbcnt has reached 0.
void cpucblk_calloc_fr(pastix_coefside_t side, SolverCblk *cblk)
Allocate the cblk structure to store the coefficient.
int cpucblk_cincoming_rhs_bwd_deps(int rank, const args_solve_t *enums, SolverMatrix *solvmtx, SolverCblk *cblk, pastix_rhs_t rhsb)
Wait for incoming dependencies, and return when cblk->ctrbcnt has reached 0.
void cpucblk_crecv_rhs_forward(const SolverMatrix *solvmtx, SolverCblk *cblk, pastix_complex32_t *work, pastix_rhs_t b)
Receive the rhs associated to a cblk->lcolidx to the remote node.
void cpucblk_crelease_deps(pastix_coefside_t side, SolverMatrix *solvmtx, const SolverCblk *cblk, SolverCblk *fcbk)
Release the dependencies of the given cblk after an update.
int cpucblk_cpotrfsp1d_panel(SolverMatrix *solvmtx, SolverCblk *cblk, void *L)
Compute the Cholesky factorization of one panel.
void cpucblk_ctrsmsp(pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk, const void *A, void *C, const pastix_lr_t *lowrank)
Compute the updates associated to a column of off-diagonal blocks.
void cpucblk_cdump(pastix_coefside_t side, const SolverCblk *cblk, FILE *stream)
Dump a single column block into a FILE in a human readale format.
void cpucblk_crequest_rhs_bwd_cleanup(const args_solve_t *enums, pastix_int_t sched, SolverMatrix *solvmtx, pastix_rhs_t rhsb)
Waitall routine for current cblk request.
void cpucblk_csend_rhs_backward(const SolverMatrix *solvmtx, SolverCblk *cblk, pastix_rhs_t b)
Send the rhs associated to a cblk->lcolidx to the remote node.
pastix_fixdbl_t cpublok_ccompress(const pastix_lr_t *lowrank, pastix_int_t M, pastix_int_t N, pastix_lrblock_t *blok)
Compress a single block from full-rank to low-rank format.
void cpucblk_csend_rhs_forward(const SolverMatrix *solvmtx, SolverCblk *cblk, pastix_rhs_t b)
Send the rhs associated to a cblk->lcolidx to the remote node.
void cpucblk_crelease_rhs_bwd_deps(const args_solve_t *enums, SolverMatrix *solvmtx, pastix_rhs_t rhsb, const SolverCblk *cblk, SolverCblk *fcbk)
Release the dependencies of the given cblk after an update.
void cpucblk_cinit(pastix_coefside_t side, const SolverMatrix *solvmtx, const pastix_bcsc_t *bcsc, pastix_int_t itercblk, const char *directory)
Fully initialize a single cblk.
int cpucblk_cdiff(pastix_coefside_t side, const SolverCblk *cblkA, SolverCblk *cblkB)
Compare two column blocks in full-rank format.
void cpublok_cscalo(pastix_trans_t trans, SolverCblk *cblk, pastix_int_t blok_m, const void *A, const void *dataD, void *dataB)
Copy the lower terms of the block with scaling for the two-terms algorithm.
void cpucblk_calloc_lrws(const SolverCblk *cblk, pastix_lrblock_t *lrblok, pastix_complex32_t *ws)
Initialize lrblock structure from a workspace from all blocks of the cblk associated.
void cpucblk_calloc(pastix_coefside_t side, SolverCblk *cblk)
Allocate the cblk structure to store the coefficient.
int cpucblk_csytrfsp1d_sytrf(SolverMatrix *solvmtx, SolverCblk *cblk, void *dataL)
Computes the LDL^t factorization of the diagonal block in a panel.
int cpucblk_csytrfsp1d_panel(SolverMatrix *solvmtx, SolverCblk *cblk, void *L, void *DLt)
Compute the LDL^t factorization of one panel.
void cpucblk_cfree(pastix_coefside_t side, SolverCblk *cblk)
Free the cblk structure that store the coefficient.
int cpucblk_cincoming_deps(int mt_flag, pastix_coefside_t side, SolverMatrix *solvmtx, SolverCblk *cblk)
Wait for incoming dependencies, and return when cblk->ctrbcnt has reached 0.
int cpucblk_cgetrfsp1d_panel(SolverMatrix *solvmtx, SolverCblk *cblk, void *L, void *U)
Compute the LU factorization of one panel.
int cpucblk_cpotrfsp1d_potrf(SolverMatrix *solvmtx, SolverCblk *cblk, void *dataL)
Compute the Cholesky factorization of the diagonal block in a panel.
void cpucblk_crecv_rhs_backward(const SolverMatrix *solvmtx, SolverCblk *cblk, pastix_rhs_t b)
Receive the rhs associated to a cblk->lcolidx to the remote node.
void cpucblk_cgetschur(const SolverCblk *cblk, int upper_part, pastix_complex32_t *S, pastix_int_t lds)
Extract a cblk panel of the Schur complement to a dense lapack form.
pastix_int_t cpucblk_ccompress(const SolverMatrix *solvmtx, pastix_coefside_t side, int max_ilulvl, SolverCblk *cblk)
Compress a single column block from full-rank to low-rank format.
int cpucblk_cgetrfsp1d_getrf(SolverMatrix *solvmtx, SolverCblk *cblk, void *L, void *U)
Compute the LU factorization of the diagonal block in a panel.
pastix_fixdbl_t cpublok_cgemmsp(pastix_trans_t trans, const SolverCblk *cblk, SolverCblk *fcblk, pastix_int_t blok_mk, pastix_int_t blok_nk, pastix_int_t blok_mn, const void *A, const void *B, void *C, const pastix_lr_t *lowrank)
Compute the CPU gemm associated to a couple of off-diagonal blocks.
Structure to define the type of function to use for the low-rank kernels and their parameters.
The block low-rank structure to hold a matrix in low-rank form.
void solve_cblk_cdiag(const SolverCblk *cblk, int nrhs, pastix_complex32_t *b, int ldb, pastix_complex32_t *work)
Apply the diagonal solve related to one cblk to all the right hand side.
void solve_cblk_ctrsmsp_backward(const args_solve_t *enums, SolverMatrix *datacode, SolverCblk *cblk, pastix_rhs_t b)
Apply a backward solve related to one cblk to all the right hand side.
void solve_blok_ctrsm(pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk, int nrhs, const void *dataA, pastix_complex32_t *b, int ldb)
Apply a solve trsm update related to a diagonal block of the matrix A.
void solve_cblk_ctrsmsp_forward(const args_solve_t *enums, SolverMatrix *datacode, const SolverCblk *cblk, pastix_rhs_t b)
Apply a forward solve related to one cblk to all the right hand side.
void solve_blok_cgemm(pastix_side_t side, pastix_trans_t trans, pastix_int_t nrhs, const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcbk, const void *dataA, const pastix_complex32_t *B, pastix_int_t ldb, pastix_complex32_t *C, pastix_int_t ldc)
Apply a solve gemm update related to a single block of the matrix A.
enum pastix_diag_e pastix_diag_t
Diagonal.
enum pastix_uplo_e pastix_uplo_t
Upper/Lower part.
enum pastix_side_e pastix_side_t
Side of the operation.
enum pastix_trans_e pastix_trans_t
Transpostion.
enum pastix_coefside_e pastix_coefside_t
Data blocks used in the kernel.
Solver column block structure.