25 #ifndef DOXYGEN_SHOULD_SKIP_THIS
26 #define MAXSIZEOFBLOCKS 64
27 static float sone = 1.0;
28 static float msone = -1.0;
73 float *Akk, *Aik, alpha;
75 minMN = pastix_imin( m, n );
78 for (k=0; k<minMN; k++) {
81 if ( fabsf(*Akk) < criterion ) {
83 *Akk = (float)(-criterion);
86 *Akk = (float)criterion;
93 cblas_sscal(m-k-1, ( alpha ), Aik, 1 );
98 cblas_sger(CblasColMajor, m-k-1, n-k-1,
145 float *Akk, *Lik, *Ukj, *Aij;
147 blocknbr = pastix_iceil( n, MAXSIZEOFBLOCKS );
151 for (k=0; k<blocknbr; k++) {
153 tempm = n - k * MAXSIZEOFBLOCKS;
154 blocksize = pastix_imin(MAXSIZEOFBLOCKS, tempm);
155 Lik = Akk + blocksize;
156 Ukj = Akk + blocksize*lda;
157 Aij = Ukj + blocksize;
160 core_sgetf2sp( tempm, blocksize, Akk, lda, nbpivots, criterion );
162 matrixsize = tempm - blocksize;
163 if ( matrixsize > 0 ) {
166 cblas_strsm(CblasColMajor,
167 CblasLeft, CblasLower,
168 CblasNoTrans, CblasUnit,
169 blocksize, matrixsize,
174 cblas_sgemm(CblasColMajor,
175 CblasNoTrans, CblasNoTrans,
176 matrixsize, matrixsize, blocksize,
182 Akk += blocksize * (lda+1);
233 stride = (cblk->
cblktype & CBLK_LAYOUT_2D) ? ncols : cblk->
stride;
235 if ( cblk->
cblktype & CBLK_COMPRESSED ) {
240 assert( (lrL->
rk == -1) && (lrU->
rk == -1) );
246 assert( stride == lrL->
rkmax );
247 assert( stride == lrU->
rkmax );
258 flops = FLOPS_SGETRF( ncols, ncols );
261 kernel_trace_stop_lvl2( flops );
269 pastix_atomic_add_32b( &(solvmtx->
nbpivots), nbpivots );
320 cblk, L, L, &(solvmtx->
lowrank) );
323 cblk, U, U, &(solvmtx->
lowrank) );
372 for( ; blok < lblk; blok++ )
376 if ( fcblk->
cblktype & CBLK_FANIN ) {
384 work, lwork, &(solvmtx->
lowrank) );
387 if ( blok+1 < lblk ) {
391 work, lwork, &(solvmtx->
lowrank) );
430 assert( cblk->
cblktype & CBLK_TASKS_2D );
437 for( i=0; blok < lblk; i++, blok++ )
439 assert( !((solvmtx->
cblktab + blok->
fcblknm)->cblktype & CBLK_RECV) );
443 while ( ( blok < lblk ) &&
444 ( blok[0].fcblknm == blok[1].fcblknm ) &&
445 ( blok[0].lcblknm == blok[1].lcblknm ) )
486 if ( fcbk->
cblktype & CBLK_FANIN ) {
496 work, lwork, &(solvmtx->
lowrank) );
499 if ( blok+1 < lblk ) {
503 work, lwork, &(solvmtx->
lowrank) );
508 while ( ( blok < lblk ) &&
509 ( blok[-1].fcblknm == blok[0].fcblknm ) &&
510 ( blok[-1].lcblknm == blok[0].lcblknm ) );
static void core_sgetf2sp(pastix_int_t m, pastix_int_t n, float *A, pastix_int_t lda, pastix_int_t *nbpivots, float criterion)
Compute the sequential static pivoting LU factorization of the matrix m-by-n A = L * U.
BEGIN_C_DECLS typedef int pastix_int_t
static void pqueuePush1(pastix_queue_t *q, pastix_int_t elt, double key1)
Push an element with a single key.
static void kernel_trace_stop(int8_t inlast, pastix_ktype_t ktype, int m, int n, int k, double flops, double starttime)
Stop the trace of a single kernel.
static double kernel_trace_start(pastix_ktype_t ktype)
Start the trace of a single kernel.
int core_sgeadd(pastix_trans_t trans, pastix_int_t M, pastix_int_t N, float alpha, const float *A, pastix_int_t LDA, float beta, float *B, pastix_int_t LDB)
Add two matrices together.
void core_sgetmo(int m, int n, const float *A, int lda, float *B, int ldb)
Transposes a m-by-n matrix out of place using an extra workspace of size m-by-n.
void core_sgetrfsp(pastix_int_t n, float *A, pastix_int_t lda, pastix_int_t *nbpivots, float criterion)
Compute the block static pivoting LU factorization of the matrix m-by-n A = L * U.
void cpucblk_sgetrfsp1dplus_update(SolverMatrix *solvmtx, SolverBlok *blok, float *work, pastix_int_t lwork)
Apply the updates of the LU factorisation of a given panel.
int cpucblk_sgetrfsp1dplus(SolverMatrix *solvmtx, SolverCblk *cblk)
Perform the LU factorization of a given panel and submit tasks for the subsequent updates.
pastix_fixdbl_t cpucblk_sgemmsp(pastix_coefside_t sideA, pastix_trans_t trans, const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcblk, const void *A, const void *B, void *C, float *work, pastix_int_t lwork, const pastix_lr_t *lowrank)
Compute the updates associated to one off-diagonal block.
int cpucblk_sgetrfsp1d(SolverMatrix *solvmtx, SolverCblk *cblk, float *work, pastix_int_t lwork)
Perform the LU factorization of a given panel and apply all its updates.
void cpucblk_strsmsp(pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk, const void *A, void *C, const pastix_lr_t *lowrank)
Compute the updates associated to a column of off-diagonal blocks.
void cpucblk_salloc(pastix_coefside_t side, SolverCblk *cblk)
Allocate the cblk structure to store the coefficient.
int cpucblk_sgetrfsp1d_panel(SolverMatrix *solvmtx, SolverCblk *cblk, void *L, void *U)
Compute the LU factorization of one panel.
int cpucblk_sgetrfsp1d_getrf(SolverMatrix *solvmtx, SolverCblk *cblk, void *dataL, void *dataU)
Compute the LU factorization of the diagonal block in a panel.
void cpucblk_srelease_deps(pastix_coefside_t side, SolverMatrix *solvmtx, const SolverCblk *cblk, SolverCblk *fcbk)
Release the dependencies of the given cblk after an update.
The block low-rank structure to hold a matrix in low-rank form.
SolverBlok *restrict bloktab
static void * cblk_getdataU(const SolverCblk *cblk)
Get the pointer to the data associated to the upper part of the cblk.
static void * cblk_getdataL(const SolverCblk *cblk)
Get the pointer to the data associated to the lower part of the cblk.
SolverCblk *restrict cblktab
volatile int32_t nbpivots
Solver column block structure.
Solver column block structure.