100 if ( lda < pastix_imax(1,M) )
108 if ( ldb < pastix_imax(1,M) ) {
113 #if defined(PRECISION_z) || defined(PRECISION_c)
115 for( j=0; j<N; j++, D += ldd ) {
117 for( i=0; i<M; i++, B++, A++ ) {
127 for( j=0; j<N; j++, D += ldd ) {
129 for( i=0; i<M; i++, B++, A++ ) {
196 if ( cblk->
cblktype & CBLK_COMPRESSED ) {
203 for(; blok < lblk; blok++, lrL++, lrLD++) {
206 assert( lrLD->
rk == -1 );
210 lrLD->
rkmax = lrL->rkmax;
212 if ( lrL->rk == -1 ) {
213 assert( M == lrL->rkmax );
216 memcpy( lrLD->
u, lrL->u, lrL->rkmax * N *
sizeof(
float) );
226 memcpy( lrLD->
u, lrL->u, M * lrL->rk *
sizeof(
float) );
227 lrLD->
v = ((
float *)lrLD->
u) + M * lrL->rk;
228 memcpy( lrLD->
v, lrL->v, N * lrL->rkmax *
sizeof(
float) );
244 else if ( cblk->
cblktype & CBLK_LAYOUT_2D ) {
245 L = D = (
float *)dataL;
246 LD = (
float *)dataLD;
249 for(; blok < lblk; blok++) {
259 L = D = (
float *)dataL;
260 LD = (
float *)dataLD;
336 assert( cblk->
cblktype & CBLK_LAYOUT_2D );
338 blok = fblok + blok_m;
342 if ( cblk->
cblktype & CBLK_COMPRESSED ) {
347 for (; (blok < lblok) && (blok->
fcblknm == cblk_m); blok++, lrA++, lrB++) {
352 lrB->
rkmax = lrA->rkmax;
354 if ( lrB->
rk == -1 ) {
355 assert( M == lrA->rkmax );
356 assert( NULL == lrA->v );
359 memcpy( lrB->
u, lrA->u, lrA->rkmax * N *
sizeof(
float) );
369 memcpy( lrB->
u, lrA->u, M * lrA->rk *
sizeof(
float) );
370 lrB->
v = ((
float *)lrB->
u) + M * lrA->rk;
371 memcpy( lrB->
v, lrA->v, N * lrA->rkmax *
sizeof(
float) );
380 lA, M, D, ldd, lB, M );
388 for (; (blok < lblok) && (blok->
fcblknm == cblk_m); blok++) {
389 lA = A + blok->
coefind - offset;
390 lB = B + blok->
coefind - offset;
395 lA, M, D, ldd, lB, M );
BEGIN_C_DECLS typedef int pastix_int_t
static void kernel_trace_stop(int8_t inlast, pastix_ktype_t ktype, int m, int n, int k, double flops, double starttime)
Stop the trace of a single kernel.
static double kernel_trace_start(pastix_ktype_t ktype)
Start the trace of a single kernel.
int core_sscalo(pastix_trans_t trans, pastix_int_t M, pastix_int_t N, const float *A, pastix_int_t lda, const float *D, pastix_int_t ldd, float *B, pastix_int_t ldb)
Scale a matrix by a diagonal out of place.
void cpublok_sscalo(pastix_trans_t trans, const SolverCblk *cblk, pastix_int_t blok_m, const void *dataA, const void *dataD, void *dataB)
Copy the lower terms of the block with scaling for the two-terms algorithm.
void cpucblk_sscalo(pastix_trans_t trans, const SolverCblk *cblk, void *dataL, void *dataLD)
Copy the L term with scaling for the two-terms algorithm.
The block low-rank structure to hold a matrix in low-rank form.
enum pastix_trans_e pastix_trans_t
Transpostion.
static pastix_int_t blok_rownbr(const SolverBlok *blok)
Compute the number of rows of a block.
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
Solver column block structure.