22 #include "kernels_trace.h"
99 if ( lda < pastix_imax(1,M) )
107 if ( ldb < pastix_imax(1,M) ) {
112 #if defined(PRECISION_z) || defined(PRECISION_c)
114 for( j=0; j<N; j++, D += ldd ) {
116 for( i=0; i<M; i++, B++, A++ ) {
126 for( j=0; j<N; j++, D += ldd ) {
128 for( i=0; i<M; i++, B++, A++ ) {
178 pastix_fixdbl_t time;
181 time = kernel_trace_start( PastixKernelSCALOCblk );
193 pastix_int_t ldl, ldd, ldld;
195 if ( cblk->
cblktype & CBLK_COMPRESSED ) {
202 for(; blok < lblk; blok++, lrL++, lrLD++) {
205 assert( lrLD->
rk == -1 );
209 lrLD->
rkmax = lrL->rkmax;
211 if ( lrL->rk == -1 ) {
212 assert( M == lrL->rkmax );
215 memcpy( lrLD->
u, lrL->u, lrL->rkmax * N *
sizeof(
float) );
225 memcpy( lrLD->
u, lrL->u, M * lrL->rk *
sizeof(
float) );
226 lrLD->
v = ((
float *)lrLD->
u) + M * lrL->rk;
227 memcpy( lrLD->
v, lrL->v, N * lrL->rkmax *
sizeof(
float) );
243 else if ( cblk->
cblktype & CBLK_LAYOUT_2D ) {
244 L = D = (
float *)dataL;
245 LD = (
float *)dataLD;
248 for(; blok < lblk; blok++) {
258 L = D = (
float *)dataL;
259 LD = (
float *)dataLD;
272 kernel_trace_stop( cblk->
fblokptr->
inlast, PastixKernelSCALOCblk, M, N, 0, (pastix_fixdbl_t)(M*N), time );
323 pastix_int_t M, N, ldd, offset, cblk_m;
335 assert( cblk->
cblktype & CBLK_LAYOUT_2D );
337 blok = fblok + blok_m;
341 if ( cblk->
cblktype & CBLK_COMPRESSED ) {
346 for (; (blok < lblok) && (blok->
fcblknm == cblk_m); blok++, lrA++, lrB++) {
351 lrB->
rkmax = lrA->rkmax;
353 if ( lrB->
rk == -1 ) {
354 assert( M == lrA->rkmax );
355 assert( NULL == lrA->v );
358 memcpy( lrB->
u, lrA->u, lrA->rkmax * N *
sizeof(
float) );
368 memcpy( lrB->
u, lrA->u, M * lrA->rk *
sizeof(
float) );
369 lrB->
v = ((
float *)lrB->
u) + M * lrA->rk;
370 memcpy( lrB->
v, lrA->v, N * lrA->rkmax *
sizeof(
float) );
379 lA, M, D, ldd, lB, M );
387 for (; (blok < lblok) && (blok->
fcblknm == cblk_m); blok++) {
388 lA = A + blok->
coefind - offset;
389 lB = B + blok->
coefind - offset;
394 lA, M, D, ldd, lB, M );
int core_sscalo(pastix_trans_t trans, pastix_int_t M, pastix_int_t N, const float *A, pastix_int_t lda, const float *D, pastix_int_t ldd, float *B, pastix_int_t ldb)
Scale a matrix by a diagonal out of place.
void cpucblk_sscalo(pastix_trans_t trans, SolverCblk *cblk, void *dataL, void *dataLD)
Copy the L term with scaling for the two-terms algorithm.
void cpublok_sscalo(pastix_trans_t trans, SolverCblk *cblk, pastix_int_t blok_m, const void *dataA, const void *dataD, void *dataB)
Copy the lower terms of the block with scaling for the two-terms algorithm.
The block low-rank structure to hold a matrix in low-rank form.
enum pastix_trans_e pastix_trans_t
Transpostion.
static pastix_int_t blok_rownbr(const SolverBlok *blok)
Compute the number of rows of a block.
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
Solver column block structure.