27 #ifndef DOXYGEN_SHOULD_SKIP_THIS
29 typedef void ( *bcsc_dspmv_Ax_fct_t )(
const pastix_bcsc_t *,
42 if( beta != (
double)0.0 )
45 for( j=0; j<n; j++, y++ )
52 memset( y, 0, n *
sizeof(
double) );
57 __bcsc_dspmv_Ax(
const pastix_bcsc_t *bcsc,
67 __bcsc_dspmv_by( cblk->
colnbr, beta, y );
69 for( j=0; j<cblk->
colnbr; j++, y++ )
71 for( i=cblk->
coltab[j]; i< cblk->coltab[j+1]; i++ )
73 *y += alpha * A[i] * x[ bcsc->rowtab[i] ];
79 __bcsc_dspmv_Ax_ind(
const pastix_bcsc_t *bcsc,
86 const double *xptr = x;
89 __bcsc_dspmv_by( bcsc->gN, beta, y );
91 for( bloc=0; bloc<bcsc->cscfnbr; bloc++ )
93 for( j=0; j < bcsc->cscftab[bloc].colnbr; j++, xptr++ )
95 for( i = bcsc->cscftab[bloc].coltab[j]; i < bcsc->cscftab[bloc].coltab[j+1]; i++ )
97 y[ bcsc->rowtab[i] ] += alpha * A[i] * (*xptr);
103 #if defined(PRECISION_z) || defined(PRECISION_c)
105 __bcsc_dspmv_conjAx(
const pastix_bcsc_t *bcsc,
115 __bcsc_dspmv_by( cblk->
colnbr, beta, y );
117 for( j=0; j<cblk->
colnbr; j++, y++ )
119 for( i=cblk->
coltab[j]; i< cblk->coltab[j+1]; i++ )
121 *y += alpha * ( A[i] ) * x[ bcsc->rowtab[i] ];
131 const pastix_bcsc_t *bcsc,
139 bcsc_dspmv_Ax_fct_t zspmv_Ax = __bcsc_dspmv_Ax;
140 double *valptr = NULL;
168 cblk = bcsc->cscftab + begin;
169 valptr = (
double*)bcsc->Lvalues;
174 if ( bcsc->Uvalues != NULL ) {
175 valptr = (
double*)bcsc->Uvalues;
183 __bcsc_dspmv_Ax_ind( bcsc, alpha, valptr, x, beta, y );
186 #if defined(PRECISION_z) || defined(PRECISION_c)
192 zspmv_Ax = __bcsc_dspmv_conjAx;
196 for( bloc=begin; bloc<end; bloc++, cblk++ )
199 double *yptr = y + solv_cblk->
lcolidx;
201 assert( !(solv_cblk->
cblktype & (CBLK_FANIN|CBLK_RECV)) );
203 zspmv_Ax( bcsc, cblk, alpha, valptr, x, beta, yptr );
253 pastix_bcsc_t *bcsc = pastix_data->
bcsc;
256 if( (bcsc == NULL) || (y == NULL) || (x == NULL) ) {
260 __bcsc_dspmv_loop( solvmtx,
261 trans, alpha, bcsc, x, beta, y,
262 0, 0, bcsc->cscfnbr );
268 struct d_argument_spmv_s {
271 const pastix_bcsc_t *bcsc;
308 struct d_argument_spmv_s *arg = (
struct d_argument_spmv_s*)args;
309 const pastix_bcsc_t *bcsc = arg->bcsc;
317 begin = start_bloc[rank];
318 if ( rank == (size - 1) )
323 end = start_bloc[rank + 1];
326 __bcsc_dspmv_loop( arg->mtx,
327 arg->trans, arg->alpha, bcsc, arg->x,
328 arg->beta, arg->y + start_indexes[rank],
360 bcsc_dspmv_Ax_fct_t zspmv_Ax = __bcsc_dspmv_Ax;
361 struct d_argument_spmv_s *arg = (
struct d_argument_spmv_s*)args;
363 double alpha = arg->alpha;
364 const pastix_bcsc_t *bcsc = arg->bcsc;
365 const double *x = arg->x;
366 double beta = arg->beta;
368 double *valptr = NULL;
380 tasknbr = mtx->ttsknbr[rank];
381 tasktab = mtx->ttsktab[rank];
407 valptr = (
double*)bcsc->Lvalues;
412 if ( bcsc->Uvalues != NULL ) {
413 valptr = (
double*)bcsc->Uvalues;
421 __bcsc_dspmv_Ax_ind( bcsc, alpha, valptr, x, beta, y );
425 #if defined(PRECISION_z) || defined(PRECISION_c)
431 zspmv_Ax = __bcsc_dspmv_conjAx;
435 for (ii=0; ii<tasknbr; ii++)
437 task_id = tasktab[ii];
438 t = mtx->tasktab + task_id;
441 bcsc_cblk = bcsc->cscftab + solv_cblk->
bcscnum;
444 zspmv_Ax( bcsc, bcsc_cblk, alpha, valptr, x, beta, yptr );
473 struct d_argument_spmv_s *args )
477 pastix_bcsc_t *bcsc = pastix_data->
bcsc;
481 total = 2 * pastix_data->
csc->nnzexp - bcsc->gN;
483 total = pastix_data->
csc->nnzexp;
485 size = pastix_data->
isched->world_size;
486 ratio = pastix_iceil( total, size );
489 args->start_bloc[0] = 0;
490 args->start_indexes[0] = 0;
492 for ( bloc = 0, rank = 1; bloc < bcsc->cscfnbr; ++bloc, ++cblk )
494 if ( load >= ratio ) {
495 assert( rank < size );
497 args->start_bloc[rank] = bloc;
508 assert( total == 0 );
510 for ( ; rank < size; rank ++ ) {
511 args->start_bloc[rank] = bcsc->cscfnbr;
512 args->start_indexes[rank] = bcsc->gN;
559 pastix_bcsc_t *bcsc = pastix_data->
bcsc;
560 struct d_argument_spmv_s arg = { trans, alpha, bcsc, x, beta, y,
561 pastix_data->
solvmatr, NULL, NULL };
563 if( (bcsc == NULL) || (y == NULL) || (x == NULL) ) {
575 MALLOC_INTERN( arg.start_indexes, 2 * pastix_data->
isched->world_size,
pastix_int_t );
576 arg.start_bloc = arg.start_indexes + pastix_data->
isched->world_size;
582 memFree_null( arg.start_indexes );
636 const double *xglobal;
656 else if ( trans == transA ) {
660 pastix_print_error(
"bcsc_dspmv: incompatible trans and transA" );
675 if ( x != xglobal ) {
676 free( (
void*)xglobal );
BEGIN_C_DECLS typedef int pastix_int_t
void pthread_bcsc_dspmv_tasktab(isched_thread_t *ctx, void *args)
Compute the matrix-vector product y = alpha * op(A) * x + beta * y.
void bcsc_dspmv_get_balanced_indexes(const pastix_data_t *pastix_data, struct d_argument_spmv_s *args)
Initialize indexes for vector pointer and bloc indexes for parallel version of spmv.
void pthread_bcsc_dspmv(isched_thread_t *ctx, void *args)
Compute the matrix-vector product y = alpha * op(A) * x + beta * y.
void bcsc_dspmv_seq(const pastix_data_t *pastix_data, pastix_trans_t trans, double alpha, const double *x, double beta, double *y)
Compute the matrix-vector product y = alpha * A * x + beta * y (Sequential version)
void bcsc_dspmv_smp(const pastix_data_t *pastix_data, pastix_trans_t trans, double alpha, const double *x, double beta, double *y)
Perform y = alpha A x + beta y (Parallel version)
const double * bvec_dgather_remote(const pastix_data_t *pastix_data, const double *y)
Gather a distributed right hand side (bvec storage) on all nodes.
void bcsc_dspmv(const pastix_data_t *pastix_data, pastix_trans_t trans, double alpha, const double *x, double beta, double *y)
Compute the matrix-vector product y = alpha * op(A) * x + beta * y.
Compressed colptr format for the bcsc.
enum pastix_trans_e pastix_trans_t
Transpostion.
Main PaStiX data structure.
SolverCblk *restrict cblktab
Solver column block structure.
Solver column block structure.
The task structure for the numerical factorization.