27 #ifndef DOXYGEN_SHOULD_SKIP_THIS
29 typedef void ( *bcsc_zspmv_Ax_fct_t )(
const pastix_bcsc_t *,
32 const pastix_complex64_t *,
33 const pastix_complex64_t *,
35 pastix_complex64_t * );
39 pastix_complex64_t beta,
40 pastix_complex64_t *y )
42 if( beta != (pastix_complex64_t)0.0 )
45 for( j=0; j<n; j++, y++ )
52 memset( y, 0, n *
sizeof(pastix_complex64_t) );
57 __bcsc_zspmv_Ax(
const pastix_bcsc_t *bcsc,
59 pastix_complex64_t alpha,
60 const pastix_complex64_t *A,
61 const pastix_complex64_t *x,
62 pastix_complex64_t beta,
63 pastix_complex64_t *y )
67 __bcsc_zspmv_by( cblk->
colnbr, beta, y );
69 for( j=0; j<cblk->
colnbr; j++, y++ )
71 for( i=cblk->
coltab[j]; i< cblk->coltab[j+1]; i++ )
73 *y += alpha * A[i] * x[ bcsc->rowtab[i] ];
79 __bcsc_zspmv_Ax_ind(
const pastix_bcsc_t *bcsc,
80 pastix_complex64_t alpha,
81 const pastix_complex64_t *A,
82 const pastix_complex64_t *x,
83 pastix_complex64_t beta,
84 pastix_complex64_t *y )
86 const pastix_complex64_t *xptr = x;
89 __bcsc_zspmv_by( bcsc->gN, beta, y );
91 for( bloc=0; bloc<bcsc->cscfnbr; bloc++ )
93 for( j=0; j < bcsc->cscftab[bloc].colnbr; j++, xptr++ )
95 for( i = bcsc->cscftab[bloc].coltab[j]; i < bcsc->cscftab[bloc].coltab[j+1]; i++ )
97 y[ bcsc->rowtab[i] ] += alpha * A[i] * (*xptr);
103 #if defined(PRECISION_z) || defined(PRECISION_c)
105 __bcsc_zspmv_conjAx(
const pastix_bcsc_t *bcsc,
107 pastix_complex64_t alpha,
108 const pastix_complex64_t *A,
109 const pastix_complex64_t *x,
110 pastix_complex64_t beta,
111 pastix_complex64_t *y )
115 __bcsc_zspmv_by( cblk->
colnbr, beta, y );
117 for( j=0; j<cblk->
colnbr; j++, y++ )
119 for( i=cblk->
coltab[j]; i< cblk->coltab[j+1]; i++ )
121 *y += alpha * conj( A[i] ) * x[ bcsc->rowtab[i] ];
130 pastix_complex64_t alpha,
131 const pastix_bcsc_t *bcsc,
132 const pastix_complex64_t *x,
133 pastix_complex64_t beta,
134 pastix_complex64_t *y,
139 bcsc_zspmv_Ax_fct_t zspmv_Ax = __bcsc_zspmv_Ax;
140 pastix_complex64_t *valptr = NULL;
168 cblk = bcsc->cscftab + begin;
169 valptr = (pastix_complex64_t*)bcsc->Lvalues;
174 if ( bcsc->Uvalues != NULL ) {
175 valptr = (pastix_complex64_t*)bcsc->Uvalues;
183 __bcsc_zspmv_Ax_ind( bcsc, alpha, valptr, x, beta, y );
186 #if defined(PRECISION_z) || defined(PRECISION_c)
192 zspmv_Ax = __bcsc_zspmv_conjAx;
196 for( bloc=begin; bloc<end; bloc++, cblk++ )
199 pastix_complex64_t *yptr = y + solv_cblk->
lcolidx;
201 assert( !(solv_cblk->
cblktype & (CBLK_FANIN|CBLK_RECV)) );
203 zspmv_Ax( bcsc, cblk, alpha, valptr, x, beta, yptr );
248 pastix_complex64_t alpha,
249 const pastix_complex64_t *x,
250 pastix_complex64_t beta,
251 pastix_complex64_t *y )
253 pastix_bcsc_t *bcsc = pastix_data->
bcsc;
256 if( (bcsc == NULL) || (y == NULL) || (x == NULL) ) {
260 __bcsc_zspmv_loop( solvmtx,
261 trans, alpha, bcsc, x, beta, y,
262 0, 0, bcsc->cscfnbr );
268 struct z_argument_spmv_s {
270 pastix_complex64_t alpha;
271 const pastix_bcsc_t *bcsc;
272 const pastix_complex64_t *x;
273 pastix_complex64_t beta;
274 pastix_complex64_t *y;
308 struct z_argument_spmv_s *arg = (
struct z_argument_spmv_s*)args;
309 const pastix_bcsc_t *bcsc = arg->bcsc;
317 begin = start_bloc[rank];
318 if ( rank == (size - 1) )
323 end = start_bloc[rank + 1];
326 __bcsc_zspmv_loop( arg->mtx,
327 arg->trans, arg->alpha, bcsc, arg->x,
328 arg->beta, arg->y + start_indexes[rank],
360 bcsc_zspmv_Ax_fct_t zspmv_Ax = __bcsc_zspmv_Ax;
361 struct z_argument_spmv_s *arg = (
struct z_argument_spmv_s*)args;
363 pastix_complex64_t alpha = arg->alpha;
364 const pastix_bcsc_t *bcsc = arg->bcsc;
365 const pastix_complex64_t *x = arg->x;
366 pastix_complex64_t beta = arg->beta;
367 pastix_complex64_t *y = arg->y;
368 pastix_complex64_t *valptr = NULL;
369 pastix_complex64_t *yptr;
380 tasknbr = mtx->ttsknbr[rank];
381 tasktab = mtx->ttsktab[rank];
407 valptr = (pastix_complex64_t*)bcsc->Lvalues;
412 if ( bcsc->Uvalues != NULL ) {
413 valptr = (pastix_complex64_t*)bcsc->Uvalues;
421 __bcsc_zspmv_Ax_ind( bcsc, alpha, valptr, x, beta, y );
425 #if defined(PRECISION_z) || defined(PRECISION_c)
431 zspmv_Ax = __bcsc_zspmv_conjAx;
435 for (ii=0; ii<tasknbr; ii++)
437 task_id = tasktab[ii];
438 t = mtx->tasktab + task_id;
441 bcsc_cblk = bcsc->cscftab + solv_cblk->
bcscnum;
444 zspmv_Ax( bcsc, bcsc_cblk, alpha, valptr, x, beta, yptr );
473 struct z_argument_spmv_s *args )
477 pastix_bcsc_t *bcsc = pastix_data->
bcsc;
481 total = 2 * pastix_data->
csc->nnzexp - bcsc->gN;
483 total = pastix_data->
csc->nnzexp;
485 size = pastix_data->
isched->world_size;
486 ratio = pastix_iceil( total, size );
489 args->start_bloc[0] = 0;
490 args->start_indexes[0] = 0;
492 for ( bloc = 0, rank = 1; bloc < bcsc->cscfnbr; ++bloc, ++cblk )
494 if ( load >= ratio ) {
495 assert( rank < size );
497 args->start_bloc[rank] = bloc;
508 assert( total == 0 );
510 for ( ; rank < size; rank ++ ) {
511 args->start_bloc[rank] = bcsc->cscfnbr;
512 args->start_indexes[rank] = bcsc->gN;
554 pastix_complex64_t alpha,
555 const pastix_complex64_t *x,
556 pastix_complex64_t beta,
557 pastix_complex64_t *y )
559 pastix_bcsc_t *bcsc = pastix_data->
bcsc;
560 struct z_argument_spmv_s arg = { trans, alpha, bcsc, x, beta, y,
561 pastix_data->
solvmatr, NULL, NULL };
563 if( (bcsc == NULL) || (y == NULL) || (x == NULL) ) {
575 MALLOC_INTERN( arg.start_indexes, 2 * pastix_data->
isched->world_size,
pastix_int_t );
576 arg.start_bloc = arg.start_indexes + pastix_data->
isched->world_size;
582 memFree_null( arg.start_indexes );
631 pastix_complex64_t alpha,
632 const pastix_complex64_t *x,
633 pastix_complex64_t beta,
634 pastix_complex64_t *y )
636 const pastix_complex64_t *xglobal;
656 else if ( trans == transA ) {
660 pastix_print_error(
"bcsc_zspmv: incompatible trans and transA" );
675 if ( x != xglobal ) {
676 free( (
void*)xglobal );
BEGIN_C_DECLS typedef int pastix_int_t
void bcsc_zspmv_get_balanced_indexes(const pastix_data_t *pastix_data, struct z_argument_spmv_s *args)
Initialize indexes for vector pointer and bloc indexes for parallel version of spmv.
void pthread_bcsc_zspmv(isched_thread_t *ctx, void *args)
Compute the matrix-vector product y = alpha * op(A) * x + beta * y.
void pthread_bcsc_zspmv_tasktab(isched_thread_t *ctx, void *args)
Compute the matrix-vector product y = alpha * op(A) * x + beta * y.
void bcsc_zspmv_smp(const pastix_data_t *pastix_data, pastix_trans_t trans, pastix_complex64_t alpha, const pastix_complex64_t *x, pastix_complex64_t beta, pastix_complex64_t *y)
Perform y = alpha A x + beta y (Parallel version)
const pastix_complex64_t * bvec_zgather_remote(const pastix_data_t *pastix_data, const pastix_complex64_t *y)
Gather a distributed right hand side (bvec storage) on all nodes.
void bcsc_zspmv(const pastix_data_t *pastix_data, pastix_trans_t trans, pastix_complex64_t alpha, const pastix_complex64_t *x, pastix_complex64_t beta, pastix_complex64_t *y)
Compute the matrix-vector product y = alpha * op(A) * x + beta * y.
void bcsc_zspmv_seq(const pastix_data_t *pastix_data, pastix_trans_t trans, pastix_complex64_t alpha, const pastix_complex64_t *x, pastix_complex64_t beta, pastix_complex64_t *y)
Compute the matrix-vector product y = alpha * A * x + beta * y (Sequential version)
Compressed colptr format for the bcsc.
enum pastix_trans_e pastix_trans_t
Transpostion.
Main PaStiX data structure.
SolverCblk *restrict cblktab
Solver column block structure.
Solver column block structure.
The task structure for the numerical factorization.