PaStiX Handbook 6.4.0
Loading...
Searching...
No Matches
bvec_ccompute.c
Go to the documentation of this file.
1/**
2 *
3 * @file bvec_ccompute.c
4 *
5 * Functions computing operations on the BCSC.
6 *
7 * @copyright 2004-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8 * Univ. Bordeaux. All rights reserved.
9 *
10 * @version 6.4.0
11 * @author Mathieu Faverge
12 * @author Pierre Ramet
13 * @author Xavier Lacoste
14 * @author Gregoire Pichon
15 * @author Theophile Terraz
16 * @author Tony Delarue
17 * @author Vincent Bridonneau
18 * @date 2024-07-05
19 * @generated from /builds/2mk6rsew/0/solverstack/pastix/bcsc/bvec_zcompute.c, normal z -> c, Tue Feb 25 14:36:02 2025
20 *
21 **/
22#include "common.h"
23#include <math.h>
24#include "lapacke.h"
25#include "bcsc/bcsc.h"
26#include "bcsc_c.h"
27#include "order/order_internal.h"
28#include "frobeniusupdate.h"
29#include "cblas.h"
30#include "blend/solver.h"
31
32#if defined(PASTIX_WITH_MPI)
33void
34bvec_cmpi_frb_merge( float *dist,
35 float *loc,
36 int *len,
37 MPI_Datatype *dtype )
38{
39 assert( *len == 2 );
40 frobenius_merge( dist[0], dist[1], loc, loc+1 );
41 (void)len;
42 (void)dtype;
43}
44#endif
45
46/**
47 *******************************************************************************
48 *
49 * @ingroup bcsc
50 *
51 * @brief Compute the norm 2 of a vector. (Sequential version)
52 *
53 *******************************************************************************
54 *
55 * @param[in] pastix_data
56 * Provide information to the parallel version, to know the global
57 * context (Number of thread, barrier, ...).
58 *
59 * @param[in] n
60 * The size of the vector x.
61 *
62 * @param[in] x
63 * The vector x of size n.
64 *
65 *******************************************************************************
66 *
67 * @retval the norm 2 of x.
68 *
69 *******************************************************************************/
70float
73 const pastix_complex32_t *x )
74{
75 SolverMatrix *solvmtx = pastix_data->solvmatr;
76 SolverCblk *scblk;
77 pastix_bcsc_t *bcsc = pastix_data->bcsc;
78 bcsc_cblk_t *bcblk = bcsc->cscftab;
79 pastix_int_t cblknbr, colnbr;
80 float data[] = { 0., 1. }; /* Scale, Sum */
81 float norm;
82 const float *valptr;
83 pastix_int_t i, j;
84
85 cblknbr = bcsc->cscfnbr;
86 for( i = 0; i < cblknbr; i++, bcblk++ ) {
87 scblk = solvmtx->cblktab + bcblk->cblknum;
88 colnbr = cblk_colnbr( scblk );
89 valptr = (const float*)(x + scblk->lcolidx);
90
91 for( j=0; j < colnbr; j++, valptr++ )
92 {
93 /* Real part */
94 frobenius_update( 1, data, data + 1, valptr );
95#if defined(PRECISION_z) || defined(PRECISION_c)
96 /* Imaginary part */
97 valptr++;
98 frobenius_update( 1, data, data + 1, valptr );
99#endif
100 }
101 }
102
103#if defined(PASTIX_WITH_MPI)
104 {
105 MPI_Op merge;
106
107 MPI_Op_create( (MPI_User_function *)bvec_cmpi_frb_merge, 1, &merge );
108 MPI_Allreduce( MPI_IN_PLACE, data, 2, MPI_FLOAT, merge, solvmtx->solv_comm );
109 MPI_Op_free( &merge );
110 }
111#endif
112
113 norm = data[0] * sqrtf( data[1] );
114
115 (void)n;
116 return norm;
117}
118
119struct c_argument_nrm2_s
120{
121 pastix_int_t n;
122 const pastix_complex32_t *x;
123 pastix_atomic_lock_t lock;
124 float scale;
125 float sumsq;
126};
127
128/**
129 *******************************************************************************
130 *
131 * @ingroup bcsc
132 *
133 * @brief Compute the norm 2 of a vector. (Parallel version)
134 *
135 *******************************************************************************
136 *
137 * @param[in] ctx
138 * The context of the current thread
139 *
140 * @param[inout] args
141 * The parameter that providenumber of elements of x,
142 * and the vector which norm2 is to be computed, and the norm value
143 *
144 *******************************************************************************/
145static inline void
146pthread_bvec_cnrm2( isched_thread_t *ctx,
147 void *args )
148{
149 struct c_argument_nrm2_s *arg = (struct c_argument_nrm2_s*)args;
150 pastix_int_t n = arg->n;
151 const pastix_complex32_t *x = arg->x;
152 float scale = 0.;
153 float sumsq = 1.;
154 float *valptr = (float*)x;
155 pastix_int_t i, rank, size;
156 pastix_int_t begin, end;
157
158 size = ctx->global_ctx->world_size;
159 rank = ctx->rank;
160
161 begin = (n / size) * rank;
162 if (rank == (size - 1)) {
163 end = n; /* One iteration more */
164 } else {
165 end = (n / size) * (rank + 1);
166 }
167
168 valptr += begin;
169#if defined(PRECISION_z) || defined(PRECISION_c)
170 valptr += begin;
171#endif /* defined(PRECISION_z) || defined(PRECISION_c) */
172
173 for( i = begin; i < end; i++, valptr++ )
174 {
175 frobenius_update( 1, &scale, &sumsq, valptr );
176#if defined(PRECISION_z) || defined(PRECISION_c)
177 valptr ++;
178 frobenius_update( 1, &scale, &sumsq, valptr );
179#endif
180 }
181
182 /* If we computed something */
183 if ( scale != 0. ) {
184 pastix_atomic_lock( &(arg->lock) );
185 frobenius_merge( scale, sumsq, &(arg->scale), &(arg->sumsq) );
186 pastix_atomic_unlock( &(arg->lock) );
187 }
188}
189
190/**
191 *******************************************************************************
192 *
193 * @ingroup bcsc
194 *
195 * @brief Compute the norm 2 of a vector. (Parallel version)
196 *
197 *******************************************************************************
198 *
199 * @param[in] pastix_data
200 * Provide information to the parallel version, to know the global
201 * context (Number of thread, barrier, ...).
202 *
203 * @param[in] x
204 * The vector which norm2 is to be computed
205 *
206 * @param[in] n
207 * The number of elements of x
208 *
209 *******************************************************************************
210 *
211 * @return The norm 2 of the vector
212 *
213 *******************************************************************************/
214float
216 pastix_int_t n,
217 const pastix_complex32_t *x )
218{
219 struct c_argument_nrm2_s arg = { n, x, PASTIX_ATOMIC_UNLOCKED, 0., 1. };
220 isched_parallel_call( pastix_data->isched, pthread_bvec_cnrm2, &arg );
221
222#if defined(PASTIX_WITH_MPI)
223 {
224 MPI_Op merge;
225
226 MPI_Op_create( (MPI_User_function *)bvec_cmpi_frb_merge, 1, &merge );
227 MPI_Allreduce( MPI_IN_PLACE, &(arg.scale), 2, MPI_FLOAT, merge, pastix_data->solvmatr->solv_comm );
228 MPI_Op_free( &merge );
229 }
230#endif
231
232 return arg.scale * sqrtf( arg.sumsq );
233}
234
235/**
236 *******************************************************************************
237 *
238 * @ingroup bcsc
239 *
240 * @brief Scale a vector by the scalar alpha. (Sequential version)
241 *
242 *******************************************************************************
243 *
244 * @param[in] pastix_data
245 * Provide information to the parallel version, to know the global
246 * context (Number of thread, barrier, ...).
247 *
248 * @param[in] n
249 * The size of the vector x.
250 *
251 * @param[in] alpha
252 * The scalar to scale the vector x.
253 *
254 * @param[inout] x
255 * The vector x to scale.
256 *
257 *******************************************************************************/
258void
260 pastix_int_t n,
261 pastix_complex32_t alpha,
263{
264#if defined(PASTIX_WITH_MPI) && 0
265 SolverMatrix *solvmtx = pastix_data->solvmatr;
266 SolverCblk *scblk = solvmtx->cblktab;
267 pastix_bcsc_t *bcsc = pastix_data->bcsc;
268 bcsc_cblk_t *bcblk = bcsc->cscftab;
269 pastix_int_t i, cblknbr;
270
271 cblknbr = bcsc->cscfnbr;
272 for( i = 0; i < cblknbr; i++, bcblk++ ) {
273 scblk = solvmtx->cblktab + bcblk->cblknum;
274 n = cblk_colnbr( scblk );
275
276 cblas_cscal( n, CBLAS_SADDR(alpha), x + scblk->lcolidx, 1 );
277 }
278#else
279 (void)pastix_data;
280 cblas_cscal( n, CBLAS_SADDR(alpha), x, 1 );
281#endif
282
283}
284
285struct c_argument_scal_s
286{
287 pastix_int_t n;
288 pastix_complex32_t alpha;
290};
291
292/**
293 *******************************************************************************
294 *
295 * @ingroup bcsc
296 *
297 * @brief Scale a vector (Parallel version)
298 *
299 *******************************************************************************
300 *
301 * @param[in] ctx
302 * The information about number of thread and rank of the actual thread
303 *
304 * @param[inout] args
305 * The argument providing number of elements of the vector, the scaling
306 * parameter and, and the vector to be scaled
307 *
308 *******************************************************************************/
309static inline void
310pthread_bvec_cscal( isched_thread_t *ctx,
311 void *args )
312{
313 struct c_argument_scal_s *arg = (struct c_argument_scal_s*)args;
314 pastix_complex32_t *x = arg->x;
315 pastix_int_t n = arg->n;
316 pastix_complex32_t alpha = arg->alpha;
317 pastix_int_t size = ctx->global_ctx->world_size;
318 pastix_int_t rank;
319 pastix_int_t begin, end;
320
321 if( x == NULL ) {
322 return;
323 }
324
325 rank = (pastix_int_t)ctx->rank;
326 begin = (n/size) * rank;
327 if (rank == (size - 1)) {
328 end = n;
329 } else {
330 end = (n/size) * (rank + 1);
331 }
332
333 if ( (end - begin) > 0 ) {
334 cblas_cscal( end - begin, CBLAS_SADDR(alpha), x + begin, 1 );
335 }
336}
337
338/**
339 *******************************************************************************
340 *
341 * @ingroup bcsc
342 *
343 * @brief Scale a vector (Parallel version)
344 *
345 *******************************************************************************
346 *
347 * @param[in] pastix_data
348 * The information about sequential and parallel version (Number of
349 * thread, ...).
350 *
351 * @param[in] n
352 * The number of elements of the vector
353 *
354 * @param[in] alpha
355 * The scaling parameter
356 *
357 * @param[inout] x
358 * The vector to be scaled
359 *
360 *******************************************************************************/
361void
363 pastix_int_t n,
364 pastix_complex32_t alpha,
366{
367 struct c_argument_scal_s arg = {n, alpha, x};
368 isched_parallel_call( pastix_data->isched, pthread_bvec_cscal, &arg );
369}
370
371/**
372 *******************************************************************************
373 *
374 * @ingroup bcsc
375 *
376 * @brief Compute y <- alpha * x + y. (Sequential version)
377 *
378 *******************************************************************************
379 *
380 * @param[in] pastix_data
381 * The information about sequential and parallel version (Number of
382 * thread, ...).
383-*
384 * @param[in] n
385 * The size of the vectors.
386 *
387 * @param[in] alpha
388 * A scalar.
389 *
390 * @param[in] x
391 * The vector x.
392 *
393 * @param[inout] y
394 * The vector y.
395 *
396 *******************************************************************************/
397void
399 pastix_int_t n,
400 pastix_complex32_t alpha,
401 const pastix_complex32_t *x,
403{
404#if defined(PASTIX_WITH_MPI) && 0
405 SolverMatrix *solvmtx = pastix_data->solvmatr;
406 SolverCblk *scblk = solvmtx->cblktab;
407 pastix_bcsc_t *bcsc = pastix_data->bcsc;
408 bcsc_cblk_t *bcblk = bcsc->cscftab;
409 pastix_int_t i, cblknbr;
410
411 cblknbr = bcsc->cscfnbr;
412 for( i = 0; i < cblknbr; i++, bcblk++ ){
413 scblk = solvmtx->cblktab + bcblk->cblknum;
414 n = cblk_colnbr( scblk );
415
416 cblas_caxpy( n, CBLAS_SADDR(alpha),
417 x + scblk->lcolidx, 1,
418 y + scblk->lcolidx, 1 );
419 }
420#else
421 (void)pastix_data;
422 cblas_caxpy( n, CBLAS_SADDR(alpha), x, 1, y, 1 );
423#endif
424}
425
426struct c_argument_axpy_s
427{
428 pastix_int_t n;
429 pastix_complex32_t alpha;
430 const pastix_complex32_t *x;
432};
433
434/**
435 *******************************************************************************
436 *
437 * @ingroup bcsc_internal
438 *
439 * @brief Compute y <- alpha * x + y (Parallel version).
440 *
441 *******************************************************************************
442 *
443 * @param[in] ctx
444 * The context about the current thread
445 *
446 * @param[inout] args
447 * The parameter providing the size of the vectors, a scalar,
448 * the vectors x and y.
449 *
450 *******************************************************************************/
451static inline void
452pthread_bvec_caxpy( isched_thread_t *ctx,
453 void *args)
454{
455 struct c_argument_axpy_s *arg = (struct c_argument_axpy_s*)args;
456 pastix_int_t n = arg->n;
457 pastix_complex32_t alpha = arg->alpha;
458 const pastix_complex32_t *x = arg->x;
459 pastix_complex32_t *y = arg->y;
460 pastix_int_t rank, size;
461 pastix_int_t begin, end;
462
463 if( (y == NULL) || (x == NULL) ) {
464 return;
465 }
466
467 if( alpha == (pastix_complex32_t)0.0 ) {
468 return;
469 }
470
471 size = (pastix_int_t)ctx->global_ctx->world_size;
472 rank = (pastix_int_t)ctx->rank;
473
474 begin = (n/size) * rank;
475 if (rank == (size - 1)) {
476 end = n;
477 } else {
478 end = (n/size) * (rank + 1);
479 }
480
481 if ( (end - begin) > 0 ) {
482 cblas_caxpy( end - begin, CBLAS_SADDR(alpha), x + begin, 1, y + begin, 1 );
483 }
484}
485
486/**
487 *******************************************************************************
488 *
489 * @ingroup bcsc
490 *
491 * @brief Perform y = alpha * x + y (Parallel version)
492 *
493 *******************************************************************************
494 *
495 * @param[in] pastix_data
496 * The information about sequential and parallel version (Number of
497 * thread, ...).
498 *
499 * @param[in] n
500 * The number of elements of vectors x and y
501 *
502 * @param[in] alpha
503 * The scalar to scale x
504 *
505 * @param[in] x
506 * The vector to be scaled
507 *
508 * @param[inout] y
509 * The resulting solution
510 *
511 *******************************************************************************/
512void
514 pastix_int_t n,
515 pastix_complex32_t alpha,
516 const pastix_complex32_t *x,
518{
519 struct c_argument_axpy_s args = {n, alpha, x, y};
520 isched_parallel_call( pastix_data->isched, pthread_bvec_caxpy, &args );
521}
522
523struct c_argument_dotc_s
524{
525 pastix_int_t n;
526 const pastix_complex32_t *x;
527 const pastix_complex32_t *y;
528 pastix_atomic_lock_t lock;
530};
531
532#if defined(PRECISION_z) || defined(PRECISION_c)
533/**
534 *******************************************************************************
535 *
536 * @ingroup bcsc
537 *
538 * @brief Compute the scalar product conjf(x).y (Sequential version)
539 *
540 *******************************************************************************
541 *
542 * @param[in] pastix_data
543 * The information about sequential and parallel version (Number of
544 * thread, ...).
545 *
546 * @param[in] n
547 * The size of the vectors.
548 *
549 * @param[in] x
550 * The vector x.
551 *
552 * @param[in] y
553 * The vector y.
554 *
555 *******************************************************************************
556 *
557 * @retval the scalar product of conjf(x) and y.
558 *
559 *******************************************************************************/
561bvec_cdotc_seq( pastix_data_t *pastix_data,
562 pastix_int_t n,
563 const pastix_complex32_t *x,
564 const pastix_complex32_t *y )
565{
566 SolverMatrix *solvmtx = pastix_data->solvmatr;
567 SolverCblk *scblk = solvmtx->cblktab;
568 pastix_bcsc_t *bcsc = pastix_data->bcsc;
569 bcsc_cblk_t *bcblk = bcsc->cscftab;
570 pastix_int_t i, j, cblknbr;
571 const pastix_complex32_t *xptr;
572 const pastix_complex32_t *yptr;
573 pastix_complex32_t r = 0.0;
574
575 cblknbr = bcsc->cscfnbr;
576 for( i = 0; i < cblknbr; i++, bcblk++ ) {
577 scblk = solvmtx->cblktab + bcblk->cblknum;
578 n = cblk_colnbr( scblk );
579
580 xptr = x + scblk->lcolidx;
581 yptr = y + scblk->lcolidx;
582 for( j=0; j<n; j++, xptr++, yptr++ ) {
583 r += conjf(*xptr) * (*yptr);
584 }
585 }
586
587#if defined(PASTIX_WITH_MPI)
588 MPI_Allreduce( MPI_IN_PLACE, &r, 1, PASTIX_MPI_COMPLEX32,
589 MPI_SUM, solvmtx->solv_comm );
590#endif
591
592 return r;
593}
594
595/**
596 *******************************************************************************
597 *
598 * @ingroup bcsc_internal
599 *
600 * @brief Compute the scalar product conjf(x).y. (Parallel version)
601 *
602 *******************************************************************************
603 *
604 * @param[in] ctx
605 * The context of the current thread
606 *
607 * @param[in] args
608 * The argument provding the vectors x and y and their size.
609 *
610 *******************************************************************************/
611static inline void
612pthread_bvec_cdotc( isched_thread_t *ctx,
613 void *args )
614{
615 struct c_argument_dotc_s *arg = (struct c_argument_dotc_s*)args;
616 pastix_int_t n = arg->n;
617 int i;
618 const pastix_complex32_t *xptr = arg->x;
619 const pastix_complex32_t *yptr = arg->y;
620 pastix_int_t begin, end, rank, size;
621 pastix_complex32_t r = 0.0;
622
623 rank = (pastix_int_t)ctx->rank;
624 size = (pastix_int_t)ctx->global_ctx->world_size;
625
626 begin = (n/size) * rank;
627 if (rank != size - 1) {
628 end = (n/size) * (rank + 1);
629 } else { /*The last one computes the calcul for the rest of the sum*/
630 end = n;
631 }
632
633 xptr += begin;
634 yptr += begin;
635
636 for ( i = begin; i < end; i++, xptr++, yptr++ )
637 {
638 r += conjf(*xptr) * (*yptr);
639 }
640
641 if ( cabsf(r) > 0. ) {
642 pastix_atomic_lock( &(arg->lock) );
643 arg->sum += r;
644 pastix_atomic_unlock( &(arg->lock) );
645 }
646}
647
648/**
649 *******************************************************************************
650 *
651 * @ingroup bcsc
652 *
653 * @brief Compute a scalar product between complex vectors: conjf(x).y
654 * (Parallel version)
655 *
656 *******************************************************************************
657 *
658 * @param[in] pastix_data
659 * The information about sequential and parallel version (Number of
660 * thread, ...).
661 *
662 * @param[in] n
663 * The number of elements of vectors x, y and r
664 *
665 * @param[in] y
666 * The first vector of the scalar product
667 *
668 * @param[in] n
669 * The second vector of the scalar product
670 *
671 * @param[out] r
672 * The result of the scalar product
673 *
674 *******************************************************************************/
676bvec_cdotc_smp( pastix_data_t *pastix_data,
677 pastix_int_t n,
678 const pastix_complex32_t *x,
679 const pastix_complex32_t *y )
680{
681 struct c_argument_dotc_s arg = {n, x, y, PASTIX_ATOMIC_UNLOCKED, 0.0};
682 isched_parallel_call( pastix_data->isched, pthread_bvec_cdotc, &arg );
683
684#if defined(PASTIX_WITH_MPI)
685 MPI_Allreduce( MPI_IN_PLACE, &(arg.sum), 1, PASTIX_MPI_COMPLEX32,
686 MPI_SUM, pastix_data->solvmatr->solv_comm );
687#endif
688
689 return arg.sum;
690}
691#endif
692
693/**
694 *******************************************************************************
695 *
696 * @ingroup bcsc
697 *
698 * @brief Compute the scalar product x.y. (Sequential version)
699 *
700 *******************************************************************************
701 *
702 * @param[in] pastix_data
703 * The information about sequential and parallel version (Number of
704 * thread, ...).
705 *
706 * @param[in] x
707 * The vector x.
708 *
709 * @param[in] y
710 * The vector y.
711 *
712 * @param[in] n
713 * The size of the vectors.
714 *
715 *******************************************************************************
716 *
717 * @retval the scalar product of x and y.
718 *
719 *******************************************************************************/
722 pastix_int_t n,
723 const pastix_complex32_t *x,
724 const pastix_complex32_t *y )
725{
726 SolverMatrix *solvmtx = pastix_data->solvmatr;
727 SolverCblk *scblk = solvmtx->cblktab;
728 pastix_bcsc_t *bcsc = pastix_data->bcsc;
729 bcsc_cblk_t *bcblk = bcsc->cscftab;
730 pastix_int_t i, j, cblknbr;
731 const pastix_complex32_t *xptr;
732 const pastix_complex32_t *yptr;
733 pastix_complex32_t r = 0.0;
734
735 cblknbr = bcsc->cscfnbr;
736 for( i = 0; i < cblknbr; i++, bcblk++ ) {
737 scblk = solvmtx->cblktab + bcblk->cblknum;
738 n = cblk_colnbr( scblk );
739
740 xptr = x + scblk->lcolidx;
741 yptr = y + scblk->lcolidx;
742 for( j=0; j<n; j++, xptr++, yptr++ ) {
743 r += (*xptr) * (*yptr);
744 }
745 }
746
747#if defined(PASTIX_WITH_MPI)
748 MPI_Allreduce( MPI_IN_PLACE, &r, 1, PASTIX_MPI_COMPLEX32,
749 MPI_SUM, solvmtx->solv_comm );
750#endif
751
752 return r;
753}
754
755/**
756 *******************************************************************************
757 *
758 * @ingroup bcsc_internal
759 *
760 * @brief Compute the scalar product x.y. (Parallel version)
761 *
762 *******************************************************************************
763 *
764 * @param[in] ctx
765 * The context of the current thread.
766 *
767 * @param[in] args
768 * The argument providing the vectors x and y and their size n.
769 *
770 *******************************************************************************/
771static inline void
772pthread_bvec_cdotu( isched_thread_t *ctx,
773 void *args )
774{
775 struct c_argument_dotc_s *arg = (struct c_argument_dotc_s*)args;
776 int i;
777 pastix_int_t n = arg->n;
778 const pastix_complex32_t *x = arg->x;
779 const pastix_complex32_t *y = arg->y;
780 const pastix_complex32_t *xptr;
781 const pastix_complex32_t *yptr;
782 pastix_complex32_t r = 0.0;
783 pastix_int_t size, rank;
784 pastix_int_t begin, end;
785
786 rank = (pastix_int_t)ctx->rank;
787 size = (pastix_int_t)ctx->global_ctx->world_size;
788
789 begin = (n / size) * rank;
790 if ( rank == (size - 1)) {
791 end = n;
792 } else {
793 end = (n / size) * (rank + 1);
794 }
795
796 xptr = x + begin;
797 yptr = y + begin;
798
799 for (i=begin; i<end; i++, xptr++, yptr++)
800 {
801 r += (*xptr) * (*yptr);
802 }
803
804 if ( cabsf(r) > 0. ) {
805 pastix_atomic_lock( &(arg->lock) );
806 arg->sum += r;
807 pastix_atomic_unlock( &(arg->lock) );
808 }
809}
810
811/**
812 *******************************************************************************
813 *
814 * @ingroup bcsc
815 *
816 * @brief Compute a regular scalar product x.y (Parallel version)
817 *
818 *******************************************************************************
819 *
820 * @param[in] pastix_data
821 * The information about sequential and parallel version (Number of
822 * thread, ...).
823 *
824 * @param[in] n
825 * The number of elements of vectors x, y and r
826 *
827 * @param[in] x
828 * The first vector of the scalar product
829 *
830 * @param[in] y
831 * The second vector of the scalar product
832 *
833 *******************************************************************************
834 *
835 * @return The allocated vector
836 *
837 *******************************************************************************/
840 pastix_int_t n,
841 const pastix_complex32_t *x,
842 const pastix_complex32_t *y )
843{
844 struct c_argument_dotc_s arg = {n, x, y, PASTIX_ATOMIC_UNLOCKED, 0.0};
845 isched_parallel_call( pastix_data->isched, pthread_bvec_cdotu, &arg );
846
847#if defined(PASTIX_WITH_MPI)
848 MPI_Allreduce( MPI_IN_PLACE, &(arg.sum), 1, PASTIX_MPI_COMPLEX32,
849 MPI_SUM, pastix_data->solvmatr->solv_comm );
850#endif
851
852 return arg.sum;
853}
854
855/**
856 *******************************************************************************
857 *
858 * @ingroup bcsc
859 *
860 * @brief Copy a vector y = x (Sequential version)
861 *
862 *******************************************************************************
863 *
864 * @param[in] pastix_data
865 * The information about sequential and parallel version (Number of
866 * thread, ...).
867 *
868 * @param[in] n
869 * The number of elements of vectors x and y
870 *
871 * @param[in] x
872 * The vector to be copied
873 *
874 * @param[inout] y
875 * The vector copy of x
876 *
877 *******************************************************************************/
878void
880 pastix_int_t n,
881 const pastix_complex32_t *x,
883{
884#if defined(PASTIX_WITH_MPI) && 0
885 SolverMatrix *solvmtx = pastix_data->solvmatr;
886 SolverCblk *scblk = solvmtx->cblktab;
887 pastix_bcsc_t *bcsc = pastix_data->bcsc;
888 bcsc_cblk_t *bcblk = bcsc->cscftab;
889 pastix_int_t i, cblknbr;
890
891 cblknbr = bcsc->cscfnbr;
892 for( i = 0; i < cblknbr; i++, bcblk++ ) {
893 scblk = solvmtx->cblktab + bcblk->cblknum;
894 n = cblk_colnbr( scblk );
895
896 memcpy( y + scblk->lcolidx, x + scblk->lcolidx, n * sizeof(pastix_complex32_t) );
897 }
898#else
899 (void)pastix_data;
900 memcpy( y, x, n * sizeof(pastix_complex32_t) );
901#endif
902}
903
904struct argument_copy_s {
905 pastix_int_t n;
906 const pastix_complex32_t *x;
908};
909
910/**
911 *******************************************************************************
912 *
913 * @ingroup bcsc_internal
914 *
915 * @brief Copy a vector y = x (parallel version)
916 *
917 * Perform the coopy of x into y.
918 *
919 *******************************************************************************
920 *
921 * @param[in] ctx
922 * The context of the current thread
923 *
924 * @param[inout] args
925 * The argument containing the vector copy and the one to copy and their
926 * size.
927 *
928 *******************************************************************************/
929static inline void
930pthread_bvec_ccopy( isched_thread_t *ctx,
931 void *args )
932{
933 struct argument_copy_s *arg = (struct argument_copy_s*)args;
934 pastix_int_t n = arg->n;
935 pastix_int_t size, rank;
936 pastix_int_t begin, end;
937
938 size = (pastix_int_t)ctx->global_ctx->world_size;
939 rank = (pastix_int_t)ctx->rank;
940
941 begin = (n/size) * rank;
942
943 if (rank == (size - 1)) {
944 end = n;
945 } else {
946 end = (n/size) * (rank + 1);
947 }
948
949 if ( (end - begin) > 0 ) {
950 memcpy( arg->y + begin, arg->x + begin, (end - begin) * sizeof(pastix_complex32_t) );
951 }
952}
953
954/**
955 *******************************************************************************
956 *
957 * @ingroup bcsc
958 *
959 * @brief Copy a vector y = x (parallel version)
960 *
961 * Initialise argument for the parallel function
962 *
963 *******************************************************************************
964 *
965 * @param[in] pastix_data
966 * The information about sequential and parallel version (Number of
967 * thread, ...).
968 *
969 * @param[in] n
970 * The number of elements of vectors x and y
971 *
972 * @param[in] x
973 * The vector to be copied
974 *
975 * @param[inout] y
976 * The vector copy of x
977 *
978 *******************************************************************************/
979void
981 pastix_int_t n,
982 const pastix_complex32_t *x,
984{
985 struct argument_copy_s args = {n, x, y};
986 isched_parallel_call( pastix_data->isched, pthread_bvec_ccopy, &args );
987}
988
989/**
990 *******************************************************************************
991 *
992 * @ingroup bcsc
993 *
994 * @brief Solve A x = b with A the sparse matrix
995 *
996 * In Complex64 and Double precision and if mixed-precision is enabled, solve
997 * SA sx = sb with SA the sparse matrix previously initialized respectively
998 * in Complex32 or Float precision.
999 *
1000 *******************************************************************************
1001 *
1002 * @param[in] pastix_data
1003 * The PaStiX data structure that describes the solver instance.
1004 *
1005 * @param[inout] b
1006 * On entry, the right hand side
1007 * On exit, the solution of the problem A x = b
1008 * @param[inout] work
1009 * On entry, if mixed-precision is disabled or incompatible with the
1010 * sparse matrix's precision, the normal solve function is called and work must
1011 * be NULL. On exit, works stays NULL
1012 * If mixed-precision is enabled, work must be allocated as a vector
1013 * half the size of b, so that the solve function can be performed in
1014 * in mixed-precision. On exit, work is undefined.
1015 *
1016 *******************************************************************************/
1017void bcsc_cspsv( pastix_data_t *pastix_data,
1019 pastix_complex32_t *work )
1020{
1021 struct pastix_rhs_s rhsb = {
1022 .allocated = 0,
1023 .flttype = PastixComplex32,
1024 .m = pastix_data->bcsc->n,
1025 .n = 1,
1026 .ld = pastix_data->bcsc->n,
1027 .b = b,
1028 .cblkb = NULL,
1029 .rhs_comm = NULL,
1030 .Ploc2Pglob = NULL,
1031 };
1032 int rc;
1033
1034 pastix_data->iparm[IPARM_VERBOSE]--;
1035
1036#if defined(PRECISION_z) || defined(PRECISION_d)
1037 if ( pastix_data->iparm[IPARM_MIXED] )
1038 {
1039 pastix_int_t n = rhsb.m;
1040 pastix_int_t nrhs = rhsb.n;
1041
1042 rhsb.flttype = PastixComplex32;
1043 rhsb.b = work;
1044
1045 /* Copying b into work at half the precision */
1046 rc = LAPACKE_clag2z_work( LAPACK_COL_MAJOR, n, nrhs,
1047 b, n, work, n );
1048 assert( rc == 0 );
1049
1050 pastix_subtask_solve( pastix_data, &rhsb );
1051
1052 /* Reverting to normal precision after solving */
1053 rc = LAPACKE_clag2z_work( LAPACK_COL_MAJOR, n, nrhs,
1054 work, n, b, n );
1055 assert( rc == 0 );
1056 }
1057 else
1058#endif
1059 {
1060 assert(work == NULL);
1061 pastix_subtask_solve( pastix_data, &rhsb );
1062 }
1063
1064 if ( rhsb.cblkb != NULL ) {
1065 free( rhsb.cblkb );
1066 }
1067 pastix_data->iparm[IPARM_VERBOSE]++;
1068 (void)rc;
1069 (void)work;
1070}
1071
1072/**
1073 *******************************************************************************
1074 *
1075 * @ingroup bcsc
1076 *
1077 * @brief Compute \f[ y = \alpha A x + \beta y \f] (Sequential version)
1078 *
1079 *******************************************************************************
1080 *
1081 * @param[in] pastix_data
1082 * The information about sequential and parallel version (Number of
1083 * thread, ...).
1084 *
1085 * @param[in] m
1086 * The number of rows of the matrix A, and the size of y.
1087 *
1088 * @param[in] n
1089 * The number of columns of the matrix A, and the size of x.
1090 *
1091 * @param[in] alpha
1092 * The scalar alpha.
1093 *
1094 * @param[in] A
1095 * The dense matrix A of size lda-by-n.
1096 *
1097 * @param[in] lda
1098 * The leading dimension of the matrix A. lda >= max(1,m)
1099 *
1100 * @param[in] x
1101 * The vector x of size n.
1102 *
1103 * @param[in] beta
1104 * The scalar beta.
1105 *
1106 * @param[inout] y
1107 * On entry, the initial vector y of size m.
1108 * On exit, the updated vector.
1109 *
1110 *******************************************************************************/
1111void
1115 pastix_complex32_t alpha,
1116 const pastix_complex32_t *A,
1117 pastix_int_t lda,
1118 const pastix_complex32_t *x,
1119 pastix_complex32_t beta,
1121{
1122#if defined(PASTIX_WITH_MPI) && 0
1123 SolverMatrix *solvmtx = pastix_data->solvmatr;
1124 SolverCblk *scblk = solvmtx->cblktab;
1125 pastix_bcsc_t *bcsc = pastix_data->bcsc;
1126 bcsc_cblk_t *bcblk = bcsc->cscftab;
1127 pastix_int_t i, cblknbr;
1128
1129 cblknbr = bcsc->cscfnbr;
1130 for( i = 0; i < cblknbr; i++, bcblk++ ) {
1131 scblk = solvmtx->cblktab + bcblk->cblknum;
1132 m = cblk_colnbr( scblk );
1133
1134 cblas_cgemv( CblasColMajor, CblasNoTrans, m, n,
1135 CBLAS_SADDR(alpha), A + scblk->lcolidx, lda, x, 1,
1136 CBLAS_SADDR(beta), y + scblk->lcolidx, 1 );
1137 }
1138#else
1139 (void)pastix_data;
1140 cblas_cgemv( CblasColMajor, CblasNoTrans, m, n,
1141 CBLAS_SADDR(alpha), A, lda, x, 1,
1142 CBLAS_SADDR(beta), y, 1 );
1143#endif
1144
1145}
1146
1147struct c_gemv_s
1148{
1149 pastix_int_t m;
1150 pastix_int_t n;
1151 pastix_complex32_t alpha;
1152 const pastix_complex32_t *A;
1153 pastix_int_t lda;
1154 const pastix_complex32_t *x;
1155 pastix_complex32_t beta;
1157};
1158
1159/**
1160 *******************************************************************************
1161 *
1162 * @ingroup bcsc_internal
1163 *
1164 * @brief Compute \f[ y = \alpha A x + \beta y \f] (Parallel version)
1165 *
1166 * This is the function called by bvec_cgemv_smp to perform gemv
1167 * (Parallel version)
1168 *
1169 *******************************************************************************
1170 *
1171 * @param[in] ctx
1172 * Information about number of thread and rank of current thread.
1173 *
1174 * @param[inout] args
1175 * The number of rows (m) and columns (n) of the matrix A, vecteors y
1176 * (size m) and x (size n), scalars alpha and beta. The dense matrix A
1177 * of size lda-by-n, and its leading dimension lda >= max(1,m), and
1178 * the vector x of size n. The result is stored in y.
1179 *
1180 *******************************************************************************/
1181static inline void
1182pthread_bvec_cgemv( isched_thread_t *ctx,
1183 void *args )
1184{
1185 struct c_gemv_s *arg = (struct c_gemv_s*)args;
1186 pastix_int_t m = arg->m;
1187 pastix_int_t sub_m;
1188 pastix_int_t n = arg->n;
1189 pastix_complex32_t alpha = arg->alpha;
1190 const pastix_complex32_t *A = arg->A;
1191 const pastix_complex32_t *Aptr;
1192 pastix_int_t lda = arg->lda;
1193 const pastix_complex32_t *x = arg->x;
1194 const pastix_complex32_t *xptr;
1195 pastix_complex32_t beta = arg->beta;
1196 pastix_complex32_t *y = arg->y;
1197 pastix_complex32_t *yptr;
1198 pastix_int_t size, rank;
1199
1200 size = (pastix_int_t)ctx->global_ctx->world_size;
1201 rank = (pastix_int_t)ctx->rank;
1202
1203 sub_m = (m / size);
1204
1205 /* Subdivision of A */
1206 Aptr = A + sub_m * rank;
1207
1208 xptr = x;
1209 yptr = y + sub_m * rank;
1210
1211 if (rank == (size - 1)) {
1212 sub_m += m % size; /* Last thread has to do more tasks */
1213 }
1214
1215 if ( sub_m > 0 ) {
1216 cblas_cgemv( CblasColMajor, CblasNoTrans, sub_m, n,
1217 CBLAS_SADDR(alpha), Aptr, lda, xptr, 1,
1218 CBLAS_SADDR(beta), yptr, 1 );
1219 }
1220}
1221
1222/**
1223 *******************************************************************************
1224 *
1225 * @ingroup bcsc
1226 *
1227 * @brief Compute \f[ y = \alpha A x + \beta y \f] (Parallel version)
1228 *
1229 *******************************************************************************
1230 *
1231 * @param[in] pastix_data
1232 * The information about sequential and parallel version (Number of
1233 * thread, ...).
1234 *
1235 * @param[in] m
1236 * The number of rows of the matrix A, and the size of y.
1237 *
1238 * @param[in] n
1239 * The number of columns of the matrix A, and the size of x.
1240 *
1241 * @param[in] alpha
1242 * The scalar alpha.
1243 *
1244 * @param[in] A
1245 * The dense matrix A of size lda-by-n.
1246 *
1247 * @param[in] lda
1248 * The leading dimension of the matrix A. lda >= max(1,m)
1249 *
1250 * @param[in] x
1251 * The vector x of size n.
1252 *
1253 * @param[in] beta
1254 * The scalar beta.
1255 *
1256 * @param[inout] y
1257 * On entry, the initial vector y of size m.
1258 * On exit, the updated vector.
1259 *
1260 *******************************************************************************/
1261void
1263 pastix_int_t m,
1264 pastix_int_t n,
1265 pastix_complex32_t alpha,
1266 const pastix_complex32_t *A,
1267 pastix_int_t lda,
1268 const pastix_complex32_t *x,
1269 pastix_complex32_t beta,
1271{
1272 struct c_gemv_s arg = {m, n, alpha, A, lda, x, beta, y};
1273
1274 isched_parallel_call( pastix_data->isched, pthread_bvec_cgemv, &arg );
1275}
1276
1277/**
1278 *******************************************************************************
1279 *
1280 * @ingroup bcsc
1281 *
1282 * @brief Set to 0 remote coefficients
1283 *
1284 *******************************************************************************
1285 *
1286 * @param[in] pastix_data
1287 * The information about sequential and parallel version (Number of
1288 * thread, ...).
1289 *
1290 * @param[inout] y
1291 * On entry, the initial vector y of size m.
1292 * On exit, the y vector with remote section set to 0.
1293 *
1294 *******************************************************************************/
1295void
1298{
1299#if defined( PASTIX_WITH_MPI )
1300 const SolverMatrix *solvmtx = pastix_data->solvmatr;
1301 const SolverCblk *cblk = solvmtx->cblktab;
1302 pastix_int_t cblknbr;
1303 pastix_int_t i, lastindex = 0;
1304 pastix_int_t n = pastix_data->csc->gNexp;
1305
1306 cblknbr = solvmtx->cblknbr;
1307 for ( i = 0; i < cblknbr; i++, cblk++ ) {
1308 if ( cblk->cblktype & (CBLK_FANIN|CBLK_RECV) ) {
1309 continue;
1310 }
1311
1312 if ( cblk->fcolnum != lastindex ) {
1313 /* Set to 0 all remote data bewtween previous local cblk, and current cblk */
1314 memset(
1315 y + lastindex, 0, ( cblk->fcolnum - lastindex ) * sizeof( pastix_complex32_t ) );
1316 }
1317 lastindex = cblk->lcolnum + 1;
1318 }
1319
1320 if ( lastindex < n ) {
1321 /* Set to 0 all remote data bewtween previous local cblk, and current cblk */
1322 memset( y + lastindex, 0, ( n - lastindex ) * sizeof( pastix_complex32_t ) );
1323 }
1324#endif
1325 (void)pastix_data;
1326 (void)y;
1327}
1328
1329/**
1330 *******************************************************************************
1331 *
1332 * @ingroup bcsc
1333 *
1334 * @brief Gather a distributed right hand side (bvec storage) on all nodes.
1335 *
1336 *******************************************************************************
1337 *
1338 * @param[in] pastix_data
1339 * The information about sequential and parallel version (Number of
1340 * thread, ...).
1341 *
1342 * @param[inout] y
1343 * On entry, the local portion of the vector y.
1344 * On exit, the complete vector y.
1345 *
1346 *******************************************************************************
1347 *
1348 * @retval TODO
1349 *
1350 *******************************************************************************/
1351const pastix_complex32_t *
1353 const pastix_complex32_t *y )
1354{
1355#if defined( PASTIX_WITH_MPI )
1356 const SolverMatrix *solvmtx = pastix_data->solvmatr;
1357 const SolverCblk *cblk = solvmtx->cblktab;
1358 pastix_complex32_t *yglobal = NULL;
1359 pastix_complex32_t *yto, *ytmp;
1360 const pastix_complex32_t *yfr;
1361 pastix_int_t c, i, cblki, cblknbr;
1362 MPI_Request request_c = MPI_REQUEST_NULL;
1363 MPI_Request request_n = MPI_REQUEST_NULL;
1364 pastix_int_t *all_n, *all_cblknbr, *indices;
1365 pastix_int_t max_n = 0;
1366 pastix_int_t max_cblknbr = 0;
1367 pastix_int_t gn = pastix_data->bcsc->gN;
1368 pastix_int_t ln = pastix_data->bcsc->n;
1369 pastix_int_t lcblknbr, colnbr;
1370
1371 if ( ln != 0 ) {
1372 yglobal = malloc( gn * sizeof(pastix_complex32_t) );
1373#if !defined(NDEBUG)
1374 memset( yglobal, 0xff, gn * sizeof(pastix_complex32_t) );
1375#endif
1376 }
1377 all_n = malloc( pastix_data->procnbr * sizeof(pastix_int_t) );
1378 all_cblknbr = malloc( pastix_data->procnbr * sizeof(pastix_int_t) );
1379
1380 MPI_Allgather( &ln, 1, PASTIX_MPI_INT,
1381 all_n, 1, PASTIX_MPI_INT, pastix_data->pastix_comm );
1382 lcblknbr = solvmtx->cblknbr - solvmtx->faninnbr - solvmtx->recvnbr;
1383 MPI_Allgather( &lcblknbr, 1, PASTIX_MPI_INT,
1384 all_cblknbr, 1, PASTIX_MPI_INT, pastix_data->pastix_comm );
1385
1386 for( c=0; c<pastix_data->procnbr; c++ )
1387 {
1388 max_n = pastix_imax( max_n, all_n[c] );
1389 max_cblknbr = pastix_imax( max_cblknbr, all_cblknbr[c] );
1390 }
1391
1392 ytmp = malloc( max_n * sizeof(pastix_complex32_t) );
1393 indices = malloc( max_cblknbr * 2 * sizeof(pastix_int_t) );
1394
1395 for( c=0; c<pastix_data->procnbr; c++ )
1396 {
1397 if ( all_n[c] == 0 ) {
1398 continue;
1399 }
1400
1401 if ( c == pastix_data->procnum ) {
1402 MPI_Ibcast( (pastix_complex32_t*)y, ln, PASTIX_MPI_COMPLEX32, c, pastix_data->pastix_comm, &request_n );
1403
1404 cblknbr = solvmtx->cblknbr;
1405 cblk = solvmtx->cblktab;
1406 cblki = 0;
1407 for ( i = 0; i < cblknbr; i++, cblk++ ) {
1408 if ( cblk->cblktype & (CBLK_FANIN|CBLK_RECV) ) {
1409 continue;
1410 }
1411
1412 yfr = y + cblk->lcolidx;
1413 yto = yglobal + cblk->fcolnum;
1414
1415 memcpy( yto, yfr, cblk_colnbr( cblk ) * sizeof( pastix_complex32_t ) );
1416
1417 indices[ 2*cblki ] = cblk->fcolnum;
1418 indices[ 2*cblki+1 ] = cblk->lcolnum;
1419 cblki++;
1420 }
1421 assert( cblki == lcblknbr );
1422
1423 MPI_Ibcast( indices, 2 * lcblknbr, PASTIX_MPI_INT, c, pastix_data->pastix_comm, &request_c );
1424 MPI_Wait( &request_n, MPI_STATUS_IGNORE );
1425 MPI_Wait( &request_c, MPI_STATUS_IGNORE );
1426 }
1427 else {
1428 MPI_Ibcast( ytmp, all_n[c], PASTIX_MPI_COMPLEX32, c, pastix_data->pastix_comm, &request_n );
1429 MPI_Ibcast( indices, all_cblknbr[c] * 2, PASTIX_MPI_INT, c, pastix_data->pastix_comm, &request_c );
1430 MPI_Wait( &request_n, MPI_STATUS_IGNORE );
1431 MPI_Wait( &request_c, MPI_STATUS_IGNORE );
1432
1433 /* If ln = 0, there are no local computation so no need to store in yglobal */
1434 if ( ln != 0 ) {
1435 yfr = ytmp;
1436 for ( i = 0; i < all_cblknbr[c]; i++ ) {
1437 yto = yglobal + indices[2*i];
1438
1439 colnbr = indices[2*i+1] - indices[2*i] + 1;
1440 memcpy( yto, yfr, colnbr * sizeof( pastix_complex32_t ) );
1441
1442 yfr += colnbr;
1443 }
1444 }
1445 }
1446 }
1447
1448 free( all_n );
1449 free( all_cblknbr );
1450 free( ytmp );
1451 free( indices );
1452
1453 return yglobal;
1454#else
1455 (void)pastix_data;
1456 return y;
1457#endif
1458}
1459
1460/**
1461 *******************************************************************************
1462 *
1463 * @ingroup bcsc
1464 *
1465 * @brief Apply an all reduce of the vector on all nodes
1466 *
1467 *******************************************************************************
1468 *
1469 * @param[in] pastix_data
1470 * The information about sequential and parallel version (Number of
1471 * thread, ...).
1472 *
1473 * @param[inout] y
1474 * On entry, the initial vector y of size m.
1475 * On exit, the y vector with remote section set to 0.
1476 *
1477 *******************************************************************************/
1478void
1479bvec_callreduce( const pastix_data_t *pastix_data,
1481{
1482#if defined( PASTIX_WITH_MPI )
1483 /* Reduce the partial sums on all nodes */
1484 MPI_Allreduce( MPI_IN_PLACE,
1485 y, pastix_data->csc->gNexp,
1486 PASTIX_MPI_COMPLEX32, MPI_SUM,
1487 pastix_data->inter_node_comm );
1488#endif
1489 (void)pastix_data;
1490 (void)y;
1491}
BEGIN_C_DECLS typedef int pastix_int_t
Definition datatypes.h:51
float _Complex pastix_complex32_t
Definition datatypes.h:76
static void pthread_bvec_cdotu(isched_thread_t *ctx, void *args)
Compute the scalar product x.y. (Parallel version)
static void pthread_bvec_cgemv(isched_thread_t *ctx, void *args)
Compute.
static void pthread_bvec_caxpy(isched_thread_t *ctx, void *args)
Compute y <- alpha * x + y (Parallel version).
static void pthread_bvec_ccopy(isched_thread_t *ctx, void *args)
Copy a vector y = x (parallel version)
pastix_int_t cblknum
Definition bcsc.h:112
void bvec_cgemv_smp(pastix_data_t *pastix_data, pastix_int_t m, pastix_int_t n, pastix_complex32_t alpha, const pastix_complex32_t *A, pastix_int_t lda, const pastix_complex32_t *x, pastix_complex32_t beta, pastix_complex32_t *y)
Compute.
float bvec_cnrm2_seq(pastix_data_t *pastix_data, pastix_int_t n, const pastix_complex32_t *x)
Compute the norm 2 of a vector. (Sequential version)
const pastix_complex32_t * bvec_cgather_remote(const pastix_data_t *pastix_data, const pastix_complex32_t *y)
Gather a distributed right hand side (bvec storage) on all nodes.
void bvec_cscal_seq(pastix_data_t *pastix_data, pastix_int_t n, pastix_complex32_t alpha, pastix_complex32_t *x)
Scale a vector by the scalar alpha. (Sequential version)
void bvec_callreduce(const pastix_data_t *pastix_data, pastix_complex32_t *y)
Apply an all reduce of the vector on all nodes.
void bvec_ccopy_smp(pastix_data_t *pastix_data, pastix_int_t n, const pastix_complex32_t *x, pastix_complex32_t *y)
Copy a vector y = x (parallel version)
void bvec_ccopy_seq(pastix_data_t *pastix_data, pastix_int_t n, const pastix_complex32_t *x, pastix_complex32_t *y)
Copy a vector y = x (Sequential version)
void bvec_caxpy_seq(pastix_data_t *pastix_data, pastix_int_t n, pastix_complex32_t alpha, const pastix_complex32_t *x, pastix_complex32_t *y)
Compute y <- alpha * x + y. (Sequential version)
void bcsc_cspsv(pastix_data_t *pastix_data, pastix_complex32_t *b, pastix_complex32_t *work)
Solve A x = b with A the sparse matrix.
pastix_complex32_t bvec_cdotu_smp(pastix_data_t *pastix_data, pastix_int_t n, const pastix_complex32_t *x, const pastix_complex32_t *y)
Compute a regular scalar product x.y (Parallel version)
static void pthread_bvec_cnrm2(isched_thread_t *ctx, void *args)
Compute the norm 2 of a vector. (Parallel version)
pastix_complex32_t bvec_cdotu_seq(pastix_data_t *pastix_data, pastix_int_t n, const pastix_complex32_t *x, const pastix_complex32_t *y)
Compute the scalar product x.y. (Sequential version)
void bvec_cgemv_seq(pastix_data_t *pastix_data, pastix_int_t m, pastix_int_t n, pastix_complex32_t alpha, const pastix_complex32_t *A, pastix_int_t lda, const pastix_complex32_t *x, pastix_complex32_t beta, pastix_complex32_t *y)
Compute.
float bvec_cnrm2_smp(pastix_data_t *pastix_data, pastix_int_t n, const pastix_complex32_t *x)
Compute the norm 2 of a vector. (Parallel version)
void bvec_caxpy_smp(pastix_data_t *pastix_data, pastix_int_t n, pastix_complex32_t alpha, const pastix_complex32_t *x, pastix_complex32_t *y)
Perform y = alpha * x + y (Parallel version)
static void pthread_bvec_cscal(isched_thread_t *ctx, void *args)
Scale a vector (Parallel version)
void bvec_cscal_smp(pastix_data_t *pastix_data, pastix_int_t n, pastix_complex32_t alpha, pastix_complex32_t *x)
Scale a vector (Parallel version)
void bvec_cnullify_remote(const pastix_data_t *pastix_data, pastix_complex32_t *y)
Set to 0 remote coefficients.
Compressed colptr format for the bcsc.
Definition bcsc.h:110
@ IPARM_MIXED
Definition api.h:139
@ IPARM_VERBOSE
Definition api.h:36
int pastix_subtask_solve(pastix_data_t *pastix_data, pastix_rhs_t b)
Solve the given problem without applying the permutation.
PASTIX_Comm pastix_comm
Definition pastixdata.h:76
void ** cblkb
Definition pastixdata.h:162
SolverMatrix * solvmatr
Definition pastixdata.h:103
pastix_int_t * iparm
Definition pastixdata.h:70
const spmatrix_t * csc
Definition pastixdata.h:90
pastix_coeftype_t flttype
Definition pastixdata.h:157
isched_t * isched
Definition pastixdata.h:86
PASTIX_Comm inter_node_comm
Definition pastixdata.h:78
pastix_int_t m
Definition pastixdata.h:158
pastix_bcsc_t * bcsc
Definition pastixdata.h:102
pastix_int_t n
Definition pastixdata.h:159
int8_t allocated
Definition pastixdata.h:156
Main PaStiX data structure.
Definition pastixdata.h:68
Main PaStiX RHS structure.
Definition pastixdata.h:155
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
Definition solver.h:329
pastix_int_t cblknbr
Definition solver.h:211
pastix_int_t faninnbr
Definition solver.h:213
pastix_int_t recvnbr
Definition solver.h:216
pastix_int_t lcolidx
Definition solver.h:170
SolverCblk *restrict cblktab
Definition solver.h:228
int8_t cblktype
Definition solver.h:164
pastix_int_t lcolnum
Definition solver.h:167
pastix_int_t fcolnum
Definition solver.h:166
Solver column block structure.
Definition solver.h:161
Solver column block structure.
Definition solver.h:203