PaStiX Handbook  6.4.0
bvec_ccompute.c
Go to the documentation of this file.
1 /**
2  *
3  * @file bvec_ccompute.c
4  *
5  * Functions computing operations on the BCSC.
6  *
7  * @copyright 2004-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8  * Univ. Bordeaux. All rights reserved.
9  *
10  * @version 6.4.0
11  * @author Mathieu Faverge
12  * @author Pierre Ramet
13  * @author Xavier Lacoste
14  * @author Gregoire Pichon
15  * @author Theophile Terraz
16  * @author Tony Delarue
17  * @author Vincent Bridonneau
18  * @date 2024-07-05
19  * @generated from /builds/solverstack/pastix/bcsc/bvec_zcompute.c, normal z -> c, Fri Jul 12 15:10:18 2024
20  *
21  **/
22 #include "common.h"
23 #include <math.h>
24 #include "lapacke.h"
25 #include "bcsc/bcsc.h"
26 #include "bcsc_c.h"
27 #include "order/order_internal.h"
28 #include "frobeniusupdate.h"
29 #include "cblas.h"
30 #include "blend/solver.h"
31 
32 #if defined(PASTIX_WITH_MPI)
33 void
34 bvec_cmpi_frb_merge( float *dist,
35  float *loc,
36  int *len,
37  MPI_Datatype *dtype )
38 {
39  assert( *len == 2 );
40  frobenius_merge( dist[0], dist[1], loc, loc+1 );
41  (void)len;
42  (void)dtype;
43 }
44 #endif
45 
46 /**
47  *******************************************************************************
48  *
49  * @ingroup bcsc
50  *
51  * @brief Compute the norm 2 of a vector. (Sequential version)
52  *
53  *******************************************************************************
54  *
55  * @param[in] pastix_data
56  * Provide information to the parallel version, to know the global
57  * context (Number of thread, barrier, ...).
58  *
59  * @param[in] n
60  * The size of the vector x.
61  *
62  * @param[in] x
63  * The vector x of size n.
64  *
65  *******************************************************************************
66  *
67  * @retval the norm 2 of x.
68  *
69  *******************************************************************************/
70 float
72  pastix_int_t n,
73  const pastix_complex32_t *x )
74 {
75  SolverMatrix *solvmtx = pastix_data->solvmatr;
76  SolverCblk *scblk;
77  pastix_bcsc_t *bcsc = pastix_data->bcsc;
78  bcsc_cblk_t *bcblk = bcsc->cscftab;
79  pastix_int_t cblknbr, colnbr;
80  float data[] = { 0., 1. }; /* Scale, Sum */
81  float norm;
82  const float *valptr;
83  pastix_int_t i, j;
84 
85  cblknbr = bcsc->cscfnbr;
86  for( i = 0; i < cblknbr; i++, bcblk++ ) {
87  scblk = solvmtx->cblktab + bcblk->cblknum;
88  colnbr = cblk_colnbr( scblk );
89  valptr = (const float*)(x + scblk->lcolidx);
90 
91  for( j=0; j < colnbr; j++, valptr++ )
92  {
93  /* Real part */
94  frobenius_update( 1, data, data + 1, valptr );
95 #if defined(PRECISION_z) || defined(PRECISION_c)
96  /* Imaginary part */
97  valptr++;
98  frobenius_update( 1, data, data + 1, valptr );
99 #endif
100  }
101  }
102 
103 #if defined(PASTIX_WITH_MPI)
104  {
105  MPI_Op merge;
106 
107  MPI_Op_create( (MPI_User_function *)bvec_cmpi_frb_merge, 1, &merge );
108  MPI_Allreduce( MPI_IN_PLACE, data, 2, MPI_FLOAT, merge, solvmtx->solv_comm );
109  MPI_Op_free( &merge );
110  }
111 #endif
112 
113  norm = data[0] * sqrtf( data[1] );
114 
115  (void)n;
116  return norm;
117 }
118 
119 struct c_argument_nrm2_s
120 {
121  pastix_int_t n;
122  const pastix_complex32_t *x;
123  pastix_atomic_lock_t lock;
124  float scale;
125  float sumsq;
126 };
127 
128 /**
129  *******************************************************************************
130  *
131  * @ingroup bcsc
132  *
133  * @brief Compute the norm 2 of a vector. (Parallel version)
134  *
135  *******************************************************************************
136  *
137  * @param[in] ctx
138  * The context of the current thread
139  *
140  * @param[inout] args
141  * The parameter that providenumber of elements of x,
142  * and the vector which norm2 is to be computed, and the norm value
143  *
144  *******************************************************************************/
145 static inline void
146 pthread_bvec_cnrm2( isched_thread_t *ctx,
147  void *args )
148 {
149  struct c_argument_nrm2_s *arg = (struct c_argument_nrm2_s*)args;
150  pastix_int_t n = arg->n;
151  const pastix_complex32_t *x = arg->x;
152  float scale = 0.;
153  float sumsq = 1.;
154  float *valptr = (float*)x;
155  pastix_int_t i, rank, size;
156  pastix_int_t begin, end;
157 
158  size = ctx->global_ctx->world_size;
159  rank = ctx->rank;
160 
161  begin = (n / size) * rank;
162  if (rank == (size - 1)) {
163  end = n; /* One iteration more */
164  } else {
165  end = (n / size) * (rank + 1);
166  }
167 
168  valptr += begin;
169 #if defined(PRECISION_z) || defined(PRECISION_c)
170  valptr += begin;
171 #endif /* defined(PRECISION_z) || defined(PRECISION_c) */
172 
173  for( i = begin; i < end; i++, valptr++ )
174  {
175  frobenius_update( 1, &scale, &sumsq, valptr );
176 #if defined(PRECISION_z) || defined(PRECISION_c)
177  valptr ++;
178  frobenius_update( 1, &scale, &sumsq, valptr );
179 #endif
180  }
181 
182  /* If we computed something */
183  if ( scale != 0. ) {
184  pastix_atomic_lock( &(arg->lock) );
185  frobenius_merge( scale, sumsq, &(arg->scale), &(arg->sumsq) );
186  pastix_atomic_unlock( &(arg->lock) );
187  }
188 }
189 
190 /**
191  *******************************************************************************
192  *
193  * @ingroup bcsc
194  *
195  * @brief Compute the norm 2 of a vector. (Parallel version)
196  *
197  *******************************************************************************
198  *
199  * @param[in] pastix_data
200  * Provide information to the parallel version, to know the global
201  * context (Number of thread, barrier, ...).
202  *
203  * @param[in] x
204  * The vector which norm2 is to be computed
205  *
206  * @param[in] n
207  * The number of elements of x
208  *
209  *******************************************************************************
210  *
211  * @return The norm 2 of the vector
212  *
213  *******************************************************************************/
214 float
216  pastix_int_t n,
217  const pastix_complex32_t *x )
218 {
219  struct c_argument_nrm2_s arg = { n, x, PASTIX_ATOMIC_UNLOCKED, 0., 1. };
220  isched_parallel_call( pastix_data->isched, pthread_bvec_cnrm2, &arg );
221 
222 #if defined(PASTIX_WITH_MPI)
223  {
224  MPI_Op merge;
225 
226  MPI_Op_create( (MPI_User_function *)bvec_cmpi_frb_merge, 1, &merge );
227  MPI_Allreduce( MPI_IN_PLACE, &(arg.scale), 2, MPI_FLOAT, merge, pastix_data->solvmatr->solv_comm );
228  MPI_Op_free( &merge );
229  }
230 #endif
231 
232  return arg.scale * sqrtf( arg.sumsq );
233 }
234 
235 /**
236  *******************************************************************************
237  *
238  * @ingroup bcsc
239  *
240  * @brief Scale a vector by the scalar alpha. (Sequential version)
241  *
242  *******************************************************************************
243  *
244  * @param[in] pastix_data
245  * Provide information to the parallel version, to know the global
246  * context (Number of thread, barrier, ...).
247  *
248  * @param[in] n
249  * The size of the vector x.
250  *
251  * @param[in] alpha
252  * The scalar to scale the vector x.
253  *
254  * @param[inout] x
255  * The vector x to scale.
256  *
257  *******************************************************************************/
258 void
260  pastix_int_t n,
261  pastix_complex32_t alpha,
262  pastix_complex32_t *x )
263 {
264 #if defined(PASTIX_WITH_MPI) && 0
265  SolverMatrix *solvmtx = pastix_data->solvmatr;
266  SolverCblk *scblk = solvmtx->cblktab;
267  pastix_bcsc_t *bcsc = pastix_data->bcsc;
268  bcsc_cblk_t *bcblk = bcsc->cscftab;
269  pastix_int_t i, cblknbr;
270 
271  cblknbr = bcsc->cscfnbr;
272  for( i = 0; i < cblknbr; i++, bcblk++ ) {
273  scblk = solvmtx->cblktab + bcblk->cblknum;
274  n = cblk_colnbr( scblk );
275 
276  cblas_cscal( n, CBLAS_SADDR(alpha), x + scblk->lcolidx, 1 );
277  }
278 #else
279  (void)pastix_data;
280  cblas_cscal( n, CBLAS_SADDR(alpha), x, 1 );
281 #endif
282 
283 }
284 
285 struct c_argument_scal_s
286 {
287  pastix_int_t n;
288  pastix_complex32_t alpha;
290 };
291 
292 /**
293  *******************************************************************************
294  *
295  * @ingroup bcsc
296  *
297  * @brief Scale a vector (Parallel version)
298  *
299  *******************************************************************************
300  *
301  * @param[in] ctx
302  * The information about number of thread and rank of the actual thread
303  *
304  * @param[inout] args
305  * The argument providing number of elements of the vector, the scaling
306  * parameter and, and the vector to be scaled
307  *
308  *******************************************************************************/
309 static inline void
310 pthread_bvec_cscal( isched_thread_t *ctx,
311  void *args )
312 {
313  struct c_argument_scal_s *arg = (struct c_argument_scal_s*)args;
314  pastix_complex32_t *x = arg->x;
315  pastix_int_t n = arg->n;
316  pastix_complex32_t alpha = arg->alpha;
317  pastix_int_t size = ctx->global_ctx->world_size;
318  pastix_int_t rank;
319  pastix_int_t begin, end;
320 
321  if( x == NULL ) {
322  return;
323  }
324 
325  rank = (pastix_int_t)ctx->rank;
326  begin = (n/size) * rank;
327  if (rank == (size - 1)) {
328  end = n;
329  } else {
330  end = (n/size) * (rank + 1);
331  }
332 
333  if ( (end - begin) > 0 ) {
334  cblas_cscal( end - begin, CBLAS_SADDR(alpha), x + begin, 1 );
335  }
336 }
337 
338 /**
339  *******************************************************************************
340  *
341  * @ingroup bcsc
342  *
343  * @brief Scale a vector (Parallel version)
344  *
345  *******************************************************************************
346  *
347  * @param[in] pastix_data
348  * The information about sequential and parallel version (Number of
349  * thread, ...).
350  *
351  * @param[in] n
352  * The number of elements of the vector
353  *
354  * @param[in] alpha
355  * The scaling parameter
356  *
357  * @param[inout] x
358  * The vector to be scaled
359  *
360  *******************************************************************************/
361 void
363  pastix_int_t n,
364  pastix_complex32_t alpha,
365  pastix_complex32_t *x )
366 {
367  struct c_argument_scal_s arg = {n, alpha, x};
368  isched_parallel_call( pastix_data->isched, pthread_bvec_cscal, &arg );
369 }
370 
371 /**
372  *******************************************************************************
373  *
374  * @ingroup bcsc
375  *
376  * @brief Compute y <- alpha * x + y. (Sequential version)
377  *
378  *******************************************************************************
379  *
380  * @param[in] pastix_data
381  * The information about sequential and parallel version (Number of
382  * thread, ...).
383 -*
384  * @param[in] n
385  * The size of the vectors.
386  *
387  * @param[in] alpha
388  * A scalar.
389  *
390  * @param[in] x
391  * The vector x.
392  *
393  * @param[inout] y
394  * The vector y.
395  *
396  *******************************************************************************/
397 void
399  pastix_int_t n,
400  pastix_complex32_t alpha,
401  const pastix_complex32_t *x,
403 {
404 #if defined(PASTIX_WITH_MPI) && 0
405  SolverMatrix *solvmtx = pastix_data->solvmatr;
406  SolverCblk *scblk = solvmtx->cblktab;
407  pastix_bcsc_t *bcsc = pastix_data->bcsc;
408  bcsc_cblk_t *bcblk = bcsc->cscftab;
409  pastix_int_t i, cblknbr;
410 
411  cblknbr = bcsc->cscfnbr;
412  for( i = 0; i < cblknbr; i++, bcblk++ ){
413  scblk = solvmtx->cblktab + bcblk->cblknum;
414  n = cblk_colnbr( scblk );
415 
416  cblas_caxpy( n, CBLAS_SADDR(alpha),
417  x + scblk->lcolidx, 1,
418  y + scblk->lcolidx, 1 );
419  }
420 #else
421  (void)pastix_data;
422  cblas_caxpy( n, CBLAS_SADDR(alpha), x, 1, y, 1 );
423 #endif
424 }
425 
426 struct c_argument_axpy_s
427 {
428  pastix_int_t n;
429  pastix_complex32_t alpha;
430  const pastix_complex32_t *x;
432 };
433 
434 /**
435  *******************************************************************************
436  *
437  * @ingroup bcsc_internal
438  *
439  * @brief Compute y <- alpha * x + y (Parallel version).
440  *
441  *******************************************************************************
442  *
443  * @param[in] ctx
444  * The context about the current thread
445  *
446  * @param[inout] args
447  * The parameter providing the size of the vectors, a scalar,
448  * the vectors x and y.
449  *
450  *******************************************************************************/
451 static inline void
452 pthread_bvec_caxpy( isched_thread_t *ctx,
453  void *args)
454 {
455  struct c_argument_axpy_s *arg = (struct c_argument_axpy_s*)args;
456  pastix_int_t n = arg->n;
457  pastix_complex32_t alpha = arg->alpha;
458  const pastix_complex32_t *x = arg->x;
459  pastix_complex32_t *y = arg->y;
460  pastix_int_t rank, size;
461  pastix_int_t begin, end;
462 
463  if( (y == NULL) || (x == NULL) ) {
464  return;
465  }
466 
467  if( alpha == (pastix_complex32_t)0.0 ) {
468  return;
469  }
470 
471  size = (pastix_int_t)ctx->global_ctx->world_size;
472  rank = (pastix_int_t)ctx->rank;
473 
474  begin = (n/size) * rank;
475  if (rank == (size - 1)) {
476  end = n;
477  } else {
478  end = (n/size) * (rank + 1);
479  }
480 
481  if ( (end - begin) > 0 ) {
482  cblas_caxpy( end - begin, CBLAS_SADDR(alpha), x + begin, 1, y + begin, 1 );
483  }
484 }
485 
486 /**
487  *******************************************************************************
488  *
489  * @ingroup bcsc
490  *
491  * @brief Perform y = alpha * x + y (Parallel version)
492  *
493  *******************************************************************************
494  *
495  * @param[in] pastix_data
496  * The information about sequential and parallel version (Number of
497  * thread, ...).
498  *
499  * @param[in] n
500  * The number of elements of vectors x and y
501  *
502  * @param[in] alpha
503  * The scalar to scale x
504  *
505  * @param[in] x
506  * The vector to be scaled
507  *
508  * @param[inout] y
509  * The resulting solution
510  *
511  *******************************************************************************/
512 void
514  pastix_int_t n,
515  pastix_complex32_t alpha,
516  const pastix_complex32_t *x,
517  pastix_complex32_t *y )
518 {
519  struct c_argument_axpy_s args = {n, alpha, x, y};
520  isched_parallel_call( pastix_data->isched, pthread_bvec_caxpy, &args );
521 }
522 
523 struct c_argument_dotc_s
524 {
525  pastix_int_t n;
526  const pastix_complex32_t *x;
527  const pastix_complex32_t *y;
528  pastix_atomic_lock_t lock;
529  pastix_complex32_t sum;
530 };
531 
532 #if defined(PRECISION_z) || defined(PRECISION_c)
533 /**
534  *******************************************************************************
535  *
536  * @ingroup bcsc
537  *
538  * @brief Compute the scalar product conjf(x).y (Sequential version)
539  *
540  *******************************************************************************
541  *
542  * @param[in] pastix_data
543  * The information about sequential and parallel version (Number of
544  * thread, ...).
545  *
546  * @param[in] n
547  * The size of the vectors.
548  *
549  * @param[in] x
550  * The vector x.
551  *
552  * @param[in] y
553  * The vector y.
554  *
555  *******************************************************************************
556  *
557  * @retval the scalar product of conjf(x) and y.
558  *
559  *******************************************************************************/
561 bvec_cdotc_seq( pastix_data_t *pastix_data,
562  pastix_int_t n,
563  const pastix_complex32_t *x,
564  const pastix_complex32_t *y )
565 {
566  SolverMatrix *solvmtx = pastix_data->solvmatr;
567  SolverCblk *scblk = solvmtx->cblktab;
568  pastix_bcsc_t *bcsc = pastix_data->bcsc;
569  bcsc_cblk_t *bcblk = bcsc->cscftab;
570  pastix_int_t i, j, cblknbr;
571  const pastix_complex32_t *xptr;
572  const pastix_complex32_t *yptr;
573  pastix_complex32_t r = 0.0;
574 
575  cblknbr = bcsc->cscfnbr;
576  for( i = 0; i < cblknbr; i++, bcblk++ ) {
577  scblk = solvmtx->cblktab + bcblk->cblknum;
578  n = cblk_colnbr( scblk );
579 
580  xptr = x + scblk->lcolidx;
581  yptr = y + scblk->lcolidx;
582  for( j=0; j<n; j++, xptr++, yptr++ ) {
583  r += conjf(*xptr) * (*yptr);
584  }
585  }
586 
587 #if defined(PASTIX_WITH_MPI)
588  MPI_Allreduce( MPI_IN_PLACE, &r, 1, PASTIX_MPI_COMPLEX32,
589  MPI_SUM, solvmtx->solv_comm );
590 #endif
591 
592  return r;
593 }
594 
595 /**
596  *******************************************************************************
597  *
598  * @ingroup bcsc_internal
599  *
600  * @brief Compute the scalar product conjf(x).y. (Parallel version)
601  *
602  *******************************************************************************
603  *
604  * @param[in] ctx
605  * The context of the current thread
606  *
607  * @param[in] args
608  * The argument provding the vectors x and y and their size.
609  *
610  *******************************************************************************/
611 static inline void
612 pthread_bvec_cdotc( isched_thread_t *ctx,
613  void *args )
614 {
615  struct c_argument_dotc_s *arg = (struct c_argument_dotc_s*)args;
616  pastix_int_t n = arg->n;
617  int i;
618  const pastix_complex32_t *xptr = arg->x;
619  const pastix_complex32_t *yptr = arg->y;
620  pastix_int_t begin, end, rank, size;
621  pastix_complex32_t r = 0.0;
622 
623  rank = (pastix_int_t)ctx->rank;
624  size = (pastix_int_t)ctx->global_ctx->world_size;
625 
626  begin = (n/size) * rank;
627  if (rank != size - 1) {
628  end = (n/size) * (rank + 1);
629  } else { /*The last one computes the calcul for the rest of the sum*/
630  end = n;
631  }
632 
633  xptr += begin;
634  yptr += begin;
635 
636  for ( i = begin; i < end; i++, xptr++, yptr++ )
637  {
638  r += conjf(*xptr) * (*yptr);
639  }
640 
641  if ( cabsf(r) > 0. ) {
642  pastix_atomic_lock( &(arg->lock) );
643  arg->sum += r;
644  pastix_atomic_unlock( &(arg->lock) );
645  }
646 }
647 
648 /**
649  *******************************************************************************
650  *
651  * @ingroup bcsc
652  *
653  * @brief Compute a scalar product between complex vectors: conjf(x).y
654  * (Parallel version)
655  *
656  *******************************************************************************
657  *
658  * @param[in] pastix_data
659  * The information about sequential and parallel version (Number of
660  * thread, ...).
661  *
662  * @param[in] n
663  * The number of elements of vectors x, y and r
664  *
665  * @param[in] y
666  * The first vector of the scalar product
667  *
668  * @param[in] n
669  * The second vector of the scalar product
670  *
671  * @param[out] r
672  * The result of the scalar product
673  *
674  *******************************************************************************/
676 bvec_cdotc_smp( pastix_data_t *pastix_data,
677  pastix_int_t n,
678  const pastix_complex32_t *x,
679  const pastix_complex32_t *y )
680 {
681  struct c_argument_dotc_s arg = {n, x, y, PASTIX_ATOMIC_UNLOCKED, 0.0};
682  isched_parallel_call( pastix_data->isched, pthread_bvec_cdotc, &arg );
683 
684 #if defined(PASTIX_WITH_MPI)
685  MPI_Allreduce( MPI_IN_PLACE, &(arg.sum), 1, PASTIX_MPI_COMPLEX32,
686  MPI_SUM, pastix_data->solvmatr->solv_comm );
687 #endif
688 
689  return arg.sum;
690 }
691 #endif
692 
693 /**
694  *******************************************************************************
695  *
696  * @ingroup bcsc
697  *
698  * @brief Compute the scalar product x.y. (Sequential version)
699  *
700  *******************************************************************************
701  *
702  * @param[in] pastix_data
703  * The information about sequential and parallel version (Number of
704  * thread, ...).
705  *
706  * @param[in] x
707  * The vector x.
708  *
709  * @param[in] y
710  * The vector y.
711  *
712  * @param[in] n
713  * The size of the vectors.
714  *
715  *******************************************************************************
716  *
717  * @retval the scalar product of x and y.
718  *
719  *******************************************************************************/
722  pastix_int_t n,
723  const pastix_complex32_t *x,
724  const pastix_complex32_t *y )
725 {
726  SolverMatrix *solvmtx = pastix_data->solvmatr;
727  SolverCblk *scblk = solvmtx->cblktab;
728  pastix_bcsc_t *bcsc = pastix_data->bcsc;
729  bcsc_cblk_t *bcblk = bcsc->cscftab;
730  pastix_int_t i, j, cblknbr;
731  const pastix_complex32_t *xptr;
732  const pastix_complex32_t *yptr;
733  pastix_complex32_t r = 0.0;
734 
735  cblknbr = bcsc->cscfnbr;
736  for( i = 0; i < cblknbr; i++, bcblk++ ) {
737  scblk = solvmtx->cblktab + bcblk->cblknum;
738  n = cblk_colnbr( scblk );
739 
740  xptr = x + scblk->lcolidx;
741  yptr = y + scblk->lcolidx;
742  for( j=0; j<n; j++, xptr++, yptr++ ) {
743  r += (*xptr) * (*yptr);
744  }
745  }
746 
747 #if defined(PASTIX_WITH_MPI)
748  MPI_Allreduce( MPI_IN_PLACE, &r, 1, PASTIX_MPI_COMPLEX32,
749  MPI_SUM, solvmtx->solv_comm );
750 #endif
751 
752  return r;
753 }
754 
755 /**
756  *******************************************************************************
757  *
758  * @ingroup bcsc_internal
759  *
760  * @brief Compute the scalar product x.y. (Parallel version)
761  *
762  *******************************************************************************
763  *
764  * @param[in] ctx
765  * The context of the current thread.
766  *
767  * @param[in] args
768  * The argument providing the vectors x and y and their size n.
769  *
770  *******************************************************************************/
771 static inline void
772 pthread_bvec_cdotu( isched_thread_t *ctx,
773  void *args )
774 {
775  struct c_argument_dotc_s *arg = (struct c_argument_dotc_s*)args;
776  int i;
777  pastix_int_t n = arg->n;
778  const pastix_complex32_t *x = arg->x;
779  const pastix_complex32_t *y = arg->y;
780  const pastix_complex32_t *xptr;
781  const pastix_complex32_t *yptr;
782  pastix_complex32_t r = 0.0;
783  pastix_int_t size, rank;
784  pastix_int_t begin, end;
785 
786  rank = (pastix_int_t)ctx->rank;
787  size = (pastix_int_t)ctx->global_ctx->world_size;
788 
789  begin = (n / size) * rank;
790  if ( rank == (size - 1)) {
791  end = n;
792  } else {
793  end = (n / size) * (rank + 1);
794  }
795 
796  xptr = x + begin;
797  yptr = y + begin;
798 
799  for (i=begin; i<end; i++, xptr++, yptr++)
800  {
801  r += (*xptr) * (*yptr);
802  }
803 
804  if ( cabsf(r) > 0. ) {
805  pastix_atomic_lock( &(arg->lock) );
806  arg->sum += r;
807  pastix_atomic_unlock( &(arg->lock) );
808  }
809 }
810 
811 /**
812  *******************************************************************************
813  *
814  * @ingroup bcsc
815  *
816  * @brief Compute a regular scalar product x.y (Parallel version)
817  *
818  *******************************************************************************
819  *
820  * @param[in] pastix_data
821  * The information about sequential and parallel version (Number of
822  * thread, ...).
823  *
824  * @param[in] n
825  * The number of elements of vectors x, y and r
826  *
827  * @param[in] x
828  * The first vector of the scalar product
829  *
830  * @param[in] y
831  * The second vector of the scalar product
832  *
833  *******************************************************************************
834  *
835  * @return The allocated vector
836  *
837  *******************************************************************************/
840  pastix_int_t n,
841  const pastix_complex32_t *x,
842  const pastix_complex32_t *y )
843 {
844  struct c_argument_dotc_s arg = {n, x, y, PASTIX_ATOMIC_UNLOCKED, 0.0};
845  isched_parallel_call( pastix_data->isched, pthread_bvec_cdotu, &arg );
846 
847 #if defined(PASTIX_WITH_MPI)
848  MPI_Allreduce( MPI_IN_PLACE, &(arg.sum), 1, PASTIX_MPI_COMPLEX32,
849  MPI_SUM, pastix_data->solvmatr->solv_comm );
850 #endif
851 
852  return arg.sum;
853 }
854 
855 /**
856  *******************************************************************************
857  *
858  * @ingroup bcsc
859  *
860  * @brief Copy a vector y = x (Sequential version)
861  *
862  *******************************************************************************
863  *
864  * @param[in] pastix_data
865  * The information about sequential and parallel version (Number of
866  * thread, ...).
867  *
868  * @param[in] n
869  * The number of elements of vectors x and y
870  *
871  * @param[in] x
872  * The vector to be copied
873  *
874  * @param[inout] y
875  * The vector copy of x
876  *
877  *******************************************************************************/
878 void
880  pastix_int_t n,
881  const pastix_complex32_t *x,
882  pastix_complex32_t *y )
883 {
884 #if defined(PASTIX_WITH_MPI) && 0
885  SolverMatrix *solvmtx = pastix_data->solvmatr;
886  SolverCblk *scblk = solvmtx->cblktab;
887  pastix_bcsc_t *bcsc = pastix_data->bcsc;
888  bcsc_cblk_t *bcblk = bcsc->cscftab;
889  pastix_int_t i, cblknbr;
890 
891  cblknbr = bcsc->cscfnbr;
892  for( i = 0; i < cblknbr; i++, bcblk++ ) {
893  scblk = solvmtx->cblktab + bcblk->cblknum;
894  n = cblk_colnbr( scblk );
895 
896  memcpy( y + scblk->lcolidx, x + scblk->lcolidx, n * sizeof(pastix_complex32_t) );
897  }
898 #else
899  (void)pastix_data;
900  memcpy( y, x, n * sizeof(pastix_complex32_t) );
901 #endif
902 }
903 
904 struct argument_copy_s {
905  pastix_int_t n;
906  const pastix_complex32_t *x;
908 };
909 
910 /**
911  *******************************************************************************
912  *
913  * @ingroup bcsc_internal
914  *
915  * @brief Copy a vector y = x (parallel version)
916  *
917  * Perform the coopy of x into y.
918  *
919  *******************************************************************************
920  *
921  * @param[in] ctx
922  * The context of the current thread
923  *
924  * @param[inout] args
925  * The argument containing the vector copy and the one to copy and their
926  * size.
927  *
928  *******************************************************************************/
929 static inline void
930 pthread_bvec_ccopy( isched_thread_t *ctx,
931  void *args )
932 {
933  struct argument_copy_s *arg = (struct argument_copy_s*)args;
934  pastix_int_t n = arg->n;
935  pastix_int_t size, rank;
936  pastix_int_t begin, end;
937 
938  size = (pastix_int_t)ctx->global_ctx->world_size;
939  rank = (pastix_int_t)ctx->rank;
940 
941  begin = (n/size) * rank;
942 
943  if (rank == (size - 1)) {
944  end = n;
945  } else {
946  end = (n/size) * (rank + 1);
947  }
948 
949  if ( (end - begin) > 0 ) {
950  memcpy( arg->y + begin, arg->x + begin, (end - begin) * sizeof(pastix_complex32_t) );
951  }
952 }
953 
954 /**
955  *******************************************************************************
956  *
957  * @ingroup bcsc
958  *
959  * @brief Copy a vector y = x (parallel version)
960  *
961  * Initialise argument for the parallel function
962  *
963  *******************************************************************************
964  *
965  * @param[in] pastix_data
966  * The information about sequential and parallel version (Number of
967  * thread, ...).
968  *
969  * @param[in] n
970  * The number of elements of vectors x and y
971  *
972  * @param[in] x
973  * The vector to be copied
974  *
975  * @param[inout] y
976  * The vector copy of x
977  *
978  *******************************************************************************/
979 void
981  pastix_int_t n,
982  const pastix_complex32_t *x,
983  pastix_complex32_t *y )
984 {
985  struct argument_copy_s args = {n, x, y};
986  isched_parallel_call( pastix_data->isched, pthread_bvec_ccopy, &args );
987 }
988 
989 /**
990  *******************************************************************************
991  *
992  * @ingroup bcsc
993  *
994  * @brief Solve A x = b with A the sparse matrix
995  *
996  * In Complex64 and Double precision and if mixed-precision is enabled, solve
997  * SA sx = sb with SA the sparse matrix previously initialized respectively
998  * in Complex32 or Float precision.
999  *
1000  *******************************************************************************
1001  *
1002  * @param[in] pastix_data
1003  * The PaStiX data structure that describes the solver instance.
1004  *
1005  * @param[inout] b
1006  * On entry, the right hand side
1007  * On exit, the solution of the problem A x = b
1008  * @param[inout] work
1009  * On entry, if mixed-precision is disabled or incompatible with the
1010  * sparse matrix's precision, the normal solve function is called and work must
1011  * be NULL. On exit, works stays NULL
1012  * If mixed-precision is enabled, work must be allocated as a vector
1013  * half the size of b, so that the solve function can be performed in
1014  * in mixed-precision. On exit, work is undefined.
1015  *
1016  *******************************************************************************/
1017 void bcsc_cspsv( pastix_data_t *pastix_data,
1018  pastix_complex32_t *b,
1019  pastix_complex32_t *work )
1020 {
1021  struct pastix_rhs_s rhsb = {
1022  .allocated = 0,
1023  .flttype = PastixComplex32,
1024  .m = pastix_data->bcsc->n,
1025  .n = 1,
1026  .ld = pastix_data->bcsc->n,
1027  .b = b,
1028  .cblkb = NULL,
1029  .rhs_comm = NULL,
1030  .Ploc2Pglob = NULL,
1031  };
1032  int rc;
1033 
1034  pastix_data->iparm[IPARM_VERBOSE]--;
1035 
1036 #if defined(PRECISION_z) || defined(PRECISION_d)
1037  if ( pastix_data->iparm[IPARM_MIXED] )
1038  {
1039  pastix_int_t n = rhsb.m;
1040  pastix_int_t nrhs = rhsb.n;
1041 
1042  rhsb.flttype = PastixComplex32;
1043  rhsb.b = work;
1044 
1045  /* Copying b into work at half the precision */
1046  rc = LAPACKE_clag2z_work( LAPACK_COL_MAJOR, n, nrhs,
1047  b, n, work, n );
1048  assert( rc == 0 );
1049 
1050  pastix_subtask_solve( pastix_data, &rhsb );
1051 
1052  /* Reverting to normal precision after solving */
1053  rc = LAPACKE_clag2z_work( LAPACK_COL_MAJOR, n, nrhs,
1054  work, n, b, n );
1055  assert( rc == 0 );
1056  }
1057  else
1058 #endif
1059  {
1060  assert(work == NULL);
1061  pastix_subtask_solve( pastix_data, &rhsb );
1062  }
1063 
1064  if ( rhsb.cblkb != NULL ) {
1065  free( rhsb.cblkb );
1066  }
1067  pastix_data->iparm[IPARM_VERBOSE]++;
1068  (void)rc;
1069  (void)work;
1070 }
1071 
1072 /**
1073  *******************************************************************************
1074  *
1075  * @ingroup bcsc
1076  *
1077  * @brief Compute \f[ y = \alpha A x + \beta y \f] (Sequential version)
1078  *
1079  *******************************************************************************
1080  *
1081  * @param[in] pastix_data
1082  * The information about sequential and parallel version (Number of
1083  * thread, ...).
1084  *
1085  * @param[in] m
1086  * The number of rows of the matrix A, and the size of y.
1087  *
1088  * @param[in] n
1089  * The number of columns of the matrix A, and the size of x.
1090  *
1091  * @param[in] alpha
1092  * The scalar alpha.
1093  *
1094  * @param[in] A
1095  * The dense matrix A of size lda-by-n.
1096  *
1097  * @param[in] lda
1098  * The leading dimension of the matrix A. lda >= max(1,m)
1099  *
1100  * @param[in] x
1101  * The vector x of size n.
1102  *
1103  * @param[in] beta
1104  * The scalar beta.
1105  *
1106  * @param[inout] y
1107  * On entry, the initial vector y of size m.
1108  * On exit, the updated vector.
1109  *
1110  *******************************************************************************/
1111 void
1113  pastix_int_t m,
1114  pastix_int_t n,
1115  pastix_complex32_t alpha,
1116  const pastix_complex32_t *A,
1117  pastix_int_t lda,
1118  const pastix_complex32_t *x,
1119  pastix_complex32_t beta,
1120  pastix_complex32_t *y )
1121 {
1122 #if defined(PASTIX_WITH_MPI) && 0
1123  SolverMatrix *solvmtx = pastix_data->solvmatr;
1124  SolverCblk *scblk = solvmtx->cblktab;
1125  pastix_bcsc_t *bcsc = pastix_data->bcsc;
1126  bcsc_cblk_t *bcblk = bcsc->cscftab;
1127  pastix_int_t i, cblknbr;
1128 
1129  cblknbr = bcsc->cscfnbr;
1130  for( i = 0; i < cblknbr; i++, bcblk++ ) {
1131  scblk = solvmtx->cblktab + bcblk->cblknum;
1132  m = cblk_colnbr( scblk );
1133 
1134  cblas_cgemv( CblasColMajor, CblasNoTrans, m, n,
1135  CBLAS_SADDR(alpha), A + scblk->lcolidx, lda, x, 1,
1136  CBLAS_SADDR(beta), y + scblk->lcolidx, 1 );
1137  }
1138 #else
1139  (void)pastix_data;
1140  cblas_cgemv( CblasColMajor, CblasNoTrans, m, n,
1141  CBLAS_SADDR(alpha), A, lda, x, 1,
1142  CBLAS_SADDR(beta), y, 1 );
1143 #endif
1144 
1145 }
1146 
1147 struct c_gemv_s
1148 {
1149  pastix_int_t m;
1150  pastix_int_t n;
1151  pastix_complex32_t alpha;
1152  const pastix_complex32_t *A;
1153  pastix_int_t lda;
1154  const pastix_complex32_t *x;
1155  pastix_complex32_t beta;
1156  pastix_complex32_t *y;
1157 };
1158 
1159 /**
1160  *******************************************************************************
1161  *
1162  * @ingroup bcsc_internal
1163  *
1164  * @brief Compute \f[ y = \alpha A x + \beta y \f] (Parallel version)
1165  *
1166  * This is the function called by bvec_cgemv_smp to perform gemv
1167  * (Parallel version)
1168  *
1169  *******************************************************************************
1170  *
1171  * @param[in] ctx
1172  * Information about number of thread and rank of current thread.
1173  *
1174  * @param[inout] args
1175  * The number of rows (m) and columns (n) of the matrix A, vecteors y
1176  * (size m) and x (size n), scalars alpha and beta. The dense matrix A
1177  * of size lda-by-n, and its leading dimension lda >= max(1,m), and
1178  * the vector x of size n. The result is stored in y.
1179  *
1180  *******************************************************************************/
1181 static inline void
1182 pthread_bvec_cgemv( isched_thread_t *ctx,
1183  void *args )
1184 {
1185  struct c_gemv_s *arg = (struct c_gemv_s*)args;
1186  pastix_int_t m = arg->m;
1187  pastix_int_t sub_m;
1188  pastix_int_t n = arg->n;
1189  pastix_complex32_t alpha = arg->alpha;
1190  const pastix_complex32_t *A = arg->A;
1191  const pastix_complex32_t *Aptr;
1192  pastix_int_t lda = arg->lda;
1193  const pastix_complex32_t *x = arg->x;
1194  const pastix_complex32_t *xptr;
1195  pastix_complex32_t beta = arg->beta;
1196  pastix_complex32_t *y = arg->y;
1197  pastix_complex32_t *yptr;
1198  pastix_int_t size, rank;
1199 
1200  size = (pastix_int_t)ctx->global_ctx->world_size;
1201  rank = (pastix_int_t)ctx->rank;
1202 
1203  sub_m = (m / size);
1204 
1205  /* Subdivision of A */
1206  Aptr = A + sub_m * rank;
1207 
1208  xptr = x;
1209  yptr = y + sub_m * rank;
1210 
1211  if (rank == (size - 1)) {
1212  sub_m += m % size; /* Last thread has to do more tasks */
1213  }
1214 
1215  if ( sub_m > 0 ) {
1216  cblas_cgemv( CblasColMajor, CblasNoTrans, sub_m, n,
1217  CBLAS_SADDR(alpha), Aptr, lda, xptr, 1,
1218  CBLAS_SADDR(beta), yptr, 1 );
1219  }
1220 }
1221 
1222 /**
1223  *******************************************************************************
1224  *
1225  * @ingroup bcsc
1226  *
1227  * @brief Compute \f[ y = \alpha A x + \beta y \f] (Parallel version)
1228  *
1229  *******************************************************************************
1230  *
1231  * @param[in] pastix_data
1232  * The information about sequential and parallel version (Number of
1233  * thread, ...).
1234  *
1235  * @param[in] m
1236  * The number of rows of the matrix A, and the size of y.
1237  *
1238  * @param[in] n
1239  * The number of columns of the matrix A, and the size of x.
1240  *
1241  * @param[in] alpha
1242  * The scalar alpha.
1243  *
1244  * @param[in] A
1245  * The dense matrix A of size lda-by-n.
1246  *
1247  * @param[in] lda
1248  * The leading dimension of the matrix A. lda >= max(1,m)
1249  *
1250  * @param[in] x
1251  * The vector x of size n.
1252  *
1253  * @param[in] beta
1254  * The scalar beta.
1255  *
1256  * @param[inout] y
1257  * On entry, the initial vector y of size m.
1258  * On exit, the updated vector.
1259  *
1260  *******************************************************************************/
1261 void
1263  pastix_int_t m,
1264  pastix_int_t n,
1265  pastix_complex32_t alpha,
1266  const pastix_complex32_t *A,
1267  pastix_int_t lda,
1268  const pastix_complex32_t *x,
1269  pastix_complex32_t beta,
1270  pastix_complex32_t *y )
1271 {
1272  struct c_gemv_s arg = {m, n, alpha, A, lda, x, beta, y};
1273 
1274  isched_parallel_call( pastix_data->isched, pthread_bvec_cgemv, &arg );
1275 }
1276 
1277 /**
1278  *******************************************************************************
1279  *
1280  * @ingroup bcsc
1281  *
1282  * @brief Set to 0 remote coefficients
1283  *
1284  *******************************************************************************
1285  *
1286  * @param[in] pastix_data
1287  * The information about sequential and parallel version (Number of
1288  * thread, ...).
1289  *
1290  * @param[inout] y
1291  * On entry, the initial vector y of size m.
1292  * On exit, the y vector with remote section set to 0.
1293  *
1294  *******************************************************************************/
1295 void
1297  pastix_complex32_t *y )
1298 {
1299 #if defined( PASTIX_WITH_MPI )
1300  const SolverMatrix *solvmtx = pastix_data->solvmatr;
1301  const SolverCblk *cblk = solvmtx->cblktab;
1302  pastix_int_t cblknbr;
1303  pastix_int_t i, lastindex = 0;
1304  pastix_int_t n = pastix_data->csc->gNexp;
1305 
1306  cblknbr = solvmtx->cblknbr;
1307  for ( i = 0; i < cblknbr; i++, cblk++ ) {
1308  if ( cblk->cblktype & (CBLK_FANIN|CBLK_RECV) ) {
1309  continue;
1310  }
1311 
1312  if ( cblk->fcolnum != lastindex ) {
1313  /* Set to 0 all remote data bewtween previous local cblk, and current cblk */
1314  memset(
1315  y + lastindex, 0, ( cblk->fcolnum - lastindex ) * sizeof( pastix_complex32_t ) );
1316  }
1317  lastindex = cblk->lcolnum + 1;
1318  }
1319 
1320  if ( lastindex < n ) {
1321  /* Set to 0 all remote data bewtween previous local cblk, and current cblk */
1322  memset( y + lastindex, 0, ( n - lastindex ) * sizeof( pastix_complex32_t ) );
1323  }
1324 #endif
1325  (void)pastix_data;
1326  (void)y;
1327 }
1328 
1329 /**
1330  *******************************************************************************
1331  *
1332  * @ingroup bcsc
1333  *
1334  * @brief Gather a distributed right hand side (bvec storage) on all nodes.
1335  *
1336  *******************************************************************************
1337  *
1338  * @param[in] pastix_data
1339  * The information about sequential and parallel version (Number of
1340  * thread, ...).
1341  *
1342  * @param[inout] y
1343  * On entry, the local portion of the vector y.
1344  * On exit, the complete vector y.
1345  *
1346  *******************************************************************************
1347  *
1348  * @retval TODO
1349  *
1350  *******************************************************************************/
1351 const pastix_complex32_t *
1353  const pastix_complex32_t *y )
1354 {
1355 #if defined( PASTIX_WITH_MPI )
1356  const SolverMatrix *solvmtx = pastix_data->solvmatr;
1357  const SolverCblk *cblk = solvmtx->cblktab;
1358  pastix_complex32_t *yglobal = NULL;
1359  pastix_complex32_t *yto, *ytmp;
1360  const pastix_complex32_t *yfr;
1361  pastix_int_t c, i, cblki, cblknbr;
1362  MPI_Request request_c = MPI_REQUEST_NULL;
1363  MPI_Request request_n = MPI_REQUEST_NULL;
1364  pastix_int_t *all_n, *all_cblknbr, *indices;
1365  pastix_int_t max_n = 0;
1366  pastix_int_t max_cblknbr = 0;
1367  pastix_int_t gn = pastix_data->bcsc->gN;
1368  pastix_int_t ln = pastix_data->bcsc->n;
1369  pastix_int_t lcblknbr, colnbr;
1370 
1371  if ( ln != 0 ) {
1372  yglobal = malloc( gn * sizeof(pastix_complex32_t) );
1373 #if !defined(NDEBUG)
1374  memset( yglobal, 0xff, gn * sizeof(pastix_complex32_t) );
1375 #endif
1376  }
1377  all_n = malloc( pastix_data->procnbr * sizeof(pastix_int_t) );
1378  all_cblknbr = malloc( pastix_data->procnbr * sizeof(pastix_int_t) );
1379 
1380  MPI_Allgather( &ln, 1, PASTIX_MPI_INT,
1381  all_n, 1, PASTIX_MPI_INT, pastix_data->pastix_comm );
1382  lcblknbr = solvmtx->cblknbr - solvmtx->faninnbr - solvmtx->recvnbr;
1383  MPI_Allgather( &lcblknbr, 1, PASTIX_MPI_INT,
1384  all_cblknbr, 1, PASTIX_MPI_INT, pastix_data->pastix_comm );
1385 
1386  for( c=0; c<pastix_data->procnbr; c++ )
1387  {
1388  max_n = pastix_imax( max_n, all_n[c] );
1389  max_cblknbr = pastix_imax( max_cblknbr, all_cblknbr[c] );
1390  }
1391 
1392  ytmp = malloc( max_n * sizeof(pastix_complex32_t) );
1393  indices = malloc( max_cblknbr * 2 * sizeof(pastix_int_t) );
1394 
1395  for( c=0; c<pastix_data->procnbr; c++ )
1396  {
1397  if ( all_n[c] == 0 ) {
1398  continue;
1399  }
1400 
1401  if ( c == pastix_data->procnum ) {
1402  MPI_Ibcast( (pastix_complex32_t*)y, ln, PASTIX_MPI_COMPLEX32, c, pastix_data->pastix_comm, &request_n );
1403 
1404  cblknbr = solvmtx->cblknbr;
1405  cblk = solvmtx->cblktab;
1406  cblki = 0;
1407  for ( i = 0; i < cblknbr; i++, cblk++ ) {
1408  if ( cblk->cblktype & (CBLK_FANIN|CBLK_RECV) ) {
1409  continue;
1410  }
1411 
1412  yfr = y + cblk->lcolidx;
1413  yto = yglobal + cblk->fcolnum;
1414 
1415  memcpy( yto, yfr, cblk_colnbr( cblk ) * sizeof( pastix_complex32_t ) );
1416 
1417  indices[ 2*cblki ] = cblk->fcolnum;
1418  indices[ 2*cblki+1 ] = cblk->lcolnum;
1419  cblki++;
1420  }
1421  assert( cblki == lcblknbr );
1422 
1423  MPI_Ibcast( indices, 2 * lcblknbr, PASTIX_MPI_INT, c, pastix_data->pastix_comm, &request_c );
1424  MPI_Wait( &request_n, MPI_STATUS_IGNORE );
1425  MPI_Wait( &request_c, MPI_STATUS_IGNORE );
1426  }
1427  else {
1428  MPI_Ibcast( ytmp, all_n[c], PASTIX_MPI_COMPLEX32, c, pastix_data->pastix_comm, &request_n );
1429  MPI_Ibcast( indices, all_cblknbr[c] * 2, PASTIX_MPI_INT, c, pastix_data->pastix_comm, &request_c );
1430  MPI_Wait( &request_n, MPI_STATUS_IGNORE );
1431  MPI_Wait( &request_c, MPI_STATUS_IGNORE );
1432 
1433  /* If ln = 0, there are no local computation so no need to store in yglobal */
1434  if ( ln != 0 ) {
1435  yfr = ytmp;
1436  for ( i = 0; i < all_cblknbr[c]; i++ ) {
1437  yto = yglobal + indices[2*i];
1438 
1439  colnbr = indices[2*i+1] - indices[2*i] + 1;
1440  memcpy( yto, yfr, colnbr * sizeof( pastix_complex32_t ) );
1441 
1442  yfr += colnbr;
1443  }
1444  }
1445  }
1446  }
1447 
1448  free( all_n );
1449  free( all_cblknbr );
1450  free( ytmp );
1451  free( indices );
1452 
1453  return yglobal;
1454 #else
1455  (void)pastix_data;
1456  return y;
1457 #endif
1458 }
1459 
1460 /**
1461  *******************************************************************************
1462  *
1463  * @ingroup bcsc
1464  *
1465  * @brief Apply an all reduce of the vector on all nodes
1466  *
1467  *******************************************************************************
1468  *
1469  * @param[in] pastix_data
1470  * The information about sequential and parallel version (Number of
1471  * thread, ...).
1472  *
1473  * @param[inout] y
1474  * On entry, the initial vector y of size m.
1475  * On exit, the y vector with remote section set to 0.
1476  *
1477  *******************************************************************************/
1478 void
1479 bvec_callreduce( const pastix_data_t *pastix_data,
1480  pastix_complex32_t *y )
1481 {
1482 #if defined( PASTIX_WITH_MPI )
1483  /* Reduce the partial sums on all nodes */
1484  MPI_Allreduce( MPI_IN_PLACE,
1485  y, pastix_data->csc->gNexp,
1486  PASTIX_MPI_COMPLEX32, MPI_SUM,
1487  pastix_data->inter_node_comm );
1488 #endif
1489  (void)pastix_data;
1490  (void)y;
1491 }
BEGIN_C_DECLS typedef int pastix_int_t
Definition: datatypes.h:51
float _Complex pastix_complex32_t
Definition: datatypes.h:76
static void pthread_bvec_cdotu(isched_thread_t *ctx, void *args)
Compute the scalar product x.y. (Parallel version)
static void pthread_bvec_cgemv(isched_thread_t *ctx, void *args)
Compute.
static void pthread_bvec_caxpy(isched_thread_t *ctx, void *args)
Compute y <- alpha * x + y (Parallel version).
static void pthread_bvec_ccopy(isched_thread_t *ctx, void *args)
Copy a vector y = x (parallel version)
pastix_int_t cblknum
Definition: bcsc.h:112
void bvec_cgemv_smp(pastix_data_t *pastix_data, pastix_int_t m, pastix_int_t n, pastix_complex32_t alpha, const pastix_complex32_t *A, pastix_int_t lda, const pastix_complex32_t *x, pastix_complex32_t beta, pastix_complex32_t *y)
Compute.
float bvec_cnrm2_seq(pastix_data_t *pastix_data, pastix_int_t n, const pastix_complex32_t *x)
Compute the norm 2 of a vector. (Sequential version)
Definition: bvec_ccompute.c:71
const pastix_complex32_t * bvec_cgather_remote(const pastix_data_t *pastix_data, const pastix_complex32_t *y)
Gather a distributed right hand side (bvec storage) on all nodes.
void bvec_cscal_seq(pastix_data_t *pastix_data, pastix_int_t n, pastix_complex32_t alpha, pastix_complex32_t *x)
Scale a vector by the scalar alpha. (Sequential version)
void bvec_callreduce(const pastix_data_t *pastix_data, pastix_complex32_t *y)
Apply an all reduce of the vector on all nodes.
void bvec_ccopy_smp(pastix_data_t *pastix_data, pastix_int_t n, const pastix_complex32_t *x, pastix_complex32_t *y)
Copy a vector y = x (parallel version)
void bvec_ccopy_seq(pastix_data_t *pastix_data, pastix_int_t n, const pastix_complex32_t *x, pastix_complex32_t *y)
Copy a vector y = x (Sequential version)
void bvec_caxpy_seq(pastix_data_t *pastix_data, pastix_int_t n, pastix_complex32_t alpha, const pastix_complex32_t *x, pastix_complex32_t *y)
Compute y <- alpha * x + y. (Sequential version)
void bcsc_cspsv(pastix_data_t *pastix_data, pastix_complex32_t *b, pastix_complex32_t *work)
Solve A x = b with A the sparse matrix.
pastix_complex32_t bvec_cdotu_smp(pastix_data_t *pastix_data, pastix_int_t n, const pastix_complex32_t *x, const pastix_complex32_t *y)
Compute a regular scalar product x.y (Parallel version)
static void pthread_bvec_cnrm2(isched_thread_t *ctx, void *args)
Compute the norm 2 of a vector. (Parallel version)
pastix_complex32_t bvec_cdotu_seq(pastix_data_t *pastix_data, pastix_int_t n, const pastix_complex32_t *x, const pastix_complex32_t *y)
Compute the scalar product x.y. (Sequential version)
void bvec_cgemv_seq(pastix_data_t *pastix_data, pastix_int_t m, pastix_int_t n, pastix_complex32_t alpha, const pastix_complex32_t *A, pastix_int_t lda, const pastix_complex32_t *x, pastix_complex32_t beta, pastix_complex32_t *y)
Compute.
float bvec_cnrm2_smp(pastix_data_t *pastix_data, pastix_int_t n, const pastix_complex32_t *x)
Compute the norm 2 of a vector. (Parallel version)
void bvec_caxpy_smp(pastix_data_t *pastix_data, pastix_int_t n, pastix_complex32_t alpha, const pastix_complex32_t *x, pastix_complex32_t *y)
Perform y = alpha * x + y (Parallel version)
static void pthread_bvec_cscal(isched_thread_t *ctx, void *args)
Scale a vector (Parallel version)
void bvec_cscal_smp(pastix_data_t *pastix_data, pastix_int_t n, pastix_complex32_t alpha, pastix_complex32_t *x)
Scale a vector (Parallel version)
void bvec_cnullify_remote(const pastix_data_t *pastix_data, pastix_complex32_t *y)
Set to 0 remote coefficients.
Compressed colptr format for the bcsc.
Definition: bcsc.h:110
@ IPARM_MIXED
Definition: api.h:139
@ IPARM_VERBOSE
Definition: api.h:36
int pastix_subtask_solve(pastix_data_t *pastix_data, pastix_rhs_t b)
Solve the given problem without applying the permutation.
PASTIX_Comm pastix_comm
Definition: pastixdata.h:76
void ** cblkb
Definition: pastixdata.h:162
SolverMatrix * solvmatr
Definition: pastixdata.h:103
pastix_int_t * iparm
Definition: pastixdata.h:70
const spmatrix_t * csc
Definition: pastixdata.h:90
pastix_coeftype_t flttype
Definition: pastixdata.h:157
isched_t * isched
Definition: pastixdata.h:86
PASTIX_Comm inter_node_comm
Definition: pastixdata.h:78
pastix_int_t m
Definition: pastixdata.h:158
pastix_bcsc_t * bcsc
Definition: pastixdata.h:102
pastix_int_t n
Definition: pastixdata.h:159
int8_t allocated
Definition: pastixdata.h:156
Main PaStiX data structure.
Definition: pastixdata.h:68
Main PaStiX RHS structure.
Definition: pastixdata.h:155
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
Definition: solver.h:329
pastix_int_t cblknbr
Definition: solver.h:211
pastix_int_t faninnbr
Definition: solver.h:213
pastix_int_t recvnbr
Definition: solver.h:216
pastix_int_t lcolidx
Definition: solver.h:170
SolverCblk *restrict cblktab
Definition: solver.h:228
int8_t cblktype
Definition: solver.h:164
pastix_int_t lcolnum
Definition: solver.h:167
pastix_int_t fcolnum
Definition: solver.h:166
Solver column block structure.
Definition: solver.h:161
Solver column block structure.
Definition: solver.h:203