PaStiX Handbook  6.2.1
core_cgeadd.c
Go to the documentation of this file.
1 /**
2  *
3  * @file core_cgeadd.c
4  *
5  * PaStiX kernel routines
6  *
7  * @copyright 2010-2015 Univ. of Tennessee, Univ. of California Berkeley and
8  * Univ. of Colorado Denver. All rights reserved.
9  * @copyright 2012-2021 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
10  * Univ. Bordeaux. All rights reserved.
11  *
12  * @version 6.1.0
13  * @author Mathieu Faverge
14  * @author Gregoire Pichon
15  * @author Pierre Ramet
16  * @author Xavier Lacoste
17  * @date 2019-11-12
18  * @generated from /builds/solverstack/pastix/kernels/core_zgeadd.c, normal z -> c, Tue Apr 12 09:38:35 2022
19  *
20  **/
21 #include "common.h"
22 #include "blend/solver.h"
23 #include "pastix_ccores.h"
24 #include "cblas.h"
25 
26 /**
27  ******************************************************************************
28  *
29  * @brief Add two matrices together.
30  *
31  * Perform the operation: B <- alpha * op(A) + B
32  *
33  *******************************************************************************
34  *
35  * @param[in] trans
36  * @arg PastixNoTrans: No transpose, op( A ) = A;
37  * @arg PastixTrans: Transpose, op( A ) = A';
38  * @arg PastixConjTrans: Conjugate Transpose, op( A ) = conjf(A').
39  *
40  * @param[in] M
41  * Number of rows of the matrix B.
42  * Number of rows of the matrix A, if trans == PastixNoTrans, number of
43  * columns of A otherwise.
44  *
45  * @param[in] N
46  * Number of columns of the matrix B.
47  * Number of columns of the matrix A, if trans == PastixNoTrans, number
48  * of rows of A otherwise.
49  *
50  * @param[in] alpha
51  * Scalar factor of A.
52  *
53  * @param[in] A
54  * Matrix of size LDA-by-N, if trans == PastixNoTrans, LDA-by-M,
55  * otherwise.
56  *
57  * @param[in] LDA
58  * Leading dimension of the array A. LDA >= max(1,K).
59  * K = M if trans == PastixNoTrans, K = N otherwise.
60  *
61  * @param[in] beta
62  * Scalar factor of B.
63  *
64  * @param[inout] B
65  * Matrix of size LDB-by-N.
66  *
67  * @param[in] LDB
68  * Leading dimension of the array B. LDB >= max(1,M)
69  *
70  *******************************************************************************
71  *
72  * @retval PASTIX_SUCCESS successful exit
73  * @retval <0 if -i, the i-th argument had an illegal value
74  * @retval 1, not yet implemented
75  *
76  ******************************************************************************/
77 int
79  pastix_int_t M,
80  pastix_int_t N,
81  pastix_complex32_t alpha,
82  const pastix_complex32_t *A,
83  pastix_int_t LDA,
84  pastix_complex32_t beta,
85  pastix_complex32_t *B,
86  pastix_int_t LDB)
87 {
88  int i, j;
89 
90 #if !defined(NDEBUG)
91  if ((trans < PastixNoTrans) ||
92  (trans > PastixConjTrans))
93  {
94  return -1;
95  }
96 
97  if (M < 0) {
98  return -2;
99  }
100  if (N < 0) {
101  return -3;
102  }
103  if ( ((trans == PastixNoTrans) && (LDA < pastix_imax(1,M)) && (M > 0)) ||
104  ((trans != PastixNoTrans) && (LDA < pastix_imax(1,N)) && (N > 0)) )
105  {
106  return -6;
107  }
108  if ( (LDB < pastix_imax(1,M)) && (M > 0) ) {
109  return -8;
110  }
111 #endif
112 
113  switch( trans ) {
114 #if defined(PRECISION_z) || defined(PRECISION_c)
115  case PastixConjTrans:
116  if ( alpha == 0.0 ) {
117  for (j=0; j<N; j++) {
118  for(i=0; i<M; i++, B++) {
119  *B = beta * (*B);
120  }
121  B += LDB-M;
122  }
123  }
124  else if ( beta == 0.0 ) {
125  for (j=0; j<N; j++, A++) {
126  for(i=0; i<M; i++, B++) {
127  *B = alpha * conjf(A[LDA*i]);
128  }
129  B += LDB-M;
130  }
131  }
132  else {
133  for (j=0; j<N; j++, A++) {
134  for(i=0; i<M; i++, B++) {
135  *B = beta * (*B) + alpha * conjf(A[LDA*i]);
136  }
137  B += LDB-M;
138  }
139  }
140  break;
141 #endif /* defined(PRECISION_z) || defined(PRECISION_c) */
142 
143  case PastixTrans:
144  if ( alpha == 0.0 ) {
145  for (j=0; j<N; j++) {
146  for(i=0; i<M; i++, B++) {
147  *B = beta * (*B);
148  }
149  B += LDB-M;
150  }
151  }
152  else if ( beta == 0.0 ) {
153  for (j=0; j<N; j++, A++) {
154  for(i=0; i<M; i++, B++) {
155  *B = alpha * A[LDA*i];
156  }
157  B += LDB-M;
158  }
159  }
160  else {
161  for (j=0; j<N; j++, A++) {
162  for(i=0; i<M; i++, B++) {
163  *B = beta * (*B) + alpha * A[LDA*i];
164  }
165  B += LDB-M;
166  }
167  }
168  break;
169 
170  case PastixNoTrans:
171  default:
172  if ( alpha == 0.0 ) {
173  for (j=0; j<N; j++) {
174  for(i=0; i<M; i++, B++) {
175  *B = beta * (*B);
176  }
177  B += LDB-M;
178  }
179  }
180  else if ( beta == 0.0 ) {
181  for (j=0; j<N; j++) {
182  for(i=0; i<M; i++, B++, A++) {
183  *B = alpha * (*A);
184  }
185  A += LDA-M;
186  B += LDB-M;
187  }
188  }
189  else {
190  for (j=0; j<N; j++) {
191  for(i=0; i<M; i++, B++, A++) {
192  *B = beta * (*B) + alpha * (*A);
193  }
194  A += LDA-M;
195  B += LDB-M;
196  }
197  }
198  }
199  return PASTIX_SUCCESS;
200 }
201 
202 /**
203  ******************************************************************************
204  *
205  * @brief Add two column blocks together.
206  *
207  * Perform the operation: cblk2 <- cblk1 + cblk2 using core_cgeadd().
208  *
209  *******************************************************************************
210  *
211  * @param[in] cblk1
212  * The pointer to the data structure that describes the panel to add.
213  * Next column blok must be accessible through cblk1[1].
214  *
215  * @param[in] cblk2
216  * The pointer to the data structure that describes the panel in which
217  * we add.
218  * Next column blok must be accessible through cblk2[1].
219  *
220  * @param[in] L
221  * The pointer to the lower matrix storing the coefficients of the
222  * panel. Must be of size cblk.stride -by- cblk.width
223  *
224  * @param[inout] Cl
225  * The pointer to the lower matrix storing the coefficients of the
226  * updated panel. Must be of size cblk.stride -by- cblk.width
227  * On exit Cl = Cl + L.
228  *
229  * @param[in] U
230  * The pointer to the upper matrix storing the coefficients of the
231  * panel. Must be of size cblk.stride -by- cblk.width. Ignored if
232  * NULL.
233  *
234  * @param[inout] Cu
235  * The pointer to the upper matrix storing the coefficients of the
236  * updated panel. Must be of size cblk.stride -by- cblk.width
237  * On exit Cu = Cu + U.
238  *
239  *******************************************************************************
240  *
241  * @retval PASTIX_SUCCESS on success.
242  *
243  ******************************************************************************/
244 int
246  SolverCblk *cblk2,
247  const pastix_complex32_t *L,
248  pastix_complex32_t *Cl,
249  const pastix_complex32_t *U,
250  pastix_complex32_t *Cu )
251 {
252  SolverBlok *iterblok;
253  SolverBlok *firstblok;
254  SolverBlok *lastblok;
255  SolverBlok *fblok;
256  pastix_int_t ncol1 = cblk1->lcolnum - cblk1->fcolnum + 1;
257  const pastix_complex32_t *ga;
258  pastix_complex32_t *gb;
259 
260  assert(0 /* Outdated */);
261 
262  firstblok = cblk1->fblokptr;
263  lastblok = cblk1[1].fblokptr;
264  fblok = cblk2->fblokptr;
265 
266  assert(cblk1->fcolnum >= cblk2->fcolnum);
267 
268  for (iterblok = firstblok; iterblok < lastblok; iterblok++) {
269  pastix_int_t nrow;
270  /* Find facing bloknum */
271  while (!is_block_inside_fblock( iterblok, fblok )) {
272  fblok++;
273  assert( fblok < cblk2[1].fblokptr );
274  }
275  ga = L + iterblok->coefind;
276  gb = Cl + cblk2->stride*(cblk1->fcolnum-cblk2->fcolnum) +
277  fblok->coefind +
278  iterblok->frownum - fblok->frownum;
279  nrow = iterblok->lrownum - iterblok->frownum + 1;
281  nrow, ncol1,
282  1.0, ga, cblk1->stride,
283  1.0, gb, cblk2->stride );
284  if (U != NULL) {
285  ga = U + iterblok->coefind;
286  gb = Cu + cblk2->stride*(cblk1->fcolnum-cblk2->fcolnum) +
287  fblok->coefind +
288  iterblok->frownum - fblok->frownum;
290  nrow, ncol1,
291  1.0, ga, cblk1->stride,
292  1.0, gb, cblk2->stride );
293  }
294  }
295  return PASTIX_SUCCESS;
296 }
solver_blok_s::frownum
pastix_int_t frownum
Definition: solver.h:112
solver.h
PastixTrans
@ PastixTrans
Definition: api.h:425
solver_cblk_s::fblokptr
SolverBlok * fblokptr
Definition: solver.h:134
solver_cblk_s::stride
pastix_int_t stride
Definition: solver.h:135
solver_cblk_s
Solver column block structure.
Definition: solver.h:127
pastix_trans_t
enum pastix_trans_e pastix_trans_t
Transpostion.
solver_blok_s
Solver block structure.
Definition: solver.h:107
PastixNoTrans
@ PastixNoTrans
Definition: api.h:424
PastixConjTrans
@ PastixConjTrans
Definition: api.h:426
solver_cblk_s::lcolnum
pastix_int_t lcolnum
Definition: solver.h:133
is_block_inside_fblock
static int is_block_inside_fblock(const SolverBlok *blok, const SolverBlok *fblok)
Check if a block is included inside another one.
Definition: solver.h:426
pastix_ccores.h
PASTIX_SUCCESS
@ PASTIX_SUCCESS
Definition: api.h:346
solver_blok_s::coefind
pastix_int_t coefind
Definition: solver.h:114
solver_blok_s::lrownum
pastix_int_t lrownum
Definition: solver.h:113
solver_cblk_s::fcolnum
pastix_int_t fcolnum
Definition: solver.h:132
core_cgeadd
int core_cgeadd(pastix_trans_t trans, pastix_int_t M, pastix_int_t N, pastix_complex32_t alpha, const pastix_complex32_t *A, pastix_int_t LDA, pastix_complex32_t beta, pastix_complex32_t *B, pastix_int_t LDB)
Add two matrices together.
Definition: core_cgeadd.c:78
cpucblk_cgeaddsp1d
int cpucblk_cgeaddsp1d(const SolverCblk *cblk1, SolverCblk *cblk2, const pastix_complex32_t *L, pastix_complex32_t *Cl, const pastix_complex32_t *U, pastix_complex32_t *Cu)
Add two column blocks together.
Definition: core_cgeadd.c:245