PaStiX Handbook  6.3.2
core_sgeadd.c
Go to the documentation of this file.
1 /**
2  *
3  * @file core_sgeadd.c
4  *
5  * PaStiX kernel routines
6  *
7  * @copyright 2010-2015 Univ. of Tennessee, Univ. of California Berkeley and
8  * Univ. of Colorado Denver. All rights reserved.
9  * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
10  * Univ. Bordeaux. All rights reserved.
11  *
12  * @version 6.3.2
13  * @author Mathieu Faverge
14  * @author Gregoire Pichon
15  * @author Pierre Ramet
16  * @author Xavier Lacoste
17  * @date 2023-07-21
18  * @generated from /builds/solverstack/pastix/kernels/core_zgeadd.c, normal z -> s, Wed Dec 13 12:09:15 2023
19  *
20  **/
21 #include "common.h"
22 #include "blend/solver.h"
23 #include "pastix_scores.h"
24 #include "cblas.h"
25 
26 /**
27  ******************************************************************************
28  *
29  * @brief Add two matrices together.
30  *
31  * Perform the operation: B <- alpha * op(A) + B
32  *
33  *******************************************************************************
34  *
35  * @param[in] trans
36  * @arg PastixNoTrans: No transpose, op( A ) = A;
37  * @arg PastixTrans: Transpose, op( A ) = A';
38  * @arg PastixTrans: Conjugate Transpose, op( A ) = (A').
39  *
40  * @param[in] M
41  * Number of rows of the matrix B.
42  * Number of rows of the matrix A, if trans == PastixNoTrans, number of
43  * columns of A otherwise.
44  *
45  * @param[in] N
46  * Number of columns of the matrix B.
47  * Number of columns of the matrix A, if trans == PastixNoTrans, number
48  * of rows of A otherwise.
49  *
50  * @param[in] alpha
51  * Scalar factor of A.
52  *
53  * @param[in] A
54  * Matrix of size LDA-by-N, if trans == PastixNoTrans, LDA-by-M,
55  * otherwise.
56  *
57  * @param[in] LDA
58  * Leading dimension of the array A. LDA >= max(1,K).
59  * K = M if trans == PastixNoTrans, K = N otherwise.
60  *
61  * @param[in] beta
62  * Scalar factor of B.
63  *
64  * @param[inout] B
65  * Matrix of size LDB-by-N.
66  *
67  * @param[in] LDB
68  * Leading dimension of the array B. LDB >= max(1,M)
69  *
70  *******************************************************************************
71  *
72  * @retval PASTIX_SUCCESS successful exit
73  * @retval <0 if -i, the i-th argument had an illegal value
74  * @retval 1, not yet implemented
75  *
76  ******************************************************************************/
77 int
79  pastix_int_t M,
80  pastix_int_t N,
81  float alpha,
82  const float *A,
83  pastix_int_t LDA,
84  float beta,
85  float *B,
86  pastix_int_t LDB)
87 {
88  int i, j;
89 
90 #if !defined(NDEBUG)
91  if ((trans < PastixNoTrans) ||
92  (trans > PastixTrans))
93  {
94  return -1;
95  }
96 
97  if (M < 0) {
98  return -2;
99  }
100  if (N < 0) {
101  return -3;
102  }
103  if ( ((trans == PastixNoTrans) && (LDA < pastix_imax(1,M)) && (M > 0)) ||
104  ((trans != PastixNoTrans) && (LDA < pastix_imax(1,N)) && (N > 0)) )
105  {
106  return -6;
107  }
108  if ( (LDB < pastix_imax(1,M)) && (M > 0) ) {
109  return -8;
110  }
111 #endif
112 
113  switch( trans ) {
114 #if defined(PRECISION_z) || defined(PRECISION_c)
115  case PastixTrans:
116  if ( alpha == 0.0 ) {
117  for (j=0; j<N; j++) {
118  for(i=0; i<M; i++, B++) {
119  *B = beta * (*B);
120  }
121  B += LDB-M;
122  }
123  }
124  else if ( beta == 0.0 ) {
125  for (j=0; j<N; j++, A++) {
126  for(i=0; i<M; i++, B++) {
127  *B = alpha * (A[LDA*i]);
128  }
129  B += LDB-M;
130  }
131  }
132  else {
133  for (j=0; j<N; j++, A++) {
134  for(i=0; i<M; i++, B++) {
135  *B = beta * (*B) + alpha * (A[LDA*i]);
136  }
137  B += LDB-M;
138  }
139  }
140  break;
141 #endif /* defined(PRECISION_z) || defined(PRECISION_c) */
142 
143  case PastixTrans:
144  if ( alpha == 0.0 ) {
145  for (j=0; j<N; j++) {
146  for(i=0; i<M; i++, B++) {
147  *B = beta * (*B);
148  }
149  B += LDB-M;
150  }
151  }
152  else if ( beta == 0.0 ) {
153  for (j=0; j<N; j++, A++) {
154  for(i=0; i<M; i++, B++) {
155  *B = alpha * A[LDA*i];
156  }
157  B += LDB-M;
158  }
159  }
160  else {
161  for (j=0; j<N; j++, A++) {
162  for(i=0; i<M; i++, B++) {
163  *B = beta * (*B) + alpha * A[LDA*i];
164  }
165  B += LDB-M;
166  }
167  }
168  break;
169 
170  case PastixNoTrans:
171  default:
172  if ( alpha == 0.0 ) {
173  for (j=0; j<N; j++) {
174  for(i=0; i<M; i++, B++) {
175  *B = beta * (*B);
176  }
177  B += LDB-M;
178  }
179  }
180  else if ( beta == 0.0 ) {
181  for (j=0; j<N; j++) {
182  for(i=0; i<M; i++, B++, A++) {
183  *B = alpha * (*A);
184  }
185  A += LDA-M;
186  B += LDB-M;
187  }
188  }
189  else {
190  for (j=0; j<N; j++) {
191  for(i=0; i<M; i++, B++, A++) {
192  *B = beta * (*B) + alpha * (*A);
193  }
194  A += LDA-M;
195  B += LDB-M;
196  }
197  }
198  }
199  return PASTIX_SUCCESS;
200 }
201 
202 /**
203  ******************************************************************************
204  *
205  * @brief Add two column blocks together.
206  *
207  * Perform the operation: cblk2 <- cblk1 + cblk2 using core_sgeadd().
208  *
209  *******************************************************************************
210  *
211  * @param[in] cblk1
212  * The pointer to the data structure that describes the panel to add.
213  * Next column blok must be accessible through cblk1[1].
214  *
215  * @param[in] cblk2
216  * The pointer to the data structure that describes the panel in which
217  * we add.
218  * Next column blok must be accessible through cblk2[1].
219  *
220  * @param[in] L
221  * The pointer to the lower matrix storing the coefficients of the
222  * panel. Must be of size cblk.stride -by- cblk.width
223  *
224  * @param[inout] Cl
225  * The pointer to the lower matrix storing the coefficients of the
226  * updated panel. Must be of size cblk.stride -by- cblk.width
227  * On exit Cl = Cl + L.
228  *
229  * @param[in] U
230  * The pointer to the upper matrix storing the coefficients of the
231  * panel. Must be of size cblk.stride -by- cblk.width. Ignored if
232  * NULL.
233  *
234  * @param[inout] Cu
235  * The pointer to the upper matrix storing the coefficients of the
236  * updated panel. Must be of size cblk.stride -by- cblk.width
237  * On exit Cu = Cu + U.
238  *
239  *******************************************************************************
240  *
241  * @retval PASTIX_SUCCESS on success.
242  *
243  ******************************************************************************/
244 int
246  SolverCblk *cblk2,
247  const float *L,
248  float *Cl,
249  const float *U,
250  float *Cu )
251 {
252  SolverBlok *iterblok;
253  SolverBlok *firstblok;
254  SolverBlok *lastblok;
255  SolverBlok *fblok;
256  pastix_int_t ncol1 = cblk1->lcolnum - cblk1->fcolnum + 1;
257  const float *ga;
258  float *gb;
259 
260  assert(0 /* Outdated */);
261 
262  firstblok = cblk1->fblokptr;
263  lastblok = cblk1[1].fblokptr;
264  fblok = cblk2->fblokptr;
265 
266  assert(cblk1->fcolnum >= cblk2->fcolnum);
267 
268  for (iterblok = firstblok; iterblok < lastblok; iterblok++) {
269  pastix_int_t nrow;
270  /* Find facing bloknum */
271  while (!is_block_inside_fblock( iterblok, fblok )) {
272  fblok++;
273  assert( fblok < cblk2[1].fblokptr );
274  }
275  ga = L + iterblok->coefind;
276  gb = Cl + cblk2->stride*(cblk1->fcolnum-cblk2->fcolnum) +
277  fblok->coefind +
278  iterblok->frownum - fblok->frownum;
279  nrow = iterblok->lrownum - iterblok->frownum + 1;
281  nrow, ncol1,
282  1.0, ga, cblk1->stride,
283  1.0, gb, cblk2->stride );
284  if (U != NULL) {
285  ga = U + iterblok->coefind;
286  gb = Cu + cblk2->stride*(cblk1->fcolnum-cblk2->fcolnum) +
287  fblok->coefind +
288  iterblok->frownum - fblok->frownum;
290  nrow, ncol1,
291  1.0, ga, cblk1->stride,
292  1.0, gb, cblk2->stride );
293  }
294  }
295  return PASTIX_SUCCESS;
296 }
BEGIN_C_DECLS typedef int pastix_int_t
Definition: datatypes.h:51
int core_sgeadd(pastix_trans_t trans, pastix_int_t M, pastix_int_t N, float alpha, const float *A, pastix_int_t LDA, float beta, float *B, pastix_int_t LDB)
Add two matrices together.
Definition: core_sgeadd.c:78
int cpucblk_sgeaddsp1d(const SolverCblk *cblk1, SolverCblk *cblk2, const float *L, float *Cl, const float *U, float *Cu)
Add two column blocks together.
Definition: core_sgeadd.c:245
enum pastix_trans_e pastix_trans_t
Transpostion.
@ PastixNoTrans
Definition: api.h:445
@ PastixTrans
Definition: api.h:446
@ PASTIX_SUCCESS
Definition: api.h:367
pastix_int_t lrownum
Definition: solver.h:143
static int is_block_inside_fblock(const SolverBlok *blok, const SolverBlok *fblok)
Check if a block is included inside another one.
Definition: solver.h:499
pastix_int_t frownum
Definition: solver.h:142
pastix_int_t coefind
Definition: solver.h:144
SolverBlok * fblokptr
Definition: solver.h:163
pastix_int_t stride
Definition: solver.h:164
pastix_int_t lcolnum
Definition: solver.h:162
pastix_int_t fcolnum
Definition: solver.h:161
Solver block structure.
Definition: solver.h:137
Solver column block structure.
Definition: solver.h:156