PaStiX Handbook  6.3.0
cpucblk_zadd.c
Go to the documentation of this file.
1 /**
2  *
3  * @file cpucblk_zadd.c
4  *
5  * Precision dependent routines to add different cblks.
6  *
7  * @copyright 2015-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8  * Univ. Bordeaux. All rights reserved.
9  *
10  * @version 6.3.0
11  * @author Pierre Ramet
12  * @author Mathieu Faverge
13  * @author Tony Delarue
14  * @date 2023-01-16
15  *
16  * @generated from /builds/solverstack/pastix/kernels/cpucblk_zadd.c, normal z -> z, Mon Aug 28 13:40:38 2023
17  *
18  **/
19 #include "common/common.h"
20 #include "blend/solver.h"
21 #include "kernels_trace.h"
22 #include "pastix_zcores.h"
23 #include "pastix_zlrcores.h"
24 
25 /**
26  *******************************************************************************
27  *
28  * @brief Add two column bloks in full rank format.
29  *
30  * The second cblk is overwritten by the sum of the two column blocks.
31  * B <- alpha * A + B
32  *
33  *******************************************************************************
34  *
35  * @param[in] side
36  * Define which side of the cblk must be tested.
37  * @arg PastixLCoef if lower part
38  * @arg PastixUCoef if upper part
39  *
40  * @param[in] alpha
41  * The scalar alpha
42  *
43  * @param[in] cblkA
44  * The column block of the A matrix.
45  *
46  * @param[inout] cblkB
47  * The column block of the B matrix
48  * On exit, cblkB coefficient arrays are overwritten by the result of
49  * alpha * A + B.
50  *
51  * @param[in] work
52  * TODO
53  *
54  * @param[in] lwork
55  * TODO
56  *
57  * @param[in] lowrank
58  * TODO
59  *
60  *******************************************************************************
61  *
62  * @return The number of flops of the operation.
63  *
64  *******************************************************************************/
65 static inline pastix_fixdbl_t
67  pastix_int_t alpha,
68  const SolverCblk *cblkA,
69  SolverCblk *cblkB,
70  pastix_complex64_t *work,
71  pastix_int_t lwork,
72  const pastix_lr_t *lowrank )
73 {
74  const SolverBlok *blokA = cblkA->fblokptr;
75  const SolverBlok *blokB = cblkB->fblokptr;
76  const SolverBlok *lblokA = cblkA[1].fblokptr;
77  const SolverBlok *lblokB = cblkB[1].fblokptr;
78  pastix_complex64_t *A;
79  pastix_int_t shift;
80  pastix_fixdbl_t flops = 0.;
81  core_zlrmm_t params;
82  pastix_lrblock_t lrA;
83 
84  assert( !(cblkA->cblktype & CBLK_COMPRESSED) );
85  assert( cblkB->cblktype & CBLK_COMPRESSED );
86  assert( cblkA->cblktype & CBLK_LAYOUT_2D );
87 
88  if ( side == PastixUCoef ) {
89  A = cblkA->ucoeftab;
90  shift = 1;
91  }
92  else {
93  A = cblkA->lcoeftab;
94  shift = 0;
95  }
96 
97  assert( A != NULL );
98 
99  params.lowrank = lowrank;
100  params.transA = PastixNoTrans; /* Unused */
101  params.transB = PastixNoTrans; /* Unused */
102  params.K = -1; /* Unused */
103  params.alpha = alpha;
104  params.A = NULL; /* Unused */
105  params.B = NULL; /* Unused */
106  params.beta = 1.0;
107  params.work = work;
108  params.lwork = lwork;
109  params.lwused = 0;
110  params.lock = &(cblkB->lock);
111 
112  /* Dimensions on N */
113  params.N = cblk_colnbr( cblkA );
114  params.Cn = cblk_colnbr( cblkB );
115  params.offy = cblkA->fcolnum - cblkB->fcolnum;
116 
117  lrA.rk = -1;
118  lrA.v = NULL;
119 
120  for (; blokA < lblokA; blokA++) {
121 
122  /* Find facing bloknum */
123  while ( !is_block_inside_fblock( blokA, blokB ) && (blokB < lblokB) ) {
124  blokB++;
125  }
126 
127  assert( is_block_inside_fblock( blokA, blokB ) && (blokB <= lblokB) );
128 
129  lrA.u = A + blokA->coefind;
130  lrA.rkmax = blok_rownbr( blokA );
131 
132  /* Dimensions on M */
133  params.M = blok_rownbr( blokA );
134  params.Cm = blok_rownbr( blokB );
135  params.offx = blokA->frownum - blokB->frownum;
136  params.C = blokB->LRblock[shift];
137 
138  flops += core_zlradd( &params, &lrA,
139  PastixNoTrans, 0 );
140  }
141  return flops;
142 }
143 
144 /**
145  *******************************************************************************
146  *
147  * @brief Add two column bloks in full rank format.
148  *
149  * The second cblk is overwritten by the sum of the two column blocks.
150  * B <- alpha * A + B
151  *
152  *******************************************************************************
153  *
154  * @param[in] side
155  * Define which side of the cblk must be tested.
156  * @arg PastixLCoef if lower part
157  * @arg PastixUCoef if upper part
158  *
159  * @param[in] alpha
160  * The scalar alpha
161  *
162  * @param[in] cblkA
163  * The column block of the A matrix.
164  *
165  * @param[inout] cblkB
166  * The column block of the B matrix
167  * On exit, cblkB coefficient arrays are overwritten by the result of
168  * alpha * A + B.
169  *
170  * @param[in] work
171  * TODO
172  *
173  * @param[in] lwork
174  * TODO
175  *
176  * @param[in] lowrank
177  * TODO
178  *
179  *******************************************************************************
180  *
181  * @return The number of flops of the operation.
182  *
183  *******************************************************************************/
184 static inline pastix_fixdbl_t
186  pastix_int_t alpha,
187  const SolverCblk *cblkA,
188  SolverCblk *cblkB,
189  pastix_complex64_t *work,
190  pastix_int_t lwork,
191  const pastix_lr_t *lowrank )
192 {
193  const SolverBlok *blokA = cblkA->fblokptr;
194  const SolverBlok *blokB = cblkB->fblokptr;
195  const SolverBlok *lblokA = cblkA[1].fblokptr;
196  const SolverBlok *lblokB = cblkB[1].fblokptr;
197  pastix_int_t shift;
198  pastix_fixdbl_t flops = 0.;
199  core_zlrmm_t params;
200 
201  assert( (cblkA->cblktype & CBLK_COMPRESSED) );
202  assert( (cblkB->cblktype & CBLK_COMPRESSED) );
203 
204  shift = (side == PastixUCoef) ? 1 : 0;
205 
206  params.lowrank = lowrank;
207  params.transA = PastixNoTrans; /* Unused */
208  params.transB = PastixNoTrans; /* Unused */
209  params.K = -1; /* Unused */
210  params.alpha = alpha;
211  params.A = NULL; /* Unused */
212  params.B = NULL; /* Unused */
213  params.beta = 1.0;
214  params.work = work;
215  params.lwork = lwork;
216  params.lwused = 0;
217  params.lock = &(cblkB->lock);
218 
219  /* Dimensions on N */
220  params.N = cblk_colnbr( cblkA );
221  params.Cn = cblk_colnbr( cblkB );
222  params.offy = cblkA->fcolnum - cblkB->fcolnum;
223 
224  for (; blokA < lblokA; blokA++) {
225 
226  /* Find facing bloknum */
227  while ( !is_block_inside_fblock( blokA, blokB ) && (blokB < lblokB) ) {
228  blokB++;
229  }
230 
231  assert( is_block_inside_fblock( blokA, blokB ) && (blokB <= lblokB) );
232 
233  /* Dimensions on M */
234  params.M = blok_rownbr( blokA );
235  params.Cm = blok_rownbr( blokB );
236  params.offx = blokA->frownum - blokB->frownum;
237  params.C = blokB->LRblock[shift];
238  flops += core_zlradd( &params, blokA->LRblock[shift], PastixNoTrans, PASTIX_LRM3_ORTHOU );
239  }
240  return flops;
241 }
242 
243 /**
244  *******************************************************************************
245  *
246  * @brief Add two column bloks in full rank format.
247  *
248  * The second cblk is overwritten by the sum of the two column blocks.
249  * B <- alpha * A + B
250  *
251  *******************************************************************************
252  *
253  * @param[in] side
254  * Define which side of the cblk must be tested.
255  * @arg PastixLCoef if lower part
256  * @arg PastixUCoef if upper part
257  *
258  * @param[in] alpha
259  * The scalar alpha
260  *
261  * @param[in] cblkA
262  * The column block of the A matrix.
263  *
264  * @param[inout] cblkB
265  * The column block of the B matrix
266  * On exit, cblkB coefficient arrays are overwritten by the result of
267  * alpha * A + B.
268  *
269  *******************************************************************************
270  *
271  * @return The number of flops of the operation.
272  *
273  *******************************************************************************/
274 static inline pastix_fixdbl_t
276  pastix_int_t alpha,
277  const SolverCblk *cblkA,
278  SolverCblk *cblkB )
279 {
280  pastix_complex64_t *A, *B;
281  pastix_int_t n = cblk_colnbr( cblkA );
282  pastix_int_t m = cblkA->stride;
283  pastix_fixdbl_t flops = m * n;
284 
285  assert( !(cblkA->cblktype & CBLK_COMPRESSED) );
286  assert( !(cblkB->cblktype & CBLK_COMPRESSED) );
287 
288  if ( side == PastixUCoef ) {
289  A = cblkA->ucoeftab;
290  B = cblkB->ucoeftab;
291  }
292  else {
293  A = cblkA->lcoeftab;
294  B = cblkB->lcoeftab;
295  }
296 
297  assert( (A != NULL) && (B != NULL) );
298 
299  /* If the cblk matches */
300  if ( (n == cblk_colnbr( cblkB )) &&
301  (m == cblkB->stride) ) {
302 
303  pastix_cblk_lock( cblkB );
304  core_zgeadd( PastixNoTrans, m, n,
305  alpha, A, m,
306  1., B, m );
307  pastix_cblk_unlock( cblkB );
308  }
309  else {
310  pastix_complex64_t *bA, *bB;
311  const SolverBlok *blokA = cblkA->fblokptr;
312  const SolverBlok *blokB = cblkB->fblokptr;
313  const SolverBlok *lblokA = cblkA[1].fblokptr;
314  const SolverBlok *lblokB = cblkB[1].fblokptr;
315  pastix_int_t lda, ldb;
316 
317  /* Both cblk A and B must be stored in 2D */
318  assert( cblkA->cblktype & CBLK_LAYOUT_2D );
319  assert( cblkB->cblktype & CBLK_LAYOUT_2D );
320 
321  for (; blokA < lblokA; blokA++) {
322 
323  /* Find facing bloknum */
324  while ( !is_block_inside_fblock( blokA, blokB ) && (blokB < lblokB) ) {
325  blokB++;
326  }
327 
328  assert( is_block_inside_fblock( blokA, blokB ) && (blokB <= lblokB) );
329 
330  bA = A + blokA->coefind;
331  bB = B + blokB->coefind;
332  lda = blok_rownbr( blokA );
333  ldb = blok_rownbr( blokB );
334 
335  bB = bB + ldb * ( cblkA->fcolnum - cblkB->fcolnum ) + ( blokA->frownum - blokB->frownum );
336  m = lda;
337 
338  pastix_cblk_lock( cblkB );
339  core_zgeadd( PastixNoTrans, m, n,
340  alpha, bA, lda,
341  1., bB, ldb );
342  pastix_cblk_unlock( cblkB );
343  }
344  }
345  return flops;
346 }
347 
348 /**
349  *******************************************************************************
350  *
351  * @brief Add two column bloks in full rank format.
352  *
353  * The second cblk is overwritten by the sum of the two column blocks.
354  * B <- alpha * A + B
355  *
356  *******************************************************************************
357  *
358  * @param[in] side
359  * Define which side of the cblk must be tested.
360  * @arg PastixLCoef if lower part only
361  * @arg PastixUCoef if upper part only
362  *
363  * @param[in] alpha
364  * The scalar alpha
365  *
366  * @param[in] cblkA
367  * The column block of the A matrix.
368  *
369  * @param[inout] cblkB
370  * The column block of the B matrix
371  * On exit, cblkB coefficient arrays are overwritten by the result of
372  * alpha * A + B.
373  *
374  * @param[in] lowrank
375  * TODO
376  *
377  *******************************************************************************/
378 void
380  double alpha,
381  const SolverCblk *cblkA,
382  SolverCblk *cblkB,
383  const pastix_lr_t *lowrank )
384 {
385  pastix_ktype_t ktype = PastixKernelGEADDCblkFRFR;
386  pastix_fixdbl_t time, flops = 0.0;
387  pastix_int_t m = cblkA->stride;
388  pastix_int_t n = cblk_colnbr( cblkA );
389 
390  if ( side == PastixLUCoef ) {
391  n *= 2;
392  }
393 
394  if ( cblkB->cblktype & CBLK_COMPRESSED ) {
395  if ( cblkA->cblktype & CBLK_COMPRESSED ) {
396  ktype = PastixKernelGEADDCblkLRLR;
397  time = kernel_trace_start( ktype );
398  flops = cpucblk_zadd_lrlr( side, alpha, cblkA, cblkB,
399  NULL, 0, lowrank );
400  }
401  else {
402  ktype = PastixKernelGEADDCblkFRLR;
403  time = kernel_trace_start( ktype );
404  flops = cpucblk_zadd_frlr( side, alpha, cblkA, cblkB,
405  NULL, 0, lowrank );
406  }
407  }
408  else {
409  if ( cblkA->cblktype & CBLK_COMPRESSED ) {
410  assert(0 /* We do not add a compressed cblk to a non compressed cblk */);
411  time = kernel_trace_start( ktype );
412  }
413  else {
414  ktype = PastixKernelGEADDCblkFRFR;
415  time = kernel_trace_start( ktype );
416  flops = cpucblk_zadd_frfr( side, alpha, cblkA, cblkB );
417  }
418  }
419 
420  kernel_trace_stop( cblkB->fblokptr->inlast, ktype, m, n, 0, flops, time );
421 }
422 
static pastix_fixdbl_t cpucblk_zadd_frfr(pastix_coefside_t side, pastix_int_t alpha, const SolverCblk *cblkA, SolverCblk *cblkB)
Add two column bloks in full rank format.
Definition: cpucblk_zadd.c:275
static pastix_fixdbl_t cpucblk_zadd_frlr(pastix_coefside_t side, pastix_int_t alpha, const SolverCblk *cblkA, SolverCblk *cblkB, pastix_complex64_t *work, pastix_int_t lwork, const pastix_lr_t *lowrank)
Add two column bloks in full rank format.
Definition: cpucblk_zadd.c:66
static pastix_fixdbl_t cpucblk_zadd_lrlr(pastix_coefside_t side, pastix_int_t alpha, const SolverCblk *cblkA, SolverCblk *cblkB, pastix_complex64_t *work, pastix_int_t lwork, const pastix_lr_t *lowrank)
Add two column bloks in full rank format.
Definition: cpucblk_zadd.c:185
int core_zgeadd(pastix_trans_t trans, pastix_int_t M, pastix_int_t N, pastix_complex64_t alpha, const pastix_complex64_t *A, pastix_int_t LDA, pastix_complex64_t beta, pastix_complex64_t *B, pastix_int_t LDB)
Add two matrices together.
Definition: core_zgeadd.c:78
void cpucblk_zadd(pastix_coefside_t side, double alpha, const SolverCblk *cblkA, SolverCblk *cblkB, const pastix_lr_t *lowrank)
Add two column bloks in full rank format.
Definition: cpucblk_zadd.c:379
const pastix_lrblock_t * A
pastix_complex64_t alpha
pastix_trans_t transB
pastix_atomic_lock_t * lock
const pastix_lrblock_t * B
const pastix_lr_t * lowrank
pastix_int_t M
pastix_int_t offy
pastix_int_t lwused
pastix_trans_t transA
pastix_int_t Cm
pastix_int_t lwork
pastix_int_t N
pastix_lrblock_t * C
pastix_complex64_t beta
pastix_int_t K
pastix_int_t offx
pastix_complex64_t * work
pastix_int_t Cn
pastix_fixdbl_t core_zlradd(core_zlrmm_t *params, const pastix_lrblock_t *A, pastix_trans_t transV, int infomask)
Perform the addition of two low-rank matrices.
Definition: core_zlr2xx.c:383
Structure to store all the parameters of the core_zlrmm family functions.
#define PASTIX_LRM3_ORTHOU
Macro to specify if the U part of a low-rank matrix is orthogonal or not (Used in LRMM functions).
Structure to define the type of function to use for the low-rank kernels and their parameters.
The block low-rank structure to hold a matrix in low-rank form.
enum pastix_coefside_e pastix_coefside_t
Data blocks used in the kernel.
@ PastixLUCoef
Definition: api.h:480
@ PastixUCoef
Definition: api.h:479
@ PastixNoTrans
Definition: api.h:447
static pastix_int_t blok_rownbr(const SolverBlok *blok)
Compute the number of rows of a block.
Definition: solver.h:389
void * ucoeftab
Definition: solver.h:172
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
Definition: solver.h:323
static int is_block_inside_fblock(const SolverBlok *blok, const SolverBlok *fblok)
Check if a block is included inside another one.
Definition: solver.h:498
pastix_int_t frownum
Definition: solver.h:142
pastix_atomic_lock_t lock
Definition: solver.h:157
pastix_int_t coefind
Definition: solver.h:144
SolverBlok * fblokptr
Definition: solver.h:163
pastix_lrblock_t * LRblock[2]
Definition: solver.h:150
int8_t inlast
Definition: solver.h:146
pastix_int_t stride
Definition: solver.h:164
int8_t cblktype
Definition: solver.h:159
void * lcoeftab
Definition: solver.h:171
pastix_int_t fcolnum
Definition: solver.h:161
Solver block structure.
Definition: solver.h:137
Solver column block structure.
Definition: solver.h:156