PaStiX Handbook  6.2.1
cpucblk_dadd.c
Go to the documentation of this file.
1 /**
2  *
3  * @file cpucblk_dadd.c
4  *
5  * Precision dependent routines to add different cblks.
6  *
7  * @copyright 2015-2021 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8  * Univ. Bordeaux. All rights reserved.
9  *
10  * @version 6.2.0
11  * @author Pierre Ramet
12  * @author Mathieu Faverge
13  * @author Tony Delarue
14  * @date 2021-01-03
15  *
16  * @generated from /builds/solverstack/pastix/kernels/cpucblk_zadd.c, normal z -> d, Tue Apr 12 09:38:40 2022
17  *
18  **/
19 #include "common/common.h"
20 #include "blend/solver.h"
21 #include "kernels_trace.h"
22 #include "pastix_dcores.h"
23 #include "pastix_dlrcores.h"
24 
25 /**
26  *******************************************************************************
27  *
28  * @brief Add two column bloks in full rank format.
29  *
30  * The second cblk is overwritten by the sum of the two column blocks.
31  * B <- alpha * A + B
32  *
33  *******************************************************************************
34  *
35  * @param[in] side
36  * Define which side of the cblk must be tested.
37  * @arg PastixLCoef if lower part
38  * @arg PastixUCoef if upper part
39  *
40  * @param[in] alpha
41  * The scalar alpha
42  *
43  * @param[in] cblkA
44  * The column block of the A matrix.
45  *
46  * @param[inout] cblkB
47  * The column block of the B matrix
48  * On exit, cblkB coefficient arrays are overwritten by the result of
49  * alpha * A + B.
50  *
51  *******************************************************************************
52  *
53  * @return The number of flops of the operation.
54  *
55  *******************************************************************************/
56 static inline pastix_fixdbl_t
58  pastix_int_t alpha,
59  const SolverCblk *cblkA,
60  SolverCblk *cblkB,
61  double *work,
62  pastix_int_t lwork,
63  const pastix_lr_t *lowrank )
64 {
65  const SolverBlok *blokA = cblkA->fblokptr;
66  const SolverBlok *blokB = cblkB->fblokptr;
67  const SolverBlok *lblokA = cblkA[1].fblokptr;
68  const SolverBlok *lblokB = cblkB[1].fblokptr;
69  double *A;
70  pastix_int_t shift;
71  pastix_fixdbl_t flops = 0.;
72  core_dlrmm_t params;
73  pastix_lrblock_t lrA;
74 
75  assert( !(cblkA->cblktype & CBLK_COMPRESSED) );
76  assert( cblkB->cblktype & CBLK_COMPRESSED );
77  assert( cblkA->cblktype & CBLK_LAYOUT_2D );
78 
79  if ( side == PastixUCoef ) {
80  A = cblkA->ucoeftab;
81  shift = 1;
82  }
83  else {
84  A = cblkA->lcoeftab;
85  shift = 0;
86  }
87 
88  assert( A != NULL );
89 
90  params.lowrank = lowrank;
91  params.transA = PastixNoTrans; /* Unused */
92  params.transB = PastixNoTrans; /* Unused */
93  params.K = -1; /* Unused */
94  params.alpha = alpha;
95  params.A = NULL; /* Unused */
96  params.B = NULL; /* Unused */
97  params.beta = 1.0;
98  params.work = work;
99  params.lwork = lwork;
100  params.lwused = 0;
101  params.lock = &(cblkB->lock);
102 
103  /* Dimensions on N */
104  params.N = cblk_colnbr( cblkA );
105  params.Cn = cblk_colnbr( cblkB );
106  params.offy = cblkA->fcolnum - cblkB->fcolnum;
107 
108  lrA.rk = -1;
109  lrA.v = NULL;
110 
111  for (; blokA < lblokA; blokA++) {
112 
113  /* Find facing bloknum */
114  while ( !is_block_inside_fblock( blokA, blokB ) && (blokB < lblokB) ) {
115  blokB++;
116  }
117 
118  assert( is_block_inside_fblock( blokA, blokB ) && (blokB <= lblokB) );
119 
120  lrA.u = A + blokA->coefind;
121  lrA.rkmax = blok_rownbr( blokA );
122 
123  /* Dimensions on M */
124  params.M = blok_rownbr( blokA );
125  params.Cm = blok_rownbr( blokB );
126  params.offx = blokA->frownum - blokB->frownum;
127  params.C = blokB->LRblock[shift];
128 
129  flops += core_dlradd( &params, &lrA,
130  PastixNoTrans, 0 );
131  }
132  return flops;
133 }
134 
135 /**
136  *******************************************************************************
137  *
138  * @brief Add two column bloks in full rank format.
139  *
140  * The second cblk is overwritten by the sum of the two column blocks.
141  * B <- alpha * A + B
142  *
143  *******************************************************************************
144  *
145  * @param[in] side
146  * Define which side of the cblk must be tested.
147  * @arg PastixLCoef if lower part
148  * @arg PastixUCoef if upper part
149  *
150  * @param[in] alpha
151  * The scalar alpha
152  *
153  * @param[in] cblkA
154  * The column block of the A matrix.
155  *
156  * @param[inout] cblkB
157  * The column block of the B matrix
158  * On exit, cblkB coefficient arrays are overwritten by the result of
159  * alpha * A + B.
160  *
161  *******************************************************************************
162  *
163  * @return The number of flops of the operation.
164  *
165  *******************************************************************************/
166 static inline pastix_fixdbl_t
168  pastix_int_t alpha,
169  const SolverCblk *cblkA,
170  SolverCblk *cblkB,
171  double *work,
172  pastix_int_t lwork,
173  const pastix_lr_t *lowrank )
174 {
175  const SolverBlok *blokA = cblkA->fblokptr;
176  const SolverBlok *blokB = cblkB->fblokptr;
177  const SolverBlok *lblokA = cblkA[1].fblokptr;
178  const SolverBlok *lblokB = cblkB[1].fblokptr;
179  pastix_int_t shift;
180  pastix_fixdbl_t flops = 0.;
181  core_dlrmm_t params;
182 
183  assert( (cblkA->cblktype & CBLK_COMPRESSED) );
184  assert( (cblkB->cblktype & CBLK_COMPRESSED) );
185 
186  shift = (side == PastixUCoef) ? 1 : 0;
187 
188  params.lowrank = lowrank;
189  params.transA = PastixNoTrans; /* Unused */
190  params.transB = PastixNoTrans; /* Unused */
191  params.K = -1; /* Unused */
192  params.alpha = alpha;
193  params.A = NULL; /* Unused */
194  params.B = NULL; /* Unused */
195  params.beta = 1.0;
196  params.work = work;
197  params.lwork = lwork;
198  params.lwused = 0;
199  params.lock = &(cblkB->lock);
200 
201  /* Dimensions on N */
202  params.N = cblk_colnbr( cblkA );
203  params.Cn = cblk_colnbr( cblkB );
204  params.offy = cblkA->fcolnum - cblkB->fcolnum;
205 
206  for (; blokA < lblokA; blokA++) {
207 
208  /* Find facing bloknum */
209  while ( !is_block_inside_fblock( blokA, blokB ) && (blokB < lblokB) ) {
210  blokB++;
211  }
212 
213  assert( is_block_inside_fblock( blokA, blokB ) && (blokB <= lblokB) );
214 
215  /* Dimensions on M */
216  params.M = blok_rownbr( blokA );
217  params.Cm = blok_rownbr( blokB );
218  params.offx = blokA->frownum - blokB->frownum;
219  params.C = blokB->LRblock[shift];
220  flops += core_dlradd( &params, blokA->LRblock[shift], PastixNoTrans, PASTIX_LRM3_ORTHOU );
221  }
222  return flops;
223 }
224 
225 /**
226  *******************************************************************************
227  *
228  * @brief Add two column bloks in full rank format.
229  *
230  * The second cblk is overwritten by the sum of the two column blocks.
231  * B <- alpha * A + B
232  *
233  *******************************************************************************
234  *
235  * @param[in] side
236  * Define which side of the cblk must be tested.
237  * @arg PastixLCoef if lower part
238  * @arg PastixUCoef if upper part
239  *
240  * @param[in] alpha
241  * The scalar alpha
242  *
243  * @param[in] cblkA
244  * The column block of the A matrix.
245  *
246  * @param[inout] cblkB
247  * The column block of the B matrix
248  * On exit, cblkB coefficient arrays are overwritten by the result of
249  * alpha * A + B.
250  *
251  *******************************************************************************
252  *
253  * @return The number of flops of the operation.
254  *
255  *******************************************************************************/
256 static inline pastix_fixdbl_t
258  pastix_int_t alpha,
259  const SolverCblk *cblkA,
260  SolverCblk *cblkB )
261 {
262  double *A, *B;
263  pastix_int_t n = cblk_colnbr( cblkA );
264  pastix_int_t m = cblkA->stride;
265  pastix_fixdbl_t flops = m * n;
266 
267  assert( !(cblkA->cblktype & CBLK_COMPRESSED) );
268  assert( !(cblkB->cblktype & CBLK_COMPRESSED) );
269 
270  if ( side == PastixUCoef ) {
271  A = cblkA->ucoeftab;
272  B = cblkB->ucoeftab;
273  }
274  else {
275  A = cblkA->lcoeftab;
276  B = cblkB->lcoeftab;
277  }
278 
279  assert( (A != NULL) && (B != NULL) );
280 
281  /* If the cblk matches */
282  if ( (n == cblk_colnbr( cblkB )) &&
283  (m == cblkB->stride) ) {
284 
285  pastix_cblk_lock( cblkB );
286  core_dgeadd( PastixNoTrans, m, n,
287  alpha, A, m,
288  1., B, m );
289  pastix_cblk_unlock( cblkB );
290  }
291  else {
292  double *bA, *bB;
293  const SolverBlok *blokA = cblkA->fblokptr;
294  const SolverBlok *blokB = cblkB->fblokptr;
295  const SolverBlok *lblokA = cblkA[1].fblokptr;
296  const SolverBlok *lblokB = cblkB[1].fblokptr;
297  pastix_int_t lda, ldb;
298 
299  /* Both cblk A and B must be stored in 2D */
300  assert( cblkA->cblktype & CBLK_LAYOUT_2D );
301  assert( cblkB->cblktype & CBLK_LAYOUT_2D );
302 
303  for (; blokA < lblokA; blokA++) {
304 
305  /* Find facing bloknum */
306  while ( !is_block_inside_fblock( blokA, blokB ) && (blokB < lblokB) ) {
307  blokB++;
308  }
309 
310  assert( is_block_inside_fblock( blokA, blokB ) && (blokB <= lblokB) );
311 
312  bA = A + blokA->coefind;
313  bB = B + blokB->coefind;
314  lda = blok_rownbr( blokA );
315  ldb = blok_rownbr( blokB );
316 
317  bB = bB + ldb * ( cblkA->fcolnum - cblkB->fcolnum ) + ( blokA->frownum - blokB->frownum );
318  m = lda;
319 
320  pastix_cblk_lock( cblkB );
321  core_dgeadd( PastixNoTrans, m, n,
322  alpha, bA, lda,
323  1., bB, ldb );
324  pastix_cblk_unlock( cblkB );
325  }
326  }
327  return flops;
328 }
329 
330 /**
331  *******************************************************************************
332  *
333  * @brief Add two column bloks in full rank format.
334  *
335  * The second cblk is overwritten by the sum of the two column blocks.
336  * B <- alpha * A + B
337  *
338  *******************************************************************************
339  *
340  * @param[in] side
341  * Define which side of the cblk must be tested.
342  * @arg PastixLCoef if lower part only
343  * @arg PastixUCoef if upper part only
344  *
345  * @param[in] alpha
346  * The scalar alpha
347  *
348  * @param[in] cblkA
349  * The column block of the A matrix.
350  *
351  * @param[inout] cblkB
352  * The column block of the B matrix
353  * On exit, cblkB coefficient arrays are overwritten by the result of
354  * alpha * A + B.
355  *
356  *******************************************************************************/
357 void
359  double alpha,
360  const SolverCblk *cblkA,
361  SolverCblk *cblkB,
362  const pastix_lr_t *lowrank )
363 {
364  pastix_ktype_t ktype = PastixKernelGEADDCblkFRFR;
365  pastix_fixdbl_t time, flops = 0.0;
366  pastix_int_t m = cblkA->stride;
367  pastix_int_t n = cblk_colnbr( cblkA );
368 
369  if ( side == PastixLUCoef ) {
370  n *= 2;
371  }
372 
373  if ( cblkB->cblktype & CBLK_COMPRESSED ) {
374  if ( cblkA->cblktype & CBLK_COMPRESSED ) {
375  ktype = PastixKernelGEADDCblkLRLR;
376  time = kernel_trace_start( ktype );
377  flops = cpucblk_dadd_lrlr( side, alpha, cblkA, cblkB,
378  NULL, 0, lowrank );
379  }
380  else {
381  ktype = PastixKernelGEADDCblkFRLR;
382  time = kernel_trace_start( ktype );
383  flops = cpucblk_dadd_frlr( side, alpha, cblkA, cblkB,
384  NULL, 0, lowrank );
385  }
386  }
387  else {
388  if ( cblkA->cblktype & CBLK_COMPRESSED ) {
389  assert(0 /* We do not add a compressed cblk to a non compressed cblk */);
390  time = kernel_trace_start( ktype );
391  }
392  else {
393  ktype = PastixKernelGEADDCblkFRFR;
394  time = kernel_trace_start( ktype );
395  flops = cpucblk_dadd_frfr( side, alpha, cblkA, cblkB );
396  }
397  }
398 
399  kernel_trace_stop( cblkB->fblokptr->inlast, ktype, m, n, 0, flops, time );
400 }
401 
solver_blok_s::frownum
pastix_int_t frownum
Definition: solver.h:112
solver.h
core_dlrmm_s::work
double * work
Definition: pastix_dlrcores.h:96
blok_rownbr
static pastix_int_t blok_rownbr(const SolverBlok *blok)
Compute the number of rows of a block.
Definition: solver.h:313
cblk_colnbr
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
Definition: solver.h:247
core_dlrmm_s::Cm
pastix_int_t Cm
Definition: pastix_dlrcores.h:87
pastix_lr_s
Structure to define the type of function to use for the low-rank kernels and their parameters.
Definition: pastix_lowrank.h:147
solver_cblk_s::fblokptr
SolverBlok * fblokptr
Definition: solver.h:134
core_dlrmm_s::lock
pastix_atomic_lock_t * lock
Definition: pastix_dlrcores.h:99
pastix_lrblock_s::v
void * v
Definition: pastix_lowrank.h:116
solver_cblk_s::stride
pastix_int_t stride
Definition: solver.h:135
pastix_lrblock_s::u
void * u
Definition: pastix_lowrank.h:115
solver_cblk_s
Solver column block structure.
Definition: solver.h:127
pastix_coefside_t
enum pastix_coefside_e pastix_coefside_t
Data blocks used in the kernel.
cpucblk_dadd_lrlr
static pastix_fixdbl_t cpucblk_dadd_lrlr(pastix_coefside_t side, pastix_int_t alpha, const SolverCblk *cblkA, SolverCblk *cblkB, double *work, pastix_int_t lwork, const pastix_lr_t *lowrank)
Add two column bloks in full rank format.
Definition: cpucblk_dadd.c:167
solver_blok_s
Solver block structure.
Definition: solver.h:107
core_dlrmm_s::C
pastix_lrblock_t * C
Definition: pastix_dlrcores.h:95
pastix_lrblock_s
The block low-rank structure to hold a matrix in low-rank form.
Definition: pastix_lowrank.h:112
solver_cblk_s::ucoeftab
void * ucoeftab
Definition: solver.h:143
core_dlrmm_s::transA
pastix_trans_t transA
Definition: pastix_dlrcores.h:82
core_dlrmm_s::Cn
pastix_int_t Cn
Definition: pastix_dlrcores.h:88
PastixNoTrans
@ PastixNoTrans
Definition: api.h:424
core_dlrmm_s::offx
pastix_int_t offx
Definition: pastix_dlrcores.h:89
is_block_inside_fblock
static int is_block_inside_fblock(const SolverBlok *blok, const SolverBlok *fblok)
Check if a block is included inside another one.
Definition: solver.h:426
core_dlrmm_s
Structure to store all the parameters of the core_dlrmm family functions.
Definition: pastix_dlrcores.h:80
core_dlrmm_s::N
pastix_int_t N
Definition: pastix_dlrcores.h:85
core_dlrmm_s::beta
double beta
Definition: pastix_dlrcores.h:94
solver_cblk_s::lcoeftab
void * lcoeftab
Definition: solver.h:142
core_dlrmm_s::lwork
pastix_int_t lwork
Definition: pastix_dlrcores.h:97
pastix_dcores.h
PASTIX_LRM3_ORTHOU
#define PASTIX_LRM3_ORTHOU
Macro to specify if the U part of a low-rank matrix is orthogonal or not (Used in LRMM functions).
Definition: pastix_lowrank.h:41
PastixUCoef
@ PastixUCoef
Definition: api.h:457
core_dlradd
pastix_fixdbl_t core_dlradd(core_dlrmm_t *params, const pastix_lrblock_t *AB, pastix_trans_t transV, int infomask)
Perform the addition of two low-rank matrices.
Definition: core_dlr2xx.c:383
core_dlrmm_s::M
pastix_int_t M
Definition: pastix_dlrcores.h:84
cpucblk_dadd_frlr
static pastix_fixdbl_t cpucblk_dadd_frlr(pastix_coefside_t side, pastix_int_t alpha, const SolverCblk *cblkA, SolverCblk *cblkB, double *work, pastix_int_t lwork, const pastix_lr_t *lowrank)
Add two column bloks in full rank format.
Definition: cpucblk_dadd.c:57
core_dlrmm_s::B
const pastix_lrblock_t * B
Definition: pastix_dlrcores.h:93
core_dlrmm_s::lwused
pastix_int_t lwused
Definition: pastix_dlrcores.h:98
core_dlrmm_s::transB
pastix_trans_t transB
Definition: pastix_dlrcores.h:83
solver_blok_s::coefind
pastix_int_t coefind
Definition: solver.h:114
pastix_lrblock_s::rk
int rk
Definition: pastix_lowrank.h:113
core_dlrmm_s::alpha
double alpha
Definition: pastix_dlrcores.h:91
solver_cblk_s::cblktype
int8_t cblktype
Definition: solver.h:130
PastixLUCoef
@ PastixLUCoef
Definition: api.h:458
solver_blok_s::inlast
int8_t inlast
Definition: solver.h:117
solver_cblk_s::lock
pastix_atomic_lock_t lock
Definition: solver.h:128
solver_cblk_s::fcolnum
pastix_int_t fcolnum
Definition: solver.h:132
solver_blok_s::LRblock
pastix_lrblock_t * LRblock[2]
Definition: solver.h:121
core_dlrmm_s::K
pastix_int_t K
Definition: pastix_dlrcores.h:86
pastix_lrblock_s::rkmax
int rkmax
Definition: pastix_lowrank.h:114
cpucblk_dadd
void cpucblk_dadd(pastix_coefside_t side, double alpha, const SolverCblk *cblkA, SolverCblk *cblkB, const pastix_lr_t *lowrank)
Add two column bloks in full rank format.
Definition: cpucblk_dadd.c:358
pastix_dlrcores.h
cpucblk_dadd_frfr
static pastix_fixdbl_t cpucblk_dadd_frfr(pastix_coefside_t side, pastix_int_t alpha, const SolverCblk *cblkA, SolverCblk *cblkB)
Add two column bloks in full rank format.
Definition: cpucblk_dadd.c:257
core_dlrmm_s::A
const pastix_lrblock_t * A
Definition: pastix_dlrcores.h:92
core_dlrmm_s::lowrank
const pastix_lr_t * lowrank
Definition: pastix_dlrcores.h:81
core_dgeadd
int core_dgeadd(pastix_trans_t trans, pastix_int_t M, pastix_int_t N, double alpha, const double *A, pastix_int_t LDA, double beta, double *B, pastix_int_t LDB)
Add two matrices together.
Definition: core_dgeadd.c:78
core_dlrmm_s::offy
pastix_int_t offy
Definition: pastix_dlrcores.h:90