PaStiX Handbook 6.4.0
Loading...
Searching...
No Matches
cpucblk_zadd.c
Go to the documentation of this file.
1/**
2 *
3 * @file cpucblk_zadd.c
4 *
5 * Precision dependent routines to add different cblks.
6 *
7 * @copyright 2015-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8 * Univ. Bordeaux. All rights reserved.
9 *
10 * @version 6.4.0
11 * @author Pierre Ramet
12 * @author Mathieu Faverge
13 * @author Tony Delarue
14 * @author Alycia Lisito
15 * @author Nolan Bredel
16 * @date 2024-07-05
17 *
18 * @generated from /builds/2mk6rsew/0/solverstack/pastix/kernels/cpucblk_zadd.c, normal z -> z, Tue Feb 25 14:35:00 2025
19 *
20 **/
21#include "common/common.h"
22#include "blend/solver.h"
23#include "kernels_trace.h"
24#include "pastix_zcores.h"
25#include "pastix_zlrcores.h"
26
27/**
28 *******************************************************************************
29 *
30 * @brief Add a column blok in full rank format to a column blok in low rank
31 * format.
32 *
33 * The second cblk is overwritten by the sum of the two column blocks.
34 * B <- alpha * A + B
35 *
36 *******************************************************************************
37 *
38 * @param[in] alpha
39 * The scalar alpha
40 *
41 * @param[in] cblkA
42 * The column block of the A matrix.
43 *
44 * @param[inout] cblkB
45 * The column block of the B matrix
46 * On exit, cblkB coefficient arrays are overwritten by the result of
47 * alpha * A + B.
48 *
49 * @param[inout] A
50 * The pointer to the coeftab of the cblk.lcoeftab matrix storing the
51 * coefficients of the panel when the Lower part is computed,
52 * cblk.ucoeftab otherwise. Must be of size cblk.stride -by- cblk.width
53 *
54 * @param[in] lrB
55 * Pointer to the low-rank representation of the column block B.
56 * Must be followed by the low-rank representation of the following blocks.
57 *
58 * @param[in] work
59 * Temporary memory buffer.
60 *
61 * @param[in] lwork
62 * Temporary workspace dimension.
63 *
64 * @param[in] lowrank
65 * The structure with low-rank parameters.
66 *
67 *******************************************************************************
68 *
69 * @return The number of flops of the operation.
70 *
71 *******************************************************************************/
72static inline pastix_fixdbl_t
73cpucblk_zadd_frlr( pastix_complex64_t alpha,
74 const SolverCblk *cblkA,
75 SolverCblk *cblkB,
76 const pastix_complex64_t *A,
78 pastix_complex64_t *work,
79 pastix_int_t lwork,
80 const pastix_lr_t *lowrank )
81{
82 const SolverBlok *blokA = cblkA->fblokptr;
83 const SolverBlok *blokB = cblkB->fblokptr;
84 const SolverBlok *lblokA = cblkA[1].fblokptr;
85 const SolverBlok *lblokB = cblkB[1].fblokptr;
86 pastix_fixdbl_t flops = 0.;
87 core_zlrmm_t params;
89
90 assert( !(cblkA->cblktype & CBLK_COMPRESSED) );
91 assert( cblkB->cblktype & CBLK_COMPRESSED );
92 assert( cblkA->cblktype & CBLK_LAYOUT_2D );
93
94 assert( A != NULL );
95
96 params.lowrank = lowrank;
97 params.transA = PastixNoTrans; /* Unused */
98 params.transB = PastixNoTrans; /* Unused */
99 params.K = -1; /* Unused */
100 params.alpha = alpha;
101 params.A = NULL; /* Unused */
102 params.B = NULL; /* Unused */
103 params.beta = 1.0;
104 params.work = work;
105 params.lwork = lwork;
106 params.lwused = 0;
107 params.lock = &(cblkB->lock);
108
109 /* Dimensions on N */
110 params.N = cblk_colnbr( cblkA );
111 params.Cn = cblk_colnbr( cblkB );
112 params.offy = cblkA->fcolnum - cblkB->fcolnum;
113
114 lrA.rk = -1;
115 lrA.v = NULL;
116
117 for (; blokA < lblokA; blokA++) {
118
119 /* Find facing bloknum */
120 while ( !is_block_inside_fblock( blokA, blokB ) && (blokB < lblokB) ) {
121 blokB++; lrB++;
122 }
123
124 assert( is_block_inside_fblock( blokA, blokB ) && (blokB <= lblokB) );
125
126 lrA.u = (pastix_complex64_t*)A + blokA->coefind;
127 lrA.rkmax = blok_rownbr( blokA );
128
129 /* Dimensions on M */
130 params.M = blok_rownbr( blokA );
131 params.Cm = blok_rownbr( blokB );
132 params.offx = blokA->frownum - blokB->frownum;
133 params.C = lrB;
134
135 flops += core_zlradd( &params, &lrA,
136 PastixNoTrans, 0 );
137 }
138 return flops;
139}
140
141/**
142 *******************************************************************************
143 *
144 * @brief Add two column bloks in low rank format.
145 *
146 * The second cblk is overwritten by the sum of the two column blocks.
147 * B <- alpha * A + B
148 *
149 *******************************************************************************
150 *
151 * @param[in] alpha
152 * The scalar alpha
153 *
154 * @param[in] cblkA
155 * The column block of the A matrix.
156 *
157 * @param[inout] cblkB
158 * The column block of the B matrix
159 * On exit, cblkB coefficient arrays are overwritten by the result of
160 * alpha * A + B.
161 *
162 * @param[in] lrA
163 * Pointer to the low-rank representation of the column block A.
164 * Must be followed by the low-rank representation of the following blocks.
165 *
166 * @param[in] lrB
167 * Pointer to the low-rank representation of the column block B.
168 * Must be followed by the low-rank representation of the following blocks.
169 *
170 * @param[in] work
171 * Temporary memory buffer.
172 *
173 * @param[in] lwork
174 * Temporary workspace dimension.
175 *
176 * @param[in] lowrank
177 * The structure with low-rank parameters.
178 *
179 *******************************************************************************
180 *
181 * @return The number of flops of the operation.
182 *
183 *******************************************************************************/
184static inline pastix_fixdbl_t
185cpucblk_zadd_lrlr( pastix_complex64_t alpha,
186 const SolverCblk *cblkA,
187 SolverCblk *cblkB,
188 const pastix_lrblock_t *lrA,
189 pastix_lrblock_t *lrB,
190 pastix_complex64_t *work,
191 pastix_int_t lwork,
192 const pastix_lr_t *lowrank )
193{
194 const SolverBlok *blokA = cblkA->fblokptr;
195 const SolverBlok *blokB = cblkB->fblokptr;
196 const SolverBlok *lblokA = cblkA[1].fblokptr;
197 const SolverBlok *lblokB = cblkB[1].fblokptr;
198 pastix_fixdbl_t flops = 0.;
199 core_zlrmm_t params;
200
201 assert( (cblkA->cblktype & CBLK_COMPRESSED) );
202 assert( (cblkB->cblktype & CBLK_COMPRESSED) );
203
204 params.lowrank = lowrank;
205 params.transA = PastixNoTrans; /* Unused */
206 params.transB = PastixNoTrans; /* Unused */
207 params.K = -1; /* Unused */
208 params.alpha = alpha;
209 params.A = NULL; /* Unused */
210 params.B = NULL; /* Unused */
211 params.beta = 1.0;
212 params.work = work;
213 params.lwork = lwork;
214 params.lwused = 0;
215 params.lock = &(cblkB->lock);
216
217 /* Dimensions on N */
218 params.N = cblk_colnbr( cblkA );
219 params.Cn = cblk_colnbr( cblkB );
220 params.offy = cblkA->fcolnum - cblkB->fcolnum;
221
222 for (; blokA < lblokA; blokA++, lrA++) {
223
224 /* Find facing bloknum */
225 while ( !is_block_inside_fblock( blokA, blokB ) && (blokB < lblokB) ) {
226 blokB++; lrB++;
227 }
228
229 assert( is_block_inside_fblock( blokA, blokB ) && (blokB <= lblokB) );
230
231 /* Dimensions on M */
232 params.M = blok_rownbr( blokA );
233 params.Cm = blok_rownbr( blokB );
234 params.offx = blokA->frownum - blokB->frownum;
235 params.C = lrB;
236 flops += core_zlradd( &params, lrA, PastixNoTrans, PASTIX_LRM3_ORTHOU );
237 }
238 return flops;
239}
240
241/**
242 *******************************************************************************
243 *
244 * @brief Add two column bloks in full rank format.
245 *
246 * The second cblk is overwritten by the sum of the two column blocks.
247 * B <- alpha * A + B
248 *
249 *******************************************************************************
250 *
251 * @param[in] alpha
252 * The scalar alpha
253 *
254 * @param[in] cblkA
255 * The column block of the A matrix.
256 *
257 * @param[inout] cblkB
258 * The column block of the B matrix
259 * On exit, cblkB coefficient arrays are overwritten by the result of
260 * alpha * A + B.
261 *
262 * @param[inout] A
263 * The pointer to the coeftab of the cblk.lcoeftab matrix storing the
264 * coefficients of the panel when the Lower part is computed,
265 * cblk.ucoeftab otherwise. Must be of size cblk.stride -by- cblk.width
266 *
267 * @param[inout] B
268 * The pointer to the coeftab of the cblk.lcoeftab matrix storing
269 * the coefficients of the panel, if Symmetric/Hermitian cases or if
270 * upper part is computed; cblk.ucoeftab otherwise. Must be of size
271 * cblk.stride -by- cblk.width
272 *
273 *******************************************************************************
274 *
275 * @return The number of flops of the operation.
276 *
277 *******************************************************************************/
278static inline pastix_fixdbl_t
279cpucblk_zadd_frfr( pastix_complex64_t alpha,
280 const SolverCblk *cblkA,
281 SolverCblk *cblkB,
282 const pastix_complex64_t *A,
283 pastix_complex64_t *B )
284{
285 pastix_int_t n = cblk_colnbr( cblkA );
286 pastix_int_t m = cblkA->stride;
287 pastix_fixdbl_t flops = m * n;
288
289 assert( !(cblkA->cblktype & CBLK_COMPRESSED) );
290 assert( !(cblkB->cblktype & CBLK_COMPRESSED) );
291
292 assert( (A != NULL) && (B != NULL) );
293
294 /* If the cblk matches */
295 if ( (n == cblk_colnbr( cblkB )) &&
296 (m == cblkB->stride) ) {
297
298 pastix_cblk_lock( cblkB );
300 alpha, A, m,
301 1., B, m );
302 pastix_cblk_unlock( cblkB );
303 }
304 else {
305 const pastix_complex64_t *bA;
306 pastix_complex64_t *bB;
307 const SolverBlok *blokA = cblkA->fblokptr;
308 const SolverBlok *blokB = cblkB->fblokptr;
309 const SolverBlok *lblokA = cblkA[1].fblokptr;
310 const SolverBlok *lblokB = cblkB[1].fblokptr;
311 pastix_int_t lda, ldb;
312
313 /* Both cblk A and B must be stored in 2D */
314 assert( cblkA->cblktype & CBLK_LAYOUT_2D );
315 assert( cblkB->cblktype & CBLK_LAYOUT_2D );
316
317 for (; blokA < lblokA; blokA++) {
318
319 /* Find facing bloknum */
320 while ( !is_block_inside_fblock( blokA, blokB ) && (blokB < lblokB) ) {
321 blokB++;
322 }
323
324 assert( is_block_inside_fblock( blokA, blokB ) && (blokB <= lblokB) );
325
326 bA = A + blokA->coefind;
327 bB = B + blokB->coefind;
328 lda = blok_rownbr( blokA );
329 ldb = blok_rownbr( blokB );
330
331 bB = bB + ldb * ( cblkA->fcolnum - cblkB->fcolnum ) + ( blokA->frownum - blokB->frownum );
332 m = lda;
333
334 pastix_cblk_lock( cblkB );
336 alpha, bA, lda,
337 1., bB, ldb );
338 pastix_cblk_unlock( cblkB );
339 }
340 }
341 return flops;
342}
343
344/**
345 *******************************************************************************
346 *
347 * @brief Add two column bloks in full rank format.
348 *
349 * The second cblk is overwritten by the sum of the two column blocks.
350 * B <- alpha * A + B
351 *
352 *******************************************************************************
353 *
354 * @param[in] alpha
355 * The scalar alpha
356 *
357 * @param[in] cblkA
358 * The column block of the A matrix.
359 *
360 * @param[inout] cblkB
361 * The column block of the B matrix
362 * On exit, cblkB coefficient arrays are overwritten by the result of
363 * alpha * A + B.
364 *
365 * @param[inout] A
366 * The pointer to the coeftab of the cblk.lcoeftab matrix storing the
367 * coefficients of the panel when the Lower part is computed,
368 * cblk.ucoeftab otherwise. Must be of size cblk.stride -by- cblk.width
369 *
370 * @param[in] B
371 * The pointer to the coeftab of the cblk.lcoeftab matrix storing
372 * the coefficients of the panel, if Symmetric/Hermitian cases or if
373 * upper part is computed; cblk.ucoeftab otherwise. Must be of size
374 * cblk.stride -by- cblk.width
375 *
376 * @param[in] work
377 * Temporary memory buffer.
378 *
379 * @param[in] lwork
380 * Temporary workspace dimension.
381 *
382 * @param[in] lowrank
383 * The structure with low-rank parameters.
384 *
385 *******************************************************************************
386 *
387 * @return The number of flops of the operation.
388 *
389 *******************************************************************************/
391cpucblk_zadd( pastix_complex64_t alpha,
392 const SolverCblk *cblkA,
393 SolverCblk *cblkB,
394 const void *A,
395 void *B,
396 pastix_complex64_t *work,
397 pastix_int_t lwork,
398 const pastix_lr_t *lowrank )
399{
401 pastix_fixdbl_t time, flops = 0.0;
402 pastix_int_t m = cblkA->stride;
403 pastix_int_t n = cblk_colnbr( cblkA );
404
405 if ( cblkB->cblktype & CBLK_COMPRESSED ) {
406 if ( cblkA->cblktype & CBLK_COMPRESSED ) {
408 time = kernel_trace_start( ktype );
409 flops = cpucblk_zadd_lrlr( alpha, cblkA, cblkB,
410 A, B, work, lwork, lowrank );
411 }
412 else {
414 time = kernel_trace_start( ktype );
415 flops = cpucblk_zadd_frlr( alpha, cblkA, cblkB,
416 A, B, work, lwork, lowrank );
417 }
418 }
419 else {
420 if ( cblkA->cblktype & CBLK_COMPRESSED ) {
421 assert(0); /* We do not add a compressed cblk to a non compressed cblk */
422 return 0.; /* Avoids compilation and coverity warning */
423 }
424 else {
426 time = kernel_trace_start( ktype );
427 flops = cpucblk_zadd_frfr( alpha, cblkA, cblkB, A, B );
428 }
429 }
430
431 kernel_trace_stop( cblkB->fblokptr->inlast, ktype, m, n, 0, flops, time );
432 return flops;
433}
434
static pastix_fixdbl_t cpucblk_zadd_frfr(pastix_complex64_t alpha, const SolverCblk *cblkA, SolverCblk *cblkB, const pastix_complex64_t *A, pastix_complex64_t *B)
Add two column bloks in full rank format.
static pastix_fixdbl_t cpucblk_zadd_lrlr(pastix_complex64_t alpha, const SolverCblk *cblkA, SolverCblk *cblkB, const pastix_lrblock_t *lrA, pastix_lrblock_t *lrB, pastix_complex64_t *work, pastix_int_t lwork, const pastix_lr_t *lowrank)
Add two column bloks in low rank format.
static pastix_fixdbl_t cpucblk_zadd_frlr(pastix_complex64_t alpha, const SolverCblk *cblkA, SolverCblk *cblkB, const pastix_complex64_t *A, pastix_lrblock_t *lrB, pastix_complex64_t *work, pastix_int_t lwork, const pastix_lr_t *lowrank)
Add a column blok in full rank format to a column blok in low rank format.
BEGIN_C_DECLS typedef int pastix_int_t
Definition datatypes.h:51
double pastix_fixdbl_t
Definition datatypes.h:65
enum pastix_ktype_e pastix_ktype_t
List of the Level 1 events that may be traced in PaStiX.
static void kernel_trace_stop(int8_t inlast, pastix_ktype_t ktype, int m, int n, int k, double flops, double starttime)
Stop the trace of a single kernel.
static double kernel_trace_start(pastix_ktype_t ktype)
Start the trace of a single kernel.
@ PastixKernelGEADDCblkFRFR
@ PastixKernelGEADDCblkLRLR
@ PastixKernelGEADDCblkFRLR
int core_zgeadd(pastix_trans_t trans, pastix_int_t M, pastix_int_t N, pastix_complex64_t alpha, const pastix_complex64_t *A, pastix_int_t LDA, pastix_complex64_t beta, pastix_complex64_t *B, pastix_int_t LDB)
Add two matrices together.
Definition core_zgeadd.c:78
pastix_fixdbl_t cpucblk_zadd(pastix_complex64_t alpha, const SolverCblk *cblkA, SolverCblk *cblkB, const void *A, void *B, pastix_complex64_t *work, pastix_int_t lwork, const pastix_lr_t *lowrank)
Add two column bloks in full rank format.
const pastix_lrblock_t * A
pastix_complex64_t alpha
pastix_trans_t transB
pastix_atomic_lock_t * lock
const pastix_lrblock_t * B
const pastix_lr_t * lowrank
pastix_int_t M
pastix_int_t offy
pastix_int_t lwused
pastix_trans_t transA
pastix_int_t Cm
pastix_int_t lwork
pastix_int_t N
pastix_lrblock_t * C
pastix_complex64_t beta
pastix_int_t K
pastix_int_t offx
pastix_complex64_t * work
pastix_int_t Cn
pastix_fixdbl_t core_zlradd(core_zlrmm_t *params, const pastix_lrblock_t *A, pastix_trans_t transV, int infomask)
Perform the addition of two low-rank matrices.
Structure to store all the parameters of the core_zlrmm family functions.
#define PASTIX_LRM3_ORTHOU
Macro to specify if the U part of a low-rank matrix is orthogonal or not (Used in LRMM functions).
Structure to define the type of function to use for the low-rank kernels and their parameters.
The block low-rank structure to hold a matrix in low-rank form.
@ PastixNoTrans
Definition api.h:445
static pastix_int_t blok_rownbr(const SolverBlok *blok)
Compute the number of rows of a block.
Definition solver.h:395
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
Definition solver.h:329
static int is_block_inside_fblock(const SolverBlok *blok, const SolverBlok *fblok)
Check if a block is included inside another one.
Definition solver.h:504
pastix_int_t frownum
Definition solver.h:147
pastix_atomic_lock_t lock
Definition solver.h:162
pastix_int_t coefind
Definition solver.h:149
SolverBlok * fblokptr
Definition solver.h:168
int8_t inlast
Definition solver.h:151
pastix_int_t stride
Definition solver.h:169
int8_t cblktype
Definition solver.h:164
pastix_int_t fcolnum
Definition solver.h:166
Solver block structure.
Definition solver.h:141
Solver column block structure.
Definition solver.h:161