PaStiX Handbook 6.4.0
Loading...
Searching...
No Matches
core_cscalo.c
Go to the documentation of this file.
1/**
2 *
3 * @file core_cscalo.c
4 *
5 * PaStiX kernel routines
6 *
7 * @copyright 2010-2015 Univ. of Tennessee, Univ. of California Berkeley and
8 * Univ. of Colorado Denver. All rights reserved.
9 * @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
10 * Univ. Bordeaux. All rights reserved.
11 *
12 * @version 6.4.0
13 * @author Mathieu Faverge
14 * @author Nolan Bredel
15 * @date 2024-07-05
16 * @generated from /builds/2mk6rsew/0/solverstack/pastix/kernels/core_zscalo.c, normal z -> c, Tue Feb 25 14:34:52 2025
17 *
18 **/
19#include "common.h"
20#include "blend/solver.h"
21#include "pastix_ccores.h"
22#include "cblas.h"
23#include "kernels_trace.h"
24
25/**
26 ******************************************************************************
27 *
28 * @brief Scale a matrix by a diagonal out of place
29 *
30 * Perform the operation: B <- op(A) * D, where A is a general matrix, and D a
31 * diagonal matrix.
32 *
33 *******************************************************************************
34 *
35 * @param[in] trans
36 * @arg PastixNoTrans: No transpose, op( A ) = A;
37 * @arg PastixTrans: Transpose, op( A ) = A;
38 * @arg PastixConjTrans: Conjugate Transpose, op( A ) = conjf(A).
39 *
40 * @param[in] M
41 * Number of rows of the matrix B.
42 * Number of rows of the matrix A.
43 *
44 * @param[in] N
45 * Number of columns of the matrix B.
46 * Number of columns of the matrix A.
47 *
48 * @param[in] A
49 * Matrix of size lda-by-N.
50 *
51 * @param[in] lda
52 * Leading dimension of the array A. lda >= max(1,M).
53 *
54 * @param[in] D
55 * Diagonal matrix of size ldd-by-N.
56 *
57 * @param[in] ldd
58 * Leading dimension of the array D. ldd >= 1.
59 *
60 * @param[inout] B
61 * Matrix of size LDB-by-N.
62 *
63 * @param[in] ldb
64 * Leading dimension of the array B. ldb >= max(1,M)
65 *
66 *******************************************************************************
67 *
68 * @retval PASTIX_SUCCESS successful exit
69 * @retval <0 if -i, the i-th argument had an illegal value
70 * @retval 1, not yet implemented
71 *
72 ******************************************************************************/
73int
77 const pastix_complex32_t *A,
78 pastix_int_t lda,
79 const pastix_complex32_t *D,
80 pastix_int_t ldd,
82 pastix_int_t ldb )
83{
85 pastix_int_t i, j;
86
87#if !defined(NDEBUG)
88 if ((trans < PastixNoTrans) ||
89 (trans > PastixConjTrans))
90 {
91 return -1;
92 }
93
94 if (M < 0) {
95 return -2;
96 }
97 if (N < 0) {
98 return -3;
99 }
100 if ( lda < pastix_imax(1,M) )
101 {
102 return -5;
103 }
104 if ( ldd < 1 )
105 {
106 return -7;
107 }
108 if ( ldb < pastix_imax(1,M) ) {
109 return -9;
110 }
111#endif
112
113#if defined(PRECISION_z) || defined(PRECISION_c)
114 if (trans == PastixConjTrans) {
115 for( j=0; j<N; j++, D += ldd ) {
116 alpha = *D;
117 for( i=0; i<M; i++, B++, A++ ) {
118 *B = conjf(*A) * alpha;
119 }
120 A += lda - M;
121 B += ldb - M;
122 }
123 }
124 else
125#endif
126 {
127 for( j=0; j<N; j++, D += ldd ) {
128 alpha = *D;
129 for( i=0; i<M; i++, B++, A++ ) {
130 *B = (*A) * alpha;
131 }
132 A += lda - M;
133 B += ldb - M;
134 }
135 }
136
137 (void)trans;
138 return PASTIX_SUCCESS;
139}
140
141/**
142 *******************************************************************************
143 *
144 * @brief Copy the L term with scaling for the two-terms algorithm
145 *
146 * Performs LD = op(L) * D
147 *
148 *******************************************************************************
149 *
150 * @param[in] trans
151 * @arg PastixNoTrans: No transpose, op( L ) = L;
152 * @arg PastixTrans: Transpose, op( L ) = L;
153 * @arg PastixConjTrans: Conjugate Transpose, op( L ) = conjf(L).
154 *
155 * @param[in] cblk
156 * Pointer to the structure representing the panel to factorize in the
157 * cblktab array. Next column blok must be accessible through cblk[1].
158 *
159 * @param[inout] dataL
160 * The pointer to the correct representation of lower part of the data.
161 * - coeftab if the block is in full rank. Must be of size cblk.stride -by- cblk.width.
162 * - pastix_lr_block if the block is compressed.
163 *
164 * @param[inout] dataLD
165 * The pointer to the correct representation of LD.
166 * - coeftab if the block is in full rank. Must be of size cblk.stride -by- cblk.width.
167 * - pastix_lr_block if the block is compressed.
168 *
169 *******************************************************************************/
170void
172 const SolverCblk *cblk,
173 void *dataL,
174 void *dataLD )
175{
176 const SolverBlok *blok, *lblk;
177 pastix_int_t M, N;
178 pastix_lrblock_t *lrL, *lrLD;
179 pastix_fixdbl_t time;
181
183
184 N = cblk_colnbr( cblk );
185
186 blok = cblk->fblokptr + 1; /* Firt off-diagonal block */
187 lblk = cblk[1].fblokptr; /* Next diagonal block */
188
189 /* if there are off-diagonal supernodes in the column */
190 if ( blok < lblk )
191 {
192 const pastix_complex32_t *L;
193 const pastix_complex32_t *D;
194 pastix_int_t ldl, ldd, ldld;
195
196 if ( cblk->cblktype & CBLK_COMPRESSED ) {
197 lrL = (pastix_lrblock_t *)dataL;
198 lrLD = (pastix_lrblock_t *)dataLD;
199 D = lrL->u;
200 ldd = N+1;
201
202 lrL++; lrLD++;
203 for(; blok < lblk; blok++, lrL++, lrLD++) {
204 M = blok_rownbr( blok );
205
206 assert( lrLD->rk == -1 );
207
208 /* Copy L in LD */
209 lrLD->rk = lrL->rk;
210 lrLD->rkmax = lrL->rkmax;
211
212 if ( lrL->rk == -1 ) {
213 assert( M == lrL->rkmax );
214
215 /* Initialize the workspace */
216 memcpy( lrLD->u, lrL->u, lrL->rkmax * N * sizeof(pastix_complex32_t) );
217 lrLD->v = NULL;
218
219 L = lrL->u;
220 LD = lrLD->u;
221 }
222 else {
223 /*
224 * Initialize the workspace
225 */
226 memcpy( lrLD->u, lrL->u, M * lrL->rk * sizeof(pastix_complex32_t) );
227 lrLD->v = ((pastix_complex32_t *)lrLD->u) + M * lrL->rk;
228 memcpy( lrLD->v, lrL->v, N * lrL->rkmax * sizeof(pastix_complex32_t) );
229
230 L = lrL->v;
231 LD = lrLD->v;
232 M = lrLD->rkmax;
233 }
234
235 ldl = M;
236 ldld = M;
237
238 /* Compute LD = L * D */
239 core_cscalo( trans, M, N,
240 L, ldl, D, ldd,
241 LD, ldld );
242 }
243 }
244 else if ( cblk->cblktype & CBLK_LAYOUT_2D ) {
245 L = D = (pastix_complex32_t *)dataL;
246 LD = (pastix_complex32_t *)dataLD;
247 ldd = N+1;
248
249 for(; blok < lblk; blok++) {
250 M = blok_rownbr( blok );
251
252 /* Compute LD = L * D */
253 core_cscalo( trans, M, N,
254 L + blok->coefind, M, D, ldd,
255 LD + blok->coefind, M );
256 }
257 }
258 else {
259 L = D = (pastix_complex32_t *)dataL;
260 LD = (pastix_complex32_t *)dataLD;
261 ldl = cblk->stride;
262 ldd = cblk->stride+1;
263
264 M = cblk->stride - N;
265 LD = LD + blok->coefind;
266 ldld = cblk->stride;
267
268 core_cscalo( trans, M, N, L + blok->coefind, ldl, D, ldd, LD, ldld );
269 }
270 }
271
272 M = cblk->stride - N;
274}
275
276/**
277 *******************************************************************************
278 *
279 * @brief Copy the lower terms of the block with scaling for the two-terms
280 * algorithm.
281 *
282 * Performs B = op(A) * D
283 *
284 *******************************************************************************
285 *
286 * @param[in] trans
287 * @arg PastixNoTrans: No transpose, op( A ) = A;
288 * @arg PastixTrans: Transpose, op( A ) = A;
289 * @arg PastixConjTrans: Conjugate Transpose, op( A ) = conjf(A).
290 *
291 * @param[in] cblk
292 * Pointer to the structure representing the panel to factorize in the
293 * cblktab array. Next column blok must be accessible through cblk[1].
294 *
295 * @param[in] blok_m
296 * Index of the off-diagonal block to be solved in the cblk. All blocks
297 * facing the same cblk, in the current column block will be solved.
298 *
299 * @param[in] dataA
300 * The pointer to the correct representation of data of A.
301 * - coeftab if the block is in full rank. Must be of size cblk.stride -by- cblk.width.
302 * - pastix_lr_block if the block is compressed.
303 *
304 * @param[in] dataD
305 * The pointer to the correct representation of data of D.
306 * - coeftab if the block is in full rank. Must be of size cblk.stride -by- cblk.width.
307 * - pastix_lr_block if the block is compressed.
308 *
309 * @param[inout] dataB
310 * The pointer to the correct representation of data of B.
311 * - coeftab if the block is in full rank. Must be of size cblk.stride -by- cblk.width.
312 * - pastix_lr_block if the block is compressed.
313 *
314 *******************************************************************************/
315void
317 const SolverCblk *cblk,
318 pastix_int_t blok_m,
319 const void *dataA,
320 const void *dataD,
321 void *dataB )
322{
323 const SolverBlok *fblok, *lblok, *blok;
324 pastix_int_t M, N, ldd, offset, cblk_m;
325 const pastix_complex32_t *lA;
326 pastix_lrblock_t *lrD, *lrB, *lrA;
327 pastix_complex32_t *D, *B, *A;
329
330 N = cblk_colnbr( cblk );
331 fblok = cblk[0].fblokptr; /* The diagonal block */
332 lblok = cblk[1].fblokptr; /* The diagonal block of the next cblk */
333 ldd = blok_rownbr( fblok ) + 1;
334
335 assert( blok_rownbr(fblok) == N );
336 assert( cblk->cblktype & CBLK_LAYOUT_2D );
337
338 blok = fblok + blok_m;
339 offset = blok->coefind;
340 cblk_m = blok->fcblknm;
341
342 if ( cblk->cblktype & CBLK_COMPRESSED ) {
343 lrA = (pastix_lrblock_t *)dataA;
344 lrD = (pastix_lrblock_t *)dataD;
345 lrB = (pastix_lrblock_t *)dataB;
346 D = lrD->u;
347 for (; (blok < lblok) && (blok->fcblknm == cblk_m); blok++, lrA++, lrB++) {
348 M = blok_rownbr( blok );
349
350 /* Copy A in B */
351 lrB->rk = lrA->rk;
352 lrB->rkmax = lrA->rkmax;
353
354 if ( lrB->rk == -1 ) {
355 assert( M == lrA->rkmax );
356 assert( NULL == lrA->v );
357
358 /* Initialize the workspace */
359 memcpy( lrB->u, lrA->u, lrA->rkmax * N * sizeof(pastix_complex32_t) );
360 lrB->v = NULL;
361
362 lA = lrA->u;
363 lB = lrB->u;
364 }
365 else {
366 /*
367 * Initialize the workspace
368 */
369 memcpy( lrB->u, lrA->u, M * lrA->rk * sizeof(pastix_complex32_t) );
370 lrB->v = ((pastix_complex32_t *)lrB->u) + M * lrA->rk;
371 memcpy( lrB->v, lrA->v, N * lrA->rkmax * sizeof(pastix_complex32_t) );
372
373 lA = lrA->v;
374 lB = lrB->v;
375 M = lrA->rkmax;
376 }
377
378 /* Compute B = op(A) * D */
379 core_cscalo( trans, M, N,
380 lA, M, D, ldd, lB, M );
381 }
382 }
383 else {
384 A = (pastix_complex32_t *)dataA;
385 D = (pastix_complex32_t *)dataD;
386 B = (pastix_complex32_t *)dataB;
387
388 for (; (blok < lblok) && (blok->fcblknm == cblk_m); blok++) {
389 lA = A + blok->coefind - offset;
390 lB = B + blok->coefind - offset;
391 M = blok_rownbr(blok);
392
393 /* Compute B = op(A) * D */
394 core_cscalo( trans, M, N,
395 lA, M, D, ldd, lB, M );
396 }
397 }
398}
BEGIN_C_DECLS typedef int pastix_int_t
Definition datatypes.h:51
float _Complex pastix_complex32_t
Definition datatypes.h:76
double pastix_fixdbl_t
Definition datatypes.h:65
static void kernel_trace_stop(int8_t inlast, pastix_ktype_t ktype, int m, int n, int k, double flops, double starttime)
Stop the trace of a single kernel.
static double kernel_trace_start(pastix_ktype_t ktype)
Start the trace of a single kernel.
@ PastixKernelSCALOCblk
int core_cscalo(pastix_trans_t trans, pastix_int_t M, pastix_int_t N, const pastix_complex32_t *A, pastix_int_t lda, const pastix_complex32_t *D, pastix_int_t ldd, pastix_complex32_t *B, pastix_int_t ldb)
Scale a matrix by a diagonal out of place.
Definition core_cscalo.c:74
void cpucblk_cscalo(pastix_trans_t trans, const SolverCblk *cblk, void *dataL, void *dataLD)
Copy the L term with scaling for the two-terms algorithm.
void cpublok_cscalo(pastix_trans_t trans, const SolverCblk *cblk, pastix_int_t blok_m, const void *dataA, const void *dataD, void *dataB)
Copy the lower terms of the block with scaling for the two-terms algorithm.
The block low-rank structure to hold a matrix in low-rank form.
enum pastix_trans_e pastix_trans_t
Transpostion.
@ PastixConjTrans
Definition api.h:447
@ PastixNoTrans
Definition api.h:445
@ PASTIX_SUCCESS
Definition api.h:367
static pastix_int_t blok_rownbr(const SolverBlok *blok)
Compute the number of rows of a block.
Definition solver.h:395
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
Definition solver.h:329
pastix_int_t fcblknm
Definition solver.h:144
pastix_int_t coefind
Definition solver.h:149
SolverBlok * fblokptr
Definition solver.h:168
int8_t inlast
Definition solver.h:151
pastix_int_t stride
Definition solver.h:169
int8_t cblktype
Definition solver.h:164
Solver block structure.
Definition solver.h:141
Solver column block structure.
Definition solver.h:161