PaStiX Handbook 6.4.0
Loading...
Searching...
No Matches
core_cpxtrfsp.c
Go to the documentation of this file.
1/**
2 *
3 * @file core_cpxtrfsp.c
4 *
5 * PaStiX kernel routines for LL^t factorization.
6 *
7 * @copyright 2011-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8 * Univ. Bordeaux. All rights reserved.
9 *
10 * @version 6.4.0
11 * @author Mathieu Faverge
12 * @author Pierre Ramet
13 * @author Xavier Lacoste
14 * @date 2024-07-05
15 * @generated from /builds/2mk6rsew/0/solverstack/pastix/kernels/core_zpxtrfsp.c, normal z -> c, Tue Feb 25 14:34:51 2025
16 *
17 **/
18#include "common.h"
19#include "cblas.h"
20#include "blend/solver.h"
21#include "pastix_ccores.h"
22#include "kernels_trace.h"
23
24#ifndef DOXYGEN_SHOULD_SKIP_THIS
25#define MAXSIZEOFBLOCKS 64
26static pastix_complex32_t cone = 1.0;
27static pastix_complex32_t mcone = -1.0;
28#endif /* DOXYGEN_SHOULD_SKIP_THIS */
29
30/**
31 *******************************************************************************
32 *
33 * @ingroup kernel_blas_lapack_null
34 *
35 * @brief Compute the sequential static pivoting LL^t factorization of the
36 * matrix n-by-n A = L * L^t .
37 *
38 *******************************************************************************
39 *
40 * @param[in] n
41 * The number of rows and columns of the matrix A.
42 *
43 * @param[inout] A
44 * The matrix A to factorize with LL^t factorization. The matrix
45 * is of size lda -by- n.
46 *
47 * @param[in] lda
48 * The leading dimension of the matrix A.
49 *
50 * @param[inout] nbpivots
51 * Pointer to the number of piovting operations made during
52 * factorization. It is updated during this call
53 *
54 * @param[in] criterion
55 * Threshold use for static pivoting. If diagonal value is under this
56 * threshold, its value is replaced by the threshold and the number of
57 * pivots is incremented.
58 *
59 *******************************************************************************
60 *
61 * @warning This routine will fail if it discovers a null or negative value on
62 * the diagonal during factorization.
63 *
64 *******************************************************************************/
65static inline void
68 pastix_int_t lda,
69 pastix_int_t *nbpivots,
70 float criterion )
71{
73 pastix_complex32_t *Akk = A; /* A [k ][k] */
74 pastix_complex32_t *Amk = A+1; /* A [k+1][k] */
76
77 for (k=0; k<n; k++){
78 if ( cabsf(*Akk) < criterion ) {
79 (*Akk) = (pastix_complex32_t)criterion;
80 (*nbpivots)++;
81 }
82
83 *Akk = csqrtf(*Akk);
84 alpha = 1.0 / (*Akk);
85
86 /* Scale the diagonal to compute L((k+1):n,k) */
87 cblas_cscal(n-k-1, CBLAS_SADDR( alpha ), Amk, 1 );
88
89 /* Move to next Akk */
90 Akk += (lda+1);
91
92 cblas_csyrk( CblasColMajor, CblasLower, CblasNoTrans,
93 n-k-1, 1,
94 CBLAS_SADDR( mcone ), Amk, lda,
95 CBLAS_SADDR( cone ), Akk, lda );
96
97 /* Move to next Amk */
98 Amk = Akk+1;
99 }
100}
101
102/**
103 *******************************************************************************
104 *
105 * @brief Compute the block static pivoting LL^t factorization of the matrix
106 * n-by-n A = L * L^t .
107 *
108 *******************************************************************************
109 *
110 * @param[in] n
111 * The number of rows and columns of the matrix A.
112 *
113 * @param[inout] A
114 * The matrix A to factorize with LL^t factorization. The matrix
115 * is of size lda -by- n.
116 *
117 * @param[in] lda
118 * The leading dimension of the matrix A.
119 *
120 * @param[inout] nbpivots
121 * Pointer to the number of piovting operations made during
122 * factorization. It is updated during this call
123 *
124 * @param[in] criterion
125 * Threshold use for static pivoting. If diagonal value is under this
126 * threshold, its value is replaced by the threshold and the nu,ber of
127 * pivots is incremented.
128 *
129 *******************************************************************************
130 *
131 * @warning This routine will fail if it discovers a null or negative value on
132 * the diagonal during factorization.
133 *
134 *******************************************************************************/
135void
138 pastix_int_t lda,
139 pastix_int_t *nbpivots,
140 float criterion )
141{
142 pastix_int_t k, blocknbr, blocksize, matrixsize;
143 pastix_complex32_t *tmp,*tmp1,*tmp2;
144
145 /* diagonal supernode is divided into MAXSIZEOFBLOCK-by-MAXSIZEOFBLOCKS blocks */
146 blocknbr = pastix_iceil( n, MAXSIZEOFBLOCKS );
147
148 for (k=0; k<blocknbr; k++) {
149
150 blocksize = pastix_imin(MAXSIZEOFBLOCKS, n-k*MAXSIZEOFBLOCKS);
151 tmp = A+(k*MAXSIZEOFBLOCKS)*(lda+1); /* Lk,k */
152
153 /* Factorize the diagonal block Akk*/
154 core_cpxtf2sp(blocksize, tmp, lda, nbpivots, criterion);
155
156 if ((k*MAXSIZEOFBLOCKS+blocksize) < n) {
157
158 tmp1 = tmp + blocksize; /* Lk+1,k */
159 tmp2 = tmp1 + blocksize * lda; /* Lk+1,k+1 */
160
161 matrixsize = n-(k*MAXSIZEOFBLOCKS+blocksize);
162
163 /* Compute the column L(k+1:n,k) = (L(k,k)D(k,k))^{-1}A(k+1:n,k) */
164 /* 1) Compute A(k+1:n,k) = A(k+1:n,k)L(k,k)^{-T} = D(k,k)L(k+1:n,k) */
165 /* input: L(k,k) in tmp, A(k+1:n,k) in tmp1 */
166 /* output: A(k+1:n,k) in tmp1 */
167 cblas_ctrsm(CblasColMajor,
168 CblasRight, CblasLower,
169 CblasTrans, CblasNonUnit,
170 matrixsize, blocksize,
171 CBLAS_SADDR(cone), tmp, lda,
172 tmp1, lda);
173
174 /* Update Ak+1k+1 = Ak+1k+1 - Lk+1k * Lk+1kT */
175 cblas_csyrk(CblasColMajor, CblasLower, CblasNoTrans,
176 matrixsize, blocksize,
177 CBLAS_SADDR( mcone ), tmp1, lda,
178 CBLAS_SADDR( cone ), tmp2, lda);
179 }
180 }
181}
182
183/**
184 *******************************************************************************
185 *
186 * @brief Compute the LL^t factorization of the diagonal block in a panel.
187 *
188 * @warning This routine will fail if it discovers a null or negative value on
189 * the diagonal during factorization.
190 *
191 *******************************************************************************
192 *
193 * @param[in] solvmtx
194 * Solver Matrix structure of the problem
195 *
196 * @param[in] cblk
197 * Pointer to the structure representing the panel to factorize in the
198 * cblktab array. Next column blok must be accessible through cblk[1].
199 *
200 * @param[inout] dataL
201 * The pointer to the correct representation of the lower part of the data.
202 * - coeftab if the block is in full rank. Must be of size cblk.stride -by- cblk.width.
203 * - pastix_lr_block if the block is compressed.
204 *
205 *******************************************************************************
206 *
207 * @return The number of static pivoting performed during the diagonal block
208 * factorization.
209 *
210 *******************************************************************************/
211int
213 SolverCblk *cblk,
214 void *dataL )
215{
216 pastix_int_t ncols, stride;
217 pastix_int_t nbpivots = 0;
218 pastix_fixdbl_t time, flops;
220 pastix_lrblock_t *lrL;
221 float criterion = solvmtx->diagthreshold;
222
224
225 ncols = cblk->lcolnum - cblk->fcolnum + 1;
226 stride = (cblk->cblktype & CBLK_LAYOUT_2D) ? ncols : cblk->stride;
227
228 /* check if diagonal column block */
229 assert( cblk->fcolnum == cblk->fblokptr->frownum );
230 assert( cblk->lcolnum == cblk->fblokptr->lrownum );
231
232 if ( cblk->cblktype & CBLK_COMPRESSED ) {
233 /* dataL is a LRblock */
234 lrL = (pastix_lrblock_t *)dataL;
235 L = lrL->u;
236 stride = ncols;
237
238 assert( lrL->rk == -1 );
239 assert( stride == lrL->rkmax );
240 } else {
241 L = (pastix_complex32_t *)dataL;
242 }
243
244 /* Factorize diagonal block */
245 flops = FLOPS_CPOTRF( ncols );
246 kernel_trace_start_lvl2( PastixKernelLvl2PXTRF );
247 core_cpxtrfsp(ncols, L, stride, &nbpivots, criterion );
248 kernel_trace_stop_lvl2( flops );
249
250 kernel_trace_stop( cblk->fblokptr->inlast, PastixKernelPXTRF, ncols, 0, 0, flops, time );
251
252 if ( nbpivots ) {
253 pastix_atomic_add_32b( &(solvmtx->nbpivots), nbpivots );
254 }
255 return nbpivots;
256}
257
258/**
259 *******************************************************************************
260 *
261 * @brief Compute the LL^t factorization of one panel.
262 *
263 *******************************************************************************
264 *
265 * @param[in] solvmtx
266 * Solver Matrix structure of the problem
267 *
268 * @param[in] cblk
269 * Pointer to the structure representing the panel to factorize in the
270 * cblktab array. Next column blok must be accessible through cblk[1].
271 *
272 * @param[inout] L
273 * The pointer to the correct representation of the lower part of the data.
274 * - coeftab if the block is in full rank. Must be of size cblk.stride -by- cblk.width.
275 * - pastix_lr_block if the block is compressed.
276 *
277 *******************************************************************************
278 *
279 * @return The number of static pivoting during factorization of the diagonal
280 * block.
281 *
282 *******************************************************************************/
283int
285 SolverCblk *cblk,
286 void *L )
287{
288 pastix_int_t nbpivots;
289 nbpivots = cpucblk_cpxtrfsp1d_pxtrf( solvmtx, cblk, L );
290
293 cblk, L, L, &(solvmtx->lowrank) );
294 return nbpivots;
295}
296
297
298/**
299 *******************************************************************************
300 *
301 * @brief Perform the LL^t factorization of a given panel and apply all its
302 * updates.
303 *
304 *******************************************************************************
305 *
306 * @param[in] solvmtx
307 * Solver Matrix structure of the problem
308 *
309 * @param[in] cblk
310 * Pointer to the structure representing the panel to factorize in the
311 * cblktab array. Next column blok must be accessible through cblk[1].
312 *
313 * @param[in] work
314 * Temporary memory buffer.
315 *
316 * @param[in] lwork
317 * Temporary workspace dimension.
318 *
319 *******************************************************************************
320 *
321 * @return The number of static pivoting during factorization of the diagonal
322 * block.
323 *
324 *******************************************************************************/
325int
327 SolverCblk *cblk,
328 pastix_complex32_t *work,
329 pastix_int_t lwork )
330{
331 void *L = cblk_getdataL( cblk );
332 SolverCblk *fcblk;
333 SolverBlok *blok, *lblk;
334 pastix_int_t nbpivots;
335
336 nbpivots = cpucblk_cpxtrfsp1d_panel( solvmtx, cblk, L );
337
338 blok = cblk->fblokptr + 1; /* First off-diagonal block */
339 lblk = cblk[1].fblokptr; /* Next diagonal block */
340
341 /* If there are off-diagonal blocks, perform the updates */
342 for( ; blok < lblk; blok++ )
343 {
344 fcblk = solvmtx->cblktab + blok->fcblknm;
345
346 if ( fcblk->cblktype & CBLK_FANIN ) {
347 cpucblk_calloc( PastixLCoef, fcblk );
348 }
349
351 cblk, blok, fcblk,
352 L, L, cblk_getdataL( fcblk ),
353 work, lwork, &(solvmtx->lowrank) );
354
355 cpucblk_crelease_deps( PastixLCoef, solvmtx, cblk, fcblk );
356 }
357
358 return nbpivots;
359}
360
361/**
362 *******************************************************************************
363 *
364 * @brief Perform the LL^t factorization of a given panel.
365 *
366 *******************************************************************************
367 *
368 * @param[in] solvmtx
369 * Solver Matrix structure of the problem
370 *
371 * @param[in] cblk
372 * Pointer to the structure representing the panel to factorize in the
373 * cblktab array. Next column blok must be accessible through cblk[1].
374 *
375 *******************************************************************************
376 *
377 * @return The number of static pivoting during factorization of the diagonal
378 * block.
379 *
380 *******************************************************************************/
381int
383 SolverCblk *cblk )
384{
385 void *L = cblk_getdataL( cblk );
386 SolverBlok *blok, *lblk;
387 pastix_int_t i, nbpivots;
388 pastix_queue_t *queue = solvmtx->computeQueue[ cblk->threadid ];
389
390 assert( cblk->cblktype & CBLK_TASKS_2D );
391 nbpivots = cpucblk_cpxtrfsp1d_panel( solvmtx, cblk, L );
392
393 blok = cblk->fblokptr + 1; /* this diagonal block */
394 lblk = cblk[1].fblokptr; /* the next diagonal block */
395
396 /* if there are off-diagonal supernodes in the column */
397 for( i=0; blok < lblk; i++, blok++ )
398 {
399 assert( !((solvmtx->cblktab + blok->fcblknm)->cblktype & CBLK_RECV) );
400 pqueuePush1( queue, - (blok - solvmtx->bloktab) - 1, cblk->priority + i );
401
402 /* Skip blocks facing the same cblk */
403 while ( ( blok < lblk ) &&
404 ( blok[0].fcblknm == blok[1].fcblknm ) &&
405 ( blok[0].lcblknm == blok[1].lcblknm ) )
406 {
407 blok++;
408 }
409 }
410
411 return nbpivots;
412}
413
414/**
415 *******************************************************************************
416 *
417 * @brief Apply the updates of the LL^t factorisation of a given panel.
418 *
419 *******************************************************************************
420 *
421 * @param[in] solvmtx
422 * Solver Matrix structure of the problem
423 *
424 * @param[in] blok
425 * Pointer to the blok where the update start.
426 *
427 * @param[in] work
428 * Temporary memory buffer.
429 *
430 * @param[in] lwork
431 * Temporary workspace dimension.
432 *
433 *******************************************************************************/
434void
436 SolverBlok *blok,
437 pastix_complex32_t *work,
438 pastix_int_t lwork )
439{
440 SolverCblk *cblk = solvmtx->cblktab + blok->lcblknm;
441 SolverCblk *fcbk = solvmtx->cblktab + blok->fcblknm;
442 SolverBlok *lblk = cblk[1].fblokptr; /* the next diagonal block */
443 void *L = cblk_getdataL( cblk );
444
445 if ( fcbk->cblktype & CBLK_FANIN ) {
447 }
448
449 do
450 {
451 /* Update on L */
453 cblk, blok, fcbk,
454 L, L, cblk_getdataL( fcbk ),
455 work, lwork, &(solvmtx->lowrank) );
456
457 cpucblk_crelease_deps( PastixLCoef, solvmtx, cblk, fcbk );
458 blok++;
459 }
460 while ( ( blok < lblk ) &&
461 ( blok[-1].fcblknm == blok[0].fcblknm ) &&
462 ( blok[-1].lcblknm == blok[0].lcblknm ) );
463}
int cpucblk_cpxtrfsp1d_panel(SolverMatrix *solvmtx, SolverCblk *cblk, void *L)
Compute the LL^t factorization of one panel.
static void core_cpxtf2sp(pastix_int_t n, pastix_complex32_t *A, pastix_int_t lda, pastix_int_t *nbpivots, float criterion)
Compute the sequential static pivoting LL^t factorization of the matrix n-by-n A = L * L^t .
void cpucblk_cpxtrfsp1dplus_update(SolverMatrix *solvmtx, SolverBlok *blok, pastix_complex32_t *work, pastix_int_t lwork)
Apply the updates of the LL^t factorisation of a given panel.
int cpucblk_cpxtrfsp1d_pxtrf(SolverMatrix *solvmtx, SolverCblk *cblk, void *dataL)
Compute the LL^t factorization of the diagonal block in a panel.
int cpucblk_cpxtrfsp1dplus(SolverMatrix *solvmtx, SolverCblk *cblk)
Perform the LL^t factorization of a given panel.
int cpucblk_cpxtrfsp1d(SolverMatrix *solvmtx, SolverCblk *cblk, pastix_complex32_t *work, pastix_int_t lwork)
Perform the LL^t factorization of a given panel and apply all its updates.
BEGIN_C_DECLS typedef int pastix_int_t
Definition datatypes.h:51
float _Complex pastix_complex32_t
Definition datatypes.h:76
double pastix_fixdbl_t
Definition datatypes.h:65
static void pqueuePush1(pastix_queue_t *q, pastix_int_t elt, double key1)
Push an element with a single key.
Definition queue.h:64
Queue structure.
Definition queue.h:38
static void kernel_trace_stop(int8_t inlast, pastix_ktype_t ktype, int m, int n, int k, double flops, double starttime)
Stop the trace of a single kernel.
static double kernel_trace_start(pastix_ktype_t ktype)
Start the trace of a single kernel.
@ PastixKernelLvl2PXTRF
@ PastixKernelPXTRF
void core_cpxtrfsp(pastix_int_t n, pastix_complex32_t *A, pastix_int_t lda, pastix_int_t *nbpivots, float criterion)
Compute the block static pivoting LL^t factorization of the matrix n-by-n A = L * L^t .
pastix_fixdbl_t cpucblk_cgemmsp(pastix_coefside_t sideA, pastix_trans_t trans, const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcblk, const void *A, const void *B, void *C, pastix_complex32_t *work, pastix_int_t lwork, const pastix_lr_t *lowrank)
Compute the updates associated to one off-diagonal block.
void cpucblk_crelease_deps(pastix_coefside_t side, SolverMatrix *solvmtx, const SolverCblk *cblk, SolverCblk *fcbk)
Release the dependencies of the given cblk after an update.
void cpucblk_ctrsmsp(pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk, const void *A, void *C, const pastix_lr_t *lowrank)
Compute the updates associated to a column of off-diagonal blocks.
void cpucblk_calloc(pastix_coefside_t side, SolverCblk *cblk)
Allocate the cblk structure to store the coefficient.
The block low-rank structure to hold a matrix in low-rank form.
@ PastixLCoef
Definition api.h:478
@ PastixLower
Definition api.h:467
@ PastixRight
Definition api.h:496
@ PastixNonUnit
Definition api.h:487
@ PastixTrans
Definition api.h:446
pastix_int_t lrownum
Definition solver.h:148
pastix_lr_t lowrank
Definition solver.h:236
pastix_int_t priority
Definition solver.h:183
pastix_int_t fcblknm
Definition solver.h:144
SolverBlok *restrict bloktab
Definition solver.h:229
pastix_int_t frownum
Definition solver.h:147
SolverBlok * fblokptr
Definition solver.h:168
static void * cblk_getdataL(const SolverCblk *cblk)
Get the pointer to the data associated to the lower part of the cblk.
Definition solver.h:342
pastix_int_t lcblknm
Definition solver.h:143
int8_t inlast
Definition solver.h:151
SolverCblk *restrict cblktab
Definition solver.h:228
pastix_int_t stride
Definition solver.h:169
int8_t cblktype
Definition solver.h:164
pastix_int_t lcolnum
Definition solver.h:167
double diagthreshold
Definition solver.h:238
volatile int32_t nbpivots
Definition solver.h:239
pastix_int_t fcolnum
Definition solver.h:166
Solver block structure.
Definition solver.h:141
Solver column block structure.
Definition solver.h:161
Solver column block structure.
Definition solver.h:203