PaStiX Handbook 6.4.0
Loading...
Searching...
No Matches
core_cpotrfsp.c
Go to the documentation of this file.
1/**
2 *
3 * @file core_cpotrfsp.c
4 *
5 * PaStiX kernel routines for Cholesky factorization.
6 *
7 * @copyright 2011-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8 * Univ. Bordeaux. All rights reserved.
9 *
10 * @version 6.4.0
11 * @author Mathieu Faverge
12 * @author Pierre Ramet
13 * @author Xavier Lacoste
14 * @author Gregoire Pichon
15 * @author Nolan Bredel
16 * @date 2024-07-05
17 * @generated from /builds/2mk6rsew/0/solverstack/pastix/kernels/core_zpotrfsp.c, normal z -> c, Tue Feb 25 14:34:51 2025
18 *
19 **/
20#include "common.h"
21#include "cblas.h"
22#include "blend/solver.h"
23#include "pastix_ccores.h"
24#include "kernels_trace.h"
25
26#ifndef DOXYGEN_SHOULD_SKIP_THIS
27#define MAXSIZEOFBLOCKS 64
28static pastix_complex32_t cone = 1.0;
29static pastix_complex32_t mcone = -1.0;
30#endif /* DOXYGEN_SHOULD_SKIP_THIS */
31
32/**
33 *******************************************************************************
34 *
35 * @ingroup kernel_blas_lapack_null
36 *
37 * @brief Compute the sequential static pivoting Cholesky factorization of the
38 * matrix n-by-n A = L * L^t .
39 *
40 *******************************************************************************
41 *
42 * @param[in] n
43 * The number of rows and columns of the matrix A.
44 *
45 * @param[inout] A
46 * The matrix A to factorize with Cholesky factorization. The matrix
47 * is of size lda -by- n.
48 *
49 * @param[in] lda
50 * The leading dimension of the matrix A.
51 *
52 * @param[inout] nbpivots
53 * Pointer to the number of piovting operations made during
54 * factorization. It is updated during this call
55 *
56 * @param[in] criterion
57 * Threshold use for static pivoting. If diagonal value is under this
58 * threshold, its value is replaced by the threshold and the number of
59 * pivots is incremented.
60 *
61 *******************************************************************************
62 *
63 * @warning This routine will fail if it discovers a null or negative value on
64 * the diagonal during factorization.
65 *
66 *******************************************************************************/
67static inline void
70 pastix_int_t lda,
71 pastix_int_t *nbpivots,
72 float criterion )
73{
75 pastix_complex32_t *Akk = A; /* A [k ][k] */
76 pastix_complex32_t *Amk = A+1; /* A [k+1][k] */
78
79 for (k=0; k<n; k++){
80 if ( cabsf(*Akk) < criterion ) {
81 (*Akk) = (pastix_complex32_t)criterion;
82 (*nbpivots)++;
83 }
84
85 /* Hermitian matrices, so imaginary part should be 0 */
86 if ( crealf(*Akk) < 0.0 )
87 {
88 pastix_print_error( "Negative diagonal term\n" );
89 }
90
91 *Akk = csqrtf(*Akk);
92 alpha = 1.0 / (*Akk);
93
94 /* Scale the diagonal to compute L((k+1):n,k) */
95 cblas_cscal(n-k-1, CBLAS_SADDR( alpha ), Amk, 1 );
96
97 /* Move to next Akk */
98 Akk += (lda+1);
99
100 cblas_cher(CblasColMajor, CblasLower,
101 n-k-1, -1.0,
102 Amk, 1,
103 Akk, lda);
104
105 /* Move to next Amk */
106 Amk = Akk+1;
107 }
108}
109
110/**
111 *******************************************************************************
112 *
113 * @brief Compute the block static pivoting Cholesky factorization of the matrix
114 * n-by-n A = L * L^t .
115 *
116 *******************************************************************************
117 *
118 * @param[in] n
119 * The number of rows and columns of the matrix A.
120 *
121 * @param[inout] A
122 * The matrix A to factorize with Cholesky factorization. The matrix
123 * is of size lda -by- n.
124 *
125 * @param[in] lda
126 * The leading dimension of the matrix A.
127 *
128 * @param[inout] nbpivots
129 * Pointer to the number of piovting operations made during
130 * factorization. It is updated during this call
131 *
132 * @param[in] criterion
133 * Threshold use for static pivoting. If diagonal value is under this
134 * threshold, its value is replaced by the threshold and the nu,ber of
135 * pivots is incremented.
136 *
137 *******************************************************************************
138 *
139 * @warning This routine will fail if it discovers a null or negative value on
140 * the diagonal during factorization.
141 *
142 *******************************************************************************/
143void
146 pastix_int_t lda,
147 pastix_int_t *nbpivots,
148 float criterion )
149{
150 pastix_int_t k, blocknbr, blocksize, matrixsize;
151 pastix_complex32_t *tmp,*tmp1,*tmp2;
152
153 /* diagonal supernode is divided into MAXSIZEOFBLOCK-by-MAXSIZEOFBLOCKS blocks */
154 blocknbr = pastix_iceil( n, MAXSIZEOFBLOCKS );
155
156 for (k=0; k<blocknbr; k++) {
157
158 blocksize = pastix_imin(MAXSIZEOFBLOCKS, n-k*MAXSIZEOFBLOCKS);
159 tmp = A+(k*MAXSIZEOFBLOCKS)*(lda+1); /* Lk,k */
160
161 /* Factorize the diagonal block Akk*/
162 core_cpotf2sp(blocksize, tmp, lda, nbpivots, criterion);
163
164 if ((k*MAXSIZEOFBLOCKS+blocksize) < n) {
165
166 tmp1 = tmp + blocksize; /* Lk+1,k */
167 tmp2 = tmp1 + blocksize * lda; /* Lk+1,k+1 */
168
169 matrixsize = n-(k*MAXSIZEOFBLOCKS+blocksize);
170
171 /* Compute the column L(k+1:n,k) = (L(k,k)D(k,k))^{-1}A(k+1:n,k) */
172 /* 1) Compute A(k+1:n,k) = A(k+1:n,k)L(k,k)^{-T} = D(k,k)L(k+1:n,k) */
173 /* input: L(k,k) in tmp, A(k+1:n,k) in tmp1 */
174 /* output: A(k+1:n,k) in tmp1 */
175 cblas_ctrsm(CblasColMajor,
176 CblasRight, CblasLower,
177 CblasConjTrans, CblasNonUnit,
178 matrixsize, blocksize,
179 CBLAS_SADDR(cone), tmp, lda,
180 tmp1, lda);
181
182 /* Update Ak+1k+1 = Ak+1k+1 - Lk+1k * Lk+1kT */
183 cblas_cherk(CblasColMajor, CblasLower, CblasNoTrans,
184 matrixsize, blocksize,
185 (float)mcone, tmp1, lda,
186 (float)cone, tmp2, lda);
187 }
188 }
189}
190
191/**
192 *******************************************************************************
193 *
194 * @brief Compute the Cholesky factorization of the diagonal block in a panel.
195 *
196 * @warning This routine will fail if it discovers a null or negative value on
197 * the diagonal during factorization.
198 *
199 *******************************************************************************
200 *
201 * @param[in] solvmtx
202 * Solver Matrix structure of the problem
203 *
204 * @param[in] cblk
205 * Pointer to the structure representing the panel to factorize in the
206 * cblktab array. Next column blok must be accessible through cblk[1].
207 *
208 * @param[inout] dataL
209 * The pointer to the correct representation of the lower part of the data.
210 * - coeftab if the block is in full rank. Must be of size cblk.stride -by- cblk.width.
211 * - pastix_lr_block if the block is compressed.
212 *
213 *******************************************************************************
214 *
215 * @return The number of static pivoting performed during the diagonal block
216 * factorization.
217 *
218 *******************************************************************************/
219int
221 SolverCblk *cblk,
222 void *dataL )
223{
224 pastix_int_t ncols, stride;
225 pastix_int_t nbpivots = 0;
226 pastix_fixdbl_t time, flops;
228 pastix_lrblock_t *lrL;
229 float criterion = solvmtx->diagthreshold;
230
232
233 ncols = cblk->lcolnum - cblk->fcolnum + 1;
234 stride = (cblk->cblktype & CBLK_LAYOUT_2D) ? ncols : cblk->stride;
235
236 /* check if diagonal column block */
237 assert( cblk->fcolnum == cblk->fblokptr->frownum );
238 assert( cblk->lcolnum == cblk->fblokptr->lrownum );
239
240 if ( cblk->cblktype & CBLK_COMPRESSED ) {
241 /* dataL is a LRblock */
242 lrL = (pastix_lrblock_t *)dataL;
243 L = lrL->u;
244 stride = ncols;
245
246 assert( lrL->rk == -1 );
247 assert( stride == lrL->rkmax );
248 } else {
249 L = (pastix_complex32_t *)dataL;
250 }
251
252 /* Factorize diagonal block */
253 flops = FLOPS_CPOTRF( ncols );
254 kernel_trace_start_lvl2( PastixKernelLvl2POTRF );
255 core_cpotrfsp(ncols, L, stride, &nbpivots, criterion );
256 kernel_trace_stop_lvl2( flops );
257
258 kernel_trace_stop( cblk->fblokptr->inlast, PastixKernelPOTRF, ncols, 0, 0, flops, time );
259
260 if ( nbpivots ) {
261 pastix_atomic_add_32b( &(solvmtx->nbpivots), nbpivots );
262 }
263 return nbpivots;
264}
265
266/**
267 *******************************************************************************
268 *
269 * @brief Compute the Cholesky factorization of one panel.
270 *
271 *******************************************************************************
272 *
273 * @param[in] solvmtx
274 * Solver Matrix structure of the problem
275 *
276 * @param[in] cblk
277 * Pointer to the structure representing the panel to factorize in the
278 * cblktab array. Next column blok must be accessible through cblk[1].
279 *
280 * @param[inout] L
281 * The pointer to the correct representation of the lower part of the data.
282 * - coeftab if the block is in full rank. Must be of size cblk.stride -by- cblk.width.
283 * - pastix_lr_block if the block is compressed.
284 *
285 *******************************************************************************
286 *
287 * @return The number of static pivoting during factorization of the diagonal
288 * block.
289 *
290 *******************************************************************************/
291int
293 SolverCblk *cblk,
294 void *L )
295{
296 pastix_int_t nbpivots;
297 nbpivots = cpucblk_cpotrfsp1d_potrf( solvmtx, cblk, L );
298
301 cblk, L, L, &(solvmtx->lowrank) );
302 return nbpivots;
303}
304
305
306/**
307 *******************************************************************************
308 *
309 * @brief Perform the Cholesky factorization of a given panel and apply all its
310 * updates.
311 *
312 *******************************************************************************
313 *
314 * @param[in] solvmtx
315 * Solver Matrix structure of the problem
316 *
317 * @param[in] cblk
318 * Pointer to the structure representing the panel to factorize in the
319 * cblktab array. Next column blok must be accessible through cblk[1].
320 *
321 * @param[in] work
322 * Temporary memory buffer.
323 *
324 * @param[in] lwork
325 * Temporary workspace dimension.
326 *
327 *******************************************************************************
328 *
329 * @return The number of static pivoting during factorization of the diagonal
330 * block.
331 *
332 *******************************************************************************/
333int
335 SolverCblk *cblk,
336 pastix_complex32_t *work,
337 pastix_int_t lwork )
338{
339 void *L = cblk_getdataL( cblk );
340 SolverCblk *fcblk;
341 SolverBlok *blok, *lblk;
342 pastix_int_t nbpivots;
343
344 nbpivots = cpucblk_cpotrfsp1d_panel( solvmtx, cblk, L );
345
346 blok = cblk->fblokptr + 1; /* First off-diagonal block */
347 lblk = cblk[1].fblokptr; /* Next diagonal block */
348
349 /* If there are off-diagonal blocks, perform the updates */
350 for( ; blok < lblk; blok++ )
351 {
352 fcblk = solvmtx->cblktab + blok->fcblknm;
353
354 if ( fcblk->cblktype & CBLK_FANIN ) {
355 cpucblk_calloc( PastixLCoef, fcblk );
356 }
357
359 cblk, blok, fcblk,
360 L, L, cblk_getdataL( fcblk ),
361 work, lwork, &(solvmtx->lowrank) );
362
363 cpucblk_crelease_deps( PastixLCoef, solvmtx, cblk, fcblk );
364 }
365
366 return nbpivots;
367}
368
369/**
370 *******************************************************************************
371 *
372 * @brief Perform the Cholesky factorization of a given panel and submit tasks
373 * for the subsequent updates.
374 *
375 *******************************************************************************
376 *
377 * @param[in] solvmtx
378 * Solver Matrix structure of the problem
379 *
380 * @param[in] cblk
381 * Pointer to the structure representing the panel to factorize in the
382 * cblktab array. Next column blok must be accessible through cblk[1].
383 *
384 *******************************************************************************
385 *
386 * @return The number of static pivoting during factorization of the diagonal
387 * block.
388 *
389 *******************************************************************************/
390int
392 SolverCblk *cblk )
393{
394 void *L = cblk_getdataL( cblk );
395 SolverBlok *blok, *lblk;
396 pastix_int_t i, nbpivots;
397 pastix_queue_t *queue = solvmtx->computeQueue[ cblk->threadid ];
398
399 assert( cblk->cblktype & CBLK_TASKS_2D );
400 nbpivots = cpucblk_cpotrfsp1d_panel( solvmtx, cblk, L );
401
402 blok = cblk->fblokptr + 1; /* this diagonal block */
403 lblk = cblk[1].fblokptr; /* the next diagonal block */
404
405 /* if there are off-diagonal supernodes in the column */
406 for( i=0; blok < lblk; i++, blok++ )
407 {
408 assert( !((solvmtx->cblktab + blok->fcblknm)->cblktype & CBLK_RECV) );
409 pqueuePush1( queue, - (blok - solvmtx->bloktab) - 1, cblk->priority + i );
410
411 /* Skip blocks facing the same cblk */
412 while ( ( blok < lblk ) &&
413 ( blok[0].fcblknm == blok[1].fcblknm ) &&
414 ( blok[0].lcblknm == blok[1].lcblknm ) )
415 {
416 blok++;
417 }
418 }
419
420 return nbpivots;
421}
422
423/**
424 *******************************************************************************
425 *
426 * @brief Apply the updates of the cholesky factorisation of a given panel.
427 *
428 *******************************************************************************
429 *
430 * @param[in] solvmtx
431 * Solver Matrix structure of the problem
432 *
433 * @param[in] blok
434 * Pointer to the blok where the update start.
435 *
436 * @param[in] work
437 * Temporary memory buffer.
438 *
439 * @param[in] lwork
440 * Temporary workspace dimension.
441 *
442 *******************************************************************************/
443void
445 SolverBlok *blok,
446 pastix_complex32_t *work,
447 pastix_int_t lwork )
448{
449 SolverCblk *cblk = solvmtx->cblktab + blok->lcblknm;
450 SolverCblk *fcbk = solvmtx->cblktab + blok->fcblknm;
451 SolverBlok *lblk = cblk[1].fblokptr; /* the next diagonal block */
452 void *L = cblk_getdataL( cblk );
453
454 if ( fcbk->cblktype & CBLK_FANIN ) {
456 }
457
458 do
459 {
460 /* Update on L */
462 cblk, blok, fcbk,
463 L, L, cblk_getdataL( fcbk ),
464 work, lwork, &(solvmtx->lowrank) );
465
466 cpucblk_crelease_deps( PastixLCoef, solvmtx, cblk, fcbk );
467 blok++;
468 }
469 while ( ( blok < lblk ) &&
470 ( blok[-1].fcblknm == blok[0].fcblknm ) &&
471 ( blok[-1].lcblknm == blok[0].lcblknm ) );
472}
static void core_cpotf2sp(pastix_int_t n, pastix_complex32_t *A, pastix_int_t lda, pastix_int_t *nbpivots, float criterion)
Compute the sequential static pivoting Cholesky factorization of the matrix n-by-n A = L * L^t .
BEGIN_C_DECLS typedef int pastix_int_t
Definition datatypes.h:51
float _Complex pastix_complex32_t
Definition datatypes.h:76
double pastix_fixdbl_t
Definition datatypes.h:65
static void pqueuePush1(pastix_queue_t *q, pastix_int_t elt, double key1)
Push an element with a single key.
Definition queue.h:64
Queue structure.
Definition queue.h:38
static void kernel_trace_stop(int8_t inlast, pastix_ktype_t ktype, int m, int n, int k, double flops, double starttime)
Stop the trace of a single kernel.
static double kernel_trace_start(pastix_ktype_t ktype)
Start the trace of a single kernel.
@ PastixKernelLvl2POTRF
@ PastixKernelPOTRF
void core_cpotrfsp(pastix_int_t n, pastix_complex32_t *A, pastix_int_t lda, pastix_int_t *nbpivots, float criterion)
Compute the block static pivoting Cholesky factorization of the matrix n-by-n A = L * L^t .
int cpucblk_cpotrfsp1dplus(SolverMatrix *solvmtx, SolverCblk *cblk)
Perform the Cholesky factorization of a given panel and submit tasks for the subsequent updates.
void cpucblk_cpotrfsp1dplus_update(SolverMatrix *solvmtx, SolverBlok *blok, pastix_complex32_t *work, pastix_int_t lwork)
Apply the updates of the cholesky factorisation of a given panel.
pastix_fixdbl_t cpucblk_cgemmsp(pastix_coefside_t sideA, pastix_trans_t trans, const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcblk, const void *A, const void *B, void *C, pastix_complex32_t *work, pastix_int_t lwork, const pastix_lr_t *lowrank)
Compute the updates associated to one off-diagonal block.
int cpucblk_cpotrfsp1d(SolverMatrix *solvmtx, SolverCblk *cblk, pastix_complex32_t *work, pastix_int_t lwork)
Perform the Cholesky factorization of a given panel and apply all its updates.
void cpucblk_crelease_deps(pastix_coefside_t side, SolverMatrix *solvmtx, const SolverCblk *cblk, SolverCblk *fcbk)
Release the dependencies of the given cblk after an update.
int cpucblk_cpotrfsp1d_panel(SolverMatrix *solvmtx, SolverCblk *cblk, void *L)
Compute the Cholesky factorization of one panel.
void cpucblk_ctrsmsp(pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk, const void *A, void *C, const pastix_lr_t *lowrank)
Compute the updates associated to a column of off-diagonal blocks.
void cpucblk_calloc(pastix_coefside_t side, SolverCblk *cblk)
Allocate the cblk structure to store the coefficient.
int cpucblk_cpotrfsp1d_potrf(SolverMatrix *solvmtx, SolverCblk *cblk, void *dataL)
Compute the Cholesky factorization of the diagonal block in a panel.
The block low-rank structure to hold a matrix in low-rank form.
@ PastixLCoef
Definition api.h:478
@ PastixLower
Definition api.h:467
@ PastixRight
Definition api.h:496
@ PastixNonUnit
Definition api.h:487
@ PastixConjTrans
Definition api.h:447
pastix_int_t lrownum
Definition solver.h:148
pastix_lr_t lowrank
Definition solver.h:236
pastix_int_t priority
Definition solver.h:183
pastix_int_t fcblknm
Definition solver.h:144
SolverBlok *restrict bloktab
Definition solver.h:229
pastix_int_t frownum
Definition solver.h:147
SolverBlok * fblokptr
Definition solver.h:168
static void * cblk_getdataL(const SolverCblk *cblk)
Get the pointer to the data associated to the lower part of the cblk.
Definition solver.h:342
pastix_int_t lcblknm
Definition solver.h:143
int8_t inlast
Definition solver.h:151
SolverCblk *restrict cblktab
Definition solver.h:228
pastix_int_t stride
Definition solver.h:169
int8_t cblktype
Definition solver.h:164
pastix_int_t lcolnum
Definition solver.h:167
double diagthreshold
Definition solver.h:238
volatile int32_t nbpivots
Definition solver.h:239
pastix_int_t fcolnum
Definition solver.h:166
Solver block structure.
Definition solver.h:141
Solver column block structure.
Definition solver.h:161
Solver column block structure.
Definition solver.h:203