PaStiX Handbook  6.4.0
core_cpotrfsp.c
Go to the documentation of this file.
1 /**
2  *
3  * @file core_cpotrfsp.c
4  *
5  * PaStiX kernel routines for Cholesky factorization.
6  *
7  * @copyright 2011-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8  * Univ. Bordeaux. All rights reserved.
9  *
10  * @version 6.4.0
11  * @author Mathieu Faverge
12  * @author Pierre Ramet
13  * @author Xavier Lacoste
14  * @author Gregoire Pichon
15  * @author Nolan Bredel
16  * @date 2024-07-05
17  * @generated from /builds/solverstack/pastix/kernels/core_zpotrfsp.c, normal z -> c, Thu Aug 29 14:20:17 2024
18  *
19  **/
20 #include "common.h"
21 #include "cblas.h"
22 #include "blend/solver.h"
23 #include "pastix_ccores.h"
24 #include "kernels_trace.h"
25 
26 #ifndef DOXYGEN_SHOULD_SKIP_THIS
27 #define MAXSIZEOFBLOCKS 64
28 static pastix_complex32_t cone = 1.0;
29 static pastix_complex32_t mcone = -1.0;
30 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
31 
32 /**
33  *******************************************************************************
34  *
35  * @ingroup kernel_blas_lapack_null
36  *
37  * @brief Compute the sequential static pivoting Cholesky factorization of the
38  * matrix n-by-n A = L * L^t .
39  *
40  *******************************************************************************
41  *
42  * @param[in] n
43  * The number of rows and columns of the matrix A.
44  *
45  * @param[inout] A
46  * The matrix A to factorize with Cholesky factorization. The matrix
47  * is of size lda -by- n.
48  *
49  * @param[in] lda
50  * The leading dimension of the matrix A.
51  *
52  * @param[inout] nbpivots
53  * Pointer to the number of piovting operations made during
54  * factorization. It is updated during this call
55  *
56  * @param[in] criterion
57  * Threshold use for static pivoting. If diagonal value is under this
58  * threshold, its value is replaced by the threshold and the number of
59  * pivots is incremented.
60  *
61  *******************************************************************************
62  *
63  * @warning This routine will fail if it discovers a null or negative value on
64  * the diagonal during factorization.
65  *
66  *******************************************************************************/
67 static inline void
70  pastix_int_t lda,
71  pastix_int_t *nbpivots,
72  float criterion )
73 {
74  pastix_int_t k;
75  pastix_complex32_t *Akk = A; /* A [k ][k] */
76  pastix_complex32_t *Amk = A+1; /* A [k+1][k] */
77  pastix_complex32_t alpha;
78 
79  for (k=0; k<n; k++){
80  if ( cabsf(*Akk) < criterion ) {
81  (*Akk) = (pastix_complex32_t)criterion;
82  (*nbpivots)++;
83  }
84 
85  /* Hermitian matrices, so imaginary part should be 0 */
86  if ( crealf(*Akk) < 0.0 )
87  {
88  pastix_print_error( "Negative diagonal term\n" );
89  }
90 
91  *Akk = csqrtf(*Akk);
92  alpha = 1.0 / (*Akk);
93 
94  /* Scale the diagonal to compute L((k+1):n,k) */
95  cblas_cscal(n-k-1, CBLAS_SADDR( alpha ), Amk, 1 );
96 
97  /* Move to next Akk */
98  Akk += (lda+1);
99 
100  cblas_cher(CblasColMajor, CblasLower,
101  n-k-1, -1.0,
102  Amk, 1,
103  Akk, lda);
104 
105  /* Move to next Amk */
106  Amk = Akk+1;
107  }
108 }
109 
110 /**
111  *******************************************************************************
112  *
113  * @brief Compute the block static pivoting Cholesky factorization of the matrix
114  * n-by-n A = L * L^t .
115  *
116  *******************************************************************************
117  *
118  * @param[in] n
119  * The number of rows and columns of the matrix A.
120  *
121  * @param[inout] A
122  * The matrix A to factorize with Cholesky factorization. The matrix
123  * is of size lda -by- n.
124  *
125  * @param[in] lda
126  * The leading dimension of the matrix A.
127  *
128  * @param[inout] nbpivots
129  * Pointer to the number of piovting operations made during
130  * factorization. It is updated during this call
131  *
132  * @param[in] criterion
133  * Threshold use for static pivoting. If diagonal value is under this
134  * threshold, its value is replaced by the threshold and the nu,ber of
135  * pivots is incremented.
136  *
137  *******************************************************************************
138  *
139  * @warning This routine will fail if it discovers a null or negative value on
140  * the diagonal during factorization.
141  *
142  *******************************************************************************/
143 void
146  pastix_int_t lda,
147  pastix_int_t *nbpivots,
148  float criterion )
149 {
150  pastix_int_t k, blocknbr, blocksize, matrixsize;
151  pastix_complex32_t *tmp,*tmp1,*tmp2;
152 
153  /* diagonal supernode is divided into MAXSIZEOFBLOCK-by-MAXSIZEOFBLOCKS blocks */
154  blocknbr = pastix_iceil( n, MAXSIZEOFBLOCKS );
155 
156  for (k=0; k<blocknbr; k++) {
157 
158  blocksize = pastix_imin(MAXSIZEOFBLOCKS, n-k*MAXSIZEOFBLOCKS);
159  tmp = A+(k*MAXSIZEOFBLOCKS)*(lda+1); /* Lk,k */
160 
161  /* Factorize the diagonal block Akk*/
162  core_cpotf2sp(blocksize, tmp, lda, nbpivots, criterion);
163 
164  if ((k*MAXSIZEOFBLOCKS+blocksize) < n) {
165 
166  tmp1 = tmp + blocksize; /* Lk+1,k */
167  tmp2 = tmp1 + blocksize * lda; /* Lk+1,k+1 */
168 
169  matrixsize = n-(k*MAXSIZEOFBLOCKS+blocksize);
170 
171  /* Compute the column L(k+1:n,k) = (L(k,k)D(k,k))^{-1}A(k+1:n,k) */
172  /* 1) Compute A(k+1:n,k) = A(k+1:n,k)L(k,k)^{-T} = D(k,k)L(k+1:n,k) */
173  /* input: L(k,k) in tmp, A(k+1:n,k) in tmp1 */
174  /* output: A(k+1:n,k) in tmp1 */
175  cblas_ctrsm(CblasColMajor,
176  CblasRight, CblasLower,
177  CblasConjTrans, CblasNonUnit,
178  matrixsize, blocksize,
179  CBLAS_SADDR(cone), tmp, lda,
180  tmp1, lda);
181 
182  /* Update Ak+1k+1 = Ak+1k+1 - Lk+1k * Lk+1kT */
183  cblas_cherk(CblasColMajor, CblasLower, CblasNoTrans,
184  matrixsize, blocksize,
185  (float)mcone, tmp1, lda,
186  (float)cone, tmp2, lda);
187  }
188  }
189 }
190 
191 /**
192  *******************************************************************************
193  *
194  * @brief Compute the Cholesky factorization of the diagonal block in a panel.
195  *
196  * @warning This routine will fail if it discovers a null or negative value on
197  * the diagonal during factorization.
198  *
199  *******************************************************************************
200  *
201  * @param[in] solvmtx
202  * Solver Matrix structure of the problem
203  *
204  * @param[in] cblk
205  * Pointer to the structure representing the panel to factorize in the
206  * cblktab array. Next column blok must be accessible through cblk[1].
207  *
208  * @param[inout] dataL
209  * The pointer to the correct representation of the lower part of the data.
210  * - coeftab if the block is in full rank. Must be of size cblk.stride -by- cblk.width.
211  * - pastix_lr_block if the block is compressed.
212  *
213  *******************************************************************************
214  *
215  * @return The number of static pivoting performed during the diagonal block
216  * factorization.
217  *
218  *******************************************************************************/
219 int
221  SolverCblk *cblk,
222  void *dataL )
223 {
224  pastix_int_t ncols, stride;
225  pastix_int_t nbpivots = 0;
226  pastix_fixdbl_t time, flops;
228  pastix_lrblock_t *lrL;
229  float criterion = solvmtx->diagthreshold;
230 
232 
233  ncols = cblk->lcolnum - cblk->fcolnum + 1;
234  stride = (cblk->cblktype & CBLK_LAYOUT_2D) ? ncols : cblk->stride;
235 
236  /* check if diagonal column block */
237  assert( cblk->fcolnum == cblk->fblokptr->frownum );
238  assert( cblk->lcolnum == cblk->fblokptr->lrownum );
239 
240  if ( cblk->cblktype & CBLK_COMPRESSED ) {
241  /* dataL is a LRblock */
242  lrL = (pastix_lrblock_t *)dataL;
243  L = lrL->u;
244  stride = ncols;
245 
246  assert( lrL->rk == -1 );
247  assert( stride == lrL->rkmax );
248  } else {
249  L = (pastix_complex32_t *)dataL;
250  }
251 
252  /* Factorize diagonal block */
253  flops = FLOPS_CPOTRF( ncols );
254  kernel_trace_start_lvl2( PastixKernelLvl2POTRF );
255  core_cpotrfsp(ncols, L, stride, &nbpivots, criterion );
256  kernel_trace_stop_lvl2( flops );
257 
258  kernel_trace_stop( cblk->fblokptr->inlast, PastixKernelPOTRF, ncols, 0, 0, flops, time );
259 
260  if ( nbpivots ) {
261  pastix_atomic_add_32b( &(solvmtx->nbpivots), nbpivots );
262  }
263  return nbpivots;
264 }
265 
266 /**
267  *******************************************************************************
268  *
269  * @brief Compute the Cholesky factorization of one panel.
270  *
271  *******************************************************************************
272  *
273  * @param[in] solvmtx
274  * Solver Matrix structure of the problem
275  *
276  * @param[in] cblk
277  * Pointer to the structure representing the panel to factorize in the
278  * cblktab array. Next column blok must be accessible through cblk[1].
279  *
280  * @param[inout] L
281  * The pointer to the correct representation of the lower part of the data.
282  * - coeftab if the block is in full rank. Must be of size cblk.stride -by- cblk.width.
283  * - pastix_lr_block if the block is compressed.
284  *
285  *******************************************************************************
286  *
287  * @return The number of static pivoting during factorization of the diagonal
288  * block.
289  *
290  *******************************************************************************/
291 int
293  SolverCblk *cblk,
294  void *L )
295 {
296  pastix_int_t nbpivots;
297  nbpivots = cpucblk_cpotrfsp1d_potrf( solvmtx, cblk, L );
298 
301  cblk, L, L, &(solvmtx->lowrank) );
302  return nbpivots;
303 }
304 
305 
306 /**
307  *******************************************************************************
308  *
309  * @brief Perform the Cholesky factorization of a given panel and apply all its
310  * updates.
311  *
312  *******************************************************************************
313  *
314  * @param[in] solvmtx
315  * Solver Matrix structure of the problem
316  *
317  * @param[in] cblk
318  * Pointer to the structure representing the panel to factorize in the
319  * cblktab array. Next column blok must be accessible through cblk[1].
320  *
321  * @param[in] work
322  * Temporary memory buffer.
323  *
324  * @param[in] lwork
325  * Temporary workspace dimension.
326  *
327  *******************************************************************************
328  *
329  * @return The number of static pivoting during factorization of the diagonal
330  * block.
331  *
332  *******************************************************************************/
333 int
335  SolverCblk *cblk,
336  pastix_complex32_t *work,
337  pastix_int_t lwork )
338 {
339  void *L = cblk_getdataL( cblk );
340  SolverCblk *fcblk;
341  SolverBlok *blok, *lblk;
342  pastix_int_t nbpivots;
343 
344  nbpivots = cpucblk_cpotrfsp1d_panel( solvmtx, cblk, L );
345 
346  blok = cblk->fblokptr + 1; /* First off-diagonal block */
347  lblk = cblk[1].fblokptr; /* Next diagonal block */
348 
349  /* If there are off-diagonal blocks, perform the updates */
350  for( ; blok < lblk; blok++ )
351  {
352  fcblk = solvmtx->cblktab + blok->fcblknm;
353 
354  if ( fcblk->cblktype & CBLK_FANIN ) {
355  cpucblk_calloc( PastixLCoef, fcblk );
356  }
357 
359  cblk, blok, fcblk,
360  L, L, cblk_getdataL( fcblk ),
361  work, lwork, &(solvmtx->lowrank) );
362 
363  cpucblk_crelease_deps( PastixLCoef, solvmtx, cblk, fcblk );
364  }
365 
366  return nbpivots;
367 }
368 
369 /**
370  *******************************************************************************
371  *
372  * @brief Perform the Cholesky factorization of a given panel and submit tasks
373  * for the subsequent updates.
374  *
375  *******************************************************************************
376  *
377  * @param[in] solvmtx
378  * Solver Matrix structure of the problem
379  *
380  * @param[in] cblk
381  * Pointer to the structure representing the panel to factorize in the
382  * cblktab array. Next column blok must be accessible through cblk[1].
383  *
384  *******************************************************************************
385  *
386  * @return The number of static pivoting during factorization of the diagonal
387  * block.
388  *
389  *******************************************************************************/
390 int
392  SolverCblk *cblk )
393 {
394  void *L = cblk_getdataL( cblk );
395  SolverBlok *blok, *lblk;
396  pastix_int_t i, nbpivots;
397  pastix_queue_t *queue = solvmtx->computeQueue[ cblk->threadid ];
398 
399  assert( cblk->cblktype & CBLK_TASKS_2D );
400  nbpivots = cpucblk_cpotrfsp1d_panel( solvmtx, cblk, L );
401 
402  blok = cblk->fblokptr + 1; /* this diagonal block */
403  lblk = cblk[1].fblokptr; /* the next diagonal block */
404 
405  /* if there are off-diagonal supernodes in the column */
406  for( i=0; blok < lblk; i++, blok++ )
407  {
408  assert( !((solvmtx->cblktab + blok->fcblknm)->cblktype & CBLK_RECV) );
409  pqueuePush1( queue, - (blok - solvmtx->bloktab) - 1, cblk->priority + i );
410 
411  /* Skip blocks facing the same cblk */
412  while ( ( blok < lblk ) &&
413  ( blok[0].fcblknm == blok[1].fcblknm ) &&
414  ( blok[0].lcblknm == blok[1].lcblknm ) )
415  {
416  blok++;
417  }
418  }
419 
420  return nbpivots;
421 }
422 
423 /**
424  *******************************************************************************
425  *
426  * @brief Apply the updates of the cholesky factorisation of a given panel.
427  *
428  *******************************************************************************
429  *
430  * @param[in] solvmtx
431  * Solver Matrix structure of the problem
432  *
433  * @param[in] blok
434  * Pointer to the blok where the update start.
435  *
436  * @param[in] work
437  * Temporary memory buffer.
438  *
439  * @param[in] lwork
440  * Temporary workspace dimension.
441  *
442  *******************************************************************************/
443 void
445  SolverBlok *blok,
446  pastix_complex32_t *work,
447  pastix_int_t lwork )
448 {
449  SolverCblk *cblk = solvmtx->cblktab + blok->lcblknm;
450  SolverCblk *fcbk = solvmtx->cblktab + blok->fcblknm;
451  SolverBlok *lblk = cblk[1].fblokptr; /* the next diagonal block */
452  void *L = cblk_getdataL( cblk );
453 
454  if ( fcbk->cblktype & CBLK_FANIN ) {
455  cpucblk_calloc( PastixLCoef, fcbk );
456  }
457 
458  do
459  {
460  /* Update on L */
462  cblk, blok, fcbk,
463  L, L, cblk_getdataL( fcbk ),
464  work, lwork, &(solvmtx->lowrank) );
465 
466  cpucblk_crelease_deps( PastixLCoef, solvmtx, cblk, fcbk );
467  blok++;
468  }
469  while ( ( blok < lblk ) &&
470  ( blok[-1].fcblknm == blok[0].fcblknm ) &&
471  ( blok[-1].lcblknm == blok[0].lcblknm ) );
472 }
static void core_cpotf2sp(pastix_int_t n, pastix_complex32_t *A, pastix_int_t lda, pastix_int_t *nbpivots, float criterion)
Compute the sequential static pivoting Cholesky factorization of the matrix n-by-n A = L * L^t .
Definition: core_cpotrfsp.c:68
BEGIN_C_DECLS typedef int pastix_int_t
Definition: datatypes.h:51
float _Complex pastix_complex32_t
Definition: datatypes.h:76
double pastix_fixdbl_t
Definition: datatypes.h:65
static void pqueuePush1(pastix_queue_t *q, pastix_int_t elt, double key1)
Push an element with a single key.
Definition: queue.h:64
Queue structure.
Definition: queue.h:38
static void kernel_trace_stop(int8_t inlast, pastix_ktype_t ktype, int m, int n, int k, double flops, double starttime)
Stop the trace of a single kernel.
static double kernel_trace_start(pastix_ktype_t ktype)
Start the trace of a single kernel.
Definition: kernels_trace.h:87
@ PastixKernelLvl2POTRF
Definition: kernels_enums.h:89
@ PastixKernelPOTRF
Definition: kernels_enums.h:50
void core_cpotrfsp(pastix_int_t n, pastix_complex32_t *A, pastix_int_t lda, pastix_int_t *nbpivots, float criterion)
Compute the block static pivoting Cholesky factorization of the matrix n-by-n A = L * L^t .
int cpucblk_cpotrfsp1dplus(SolverMatrix *solvmtx, SolverCblk *cblk)
Perform the Cholesky factorization of a given panel and submit tasks for the subsequent updates.
void cpucblk_cpotrfsp1dplus_update(SolverMatrix *solvmtx, SolverBlok *blok, pastix_complex32_t *work, pastix_int_t lwork)
Apply the updates of the cholesky factorisation of a given panel.
pastix_fixdbl_t cpucblk_cgemmsp(pastix_coefside_t sideA, pastix_trans_t trans, const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcblk, const void *A, const void *B, void *C, pastix_complex32_t *work, pastix_int_t lwork, const pastix_lr_t *lowrank)
Compute the updates associated to one off-diagonal block.
int cpucblk_cpotrfsp1d(SolverMatrix *solvmtx, SolverCblk *cblk, pastix_complex32_t *work, pastix_int_t lwork)
Perform the Cholesky factorization of a given panel and apply all its updates.
void cpucblk_crelease_deps(pastix_coefside_t side, SolverMatrix *solvmtx, const SolverCblk *cblk, SolverCblk *fcbk)
Release the dependencies of the given cblk after an update.
int cpucblk_cpotrfsp1d_panel(SolverMatrix *solvmtx, SolverCblk *cblk, void *L)
Compute the Cholesky factorization of one panel.
void cpucblk_ctrsmsp(pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk, const void *A, void *C, const pastix_lr_t *lowrank)
Compute the updates associated to a column of off-diagonal blocks.
Definition: core_ctrsmsp.c:356
void cpucblk_calloc(pastix_coefside_t side, SolverCblk *cblk)
Allocate the cblk structure to store the coefficient.
int cpucblk_cpotrfsp1d_potrf(SolverMatrix *solvmtx, SolverCblk *cblk, void *dataL)
Compute the Cholesky factorization of the diagonal block in a panel.
The block low-rank structure to hold a matrix in low-rank form.
@ PastixLCoef
Definition: api.h:478
@ PastixLower
Definition: api.h:467
@ PastixRight
Definition: api.h:496
@ PastixNonUnit
Definition: api.h:487
@ PastixConjTrans
Definition: api.h:447
pastix_int_t lrownum
Definition: solver.h:148
pastix_lr_t lowrank
Definition: solver.h:236
pastix_int_t priority
Definition: solver.h:183
pastix_int_t fcblknm
Definition: solver.h:144
SolverBlok *restrict bloktab
Definition: solver.h:229
pastix_int_t frownum
Definition: solver.h:147
SolverBlok * fblokptr
Definition: solver.h:168
static void * cblk_getdataL(const SolverCblk *cblk)
Get the pointer to the data associated to the lower part of the cblk.
Definition: solver.h:342
pastix_int_t lcblknm
Definition: solver.h:143
int threadid
Definition: solver.h:182
int8_t inlast
Definition: solver.h:151
SolverCblk *restrict cblktab
Definition: solver.h:228
pastix_int_t stride
Definition: solver.h:169
int8_t cblktype
Definition: solver.h:164
pastix_int_t lcolnum
Definition: solver.h:167
double diagthreshold
Definition: solver.h:238
volatile int32_t nbpivots
Definition: solver.h:239
pastix_int_t fcolnum
Definition: solver.h:166
Solver block structure.
Definition: solver.h:141
Solver column block structure.
Definition: solver.h:161
Solver column block structure.
Definition: solver.h:203