PaStiX Handbook  6.4.0
core_cpxtrfsp.c
Go to the documentation of this file.
1 /**
2  *
3  * @file core_cpxtrfsp.c
4  *
5  * PaStiX kernel routines for LL^t factorization.
6  *
7  * @copyright 2011-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8  * Univ. Bordeaux. All rights reserved.
9  *
10  * @version 6.4.0
11  * @author Mathieu Faverge
12  * @author Pierre Ramet
13  * @author Xavier Lacoste
14  * @date 2024-07-05
15  * @generated from /builds/solverstack/pastix/kernels/core_zpxtrfsp.c, normal z -> c, Tue Oct 8 14:17:18 2024
16  *
17  **/
18 #include "common.h"
19 #include "cblas.h"
20 #include "blend/solver.h"
21 #include "pastix_ccores.h"
22 #include "kernels_trace.h"
23 
24 #ifndef DOXYGEN_SHOULD_SKIP_THIS
25 #define MAXSIZEOFBLOCKS 64
26 static pastix_complex32_t cone = 1.0;
27 static pastix_complex32_t mcone = -1.0;
28 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
29 
30 /**
31  *******************************************************************************
32  *
33  * @ingroup kernel_blas_lapack_null
34  *
35  * @brief Compute the sequential static pivoting LL^t factorization of the
36  * matrix n-by-n A = L * L^t .
37  *
38  *******************************************************************************
39  *
40  * @param[in] n
41  * The number of rows and columns of the matrix A.
42  *
43  * @param[inout] A
44  * The matrix A to factorize with LL^t factorization. The matrix
45  * is of size lda -by- n.
46  *
47  * @param[in] lda
48  * The leading dimension of the matrix A.
49  *
50  * @param[inout] nbpivots
51  * Pointer to the number of piovting operations made during
52  * factorization. It is updated during this call
53  *
54  * @param[in] criterion
55  * Threshold use for static pivoting. If diagonal value is under this
56  * threshold, its value is replaced by the threshold and the number of
57  * pivots is incremented.
58  *
59  *******************************************************************************
60  *
61  * @warning This routine will fail if it discovers a null or negative value on
62  * the diagonal during factorization.
63  *
64  *******************************************************************************/
65 static inline void
68  pastix_int_t lda,
69  pastix_int_t *nbpivots,
70  float criterion )
71 {
72  pastix_int_t k;
73  pastix_complex32_t *Akk = A; /* A [k ][k] */
74  pastix_complex32_t *Amk = A+1; /* A [k+1][k] */
75  pastix_complex32_t alpha;
76 
77  for (k=0; k<n; k++){
78  if ( cabsf(*Akk) < criterion ) {
79  (*Akk) = (pastix_complex32_t)criterion;
80  (*nbpivots)++;
81  }
82 
83  *Akk = csqrtf(*Akk);
84  alpha = 1.0 / (*Akk);
85 
86  /* Scale the diagonal to compute L((k+1):n,k) */
87  cblas_cscal(n-k-1, CBLAS_SADDR( alpha ), Amk, 1 );
88 
89  /* Move to next Akk */
90  Akk += (lda+1);
91 
92  cblas_csyrk( CblasColMajor, CblasLower, CblasNoTrans,
93  n-k-1, 1,
94  CBLAS_SADDR( mcone ), Amk, lda,
95  CBLAS_SADDR( cone ), Akk, lda );
96 
97  /* Move to next Amk */
98  Amk = Akk+1;
99  }
100 }
101 
102 /**
103  *******************************************************************************
104  *
105  * @brief Compute the block static pivoting LL^t factorization of the matrix
106  * n-by-n A = L * L^t .
107  *
108  *******************************************************************************
109  *
110  * @param[in] n
111  * The number of rows and columns of the matrix A.
112  *
113  * @param[inout] A
114  * The matrix A to factorize with LL^t factorization. The matrix
115  * is of size lda -by- n.
116  *
117  * @param[in] lda
118  * The leading dimension of the matrix A.
119  *
120  * @param[inout] nbpivots
121  * Pointer to the number of piovting operations made during
122  * factorization. It is updated during this call
123  *
124  * @param[in] criterion
125  * Threshold use for static pivoting. If diagonal value is under this
126  * threshold, its value is replaced by the threshold and the nu,ber of
127  * pivots is incremented.
128  *
129  *******************************************************************************
130  *
131  * @warning This routine will fail if it discovers a null or negative value on
132  * the diagonal during factorization.
133  *
134  *******************************************************************************/
135 void
138  pastix_int_t lda,
139  pastix_int_t *nbpivots,
140  float criterion )
141 {
142  pastix_int_t k, blocknbr, blocksize, matrixsize;
143  pastix_complex32_t *tmp,*tmp1,*tmp2;
144 
145  /* diagonal supernode is divided into MAXSIZEOFBLOCK-by-MAXSIZEOFBLOCKS blocks */
146  blocknbr = pastix_iceil( n, MAXSIZEOFBLOCKS );
147 
148  for (k=0; k<blocknbr; k++) {
149 
150  blocksize = pastix_imin(MAXSIZEOFBLOCKS, n-k*MAXSIZEOFBLOCKS);
151  tmp = A+(k*MAXSIZEOFBLOCKS)*(lda+1); /* Lk,k */
152 
153  /* Factorize the diagonal block Akk*/
154  core_cpxtf2sp(blocksize, tmp, lda, nbpivots, criterion);
155 
156  if ((k*MAXSIZEOFBLOCKS+blocksize) < n) {
157 
158  tmp1 = tmp + blocksize; /* Lk+1,k */
159  tmp2 = tmp1 + blocksize * lda; /* Lk+1,k+1 */
160 
161  matrixsize = n-(k*MAXSIZEOFBLOCKS+blocksize);
162 
163  /* Compute the column L(k+1:n,k) = (L(k,k)D(k,k))^{-1}A(k+1:n,k) */
164  /* 1) Compute A(k+1:n,k) = A(k+1:n,k)L(k,k)^{-T} = D(k,k)L(k+1:n,k) */
165  /* input: L(k,k) in tmp, A(k+1:n,k) in tmp1 */
166  /* output: A(k+1:n,k) in tmp1 */
167  cblas_ctrsm(CblasColMajor,
168  CblasRight, CblasLower,
169  CblasTrans, CblasNonUnit,
170  matrixsize, blocksize,
171  CBLAS_SADDR(cone), tmp, lda,
172  tmp1, lda);
173 
174  /* Update Ak+1k+1 = Ak+1k+1 - Lk+1k * Lk+1kT */
175  cblas_csyrk(CblasColMajor, CblasLower, CblasNoTrans,
176  matrixsize, blocksize,
177  CBLAS_SADDR( mcone ), tmp1, lda,
178  CBLAS_SADDR( cone ), tmp2, lda);
179  }
180  }
181 }
182 
183 /**
184  *******************************************************************************
185  *
186  * @brief Compute the LL^t factorization of the diagonal block in a panel.
187  *
188  * @warning This routine will fail if it discovers a null or negative value on
189  * the diagonal during factorization.
190  *
191  *******************************************************************************
192  *
193  * @param[in] solvmtx
194  * Solver Matrix structure of the problem
195  *
196  * @param[in] cblk
197  * Pointer to the structure representing the panel to factorize in the
198  * cblktab array. Next column blok must be accessible through cblk[1].
199  *
200  * @param[inout] dataL
201  * The pointer to the correct representation of the lower part of the data.
202  * - coeftab if the block is in full rank. Must be of size cblk.stride -by- cblk.width.
203  * - pastix_lr_block if the block is compressed.
204  *
205  *******************************************************************************
206  *
207  * @return The number of static pivoting performed during the diagonal block
208  * factorization.
209  *
210  *******************************************************************************/
211 int
213  SolverCblk *cblk,
214  void *dataL )
215 {
216  pastix_int_t ncols, stride;
217  pastix_int_t nbpivots = 0;
218  pastix_fixdbl_t time, flops;
220  pastix_lrblock_t *lrL;
221  float criterion = solvmtx->diagthreshold;
222 
224 
225  ncols = cblk->lcolnum - cblk->fcolnum + 1;
226  stride = (cblk->cblktype & CBLK_LAYOUT_2D) ? ncols : cblk->stride;
227 
228  /* check if diagonal column block */
229  assert( cblk->fcolnum == cblk->fblokptr->frownum );
230  assert( cblk->lcolnum == cblk->fblokptr->lrownum );
231 
232  if ( cblk->cblktype & CBLK_COMPRESSED ) {
233  /* dataL is a LRblock */
234  lrL = (pastix_lrblock_t *)dataL;
235  L = lrL->u;
236  stride = ncols;
237 
238  assert( lrL->rk == -1 );
239  assert( stride == lrL->rkmax );
240  } else {
241  L = (pastix_complex32_t *)dataL;
242  }
243 
244  /* Factorize diagonal block */
245  flops = FLOPS_CPOTRF( ncols );
246  kernel_trace_start_lvl2( PastixKernelLvl2PXTRF );
247  core_cpxtrfsp(ncols, L, stride, &nbpivots, criterion );
248  kernel_trace_stop_lvl2( flops );
249 
250  kernel_trace_stop( cblk->fblokptr->inlast, PastixKernelPXTRF, ncols, 0, 0, flops, time );
251 
252  if ( nbpivots ) {
253  pastix_atomic_add_32b( &(solvmtx->nbpivots), nbpivots );
254  }
255  return nbpivots;
256 }
257 
258 /**
259  *******************************************************************************
260  *
261  * @brief Compute the LL^t factorization of one panel.
262  *
263  *******************************************************************************
264  *
265  * @param[in] solvmtx
266  * Solver Matrix structure of the problem
267  *
268  * @param[in] cblk
269  * Pointer to the structure representing the panel to factorize in the
270  * cblktab array. Next column blok must be accessible through cblk[1].
271  *
272  * @param[inout] L
273  * The pointer to the correct representation of the lower part of the data.
274  * - coeftab if the block is in full rank. Must be of size cblk.stride -by- cblk.width.
275  * - pastix_lr_block if the block is compressed.
276  *
277  *******************************************************************************
278  *
279  * @return The number of static pivoting during factorization of the diagonal
280  * block.
281  *
282  *******************************************************************************/
283 int
285  SolverCblk *cblk,
286  void *L )
287 {
288  pastix_int_t nbpivots;
289  nbpivots = cpucblk_cpxtrfsp1d_pxtrf( solvmtx, cblk, L );
290 
293  cblk, L, L, &(solvmtx->lowrank) );
294  return nbpivots;
295 }
296 
297 
298 /**
299  *******************************************************************************
300  *
301  * @brief Perform the LL^t factorization of a given panel and apply all its
302  * updates.
303  *
304  *******************************************************************************
305  *
306  * @param[in] solvmtx
307  * Solver Matrix structure of the problem
308  *
309  * @param[in] cblk
310  * Pointer to the structure representing the panel to factorize in the
311  * cblktab array. Next column blok must be accessible through cblk[1].
312  *
313  * @param[in] work
314  * Temporary memory buffer.
315  *
316  * @param[in] lwork
317  * Temporary workspace dimension.
318  *
319  *******************************************************************************
320  *
321  * @return The number of static pivoting during factorization of the diagonal
322  * block.
323  *
324  *******************************************************************************/
325 int
327  SolverCblk *cblk,
328  pastix_complex32_t *work,
329  pastix_int_t lwork )
330 {
331  void *L = cblk_getdataL( cblk );
332  SolverCblk *fcblk;
333  SolverBlok *blok, *lblk;
334  pastix_int_t nbpivots;
335 
336  nbpivots = cpucblk_cpxtrfsp1d_panel( solvmtx, cblk, L );
337 
338  blok = cblk->fblokptr + 1; /* First off-diagonal block */
339  lblk = cblk[1].fblokptr; /* Next diagonal block */
340 
341  /* If there are off-diagonal blocks, perform the updates */
342  for( ; blok < lblk; blok++ )
343  {
344  fcblk = solvmtx->cblktab + blok->fcblknm;
345 
346  if ( fcblk->cblktype & CBLK_FANIN ) {
347  cpucblk_calloc( PastixLCoef, fcblk );
348  }
349 
351  cblk, blok, fcblk,
352  L, L, cblk_getdataL( fcblk ),
353  work, lwork, &(solvmtx->lowrank) );
354 
355  cpucblk_crelease_deps( PastixLCoef, solvmtx, cblk, fcblk );
356  }
357 
358  return nbpivots;
359 }
360 
361 /**
362  *******************************************************************************
363  *
364  * @brief Perform the LL^t factorization of a given panel.
365  *
366  *******************************************************************************
367  *
368  * @param[in] solvmtx
369  * Solver Matrix structure of the problem
370  *
371  * @param[in] cblk
372  * Pointer to the structure representing the panel to factorize in the
373  * cblktab array. Next column blok must be accessible through cblk[1].
374  *
375  *******************************************************************************
376  *
377  * @return The number of static pivoting during factorization of the diagonal
378  * block.
379  *
380  *******************************************************************************/
381 int
383  SolverCblk *cblk )
384 {
385  void *L = cblk_getdataL( cblk );
386  SolverBlok *blok, *lblk;
387  pastix_int_t i, nbpivots;
388  pastix_queue_t *queue = solvmtx->computeQueue[ cblk->threadid ];
389 
390  assert( cblk->cblktype & CBLK_TASKS_2D );
391  nbpivots = cpucblk_cpxtrfsp1d_panel( solvmtx, cblk, L );
392 
393  blok = cblk->fblokptr + 1; /* this diagonal block */
394  lblk = cblk[1].fblokptr; /* the next diagonal block */
395 
396  /* if there are off-diagonal supernodes in the column */
397  for( i=0; blok < lblk; i++, blok++ )
398  {
399  assert( !((solvmtx->cblktab + blok->fcblknm)->cblktype & CBLK_RECV) );
400  pqueuePush1( queue, - (blok - solvmtx->bloktab) - 1, cblk->priority + i );
401 
402  /* Skip blocks facing the same cblk */
403  while ( ( blok < lblk ) &&
404  ( blok[0].fcblknm == blok[1].fcblknm ) &&
405  ( blok[0].lcblknm == blok[1].lcblknm ) )
406  {
407  blok++;
408  }
409  }
410 
411  return nbpivots;
412 }
413 
414 /**
415  *******************************************************************************
416  *
417  * @brief Apply the updates of the LL^t factorisation of a given panel.
418  *
419  *******************************************************************************
420  *
421  * @param[in] solvmtx
422  * Solver Matrix structure of the problem
423  *
424  * @param[in] blok
425  * Pointer to the blok where the update start.
426  *
427  * @param[in] work
428  * Temporary memory buffer.
429  *
430  * @param[in] lwork
431  * Temporary workspace dimension.
432  *
433  *******************************************************************************/
434 void
436  SolverBlok *blok,
437  pastix_complex32_t *work,
438  pastix_int_t lwork )
439 {
440  SolverCblk *cblk = solvmtx->cblktab + blok->lcblknm;
441  SolverCblk *fcbk = solvmtx->cblktab + blok->fcblknm;
442  SolverBlok *lblk = cblk[1].fblokptr; /* the next diagonal block */
443  void *L = cblk_getdataL( cblk );
444 
445  if ( fcbk->cblktype & CBLK_FANIN ) {
446  cpucblk_calloc( PastixLCoef, fcbk );
447  }
448 
449  do
450  {
451  /* Update on L */
453  cblk, blok, fcbk,
454  L, L, cblk_getdataL( fcbk ),
455  work, lwork, &(solvmtx->lowrank) );
456 
457  cpucblk_crelease_deps( PastixLCoef, solvmtx, cblk, fcbk );
458  blok++;
459  }
460  while ( ( blok < lblk ) &&
461  ( blok[-1].fcblknm == blok[0].fcblknm ) &&
462  ( blok[-1].lcblknm == blok[0].lcblknm ) );
463 }
int cpucblk_cpxtrfsp1d_panel(SolverMatrix *solvmtx, SolverCblk *cblk, void *L)
Compute the LL^t factorization of one panel.
static void core_cpxtf2sp(pastix_int_t n, pastix_complex32_t *A, pastix_int_t lda, pastix_int_t *nbpivots, float criterion)
Compute the sequential static pivoting LL^t factorization of the matrix n-by-n A = L * L^t .
Definition: core_cpxtrfsp.c:66
void cpucblk_cpxtrfsp1dplus_update(SolverMatrix *solvmtx, SolverBlok *blok, pastix_complex32_t *work, pastix_int_t lwork)
Apply the updates of the LL^t factorisation of a given panel.
int cpucblk_cpxtrfsp1d_pxtrf(SolverMatrix *solvmtx, SolverCblk *cblk, void *dataL)
Compute the LL^t factorization of the diagonal block in a panel.
int cpucblk_cpxtrfsp1dplus(SolverMatrix *solvmtx, SolverCblk *cblk)
Perform the LL^t factorization of a given panel.
int cpucblk_cpxtrfsp1d(SolverMatrix *solvmtx, SolverCblk *cblk, pastix_complex32_t *work, pastix_int_t lwork)
Perform the LL^t factorization of a given panel and apply all its updates.
BEGIN_C_DECLS typedef int pastix_int_t
Definition: datatypes.h:51
float _Complex pastix_complex32_t
Definition: datatypes.h:76
double pastix_fixdbl_t
Definition: datatypes.h:65
static void pqueuePush1(pastix_queue_t *q, pastix_int_t elt, double key1)
Push an element with a single key.
Definition: queue.h:64
Queue structure.
Definition: queue.h:38
static void kernel_trace_stop(int8_t inlast, pastix_ktype_t ktype, int m, int n, int k, double flops, double starttime)
Stop the trace of a single kernel.
static double kernel_trace_start(pastix_ktype_t ktype)
Start the trace of a single kernel.
Definition: kernels_trace.h:87
@ PastixKernelLvl2PXTRF
Definition: kernels_enums.h:90
@ PastixKernelPXTRF
Definition: kernels_enums.h:51
void core_cpxtrfsp(pastix_int_t n, pastix_complex32_t *A, pastix_int_t lda, pastix_int_t *nbpivots, float criterion)
Compute the block static pivoting LL^t factorization of the matrix n-by-n A = L * L^t .
pastix_fixdbl_t cpucblk_cgemmsp(pastix_coefside_t sideA, pastix_trans_t trans, const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcblk, const void *A, const void *B, void *C, pastix_complex32_t *work, pastix_int_t lwork, const pastix_lr_t *lowrank)
Compute the updates associated to one off-diagonal block.
void cpucblk_crelease_deps(pastix_coefside_t side, SolverMatrix *solvmtx, const SolverCblk *cblk, SolverCblk *fcbk)
Release the dependencies of the given cblk after an update.
void cpucblk_ctrsmsp(pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk, const void *A, void *C, const pastix_lr_t *lowrank)
Compute the updates associated to a column of off-diagonal blocks.
Definition: core_ctrsmsp.c:356
void cpucblk_calloc(pastix_coefside_t side, SolverCblk *cblk)
Allocate the cblk structure to store the coefficient.
The block low-rank structure to hold a matrix in low-rank form.
@ PastixLCoef
Definition: api.h:478
@ PastixLower
Definition: api.h:467
@ PastixRight
Definition: api.h:496
@ PastixNonUnit
Definition: api.h:487
@ PastixTrans
Definition: api.h:446
pastix_int_t lrownum
Definition: solver.h:148
pastix_lr_t lowrank
Definition: solver.h:236
pastix_int_t priority
Definition: solver.h:183
pastix_int_t fcblknm
Definition: solver.h:144
SolverBlok *restrict bloktab
Definition: solver.h:229
pastix_int_t frownum
Definition: solver.h:147
SolverBlok * fblokptr
Definition: solver.h:168
static void * cblk_getdataL(const SolverCblk *cblk)
Get the pointer to the data associated to the lower part of the cblk.
Definition: solver.h:342
pastix_int_t lcblknm
Definition: solver.h:143
int threadid
Definition: solver.h:182
int8_t inlast
Definition: solver.h:151
SolverCblk *restrict cblktab
Definition: solver.h:228
pastix_int_t stride
Definition: solver.h:169
int8_t cblktype
Definition: solver.h:164
pastix_int_t lcolnum
Definition: solver.h:167
double diagthreshold
Definition: solver.h:238
volatile int32_t nbpivots
Definition: solver.h:239
pastix_int_t fcolnum
Definition: solver.h:166
Solver block structure.
Definition: solver.h:141
Solver column block structure.
Definition: solver.h:161
Solver column block structure.
Definition: solver.h:203