PaStiX Handbook  6.3.0
core_dsytrfsp.c
Go to the documentation of this file.
1 /**
2  *
3  * @file core_dsytrfsp.c
4  *
5  * PaStiX kernel routines for LDL^t factorization.
6  *
7  * @copyright 2011-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8  * Univ. Bordeaux. All rights reserved.
9  *
10  * @version 6.3.0
11  * @author Mathieu Faverge
12  * @author Pierre Ramet
13  * @author Xavier Lacoste
14  * @author Gregoire Pichon
15  * @date 2021-08-24
16  * @generated from /builds/solverstack/pastix/kernels/core_zsytrfsp.c, normal z -> d, Thu Jun 8 12:22:40 2023
17  *
18  **/
19 #include "common.h"
20 #include "cblas.h"
21 #include "blend/solver.h"
22 #include "pastix_dcores.h"
23 #include "kernels_trace.h"
24 
25 #include <lapacke.h>
26 
27 #ifndef DOXYGEN_SHOULD_SKIP_THIS
28 #define MAXSIZEOFBLOCKS 64
29 static double done = 1.0;
30 static double mdone = -1.0;
31 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
32 
33 /**
34  *******************************************************************************
35  *
36  * @ingroup kernel_blas_lapack_null
37  *
38  * @brief Compute the sequential static pivoting factorization of the symmetric
39  * matrix n-by-n A such that A = L * D * L^t.
40  *
41  *******************************************************************************
42  *
43  * @param[in] n
44  * The number of rows and columns of the matrix A.
45  *
46  * @param[inout] A
47  * The matrix A to factorize with LDL^t factorization. The matrix
48  * is of size lda -by- n.
49  *
50  * @param[in] lda
51  * The leading dimension of the matrix A.
52  *
53  * @param[inout] nbpivots
54  * Pointer to the number of piovting operations made during
55  * factorization. It is updated during this call
56  *
57  * @param[in] criterion
58  * Threshold use for static pivoting. If diagonal value is under this
59  * threshold, its value is replaced by the threshold and the number of
60  * pivots is incremented.
61  *
62  *******************************************************************************/
63 static inline void
64 core_dsytf2sp( pastix_int_t n,
65  double *A,
66  pastix_int_t lda,
67  pastix_int_t *nbpivots,
68  double criterion )
69 {
70  pastix_int_t k, m;
71  double *Akk = A; /* A [k ][k ] */
72  double *Amk = A+1; /* A [k+1][k ] */
73  double *Akm = A+lda; /* A [k ][k+1] */
74  double alpha;
75 
76  m = n-1;
77  for (k=0; k<n; k++, m--){
78  if ( fabs(*Akk) < criterion ) {
79  if ( (*Akk) < 0. ) {
80  *Akk = (double)(-criterion);
81  }
82  else {
83  *Akk = (double)criterion;
84  }
85  (*nbpivots)++;
86  }
87 
88  alpha = 1.0 / (*Akk);
89 
90  /* Transpose the column before scaling */
91  cblas_dcopy( m, Amk, 1, Akm, lda );
92 
93  /* Scale the diagonal to compute L((k+1):n,k) */
94  cblas_dscal(m, ( alpha ), Amk, 1 );
95 
96  alpha = -(*Akk);
97 
98  /* Move to next Akk */
99  Akk += (lda+1);
100 
101  cblas_dsyrk(CblasColMajor, CblasLower, CblasNoTrans,
102  m, 1,
103  ( alpha ), Amk, lda,
104  ( done ), Akk, lda);
105 
106  /* Move to next Amk */
107  Amk = Akk+1;
108  Akm = Akk+lda;
109  }
110 }
111 
112 /**
113  *******************************************************************************
114  *
115  * @brief Compute the block static pivoting factorization of the symmetric
116  * matrix n-by-n A such that A = L * D * L^t.
117  *
118  *******************************************************************************
119  *
120  * @param[in] n
121  * The number of rows and columns of the matrix A.
122  *
123  * @param[inout] A
124  * The matrix A to factorize with LDL^t factorization. The matrix
125  * is of size lda -by- n.
126  *
127  * @param[in] lda
128  * The leading dimension of the matrix A.
129  *
130  * @param[inout] nbpivots
131  * Pointer to the number of piovting operations made during
132  * factorization. It is updated during this call
133  *
134  * @param[in] criterion
135  * Threshold use for static pivoting. If diagonal value is under this
136  * threshold, its value is replaced by the threshold and the nu,ber of
137  * pivots is incremented.
138  *
139  *******************************************************************************/
140 void
141 core_dsytrfsp( pastix_int_t n,
142  double *A,
143  pastix_int_t lda,
144  pastix_int_t *nbpivots,
145  double criterion )
146 {
147  pastix_int_t k, blocknbr, blocksize, matrixsize, col;
148  double *Akk, *Amk, *Akm, *Amm;
149  double alpha;
150 
151  /* diagonal supernode is divided into MAXSIZEOFBLOCK-by-MAXSIZEOFBLOCKS blocks */
152  blocknbr = pastix_iceil( n, MAXSIZEOFBLOCKS );
153 
154  for (k=0; k<blocknbr; k++) {
155 
156  blocksize = pastix_imin(MAXSIZEOFBLOCKS, n-k*MAXSIZEOFBLOCKS);
157  Akk = A+(k*MAXSIZEOFBLOCKS)*(lda+1); /* Lk, k */
158  Amk = Akk + blocksize; /* Lk+1,k */
159  Akm = Akk + blocksize * lda; /* Lk, k+1 */
160  Amm = Amk + blocksize * lda; /* Lk+1,k+1 */
161 
162  /* Factorize the diagonal block Akk*/
163  core_dsytf2sp(blocksize, Akk, lda, nbpivots, criterion);
164 
165  if ((k*MAXSIZEOFBLOCKS+blocksize) < n) {
166 
167  matrixsize = n-(k*MAXSIZEOFBLOCKS+blocksize);
168 
169  /*
170  * Solve the lower rectangle below the diagonal block
171  * L(k+1:n,k) = (L(k,k) D(k,k))^{-1} A(k+1:n,k)
172  */
173  /* 1) Compute A(k+1:n,k) = A(k+1:n,k)L(k,k)^{-T} = D(k,k)L(k+1:n,k) */
174  /* input: L(k,k) in tmp, A(k+1:n,k) in tmp1 */
175  /* output: A(k+1:n,k) in tmp1 */
176  cblas_dtrsm(CblasColMajor,
177  CblasRight, CblasLower,
178  CblasTrans, CblasUnit,
179  matrixsize, blocksize,
180  (done), Akk, lda,
181  Amk, lda);
182 
183  /* Compute L(k+1:n,k) = A(k+1:n,k)D(k,k)^{-1} */
184  for(col = 0; col < blocksize; col++) {
185  /* copy L(k+1+col:n,k+col)*D(k+col,k+col) into work(:,col) */
186  cblas_dcopy(matrixsize, Amk + col*lda, 1,
187  Akm + col, lda);
188 
189  /* compute L(k+1+col:n,k+col) = A(k+1+col:n,k+col)D(k+col,k+col)^{-1} */
190  alpha = 1.0 / *(Akk + col*(lda+1));
191  cblas_dscal( matrixsize, (alpha),
192  Amk + col*lda, 1 );
193  }
194 
195  /* Update A(k+1:n,k+1:n) = A(k+1:n,k+1:n) - (L(k+1:n,k)*D(k,k))*L(k+1:n,k)^T */
196  cblas_dgemm(CblasColMajor,
197  CblasNoTrans, CblasNoTrans,
198  matrixsize, matrixsize, blocksize,
199  (mdone), Amk, lda,
200  Akm, lda,
201  (done), Amm, lda);
202  }
203  }
204 }
205 
206 /**
207  *******************************************************************************
208  *
209  * @brief Computes the LDL^t factorization of the diagonal block in a panel.
210  *
211  *******************************************************************************
212  *
213  * @param[in] solvmtx
214  * Solver Matrix structure of the problem
215  *
216  * @param[in] cblk
217  * Pointer to the structure representing the panel to factorize in the
218  * cblktab array. Next column blok must be accessible through cblk[1].
219  *
220  * @param[inout] dataL
221  * The pointer to the correct representation of lower part of the data.
222  * - coeftab if the block is in full rank. Must be of size cblk.stride -by- cblk.width.
223  * - pastix_lr_block if the block is compressed.
224  *
225  *******************************************************************************
226  *
227  * @return The number of static pivoting performed during the diagonal block
228  * factorization.
229  *
230  *******************************************************************************/
231 int
232 cpucblk_dsytrfsp1d_sytrf( SolverMatrix *solvmtx,
233  SolverCblk *cblk,
234  void *dataL )
235 {
236  pastix_int_t ncols, stride;
237  pastix_int_t nbpivots = 0;
238  pastix_fixdbl_t time, flops;
239  double *L;
240  pastix_lrblock_t *lrL;
241  double criterion = solvmtx->diagthreshold;
242 
243  time = kernel_trace_start( PastixKernelSYTRF );
244 
245  ncols = cblk->lcolnum - cblk->fcolnum + 1;
246  stride = (cblk->cblktype & CBLK_LAYOUT_2D) ? ncols : cblk->stride;
247 
248  if ( cblk->cblktype & CBLK_COMPRESSED ) {
249  /* dataL is a LRblock */
250  lrL = (pastix_lrblock_t *)dataL;
251  L = lrL->u;
252  stride = ncols;
253 
254  assert( lrL->rk == -1 );
255  assert( stride == lrL->rkmax );
256  } else {
257  L = (double *)dataL;
258  }
259 
260  /*
261  * Factorize diagonal block in L D L^t
262  *
263  * - lower part holds L
264  * - diagonal holds D
265  * - uppert part holds (DL^t)
266  */
267  flops = FLOPS_DSYTRF( ncols );
268  kernel_trace_start_lvl2( PastixKernelLvl2SYTRF );
269  core_dsytrfsp( ncols, L, stride, &nbpivots, criterion );
270  kernel_trace_stop_lvl2( flops );
271 
272  kernel_trace_stop( cblk->fblokptr->inlast, PastixKernelSYTRF, ncols, 0, 0, flops, time );
273 
274  if ( nbpivots ) {
275  pastix_atomic_add_32b( &(solvmtx->nbpivots), nbpivots );
276  }
277  return nbpivots;
278 }
279 
280 /**
281  *******************************************************************************
282  *
283  * core_dsytrfsp1d_gemm - Computes the LDL^t factorization of one panel and
284  * apply all the trsm updates to this panel.
285  *
286  *******************************************************************************
287  *
288  * @param[in] cblk
289  * The pointer to the data structure that describes the panel from
290  * which we compute the contributions. Next column blok must be
291  * accessible through cblk[1].
292  *
293  * @param[in] blok
294  * The pointer to the data structure that describes the blok from which
295  * we compute the contributions.
296  *
297  * @param[in] fcblk
298  * The pointer to the data structure that describes the panel on
299  * which we compute the contributions. Next column blok must be
300  * accessible through fcblk[1].
301  *
302  * @param[inout] L
303  * The pointer to the matrix storing the coefficients of the
304  * panel. Must be of size cblk.stride -by- cblk.width
305  *
306  * @param[inout] C
307  * The pointer to the matrix storing the coefficients of the
308  * target.
309  *
310  * @param[inout] work
311  * Temporary buffer used in core_dgemdm().
312  *
313  *******************************************************************************/
315  const SolverBlok *blok,
316  SolverCblk *fcblk,
317  const double *L,
318  double *C,
319  double *work )
320 {
321  const SolverBlok *iterblok;
322  const SolverBlok *fblok;
323  const SolverBlok *lblok;
324  const double *blokA;
325  const double *blokB;
326  const double *blokD;
327  double *blokC;
328 
329  pastix_int_t M, N, K, lda, ldb, ldc, ldd;
330 
331  /* Get the panel update dimensions */
332  K = cblk_colnbr( cblk );
333  N = blok_rownbr( blok );
334 
335  /* Get info for diagonal, and the B block */
336  blokD = L;
337  blokB = L + blok->coefind;
338  if ( cblk->cblktype & CBLK_LAYOUT_2D ) {
339  ldb = N;
340  ldd = K + 1;
341  }
342  else {
343  ldb = cblk->stride;
344  ldd = cblk->stride + 1;
345  }
346 
347  /*
348  * Add contribution to C in fcblk:
349  * Get the first facing block of the distant panel, and the last block of
350  * the current cblk
351  */
352  fblok = fcblk->fblokptr;
353  lblok = cblk[1].fblokptr;
354 
355  for (iterblok=blok; iterblok<lblok; iterblok++) {
356 
357  /* Find facing blok */
358  while (!is_block_inside_fblock( iterblok, fblok ))
359  {
360  fblok++;
361  assert( fblok < fcblk[1].fblokptr );
362  }
363 
364  /* Get the A block and its dimensions */
365  M = blok_rownbr( iterblok );
366  blokA = L + iterblok->coefind;
367  lda = (cblk->cblktype & CBLK_LAYOUT_2D) ? M : cblk->stride;
368 
369  /* Get the C block */
370  ldc = (fcblk->cblktype & CBLK_LAYOUT_2D) ? blok_rownbr(fblok) : fcblk->stride;
371 
372  blokC = C + fblok->coefind
373  + iterblok->frownum - fblok->frownum
374  + (blok->frownum - fcblk->fcolnum) * ldc;
375 
376  {
377  pastix_int_t ldw;
378  int ret;
379 
380  /* Compute ldw which should never be larger than SOLVE_COEFMAX */
381  ldw = (M+1) * K;
382 
383  pastix_cblk_lock( fcblk );
385  M, N, K,
386  -1.0, blokA, lda,
387  blokB, ldb,
388  1.0, blokC, ldc,
389  blokD, ldd,
390  work, ldw );
391  pastix_cblk_unlock( fcblk );
392  assert(ret == PASTIX_SUCCESS);
393  (void)ret;
394  }
395  }
396 }
397 
398 /**
399  *******************************************************************************
400  *
401  * @brief Compute the LDL^t factorization of one panel.
402  *
403  *******************************************************************************
404  *
405  * @param[in] solvmtx
406  * Solver Matrix structure of the problem
407  *
408  * @param[in] cblk
409  * Pointer to the structure representing the panel to factorize in the
410  * cblktab array. Next column blok must be accessible through cblk[1].
411  *
412  * @param[inout] L
413  * The pointer to the matrix storing the coefficients of the
414  * panel. Must be of size cblk.stride -by- cblk.width
415  *
416  * @param[inout] DLt
417  * The pointer to the upper matrix storing the coefficients the
418  * temporary DL^t product. Must be of size cblk.stride -by- cblk.width
419  *
420  *******************************************************************************
421  *
422  * @return The number of static pivoting during factorization of the diagonal
423  * block.
424  *
425  *******************************************************************************/
426 int
427 cpucblk_dsytrfsp1d_panel( SolverMatrix *solvmtx,
428  SolverCblk *cblk,
429  void *L,
430  void *DLt )
431 {
432  pastix_int_t nbpivots;
433  nbpivots = cpucblk_dsytrfsp1d_sytrf( solvmtx, cblk, L );
434 
435  /*
436  * We exploit the fact that (DL^t) is stored in the upper triangle part of L
437  */
440  cblk, L, L, &(solvmtx->lowrank) );
441 
442  if ( (DLt != NULL) && (cblk->cblktype & CBLK_LAYOUT_2D) ) {
443 
444  /* Copy L into the temporary buffer and multiply by D */
445  cpucblk_dscalo( PastixNoTrans, cblk, L, DLt );
446  }
447  return nbpivots;
448 }
449 
450 
451 /**
452  *******************************************************************************
453  *
454  * @brief Perform the LDL^t factorization of a given panel and apply all its
455  * updates.
456  *
457  *******************************************************************************
458  *
459  * @param[in] solvmtx
460  * Solver Matrix structure of the problem
461  *
462  * @param[in] cblk
463  * Pointer to the structure representing the panel to factorize in the
464  * cblktab array. Next column blok must be accessible through cblk[1].
465  *
466  * @param[in] DLt
467  * Temporary memory buffer to store the transpose of DLt.
468  *
469  * @param[in] work
470  * Temporary memory buffer.
471  *
472  * @param[in] lwork
473  * Temporary workspace dimension.
474  *
475  *******************************************************************************
476  *
477  * @return The number of static pivoting during factorization of the diagonal
478  * block.
479  *
480  *******************************************************************************/
481 int
482 cpucblk_dsytrfsp1d( SolverMatrix *solvmtx,
483  SolverCblk *cblk,
484  double *DLt,
485  double *work,
486  pastix_int_t lwork )
487 {
488  void *dataL = cblk_getdataL( cblk );
489  void *dataDLt = cblk_getdataU( cblk );
490  SolverCblk *fcblk;
491  SolverBlok *blok, *lblk;
492  pastix_int_t nbpivots;
493 
494  if ( !(cblk->cblktype & CBLK_LAYOUT_2D) ) {
495  DLt = NULL;
496  }
497  else {
498  if (cblk->cblktype & CBLK_COMPRESSED) {
499  cpucblk_dalloc_lrws( cblk, dataDLt, DLt );
500  }
501  else {
502  assert( dataDLt == NULL );
503  dataDLt = DLt;
504  }
505  }
506 
507  /* if there are off-diagonal supernodes in the column */
508  nbpivots = cpucblk_dsytrfsp1d_panel( solvmtx, cblk, dataL, dataDLt );
509 
510  blok = cblk->fblokptr+1; /* this diagonal block */
511  lblk = cblk[1].fblokptr; /* the next diagonal block */
512 
513  for( ; blok < lblk; blok++ )
514  {
515  fcblk = solvmtx->cblktab + blok->fcblknm;
516 
517  if ( fcblk->cblktype & CBLK_FANIN ) {
518  cpucblk_dalloc( PastixLCoef, fcblk );
519  }
520 
521  /* Update on L */
522  if ( DLt == NULL ) {
523  core_dsytrfsp1d_gemm( cblk, blok, fcblk,
524  dataL, fcblk->lcoeftab,
525  work );
526  }
527  else {
529  cblk, blok, fcblk,
530  dataL, dataDLt, cblk_getdataL( fcblk ),
531  work, lwork, &(solvmtx->lowrank) );
532  }
533  cpucblk_drelease_deps( PastixLCoef, solvmtx, cblk, fcblk );
534  }
535 
536  return nbpivots;
537 }
static void core_dsytf2sp(pastix_int_t n, double *A, pastix_int_t lda, pastix_int_t *nbpivots, double criterion)
Compute the sequential static pivoting factorization of the symmetric matrix n-by-n A such that A = L...
Definition: core_dsytrfsp.c:64
int core_dgemdm(pastix_trans_t transA, pastix_trans_t transB, int M, int N, int K, double alpha, const double *A, int LDA, const double *B, int LDB, double beta, double *C, int LDC, const double *D, int incD, double *WORK, int LWORK)
Perform one of the following matrix-matrix operations.
Definition: core_dgemdm.c:139
void core_dsytrfsp(pastix_int_t n, double *A, pastix_int_t lda, pastix_int_t *nbpivots, double criterion)
Compute the block static pivoting factorization of the symmetric matrix n-by-n A such that A = L * D ...
void core_dsytrfsp1d_gemm(const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcblk, const double *L, double *C, double *work)
int cpucblk_dsytrfsp1d_panel(SolverMatrix *solvmtx, SolverCblk *cblk, void *L, void *DLt)
Compute the LDL^t factorization of one panel.
void cpucblk_dalloc(pastix_coefside_t side, SolverCblk *cblk)
Allocate the cblk structure to store the coefficient.
void cpucblk_dtrsmsp(pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk, const void *A, void *C, const pastix_lr_t *lowrank)
Compute the updates associated to a column of off-diagonal blocks.
Definition: core_dtrsmsp.c:356
void cpucblk_dscalo(pastix_trans_t trans, SolverCblk *cblk, void *dataL, void *dataLD)
Copy the L term with scaling for the two-terms algorithm.
Definition: core_dscalo.c:170
int cpucblk_dsytrfsp1d(SolverMatrix *solvmtx, SolverCblk *cblk, double *DLt, double *work, pastix_int_t lwork)
Perform the LDL^t factorization of a given panel and apply all its updates.
void cpucblk_dalloc_lrws(const SolverCblk *cblk, pastix_lrblock_t *lrblok, double *ws)
Initialize lrblock structure from a workspace from all blocks of the cblk associated.
Definition: cpucblk_dinit.c:96
pastix_fixdbl_t cpucblk_dgemmsp(pastix_coefside_t sideA, pastix_trans_t trans, const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcblk, const void *A, const void *B, void *C, double *work, pastix_int_t lwork, const pastix_lr_t *lowrank)
Compute the updates associated to one off-diagonal block.
void cpucblk_drelease_deps(pastix_coefside_t side, SolverMatrix *solvmtx, const SolverCblk *cblk, SolverCblk *fcbk)
Release the dependencies of the given cblk after an update.
int cpucblk_dsytrfsp1d_sytrf(SolverMatrix *solvmtx, SolverCblk *cblk, void *dataL)
Computes the LDL^t factorization of the diagonal block in a panel.
The block low-rank structure to hold a matrix in low-rank form.
@ PastixLCoef
Definition: api.h:475
@ PastixUpper
Definition: api.h:463
@ PastixRight
Definition: api.h:493
@ PastixNonUnit
Definition: api.h:484
@ PastixNoTrans
Definition: api.h:444
@ PastixTrans
Definition: api.h:445
@ PASTIX_SUCCESS
Definition: api.h:366
static pastix_int_t blok_rownbr(const SolverBlok *blok)
Compute the number of rows of a block.
Definition: solver.h:388
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
Definition: solver.h:322
pastix_int_t fcblknm
Definition: solver.h:140
static int is_block_inside_fblock(const SolverBlok *blok, const SolverBlok *fblok)
Check if a block is included inside another one.
Definition: solver.h:501
pastix_int_t frownum
Definition: solver.h:142
static void * cblk_getdataU(const SolverCblk *cblk)
Get the pointer to the data associated to the upper part of the cblk.
Definition: solver.h:347
pastix_int_t coefind
Definition: solver.h:144
SolverBlok * fblokptr
Definition: solver.h:163
static void * cblk_getdataL(const SolverCblk *cblk)
Get the pointer to the data associated to the lower part of the cblk.
Definition: solver.h:335
int8_t inlast
Definition: solver.h:146
pastix_int_t stride
Definition: solver.h:164
int8_t cblktype
Definition: solver.h:159
pastix_int_t lcolnum
Definition: solver.h:162
void * lcoeftab
Definition: solver.h:171
pastix_int_t fcolnum
Definition: solver.h:161
Solver block structure.
Definition: solver.h:137
Solver column block structure.
Definition: solver.h:156