PaStiX Handbook  6.3.0
core_zhetrfsp.c
Go to the documentation of this file.
1 /**
2  *
3  * @file core_zhetrfsp.c
4  *
5  * PaStiX kernel routines for LDL^h factorization.
6  *
7  * @copyright 2011-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8  * Univ. Bordeaux. All rights reserved.
9  *
10  * @version 6.3.0
11  * @author Mathieu Faverge
12  * @author Pierre Ramet
13  * @author Xavier Lacoste
14  * @author Gregoire Pichon
15  * @date 2023-01-16
16  * @generated from /builds/solverstack/pastix/kernels/core_zhetrfsp.c, normal z -> z, Mon Aug 28 13:40:36 2023
17  *
18  **/
19 #include "common.h"
20 #include "cblas.h"
21 #include "blend/solver.h"
22 #include "pastix_zcores.h"
23 #include "kernels_trace.h"
24 
25 #include <lapacke.h>
26 
27 #ifndef DOXYGEN_SHOULD_SKIP_THIS
28 #define MAXSIZEOFBLOCKS 64
29 static pastix_complex64_t zone = 1.0;
30 static pastix_complex64_t mzone = -1.0;
31 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
32 
33 /**
34  *******************************************************************************
35  *
36  * @ingroup kernel_blas_lapack_null
37  *
38  * @brief Compute the sequential static pivoting factorization of the hermitian
39  * matrix n-by-n A such that A = L * D * conj(L^t).
40  *
41  *******************************************************************************
42  *
43  * @param[in] n
44  * The number of rows and columns of the matrix A.
45  *
46  * @param[inout] A
47  * The matrix A to factorize with LDL^h factorization. The matrix
48  * is of size lda -by- n.
49  *
50  * @param[in] lda
51  * The leading dimension of the matrix A.
52  *
53  * @param[inout] nbpivots
54  * Pointer to the number of piovting operations made during
55  * factorization. It is updated during this call
56  *
57  * @param[in] criterion
58  * Threshold use for static pivoting. If diagonal value is under this
59  * threshold, its value is replaced by the threshold and the number of
60  * pivots is incremented.
61  *
62  *******************************************************************************/
63 static inline void
64 core_zhetf2sp( pastix_int_t n,
65  pastix_complex64_t *A,
66  pastix_int_t lda,
67  pastix_int_t *nbpivots,
68  double criterion )
69 {
70  pastix_int_t k, m, ret;
71  pastix_complex64_t *Akk = A; /* A [k ][k ] */
72  pastix_complex64_t *Amk = A+1; /* A [k+1][k ] */
73  pastix_complex64_t *Akm = A+lda; /* A [k ][k+1] */
74  pastix_complex64_t zalpha;
75  double dalpha;
76 
77  m = n-1;
78  for (k=0; k<n; k++, m--){
79  if ( cabs(*Akk) < criterion ) {
80  if ( creal(*Akk) < 0. ) {
81  *Akk = (pastix_complex64_t)(-criterion);
82  }
83  else {
84  *Akk = (pastix_complex64_t)criterion;
85  }
86  (*nbpivots)++;
87  }
88 
89  zalpha = 1.0 / (*Akk);
90 
91  cblas_zcopy( m, Amk, 1, Akm, lda );
92  ret = LAPACKE_zlacgv_work( m, Akm, 1 );
93  assert( ret == 0 );
94 
95  /* Scale the diagonal to compute L((k+1):n,k) */
96  cblas_zscal(m, CBLAS_SADDR( zalpha ), Amk, 1 );
97 
98  dalpha = -1.0 * creal(*Akk);
99 
100  /* Move to next Akk */
101  Akk += (lda+1);
102 
103  cblas_zher(CblasColMajor, CblasLower,
104  m, dalpha,
105  Amk, 1,
106  Akk, lda);
107 
108  /* Move to next Amk */
109  Amk = Akk+1;
110  Akm = Akk+lda;
111  }
112  (void)ret;
113 }
114 
115 /**
116  *******************************************************************************
117  *
118  * @brief Compute the block static pivoting factorization of the hermitian
119  * matrix n-by-n A such that A = L * D * conj(L^t).
120  *
121  *******************************************************************************
122  *
123  * @param[in] n
124  * The number of rows and columns of the matrix A.
125  *
126  * @param[inout] A
127  * The matrix A to factorize with LDL^h factorization. The matrix
128  * is of size lda -by- n.
129  *
130  * @param[in] lda
131  * The leading dimension of the matrix A.
132  *
133  * @param[inout] nbpivots
134  * Pointer to the number of piovting operations made during
135  * factorization. It is updated during this call
136  *
137  * @param[in] criterion
138  * Threshold use for static pivoting. If diagonal value is under this
139  * threshold, its value is replaced by the threshold and the nu,ber of
140  * pivots is incremented.
141  *
142  *******************************************************************************/
143 void
144 core_zhetrfsp( pastix_int_t n,
145  pastix_complex64_t *A,
146  pastix_int_t lda,
147  pastix_int_t *nbpivots,
148  double criterion )
149 {
150  pastix_int_t k, blocknbr, blocksize, matrixsize, col, ret;
151  pastix_complex64_t *Akk, *Amk, *Akm, *Amm;
152  pastix_complex64_t alpha;
153 
154  /* diagonal supernode is divided into MAXSIZEOFBLOCK-by-MAXSIZEOFBLOCKS blocks */
155  blocknbr = pastix_iceil( n, MAXSIZEOFBLOCKS );
156 
157  for (k=0; k<blocknbr; k++) {
158 
159  blocksize = pastix_imin(MAXSIZEOFBLOCKS, n-k*MAXSIZEOFBLOCKS);
160  Akk = A+(k*MAXSIZEOFBLOCKS)*(lda+1); /* Lk, k */
161  Amk = Akk + blocksize; /* Lk+1,k */
162  Akm = Akk + blocksize * lda; /* Lk, k+1 */
163  Amm = Amk + blocksize * lda; /* Lk+1,k+1 */
164 
165  /* Factorize the diagonal block Akk*/
166  core_zhetf2sp(blocksize, Akk, lda, nbpivots, criterion);
167 
168  if ((k*MAXSIZEOFBLOCKS+blocksize) < n) {
169 
170  matrixsize = n-(k*MAXSIZEOFBLOCKS+blocksize);
171 
172  /*
173  * Solve the lower rectangle below the diagonal block
174  * L(k+1:n,k) = (L(k,k) D(k,k))^{-1} A(k+1:n,k)
175  */
176  /* 1) Compute A(k+1:n,k) = A(k+1:n,k)L(k,k)^{-T} = D(k,k)L(k+1:n,k) */
177  /* input: L(k,k) in tmp, A(k+1:n,k) in tmp1 */
178  /* output: A(k+1:n,k) in tmp1 */
179  cblas_ztrsm(CblasColMajor,
180  CblasRight, CblasLower,
181  CblasConjTrans, CblasUnit,
182  matrixsize, blocksize,
183  CBLAS_SADDR(zone), Akk, lda,
184  Amk, lda);
185 
186  /* Compute L(k+1:n,k) = A(k+1:n,k)D(k,k)^{-1} */
187  for(col = 0; col < blocksize; col++) {
188  /* copy L(k+1+col:n,k+col)*D(k+col,k+col) into work(:,col) */
189  cblas_zcopy(matrixsize, Amk + col*lda, 1,
190  Akm + col, lda);
191  ret = LAPACKE_zlacgv_work( matrixsize, Akm + col, lda );
192  assert( ret == 0 );
193 
194  /* compute L(k+1+col:n,k+col) = A(k+1+col:n,k+col)D(k+col,k+col)^{-1} */
195  alpha = 1.0 / *(Akk + col*(lda+1));
196  cblas_zscal( matrixsize, CBLAS_SADDR(alpha),
197  Amk + col*lda, 1 );
198  }
199 
200  /* Update A(k+1:n,k+1:n) = A(k+1:n,k+1:n) - (L(k+1:n,k)*D(k,k))*L(k+1:n,k)^T */
201  cblas_zgemm(CblasColMajor,
202  CblasNoTrans, CblasNoTrans,
203  matrixsize, matrixsize, blocksize,
204  CBLAS_SADDR(mzone), Amk, lda,
205  Akm, lda,
206  CBLAS_SADDR(zone), Amm, lda);
207  }
208  }
209  (void)ret;
210 }
211 
212 /**
213  *******************************************************************************
214  *
215  * @brief Computes the LDL^h factorization of the diagonal block in a panel.
216  *
217  *******************************************************************************
218  *
219  * @param[in] solvmtx
220  * Solver Matrix structure of the problem
221  *
222  * @param[in] cblk
223  * Pointer to the structure representing the panel to factorize in the
224  * cblktab array. Next column blok must be accessible through cblk[1].
225  *
226  * @param[inout] dataL
227  * The pointer to the correct representation of lower part of the data.
228  * - coeftab if the block is in full rank. Must be of size cblk.stride -by- cblk.width.
229  * - pastix_lr_block if the block is compressed.
230  *
231  *******************************************************************************
232  *
233  * @return The number of static pivoting performed during the diagonal block
234  * factorization.
235  *
236  *******************************************************************************/
237 int
238 cpucblk_zhetrfsp1d_hetrf( SolverMatrix *solvmtx,
239  SolverCblk *cblk,
240  void *dataL )
241 {
242  pastix_int_t ncols, stride;
243  pastix_int_t nbpivots = 0;
244  pastix_fixdbl_t time, flops;
245  pastix_complex64_t *L;
246  pastix_lrblock_t *lrL;
247  double criterion = solvmtx->diagthreshold;
248 
249  time = kernel_trace_start( PastixKernelHETRF );
250 
251  ncols = cblk->lcolnum - cblk->fcolnum + 1;
252  stride = (cblk->cblktype & CBLK_LAYOUT_2D) ? ncols : cblk->stride;
253 
254  if ( cblk->cblktype & CBLK_COMPRESSED ) {
255  /* dataL is a LRblock */
256  lrL = (pastix_lrblock_t *)dataL;
257  assert( lrL->rk == -1 );
258  L = lrL->u;
259  stride = ncols;
260 
261  assert( stride == lrL->rkmax );
262  } else {
263  L = (pastix_complex64_t *)dataL;
264  }
265 
266  /*
267  * Factorize diagonal block in L D L^h
268  *
269  * - lower part holds L
270  * - diagonal holds D
271  * - uppert part holds (DL^h)
272  */
273  flops = FLOPS_ZHETRF( ncols );
274  kernel_trace_start_lvl2( PastixKernelLvl2HETRF );
275  core_zhetrfsp( ncols, L, stride, &nbpivots, criterion );
276  kernel_trace_stop_lvl2( flops );
277 
278  kernel_trace_stop( cblk->fblokptr->inlast, PastixKernelHETRF, ncols, 0, 0, flops, time );
279 
280  if ( nbpivots ) {
281  pastix_atomic_add_32b( &(solvmtx->nbpivots), nbpivots );
282  }
283  return nbpivots;
284 }
285 
286 /**
287  *******************************************************************************
288  *
289  * core_zhetrfsp1d_gemm - Computes the LDL^h factorization of one panel and
290  * apply all the trsm updates to this panel.
291  *
292  *******************************************************************************
293  *
294  * @param[in] cblk
295  * The pointer to the data structure that describes the panel from
296  * which we compute the contributions. Next column blok must be
297  * accessible through cblk[1].
298  *
299  * @param[in] blok
300  * The pointer to the data structure that describes the blok from which
301  * we compute the contributions.
302  *
303  * @param[in] fcblk
304  * The pointer to the data structure that describes the panel on
305  * which we compute the contributions. Next column blok must be
306  * accessible through fcblk[1].
307  *
308  * @param[inout] L
309  * The pointer to the matrix storing the coefficients of the
310  * panel. Must be of size cblk.stride -by- cblk.width
311  *
312  * @param[inout] C
313  * The pointer to the matrix storing the coefficients of the
314  * target.
315  *
316  * @param[inout] work
317  * Temporary buffer used in core_zgemdm().
318  *
319  *******************************************************************************/
321  const SolverBlok *blok,
322  SolverCblk *fcblk,
323  const pastix_complex64_t *L,
324  pastix_complex64_t *C,
325  pastix_complex64_t *work )
326 {
327  const SolverBlok *iterblok;
328  const SolverBlok *fblok;
329  const SolverBlok *lblok;
330  const pastix_complex64_t *blokA;
331  const pastix_complex64_t *blokB;
332  const pastix_complex64_t *blokD;
333  pastix_complex64_t *blokC;
334 
335  pastix_int_t M, N, K, lda, ldb, ldc, ldd;
336 
337  /* Get the panel update dimensions */
338  K = cblk_colnbr( cblk );
339  N = blok_rownbr( blok );
340 
341  /* Get info for diagonal, and the B block */
342  blokD = L;
343  blokB = L + blok->coefind;
344  if ( cblk->cblktype & CBLK_LAYOUT_2D ) {
345  ldb = N;
346  ldd = K + 1;
347  }
348  else {
349  ldb = cblk->stride;
350  ldd = cblk->stride + 1;
351  }
352 
353  /*
354  * Add contribution to C in fcblk:
355  * Get the first facing block of the distant panel, and the last block of
356  * the current cblk
357  */
358  fblok = fcblk->fblokptr;
359  lblok = cblk[1].fblokptr;
360 
361  for (iterblok=blok; iterblok<lblok; iterblok++) {
362 
363  /* Find facing blok */
364  while (!is_block_inside_fblock( iterblok, fblok ))
365  {
366  fblok++;
367  assert( fblok < fcblk[1].fblokptr );
368  }
369 
370  /* Get the A block and its dimensions */
371  M = blok_rownbr( iterblok );
372  blokA = L + iterblok->coefind;
373  lda = (cblk->cblktype & CBLK_LAYOUT_2D) ? M : cblk->stride;
374 
375  /* Get the C block */
376  ldc = (fcblk->cblktype & CBLK_LAYOUT_2D) ? blok_rownbr(fblok) : fcblk->stride;
377 
378  blokC = C + fblok->coefind
379  + iterblok->frownum - fblok->frownum
380  + (blok->frownum - fcblk->fcolnum) * ldc;
381 
382  {
383  pastix_int_t ldw;
384  int ret;
385 
386  /* Compute ldw which should never be larger than SOLVE_COEFMAX */
387  ldw = (M+1) * K;
388 
389  pastix_cblk_lock( fcblk );
391  M, N, K,
392  -1.0, blokA, lda,
393  blokB, ldb,
394  1.0, blokC, ldc,
395  blokD, ldd,
396  work, ldw );
397  pastix_cblk_unlock( fcblk );
398  assert(ret == PASTIX_SUCCESS);
399  (void)ret;
400  }
401  }
402 }
403 
404 /**
405  *******************************************************************************
406  *
407  * @brief Compute the LDL^h factorization of one panel.
408  *
409  *******************************************************************************
410  *
411  * @param[in] solvmtx
412  * Solver Matrix structure of the problem
413  *
414  * @param[in] cblk
415  * Pointer to the structure representing the panel to factorize in the
416  * cblktab array. Next column blok must be accessible through cblk[1].
417  *
418  * @param[inout] L
419  * The pointer to the correct representation of lower part of the data.
420  * - coeftab if the block is in full rank. Must be of size cblk.stride -by- cblk.width.
421  * - pastix_lr_block if the block is compressed.
422  *
423  * @param[inout] DLh
424  * The pointer to the correct representation of Dlh matrix
425  * (stored in the upper part bu default).
426  * - coeftab if the block is in full rank. Must be of size cblk.stride -by- cblk.width.
427  * - pastix_lr_block if the block is compressed.
428  *
429  *******************************************************************************
430  *
431  * @return The number of static pivoting during factorization of the diagonal
432  * block.
433  *
434  *******************************************************************************/
435 int
436 cpucblk_zhetrfsp1d_panel( SolverMatrix *solvmtx,
437  SolverCblk *cblk,
438  void *L,
439  void *DLh )
440 {
441  pastix_int_t nbpivots;
442  nbpivots = cpucblk_zhetrfsp1d_hetrf( solvmtx, cblk, L );
443 
444  /*
445  * We exploit the fact that (DL^h) is stored in the upper triangle part of L
446  */
449  cblk, L, L, &(solvmtx->lowrank) );
450 
451  if ( (DLh != NULL) && (cblk->cblktype & CBLK_LAYOUT_2D) ) {
452 
453  /* Copy L into the temporary buffer and multiply by D */
454  cpucblk_zscalo( PastixConjTrans, cblk, L, DLh );
455  }
456  return nbpivots;
457 }
458 
459 
460 /**
461  *******************************************************************************
462  *
463  * @brief Perform the LDL^h factorization of a given panel and apply all its
464  * updates.
465  *
466  *******************************************************************************
467  *
468  * @param[in] solvmtx
469  * Solver Matrix structure of the problem
470  *
471  * @param[in] cblk
472  * Pointer to the structure representing the panel to factorize in the
473  * cblktab array. Next column blok must be accessible through cblk[1].
474  *
475  * @param[in] DLh
476  * Temporary memory buffer to store the conjugate transpose of DLh.
477  *
478  * @param[in] work
479  * Temporary memory buffer.
480  *
481  * @param[in] lwork
482  * Temporary workspace dimension.
483  *
484  *******************************************************************************
485  *
486  * @return The number of static pivoting during factorization of the diagonal
487  * block.
488  *
489  *******************************************************************************/
490 int
491 cpucblk_zhetrfsp1d( SolverMatrix *solvmtx,
492  SolverCblk *cblk,
493  pastix_complex64_t *DLh,
494  pastix_complex64_t *work,
495  pastix_int_t lwork )
496 {
497  void *dataL = cblk_getdataL( cblk );
498  void *dataDLh = cblk_getdataU( cblk );
499  SolverCblk *fcblk;
500  SolverBlok *blok, *lblk;
501  pastix_int_t nbpivots;
502 
503  if ( !(cblk->cblktype & CBLK_LAYOUT_2D) ) {
504  DLh = NULL;
505  }
506  else {
507  if (cblk->cblktype & CBLK_COMPRESSED) {
508  cpucblk_zalloc_lrws( cblk, dataDLh, DLh );
509  }
510  else {
511  assert( dataDLh == NULL );
512  dataDLh = DLh;
513  }
514  }
515 
516  /* if there are off-diagonal supernodes in the column */
517  nbpivots = cpucblk_zhetrfsp1d_panel( solvmtx, cblk, dataL, dataDLh );
518 
519  blok = cblk->fblokptr+1; /* this diagonal block */
520  lblk = cblk[1].fblokptr; /* the next diagonal block */
521 
522  for( ; blok < lblk; blok++ )
523  {
524  fcblk = solvmtx->cblktab + blok->fcblknm;
525 
526  if ( fcblk->cblktype & CBLK_FANIN ) {
527  cpucblk_zalloc( PastixLCoef, fcblk );
528  }
529 
530  /* Update on L */
531  if (DLh == NULL) {
532  core_zhetrfsp1d_gemm( cblk, blok, fcblk,
533  dataL, fcblk->lcoeftab,
534  work );
535  }
536  else {
538  cblk, blok, fcblk,
539  dataL, dataDLh, cblk_getdataL( fcblk ),
540  work, lwork, &(solvmtx->lowrank) );
541  }
542  cpucblk_zrelease_deps( PastixLCoef, solvmtx, cblk, fcblk );
543  }
544 
545  return nbpivots;
546 }
static void core_zhetf2sp(pastix_int_t n, pastix_complex64_t *A, pastix_int_t lda, pastix_int_t *nbpivots, double criterion)
Compute the sequential static pivoting factorization of the hermitian matrix n-by-n A such that A = L...
Definition: core_zhetrfsp.c:64
int cpucblk_zhetrfsp1d(SolverMatrix *solvmtx, SolverCblk *cblk, pastix_complex64_t *DLh, pastix_complex64_t *work, pastix_int_t lwork)
Perform the LDL^h factorization of a given panel and apply all its updates.
int cpucblk_zhetrfsp1d_hetrf(SolverMatrix *solvmtx, SolverCblk *cblk, void *dataL)
Computes the LDL^h factorization of the diagonal block in a panel.
int cpucblk_zhetrfsp1d_panel(SolverMatrix *solvmtx, SolverCblk *cblk, void *L, void *DLh)
Compute the LDL^h factorization of one panel.
void core_zhetrfsp(pastix_int_t n, pastix_complex64_t *A, pastix_int_t lda, pastix_int_t *nbpivots, double criterion)
Compute the block static pivoting factorization of the hermitian matrix n-by-n A such that A = L * D ...
void core_zhetrfsp1d_gemm(const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcblk, const pastix_complex64_t *L, pastix_complex64_t *C, pastix_complex64_t *work)
int core_zgemdm(pastix_trans_t transA, pastix_trans_t transB, int M, int N, int K, pastix_complex64_t alpha, const pastix_complex64_t *A, int LDA, const pastix_complex64_t *B, int LDB, pastix_complex64_t beta, pastix_complex64_t *C, int LDC, const pastix_complex64_t *D, int incD, pastix_complex64_t *WORK, int LWORK)
Perform one of the following matrix-matrix operations.
Definition: core_zgemdm.c:139
void cpucblk_ztrsmsp(pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk, const void *A, void *C, const pastix_lr_t *lowrank)
Compute the updates associated to a column of off-diagonal blocks.
Definition: core_ztrsmsp.c:356
void cpucblk_zalloc(pastix_coefside_t side, SolverCblk *cblk)
Allocate the cblk structure to store the coefficient.
pastix_fixdbl_t cpucblk_zgemmsp(pastix_coefside_t sideA, pastix_trans_t trans, const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcblk, const void *A, const void *B, void *C, pastix_complex64_t *work, pastix_int_t lwork, const pastix_lr_t *lowrank)
Compute the updates associated to one off-diagonal block.
void cpucblk_zrelease_deps(pastix_coefside_t side, SolverMatrix *solvmtx, const SolverCblk *cblk, SolverCblk *fcbk)
Release the dependencies of the given cblk after an update.
void cpucblk_zalloc_lrws(const SolverCblk *cblk, pastix_lrblock_t *lrblok, pastix_complex64_t *ws)
Initialize lrblock structure from a workspace from all blocks of the cblk associated.
Definition: cpucblk_zinit.c:96
void cpucblk_zscalo(pastix_trans_t trans, SolverCblk *cblk, void *dataL, void *dataLD)
Copy the L term with scaling for the two-terms algorithm.
Definition: core_zscalo.c:170
The block low-rank structure to hold a matrix in low-rank form.
@ PastixLCoef
Definition: api.h:478
@ PastixUpper
Definition: api.h:466
@ PastixRight
Definition: api.h:496
@ PastixNonUnit
Definition: api.h:487
@ PastixConjTrans
Definition: api.h:449
@ PastixNoTrans
Definition: api.h:447
@ PastixTrans
Definition: api.h:448
@ PASTIX_SUCCESS
Definition: api.h:369
static pastix_int_t blok_rownbr(const SolverBlok *blok)
Compute the number of rows of a block.
Definition: solver.h:389
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
Definition: solver.h:323
pastix_int_t fcblknm
Definition: solver.h:140
static int is_block_inside_fblock(const SolverBlok *blok, const SolverBlok *fblok)
Check if a block is included inside another one.
Definition: solver.h:498
pastix_int_t frownum
Definition: solver.h:142
static void * cblk_getdataU(const SolverCblk *cblk)
Get the pointer to the data associated to the upper part of the cblk.
Definition: solver.h:348
pastix_int_t coefind
Definition: solver.h:144
SolverBlok * fblokptr
Definition: solver.h:163
static void * cblk_getdataL(const SolverCblk *cblk)
Get the pointer to the data associated to the lower part of the cblk.
Definition: solver.h:336
int8_t inlast
Definition: solver.h:146
pastix_int_t stride
Definition: solver.h:164
int8_t cblktype
Definition: solver.h:159
pastix_int_t lcolnum
Definition: solver.h:162
void * lcoeftab
Definition: solver.h:171
pastix_int_t fcolnum
Definition: solver.h:161
Solver block structure.
Definition: solver.h:137
Solver column block structure.
Definition: solver.h:156