PaStiX Handbook  6.4.0
core_chetrfsp.c
Go to the documentation of this file.
1 /**
2  *
3  * @file core_chetrfsp.c
4  *
5  * PaStiX kernel routines for LDL^h factorization.
6  *
7  * @copyright 2011-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8  * Univ. Bordeaux. All rights reserved.
9  *
10  * @version 6.4.0
11  * @author Mathieu Faverge
12  * @author Pierre Ramet
13  * @author Xavier Lacoste
14  * @author Gregoire Pichon
15  * @author Alycia Lisito
16  * @author Nolan Bredel
17  * @date 2024-07-05
18  * @generated from /builds/solverstack/pastix/kernels/core_zhetrfsp.c, normal z -> c, Fri Jul 12 15:09:43 2024
19  *
20  **/
21 #include "common.h"
22 #include "cblas.h"
23 #include "blend/solver.h"
24 #include "pastix_ccores.h"
25 #include "kernels_trace.h"
26 
27 #include <lapacke.h>
28 
29 #ifndef DOXYGEN_SHOULD_SKIP_THIS
30 #define MAXSIZEOFBLOCKS 64
31 static pastix_complex32_t cone = 1.0;
32 static pastix_complex32_t mcone = -1.0;
33 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
34 
35 /**
36  *******************************************************************************
37  *
38  * @ingroup kernel_blas_lapack_null
39  *
40  * @brief Compute the sequential static pivoting factorization of the hermitian
41  * matrix n-by-n A such that A = L * D * conjf(L^t).
42  *
43  *******************************************************************************
44  *
45  * @param[in] n
46  * The number of rows and columns of the matrix A.
47  *
48  * @param[inout] A
49  * The matrix A to factorize with LDL^h factorization. The matrix
50  * is of size lda -by- n.
51  *
52  * @param[in] lda
53  * The leading dimension of the matrix A.
54  *
55  * @param[inout] nbpivots
56  * Pointer to the number of piovting operations made during
57  * factorization. It is updated during this call
58  *
59  * @param[in] criterion
60  * Threshold use for static pivoting. If diagonal value is under this
61  * threshold, its value is replaced by the threshold and the number of
62  * pivots is incremented.
63  *
64  *******************************************************************************/
65 static inline void
68  pastix_int_t lda,
69  pastix_int_t *nbpivots,
70  float criterion )
71 {
72  pastix_int_t k, m, ret;
73  pastix_complex32_t *Akk = A; /* A [k ][k ] */
74  pastix_complex32_t *Amk = A+1; /* A [k+1][k ] */
75  pastix_complex32_t *Akm = A+lda; /* A [k ][k+1] */
76  pastix_complex32_t zalpha;
77  float dalpha;
78 
79  m = n-1;
80  for (k=0; k<n; k++, m--){
81  if ( cabsf(*Akk) < criterion ) {
82  if ( crealf(*Akk) < 0. ) {
83  *Akk = (pastix_complex32_t)(-criterion);
84  }
85  else {
86  *Akk = (pastix_complex32_t)criterion;
87  }
88  (*nbpivots)++;
89  }
90 
91  zalpha = 1.0 / (*Akk);
92 
93  /* Transpose the column before scaling */
94  cblas_ccopy( m, Amk, 1, Akm, lda );
95  ret = LAPACKE_clacgv_work( m, Akm, 1 );
96  assert( ret == 0 );
97 
98  /* Scale the diagonal to compute L((k+1):n,k) */
99  cblas_cscal(m, CBLAS_SADDR( zalpha ), Amk, 1 );
100 
101  dalpha = -1.0 * crealf(*Akk);
102 
103  /* Move to next Akk */
104  Akk += (lda+1);
105 
106  cblas_cher(CblasColMajor, CblasLower,
107  m, dalpha,
108  Amk, 1,
109  Akk, lda);
110 
111  /* Move to next Amk */
112  Amk = Akk+1;
113  Akm = Akk+lda;
114  }
115  (void)ret;
116 }
117 
118 /**
119  *******************************************************************************
120  *
121  * @brief Compute the block static pivoting factorization of the hermitian
122  * matrix n-by-n A such that A = L * D * conjf(L^t).
123  *
124  *******************************************************************************
125  *
126  * @param[in] n
127  * The number of rows and columns of the matrix A.
128  *
129  * @param[inout] A
130  * The matrix A to factorize with LDL^h factorization. The matrix
131  * is of size lda -by- n.
132  *
133  * @param[in] lda
134  * The leading dimension of the matrix A.
135  *
136  * @param[inout] nbpivots
137  * Pointer to the number of piovting operations made during
138  * factorization. It is updated during this call
139  *
140  * @param[in] criterion
141  * Threshold use for static pivoting. If diagonal value is under this
142  * threshold, its value is replaced by the threshold and the nu,ber of
143  * pivots is incremented.
144  *
145  *******************************************************************************/
146 void
149  pastix_int_t lda,
150  pastix_int_t *nbpivots,
151  float criterion )
152 {
153  pastix_int_t k, blocknbr, blocksize, matrixsize, col, ret;
154  pastix_complex32_t *Akk, *Amk, *Akm, *Amm;
155  pastix_complex32_t alpha;
156 
157  /* diagonal supernode is divided into MAXSIZEOFBLOCK-by-MAXSIZEOFBLOCKS blocks */
158  blocknbr = pastix_iceil( n, MAXSIZEOFBLOCKS );
159 
160  for (k=0; k<blocknbr; k++) {
161 
162  blocksize = pastix_imin(MAXSIZEOFBLOCKS, n-k*MAXSIZEOFBLOCKS);
163  Akk = A+(k*MAXSIZEOFBLOCKS)*(lda+1); /* Lk, k */
164  Amk = Akk + blocksize; /* Lk+1,k */
165  Akm = Akk + blocksize * lda; /* Lk, k+1 */
166  Amm = Amk + blocksize * lda; /* Lk+1,k+1 */
167 
168  /* Factorize the diagonal block Akk*/
169  core_chetf2sp(blocksize, Akk, lda, nbpivots, criterion);
170 
171  if ((k*MAXSIZEOFBLOCKS+blocksize) < n) {
172 
173  matrixsize = n-(k*MAXSIZEOFBLOCKS+blocksize);
174 
175  /*
176  * Solve the lower rectangle below the diagonal block
177  * L(k+1:n,k) = (L(k,k) D(k,k))^{-1} A(k+1:n,k)
178  */
179  /* 1) Compute A(k+1:n,k) = A(k+1:n,k)L(k,k)^{-T} = D(k,k)L(k+1:n,k) */
180  /* input: L(k,k) in tmp, A(k+1:n,k) in tmp1 */
181  /* output: A(k+1:n,k) in tmp1 */
182  cblas_ctrsm(CblasColMajor,
183  CblasRight, CblasLower,
184  CblasConjTrans, CblasUnit,
185  matrixsize, blocksize,
186  CBLAS_SADDR(cone), Akk, lda,
187  Amk, lda);
188 
189  /* Compute L(k+1:n,k) = A(k+1:n,k)D(k,k)^{-1} */
190  for(col = 0; col < blocksize; col++) {
191  /* copy L(k+1+col:n,k+col)*D(k+col,k+col) into work(:,col) */
192  cblas_ccopy(matrixsize, Amk + col*lda, 1,
193  Akm + col, lda);
194  ret = LAPACKE_clacgv_work( matrixsize, Akm + col, lda );
195  assert( ret == 0 );
196 
197  /* compute L(k+1+col:n,k+col) = A(k+1+col:n,k+col)D(k+col,k+col)^{-1} */
198  alpha = 1.0 / *(Akk + col*(lda+1));
199  cblas_cscal( matrixsize, CBLAS_SADDR(alpha),
200  Amk + col*lda, 1 );
201  }
202 
203  /* Update A(k+1:n,k+1:n) = A(k+1:n,k+1:n) - (L(k+1:n,k)*D(k,k))*L(k+1:n,k)^T */
204  cblas_cgemm(CblasColMajor,
205  CblasNoTrans, CblasNoTrans,
206  matrixsize, matrixsize, blocksize,
207  CBLAS_SADDR(mcone), Amk, lda,
208  Akm, lda,
209  CBLAS_SADDR(cone), Amm, lda);
210  }
211  }
212  (void)ret;
213 }
214 
215 /**
216  *******************************************************************************
217  *
218  * @brief Computes the LDL^h factorization of the diagonal block in a panel.
219  *
220  *******************************************************************************
221  *
222  * @param[in] solvmtx
223  * Solver Matrix structure of the problem
224  *
225  * @param[in] cblk
226  * Pointer to the structure representing the panel to factorize in the
227  * cblktab array. Next column blok must be accessible through cblk[1].
228  *
229  * @param[inout] dataL
230  * The pointer to the correct representation of lower part of the data.
231  * - coeftab if the block is in full rank. Must be of size cblk.stride -by- cblk.width.
232  * - pastix_lr_block if the block is compressed.
233  *
234  *******************************************************************************
235  *
236  * @return The number of static pivoting performed during the diagonal block
237  * factorization.
238  *
239  *******************************************************************************/
240 int
242  SolverCblk *cblk,
243  void *dataL )
244 {
245  pastix_int_t ncols, stride;
246  pastix_int_t nbpivots = 0;
247  pastix_fixdbl_t time, flops;
249  pastix_lrblock_t *lrL;
250  float criterion = solvmtx->diagthreshold;
251 
253 
254  ncols = cblk->lcolnum - cblk->fcolnum + 1;
255  stride = (cblk->cblktype & CBLK_LAYOUT_2D) ? ncols : cblk->stride;
256 
257  if ( cblk->cblktype & CBLK_COMPRESSED ) {
258  /* dataL is a LRblock */
259  lrL = (pastix_lrblock_t *)dataL;
260  L = lrL->u;
261  stride = ncols;
262 
263  assert( lrL->rk == -1 );
264  assert( stride == lrL->rkmax );
265  } else {
266  L = (pastix_complex32_t *)dataL;
267  }
268 
269  /*
270  * Factorize diagonal block in L D L^h
271  *
272  * - lower part holds L
273  * - diagonal holds D
274  * - uppert part holds (DL^h)
275  */
276  flops = FLOPS_CHETRF( ncols );
277  kernel_trace_start_lvl2( PastixKernelLvl2HETRF );
278  core_chetrfsp( ncols, L, stride, &nbpivots, criterion );
279  kernel_trace_stop_lvl2( flops );
280 
281  kernel_trace_stop( cblk->fblokptr->inlast, PastixKernelHETRF, ncols, 0, 0, flops, time );
282 
283  if ( nbpivots ) {
284  pastix_atomic_add_32b( &(solvmtx->nbpivots), nbpivots );
285  }
286  return nbpivots;
287 }
288 
289 /**
290  *******************************************************************************
291  *
292  * core_chetrfsp1d_gemm - Computes the LDL^h factorization of one panel and
293  * apply all the trsm updates to this panel.
294  *
295  *******************************************************************************
296  *
297  * @param[in] cblk
298  * The pointer to the data structure that describes the panel from
299  * which we compute the contributions. Next column blok must be
300  * accessible through cblk[1].
301  *
302  * @param[in] blok
303  * The pointer to the data structure that describes the blok from which
304  * we compute the contributions.
305  *
306  * @param[in] fcblk
307  * The pointer to the data structure that describes the panel on
308  * which we compute the contributions. Next column blok must be
309  * accessible through fcblk[1].
310  *
311  * @param[inout] L
312  * The pointer to the matrix storing the coefficients of the
313  * panel. Must be of size cblk.stride -by- cblk.width
314  *
315  * @param[inout] C
316  * The pointer to the matrix storing the coefficients of the
317  * target.
318  *
319  * @param[inout] work
320  * Temporary buffer used in core_cgemdm().
321  *
322  *******************************************************************************/
324  const SolverBlok *blok,
325  SolverCblk *fcblk,
326  const pastix_complex32_t *L,
328  pastix_complex32_t *work )
329 {
330  const SolverBlok *iterblok;
331  const SolverBlok *fblok;
332  const SolverBlok *lblok;
333  const pastix_complex32_t *blokA;
334  const pastix_complex32_t *blokB;
335  const pastix_complex32_t *blokD;
336  pastix_complex32_t *blokC;
337 
338  pastix_int_t M, N, K, lda, ldb, ldc, ldd;
339 
340  /* Get the panel update dimensions */
341  K = cblk_colnbr( cblk );
342  N = blok_rownbr( blok );
343 
344  /* Get info for diagonal, and the B block */
345  blokD = L;
346  blokB = L + blok->coefind;
347  if ( cblk->cblktype & CBLK_LAYOUT_2D ) {
348  ldb = N;
349  ldd = K + 1;
350  }
351  else {
352  ldb = cblk->stride;
353  ldd = cblk->stride + 1;
354  }
355 
356  /*
357  * Add contribution to C in fcblk:
358  * Get the first facing block of the distant panel, and the last block of
359  * the current cblk
360  */
361  fblok = fcblk->fblokptr;
362  lblok = cblk[1].fblokptr;
363 
364  for (iterblok=blok; iterblok<lblok; iterblok++) {
365 
366  /* Find facing blok */
367  while (!is_block_inside_fblock( iterblok, fblok ))
368  {
369  fblok++;
370  assert( fblok < fcblk[1].fblokptr );
371  }
372 
373  /* Get the A block and its dimensions */
374  M = blok_rownbr( iterblok );
375  blokA = L + iterblok->coefind;
376  lda = (cblk->cblktype & CBLK_LAYOUT_2D) ? M : cblk->stride;
377 
378  /* Get the C block */
379  ldc = (fcblk->cblktype & CBLK_LAYOUT_2D) ? blok_rownbr(fblok) : fcblk->stride;
380 
381  blokC = C + fblok->coefind
382  + iterblok->frownum - fblok->frownum
383  + (blok->frownum - fcblk->fcolnum) * ldc;
384 
385  {
386  pastix_int_t ldw;
387  int ret;
388 
389  /* Compute ldw which should never be larger than SOLVE_COEFMAX */
390  ldw = (M+1) * K;
391 
392  pastix_cblk_lock( fcblk );
394  M, N, K,
395  -1.0, blokA, lda,
396  blokB, ldb,
397  1.0, blokC, ldc,
398  blokD, ldd,
399  work, ldw );
400  pastix_cblk_unlock( fcblk );
401  assert(ret == PASTIX_SUCCESS);
402  (void)ret;
403  }
404  }
405 }
406 
407 /**
408  *******************************************************************************
409  *
410  * @brief Compute the LDL^h factorization of one panel.
411  *
412  *******************************************************************************
413  *
414  * @param[in] solvmtx
415  * Solver Matrix structure of the problem
416  *
417  * @param[in] cblk
418  * Pointer to the structure representing the panel to factorize in the
419  * cblktab array. Next column blok must be accessible through cblk[1].
420  *
421  * @param[inout] L
422  * The pointer to the correct representation of lower part of the data.
423  * - coeftab if the block is in full rank. Must be of size cblk.stride -by- cblk.width.
424  * - pastix_lr_block if the block is compressed.
425  *
426  * @param[inout] DLh
427  * The pointer to the correct representation of DLh matrix
428  * (stored in the upper part by default).
429  * - coeftab if the block is in full rank. Must be of size cblk.stride -by- cblk.width.
430  * - pastix_lr_block if the block is compressed.
431  *
432  *******************************************************************************
433  *
434  * @return The number of static pivoting during factorization of the diagonal
435  * block.
436  *
437  *******************************************************************************/
438 int
440  SolverCblk *cblk,
441  void *L,
442  void *DLh )
443 {
444  pastix_int_t nbpivots;
445  nbpivots = cpucblk_chetrfsp1d_hetrf( solvmtx, cblk, L );
446 
447  /*
448  * We exploit the fact that (DL^h) is stored in the upper triangle part of L
449  */
452  cblk, L, L, &(solvmtx->lowrank) );
453 
454  if ( (DLh != NULL) && (cblk->cblktype & CBLK_LAYOUT_2D) ) {
455 
456  /* Copy L into the temporary buffer and multiply by D */
457  cpucblk_cscalo( PastixConjTrans, cblk, L, DLh );
458  }
459  return nbpivots;
460 }
461 
462 
463 /**
464  *******************************************************************************
465  *
466  * @brief Perform the LDL^h factorization of a given panel and apply all its
467  * updates.
468  *
469  *******************************************************************************
470  *
471  * @param[in] solvmtx
472  * Solver Matrix structure of the problem
473  *
474  * @param[in] cblk
475  * Pointer to the structure representing the panel to factorize in the
476  * cblktab array. Next column blok must be accessible through cblk[1].
477  *
478  * @param[in] DLh
479  * Temporary memory buffer to store the conjugate transpose of DLh.
480  *
481  * @param[in] work
482  * Temporary memory buffer.
483  *
484  * @param[in] lwork
485  * Temporary workspace dimension.
486  *
487  *******************************************************************************
488  *
489  * @return The number of static pivoting during factorization of the diagonal
490  * block.
491  *
492  *******************************************************************************/
493 int
495  SolverCblk *cblk,
496  pastix_complex32_t *DLh,
497  pastix_complex32_t *work,
498  pastix_int_t lwork )
499 {
500  void *dataL = cblk_getdataL( cblk );
501  void *dataDLh = cblk_getdataU( cblk );
502  SolverCblk *fcblk;
503  SolverBlok *blok, *lblk;
504  pastix_int_t nbpivots;
505 
506  if ( !(cblk->cblktype & CBLK_LAYOUT_2D) ) {
507  DLh = NULL;
508  }
509  else {
510  if (cblk->cblktype & CBLK_COMPRESSED) {
511  cpucblk_calloc_lrws( cblk, dataDLh, DLh );
512  }
513  else {
514  assert( dataDLh == NULL );
515  dataDLh = DLh;
516  }
517  }
518 
519  /* if there are off-diagonal supernodes in the column */
520  nbpivots = cpucblk_chetrfsp1d_panel( solvmtx, cblk, dataL, dataDLh );
521 
522  blok = cblk->fblokptr+1; /* this diagonal block */
523  lblk = cblk[1].fblokptr; /* the next diagonal block */
524 
525  for( ; blok < lblk; blok++ )
526  {
527  fcblk = solvmtx->cblktab + blok->fcblknm;
528 
529  if ( fcblk->cblktype & CBLK_FANIN ) {
530  cpucblk_calloc( PastixLCoef, fcblk );
531  }
532 
533  /* Update on L */
534  if ( DLh == NULL ) {
535  core_chetrfsp1d_gemm( cblk, blok, fcblk,
536  dataL, cblk_getdataL( fcblk ),
537  work );
538  }
539  else {
541  cblk, blok, fcblk,
542  dataL, dataDLh, cblk_getdataL( fcblk ),
543  work, lwork, &(solvmtx->lowrank) );
544  }
545  cpucblk_crelease_deps( PastixLCoef, solvmtx, cblk, fcblk );
546  }
547 
548  return nbpivots;
549 }
550 
551 /**
552  *******************************************************************************
553  *
554  * @brief Perform the LDL^h factorization of a given panel and submit tasks
555  * for the subsequent updates.
556  *
557  *******************************************************************************
558  *
559  * @param[in] solvmtx
560  * Solver Matrix structure of the problem
561  *
562  * @param[in] cblk
563  * Pointer to the structure representing the panel to factorize in the
564  * cblktab array. Next column blok must be accessible through cblk[1].
565  *
566  *******************************************************************************
567  *
568  * @return The number of static pivoting during factorization of the diagonal
569  * block.
570  *
571  *******************************************************************************/
572 int
574  SolverCblk *cblk )
575 {
576  void *dataL = cblk_getdataL( cblk );
577  SolverBlok *blok, *lblk;
578  pastix_int_t i, nbpivots;
579  pastix_queue_t *queue = solvmtx->computeQueue[ cblk->threadid ];
580 
581  assert( cblk->cblktype & CBLK_TASKS_2D );
582  nbpivots = cpucblk_chetrfsp1d_panel( solvmtx, cblk, dataL, NULL );
583 
584  blok = cblk->fblokptr + 1; /* this diagonal block */
585  lblk = cblk[1].fblokptr; /* the next diagonal block */
586 
587  /* if there are off-diagonal supernodes in the column */
588  for( i=0; blok < lblk; i++, blok++ )
589  {
590  assert( !((solvmtx->cblktab + blok->fcblknm)->cblktype & CBLK_RECV) );
591  pqueuePush1( queue, - (blok - solvmtx->bloktab) - 1, cblk->priority + i );
592 
593  /* Skip blocks facing the same cblk */
594  while ( ( blok < lblk ) &&
595  ( blok[0].fcblknm == blok[1].fcblknm ) &&
596  ( blok[0].lcblknm == blok[1].lcblknm ) )
597  {
598  blok++;
599  }
600  }
601 
602  return nbpivots;
603 }
604 
605 /**
606  *******************************************************************************
607  *
608  * @brief Apply the updates of the LDL^h factorisation of a given panel.
609  *
610  *******************************************************************************
611  *
612  * @param[in] solvmtx
613  * Solver Matrix structure of the problem
614  *
615  * @param[in] blok
616  * Pointer to the blok where the update start.
617  *
618  * @param[in] work
619  * Temporary memory buffer.
620  *
621  * @param[in] lwork
622  * Temporary workspace dimension.
623  *
624  *******************************************************************************/
625 void
627  SolverBlok *blok,
628  pastix_complex32_t *work )
629 {
630  SolverCblk *cblk = solvmtx->cblktab + blok->lcblknm;
631  SolverCblk *fcbk = solvmtx->cblktab + blok->fcblknm;
632  SolverBlok *lblk = cblk[1].fblokptr; /* the next diagonal block */
633  void *dataL = cblk_getdataL( cblk );
634 
635  if ( fcbk->cblktype & CBLK_FANIN ) {
636  cpucblk_calloc( PastixLCoef, fcbk );
637  }
638 
639  do
640  {
641  /* Update on L (3 terms) */
642  core_chetrfsp1d_gemm( cblk, blok, fcbk,
643  dataL, fcbk->lcoeftab,
644  work );
645 
646  cpucblk_crelease_deps( PastixLCoef, solvmtx, cblk, fcbk );
647  blok++;
648  }
649  while ( ( blok < lblk ) &&
650  ( blok[-1].fcblknm == blok[0].fcblknm ) &&
651  ( blok[-1].lcblknm == blok[0].lcblknm ) );
652 }
void core_chetrfsp(pastix_int_t n, pastix_complex32_t *A, pastix_int_t lda, pastix_int_t *nbpivots, float criterion)
Compute the block static pivoting factorization of the hermitian matrix n-by-n A such that A = L * D ...
int cpucblk_chetrfsp1d_panel(SolverMatrix *solvmtx, SolverCblk *cblk, void *L, void *DLh)
Compute the LDL^h factorization of one panel.
int cpucblk_chetrfsp1d(SolverMatrix *solvmtx, SolverCblk *cblk, pastix_complex32_t *DLh, pastix_complex32_t *work, pastix_int_t lwork)
Perform the LDL^h factorization of a given panel and apply all its updates.
static void core_chetf2sp(pastix_int_t n, pastix_complex32_t *A, pastix_int_t lda, pastix_int_t *nbpivots, float criterion)
Compute the sequential static pivoting factorization of the hermitian matrix n-by-n A such that A = L...
Definition: core_chetrfsp.c:66
void cpucblk_chetrfsp1dplus_update(SolverMatrix *solvmtx, SolverBlok *blok, pastix_complex32_t *work)
Apply the updates of the LDL^h factorisation of a given panel.
void core_chetrfsp1d_gemm(const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcblk, const pastix_complex32_t *L, pastix_complex32_t *C, pastix_complex32_t *work)
int cpucblk_chetrfsp1d_hetrf(SolverMatrix *solvmtx, SolverCblk *cblk, void *dataL)
Computes the LDL^h factorization of the diagonal block in a panel.
int cpucblk_chetrfsp1dplus(SolverMatrix *solvmtx, SolverCblk *cblk)
Perform the LDL^h factorization of a given panel and submit tasks for the subsequent updates.
BEGIN_C_DECLS typedef int pastix_int_t
Definition: datatypes.h:51
float _Complex pastix_complex32_t
Definition: datatypes.h:76
double pastix_fixdbl_t
Definition: datatypes.h:65
static void pqueuePush1(pastix_queue_t *q, pastix_int_t elt, double key1)
Push an element with a single key.
Definition: queue.h:64
Queue structure.
Definition: queue.h:38
static void kernel_trace_stop(int8_t inlast, pastix_ktype_t ktype, int m, int n, int k, double flops, double starttime)
Stop the trace of a single kernel.
static double kernel_trace_start(pastix_ktype_t ktype)
Start the trace of a single kernel.
Definition: kernels_trace.h:87
@ PastixKernelLvl2HETRF
Definition: kernels_enums.h:88
@ PastixKernelHETRF
Definition: kernels_enums.h:49
int core_cgemdm(pastix_trans_t transA, pastix_trans_t transB, int M, int N, int K, pastix_complex32_t alpha, const pastix_complex32_t *A, int LDA, const pastix_complex32_t *B, int LDB, pastix_complex32_t beta, pastix_complex32_t *C, int LDC, const pastix_complex32_t *D, int incD, pastix_complex32_t *WORK, int LWORK)
Perform one of the following matrix-matrix operations.
Definition: core_cgemdm.c:139
pastix_fixdbl_t cpucblk_cgemmsp(pastix_coefside_t sideA, pastix_trans_t trans, const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcblk, const void *A, const void *B, void *C, pastix_complex32_t *work, pastix_int_t lwork, const pastix_lr_t *lowrank)
Compute the updates associated to one off-diagonal block.
void cpucblk_cscalo(pastix_trans_t trans, const SolverCblk *cblk, void *dataL, void *dataLD)
Copy the L term with scaling for the two-terms algorithm.
Definition: core_cscalo.c:171
void cpucblk_crelease_deps(pastix_coefside_t side, SolverMatrix *solvmtx, const SolverCblk *cblk, SolverCblk *fcbk)
Release the dependencies of the given cblk after an update.
void cpucblk_ctrsmsp(pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk, const void *A, void *C, const pastix_lr_t *lowrank)
Compute the updates associated to a column of off-diagonal blocks.
Definition: core_ctrsmsp.c:356
void cpucblk_calloc_lrws(const SolverCblk *cblk, pastix_lrblock_t *lrblok, pastix_complex32_t *ws)
Initialize lrblock structure from a workspace for all blocks of the cblk associated.
Definition: cpucblk_cinit.c:98
void cpucblk_calloc(pastix_coefside_t side, SolverCblk *cblk)
Allocate the cblk structure to store the coefficient.
The block low-rank structure to hold a matrix in low-rank form.
@ PastixLCoef
Definition: api.h:478
@ PastixUpper
Definition: api.h:466
@ PastixRight
Definition: api.h:496
@ PastixNonUnit
Definition: api.h:487
@ PastixConjTrans
Definition: api.h:447
@ PastixNoTrans
Definition: api.h:445
@ PastixTrans
Definition: api.h:446
@ PASTIX_SUCCESS
Definition: api.h:367
static pastix_int_t blok_rownbr(const SolverBlok *blok)
Compute the number of rows of a block.
Definition: solver.h:395
pastix_lr_t lowrank
Definition: solver.h:236
pastix_int_t priority
Definition: solver.h:183
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
Definition: solver.h:329
pastix_int_t fcblknm
Definition: solver.h:144
static int is_block_inside_fblock(const SolverBlok *blok, const SolverBlok *fblok)
Check if a block is included inside another one.
Definition: solver.h:504
SolverBlok *restrict bloktab
Definition: solver.h:229
pastix_int_t frownum
Definition: solver.h:147
static void * cblk_getdataU(const SolverCblk *cblk)
Get the pointer to the data associated to the upper part of the cblk.
Definition: solver.h:354
pastix_int_t coefind
Definition: solver.h:149
SolverBlok * fblokptr
Definition: solver.h:168
static void * cblk_getdataL(const SolverCblk *cblk)
Get the pointer to the data associated to the lower part of the cblk.
Definition: solver.h:342
pastix_int_t lcblknm
Definition: solver.h:143
int threadid
Definition: solver.h:182
int8_t inlast
Definition: solver.h:151
SolverCblk *restrict cblktab
Definition: solver.h:228
pastix_int_t stride
Definition: solver.h:169
int8_t cblktype
Definition: solver.h:164
pastix_int_t lcolnum
Definition: solver.h:167
void * lcoeftab
Definition: solver.h:177
double diagthreshold
Definition: solver.h:238
volatile int32_t nbpivots
Definition: solver.h:239
pastix_int_t fcolnum
Definition: solver.h:166
Solver block structure.
Definition: solver.h:141
Solver column block structure.
Definition: solver.h:161
Solver column block structure.
Definition: solver.h:203