PaStiX Handbook  6.4.0
pastix_ccores.h
Go to the documentation of this file.
1 /**
2  * @file pastix_ccores.h
3  *
4  * PaStiX kernel header.
5  *
6  * @copyright 2011-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
7  * Univ. Bordeaux. All rights reserved.
8  *
9  * @version 6.4.0
10  * @author Mathieu Faverge
11  * @author Pierre Ramet
12  * @author Xavier Lacoste
13  * @author Esragul Korkmaz
14  * @author Gregoire Pichon
15  * @author Tony Delarue
16  * @author Alycia Lisito
17  * @author Nolan Bredel
18  * @date 2024-07-05
19  * @generated from /builds/solverstack/pastix/kernels/pastix_zcores.h, normal z -> c, Tue Oct 8 14:17:09 2024
20  *
21  */
22 #ifndef _pastix_ccores_h_
23 #define _pastix_ccores_h_
24 
25 #ifndef DOXYGEN_SHOULD_SKIP_THIS
26 #define pastix_cblk_lock( cblk_ ) pastix_atomic_lock( &((cblk_)->lock) )
27 #define pastix_cblk_unlock( cblk_ ) pastix_atomic_unlock( &((cblk_)->lock) )
28 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
29 
30 /**
31  * @addtogroup kernel_blas_lapack
32  * @{
33  * This module contains all the BLAS and LAPACK-like kernels that are working
34  * on lapack layout matrices.
35  *
36  * @name PastixComplex32 BLAS kernels
37  * @{
38  */
39 void core_cplrnt( int m,
40  int n,
42  int lda,
43  int gM,
44  int m0,
45  int n0,
46  unsigned long long int seed );
47 void core_cgetmo( int m,
48  int n,
49  const pastix_complex32_t *A,
50  int lda,
52  int ldb );
53 int core_cgeadd( pastix_trans_t trans,
54  pastix_int_t M,
55  pastix_int_t N,
56  pastix_complex32_t alpha,
57  const pastix_complex32_t *A,
58  pastix_int_t LDA,
59  pastix_complex32_t beta,
61  pastix_int_t LDB );
62 int core_cgemdm( pastix_trans_t transA,
63  pastix_trans_t transB,
64  int M,
65  int N,
66  int K,
67  pastix_complex32_t alpha,
68  const pastix_complex32_t *A,
69  int LDA,
70  const pastix_complex32_t *B,
71  int LDB,
72  pastix_complex32_t beta,
74  int LDC,
75  const pastix_complex32_t *D,
76  int incD,
77  pastix_complex32_t *WORK,
78  int LWORK );
79 int core_cpqrcp( float tol,
80  pastix_int_t maxrank,
81  int full_update,
82  pastix_int_t nb,
83  pastix_int_t m,
84  pastix_int_t n,
86  pastix_int_t lda,
87  pastix_int_t *jpvt,
88  pastix_complex32_t *tau,
89  pastix_complex32_t *work,
90  pastix_int_t lwork,
91  float *rwork );
92 int core_crqrcp( float tol,
93  pastix_int_t maxrank,
94  int refine,
95  pastix_int_t nb,
96  pastix_int_t m,
97  pastix_int_t n,
99  pastix_int_t lda,
100  pastix_int_t *jpvt,
101  pastix_complex32_t *tau,
102  pastix_complex32_t *work,
103  pastix_int_t lwork,
104  float *rwork );
105 int core_crqrrt( float tol,
106  pastix_int_t maxrank,
107  pastix_int_t nb,
108  pastix_int_t m,
109  pastix_int_t n,
111  pastix_int_t lda,
112  pastix_complex32_t *tau,
114  pastix_int_t ldb,
115  pastix_complex32_t *tau_b,
116  pastix_complex32_t *work,
117  pastix_int_t lwork,
118  float normA );
119 int core_ctqrcp( float tol,
120  pastix_int_t maxrank,
121  int unused,
122  pastix_int_t nb,
123  pastix_int_t m,
124  pastix_int_t n,
126  pastix_int_t lda,
127  pastix_int_t *jpvt,
128  pastix_complex32_t *tau,
129  pastix_complex32_t *work,
130  pastix_int_t lwork,
131  float *rwork );
132 int core_ctradd( pastix_uplo_t uplo,
133  pastix_trans_t trans,
134  pastix_int_t M,
135  pastix_int_t N,
136  pastix_complex32_t alpha,
137  const pastix_complex32_t *A,
138  pastix_int_t LDA,
139  pastix_complex32_t beta,
141  pastix_int_t LDB);
142 int core_cscalo( pastix_trans_t trans,
143  pastix_int_t M,
144  pastix_int_t N,
145  const pastix_complex32_t *A,
146  pastix_int_t lda,
147  const pastix_complex32_t *D,
148  pastix_int_t ldd,
150  pastix_int_t ldb );
151 
152 /**
153  * @}
154  * @name PastixComplex32 Othogonalization kernels for low-rank updates
155  * @{
156  */
158  pastix_int_t N,
159  pastix_int_t rank,
161  pastix_int_t ldu,
163  pastix_int_t ldv );
165  pastix_int_t N,
166  pastix_int_t r1,
167  pastix_int_t *r2ptr,
168  pastix_int_t offx,
169  pastix_int_t offy,
171  pastix_int_t ldu,
173  pastix_int_t ldv );
175  pastix_int_t N1,
176  pastix_int_t M2,
177  pastix_int_t N2,
178  pastix_int_t r1,
179  pastix_int_t *r2ptr,
180  pastix_int_t offx,
181  pastix_int_t offy,
183  pastix_int_t ldu,
185  pastix_int_t ldv );
186 
187 /**
188  * @}
189  * @name PastixComplex32 LAPACK kernels
190  * @{
191  */
194  pastix_int_t lda,
195  pastix_int_t *nbpivots,
196  float criterion );
199  pastix_int_t lda,
200  pastix_int_t *nbpivots,
201  float criterion );
204  pastix_int_t lda,
205  pastix_int_t *nbpivots,
206  float criterion );
207 #if defined(PRECISION_z) || defined(PRECISION_c)
210  pastix_int_t lda,
211  pastix_int_t *nbpivots,
212  float criterion );
213 #endif
216  pastix_int_t lda,
217  pastix_int_t *nbpivots,
218  float criterion );
219 
220 /**
221  * @}
222  * @}
223  *
224  * @addtogroup kernel_fact
225  * @{
226  * This module contains all the kernel working at the solver matrix structure
227  * level for the numerical factorization step.
228  *
229  * @name PastixComplex32 cblk-BLAS CPU kernels
230  * @{
231  */
232 
233 int cpucblk_cgeaddsp1d( const SolverCblk *cblk1,
234  SolverCblk *cblk2,
235  const pastix_complex32_t *L1,
236  pastix_complex32_t *L2,
237  const pastix_complex32_t *U1,
238  pastix_complex32_t *U2 );
239 
241  pastix_trans_t trans,
242  const SolverCblk *cblk,
243  const SolverBlok *blok,
244  SolverCblk *fcblk,
245  const void *A,
246  const void *B,
247  void *C,
248  pastix_complex32_t *work,
249  pastix_int_t lwork,
250  const pastix_lr_t *lowrank );
251 void cpucblk_ctrsmsp( pastix_side_t side,
252  pastix_uplo_t uplo,
253  pastix_trans_t trans,
254  pastix_diag_t diag,
255  const SolverCblk *cblk,
256  const void *A,
257  void *C,
258  const pastix_lr_t *lowrank );
259 void cpucblk_cscalo ( pastix_trans_t trans,
260  const SolverCblk *cblk,
261  void *dataL,
262  void *dataLD );
263 
265  const SolverCblk *cblk,
266  SolverCblk *fcblk,
267  pastix_int_t blok_mk,
268  pastix_int_t blok_nk,
269  pastix_int_t blok_mn,
270  const void *A,
271  const void *B,
272  void *C,
273  const pastix_lr_t *lowrank );
275  pastix_uplo_t uplo,
276  pastix_trans_t trans,
277  pastix_diag_t diag,
278  const SolverCblk *cblk,
279  pastix_int_t blok_m,
280  const void *A,
281  void *C,
282  const pastix_lr_t *lowrank );
283 void cpublok_cscalo ( pastix_trans_t trans,
284  const SolverCblk *cblk,
285  pastix_int_t blok_m,
286  const void *A,
287  const void *dataD,
288  void *dataB );
289 
290 /**
291  * @}
292  * @name PastixComplex32 cblk LU kernels
293  * @{
294  */
296  SolverCblk *cblk,
297  void *L,
298  void *U );
300  SolverCblk *cblk,
301  void *L,
302  void *U );
303 int cpucblk_cgetrfsp1d ( SolverMatrix *solvmtx,
304  SolverCblk *cblk,
305  pastix_complex32_t *work,
306  pastix_int_t lwork );
307 
308 /**
309  * @}
310  * @name PastixComplex32 cblk Cholesky kernels
311  * @{
312  */
314  SolverCblk *cblk,
315  void *dataL );
317  SolverCblk *cblk,
318  void *L );
319 int cpucblk_cpotrfsp1d( SolverMatrix *solvmtx,
320  SolverCblk *cblk,
321  pastix_complex32_t *work,
322  pastix_int_t lwork );
323 
324 /**
325  * @}
326  */
327 
328 #if defined(PRECISION_z) || defined(PRECISION_c)
329  /**
330  * @name PastixComplex32 cblk LDL^h kernels
331  * @{
332  */
334  SolverCblk *cblk,
335  void *L );
337  SolverCblk *cblk,
338  void *L,
339  void *DLh );
340 int cpucblk_chetrfsp1d( SolverMatrix *solvmtx,
341  SolverCblk *cblk,
342  pastix_complex32_t *work1,
343  pastix_complex32_t *work2,
344  pastix_int_t lwork );
345 
346 /**
347  * @}
348  * @name PastixComplex32 cblk LL^t kernels
349  * @{
350  */
352  SolverCblk *cblk,
353  void *dataL );
355  SolverCblk *cblk,
356  void *dataL );
357 int cpucblk_cpxtrfsp1d( SolverMatrix *solvmtx,
358  SolverCblk *cblk,
359  pastix_complex32_t *work,
360  pastix_int_t lwork );
361 
362 /**
363  * @}
364  */
365 #endif
366 
367  /**
368  * @name PastixComplex32 cblk LDL^t kernels
369  * @{
370  */
372  SolverCblk *cblk,
373  void *dataL );
375  SolverCblk *cblk,
376  void *L,
377  void *DLt );
378 int cpucblk_csytrfsp1d( SolverMatrix *solvmtx,
379  SolverCblk *cblk,
380  pastix_complex32_t *Dlt,
381  pastix_complex32_t *work,
382  pastix_int_t lwork );
383 
384 /**
385  * @}
386  * @name PastixComplex32 initialization and additionnal routines
387  * @{
388  */
389 void cpucblk_calloc_lrws( const SolverCblk *cblk,
390  pastix_lrblock_t *lrblok,
391  pastix_complex32_t *ws );
393  SolverCblk *cblk,
394  int rkmax );
396  SolverCblk *cblk );
398  SolverCblk *cblk );
400  SolverCblk *cblk );
402  const SolverMatrix *solvmtx,
403  const pastix_bcsc_t *bcsc,
404  pastix_int_t itercblk );
406  const SolverMatrix *solvmtx,
407  const pastix_bcsc_t *bcsc,
408  pastix_int_t itercblk,
409  const char *directory );
410 void cpucblk_cgetschur( const SolverCblk *cblk,
411  int upper_part,
413  pastix_int_t lds );
415  const SolverCblk *cblk,
416  FILE *stream );
418  const SolverCblk *cblkA,
419  SolverCblk *cblkB );
421  const SolverCblk *cblkA,
422  SolverCblk *cblkB,
423  const void *A,
424  void *B,
425  pastix_complex32_t *work,
426  pastix_int_t lwork,
427  const pastix_lr_t *lowrank );
429  const SolverCblk *cblkA,
430  SolverCblk *cblkB,
431  pastix_int_t blokA_m,
432  pastix_int_t blokB_m,
433  const void *A,
434  void *B,
435  pastix_complex32_t *work,
436  pastix_int_t lwork,
437  const pastix_lr_t *lowrank );
438 
439 /**
440  * @}
441  * @name PastixComplex32 MPI routines
442  * @{
443  */
444 int cpucblk_cincoming_deps( int mt_flag,
445  pastix_coefside_t side,
446  SolverMatrix *solvmtx,
447  SolverCblk *cblk );
449  SolverMatrix *solvmtx,
450  const SolverCblk *cblk,
451  SolverCblk *fcbk );
453  pastix_int_t sched,
454  SolverMatrix *solvmtx );
455 void cpucblk_cupdate_reqtab( SolverMatrix *solvmtx );
456 #if defined( PASTIX_WITH_MPI )
457 void cpucblk_cmpi_progress( pastix_coefside_t side,
458  SolverMatrix *solvmtx,
459  int threadid );
460 void cpucblk_cisend_rhs_bwd( SolverMatrix *solvmtx,
461  pastix_rhs_t rhsb,
462  SolverCblk *cblk );
463 #endif
464 void cpucblk_cmpi_rhs_fwd_progress( const args_solve_t *enums,
465  SolverMatrix *solvmtx,
466  pastix_rhs_t rhsb,
467  int threadid );
469  SolverMatrix *solvmtx,
470  pastix_rhs_t rhsb,
471  const SolverCblk *cblk,
472  SolverCblk *fcbk );
473 int cpucblk_cincoming_rhs_fwd_deps( int rank,
474  const args_solve_t *enums,
475  SolverMatrix *solvmtx,
476  SolverCblk *cblk,
477  pastix_rhs_t rhsb );
479  pastix_int_t sched,
480  SolverMatrix *solvmtx,
481  pastix_rhs_t rhsb );
482 
483 void cpucblk_cmpi_rhs_bwd_progress( const args_solve_t *enums,
484  SolverMatrix *solvmtx,
485  pastix_rhs_t rhsb,
486  int threadid );
488  SolverMatrix *solvmtx,
489  pastix_rhs_t rhsb,
490  const SolverCblk *cblk,
491  SolverCblk *fcbk );
492 int cpucblk_cincoming_rhs_bwd_deps( int rank,
493  const args_solve_t *enums,
494  SolverMatrix *solvmtx,
495  SolverCblk *cblk,
496  pastix_rhs_t rhsb );
498  pastix_int_t sched,
499  SolverMatrix *solvmtx,
500  pastix_rhs_t rhsb );
501 void cpucblk_csend_rhs_forward( const SolverMatrix *solvmtx,
502  SolverCblk *cblk,
503  pastix_rhs_t b );
504 void cpucblk_crecv_rhs_forward( const SolverMatrix *solvmtx,
505  SolverCblk *cblk,
506  pastix_complex32_t *work,
507  pastix_rhs_t b );
508 void cpucblk_csend_rhs_backward( const SolverMatrix *solvmtx,
509  SolverCblk *cblk,
510  pastix_rhs_t b );
511 void cpucblk_crecv_rhs_backward( const SolverMatrix *solvmtx,
512  SolverCblk *cblk,
513  pastix_rhs_t b );
514 
515 /**
516  * @}
517  * @name PastixComplex32 compression/uncompression routines
518  * @{
519  */
521  pastix_int_t M,
522  pastix_int_t N,
523  pastix_lrblock_t *blok );
525  pastix_coefside_t side,
526  int max_ilulvl,
527  SolverCblk *cblk );
529  SolverCblk *cblk );
531  const SolverMatrix *solvmtx,
532  SolverCblk *cblk,
533  pastix_int_t *orig,
534  pastix_int_t *gain );
535 
536 /**
537  * @}
538  * @}
539  *
540  * @addtogroup kernel_solve
541  * @{
542  * This module contains all the kernel working on the solver matrix structure
543  * for the solve step.
544  *
545  */
546 
548  pastix_uplo_t uplo,
549  pastix_trans_t trans,
550  pastix_diag_t diag,
551  const SolverCblk *cblk,
552  int nrhs,
553  const void *dataA,
555  int ldb );
557  pastix_trans_t trans,
558  pastix_int_t nrhs,
559  const SolverCblk *cblk,
560  const SolverBlok *blok,
561  SolverCblk *fcbk,
562  const void *dataA,
563  const pastix_complex32_t *B,
564  pastix_int_t ldb,
566  pastix_int_t ldc );
567 
568 void solve_cblk_ctrsmsp_forward( const args_solve_t *enums,
569  SolverMatrix *datacode,
570  const SolverCblk *cblk,
571  pastix_rhs_t b );
572 void solve_cblk_ctrsmsp_backward( const args_solve_t *enums,
573  SolverMatrix *datacode,
574  SolverCblk *cblk,
575  pastix_rhs_t b );
576 
577 void solve_cblk_cdiag( const SolverCblk *cblk,
578  const void *dataA,
579  int nrhs,
581  int ldb,
582  pastix_complex32_t *work );
583 /**
584  * @}
585  *
586  * @addtogroup kernel_fact_null
587  * @{
588  * This module contains the three terms update functions for the LDL^t and
589  * LDL^h factorizations.
590  *
591  */
592 #if defined(PRECISION_z) || defined(PRECISION_c)
593 void core_chetrfsp1d_gemm( const SolverCblk *cblk,
594  const SolverBlok *blok,
595  SolverCblk *fcblk,
596  const pastix_complex32_t *L,
598  pastix_complex32_t *work );
599 #endif
600 void core_csytrfsp1d_gemm( const SolverCblk *cblk,
601  const SolverBlok *blok,
602  SolverCblk *fcblk,
603  const pastix_complex32_t *L,
605  pastix_complex32_t *work );
606 
607 int
609  SolverCblk *cblk );
610 void
612  SolverBlok *blok,
613  pastix_complex32_t *work,
614  pastix_int_t lwork );
615 int
617  SolverCblk *cblk );
618 void
620  SolverBlok *blok,
621  pastix_complex32_t *work );
622 int
624  SolverCblk *cblk );
625 void
627  SolverBlok *blok,
628  pastix_complex32_t *work,
629  pastix_int_t lwork );
630 #if defined(PRECISION_z) || defined(PRECISION_c)
631 int
633  SolverCblk *cblk );
634 void
636  SolverBlok *blok,
637  pastix_complex32_t *work,
638  pastix_int_t lwork );
639 int
641  SolverCblk *cblk );
642 void
644  SolverBlok *blok,
645  pastix_complex32_t *work );
646 #endif
647 
648 /**
649  * @}
650  */
651 
652 #endif /* _pastix_ccores_h_ */
void core_chetrfsp(pastix_int_t n, pastix_complex32_t *A, pastix_int_t lda, pastix_int_t *nbpivots, float criterion)
Compute the block static pivoting factorization of the hermitian matrix n-by-n A such that A = L * D ...
int cpucblk_chetrfsp1d_panel(SolverMatrix *solvmtx, SolverCblk *cblk, void *L, void *DLh)
Compute the LDL^h factorization of one panel.
int cpucblk_chetrfsp1d(SolverMatrix *solvmtx, SolverCblk *cblk, pastix_complex32_t *DLh, pastix_complex32_t *work, pastix_int_t lwork)
Perform the LDL^h factorization of a given panel and apply all its updates.
void cpucblk_chetrfsp1dplus_update(SolverMatrix *solvmtx, SolverBlok *blok, pastix_complex32_t *work)
Apply the updates of the LDL^h factorisation of a given panel.
void core_chetrfsp1d_gemm(const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcblk, const pastix_complex32_t *L, pastix_complex32_t *C, pastix_complex32_t *work)
int cpucblk_chetrfsp1d_hetrf(SolverMatrix *solvmtx, SolverCblk *cblk, void *dataL)
Computes the LDL^h factorization of the diagonal block in a panel.
int cpucblk_chetrfsp1dplus(SolverMatrix *solvmtx, SolverCblk *cblk)
Perform the LDL^h factorization of a given panel and submit tasks for the subsequent updates.
int cpucblk_cpxtrfsp1d_panel(SolverMatrix *solvmtx, SolverCblk *cblk, void *L)
Compute the LL^t factorization of one panel.
void cpucblk_cpxtrfsp1dplus_update(SolverMatrix *solvmtx, SolverBlok *blok, pastix_complex32_t *work, pastix_int_t lwork)
Apply the updates of the LL^t factorisation of a given panel.
int cpucblk_cpxtrfsp1d_pxtrf(SolverMatrix *solvmtx, SolverCblk *cblk, void *dataL)
Compute the LL^t factorization of the diagonal block in a panel.
int cpucblk_cpxtrfsp1dplus(SolverMatrix *solvmtx, SolverCblk *cblk)
Perform the LL^t factorization of a given panel.
int cpucblk_cpxtrfsp1d(SolverMatrix *solvmtx, SolverCblk *cblk, pastix_complex32_t *work, pastix_int_t lwork)
Perform the LL^t factorization of a given panel and apply all its updates.
BEGIN_C_DECLS typedef int pastix_int_t
Definition: datatypes.h:51
float _Complex pastix_complex32_t
Definition: datatypes.h:76
double pastix_fixdbl_t
Definition: datatypes.h:65
int core_crqrcp(float tol, pastix_int_t maxrank, int refine, pastix_int_t nb, pastix_int_t m, pastix_int_t n, pastix_complex32_t *A, pastix_int_t lda, pastix_int_t *jpvt, pastix_complex32_t *tau, pastix_complex32_t *work, pastix_int_t lwork, float *rwork)
Compute a randomized QR factorization.
Definition: core_crqrcp.c:114
int core_ctradd(pastix_uplo_t uplo, pastix_trans_t trans, pastix_int_t M, pastix_int_t N, pastix_complex32_t alpha, const pastix_complex32_t *A, pastix_int_t LDA, pastix_complex32_t beta, pastix_complex32_t *B, pastix_int_t LDB)
Add two triangular matrices together as in PBLAS pctradd.
Definition: core_ctradd.c:79
pastix_fixdbl_t core_clrorthu_cgs(pastix_int_t M1, pastix_int_t N1, pastix_int_t M2, pastix_int_t N2, pastix_int_t r1, pastix_int_t *r2ptr, pastix_int_t offx, pastix_int_t offy, pastix_complex32_t *U, pastix_int_t ldu, pastix_complex32_t *V, pastix_int_t ldv)
Try to orthognalize the U part of the low-rank form, and update the V part accordingly using CGS.
Definition: core_clrothu.c:430
void core_csytrfsp(pastix_int_t n, pastix_complex32_t *A, pastix_int_t lda, pastix_int_t *nbpivots, float criterion)
Compute the block static pivoting factorization of the symmetric matrix n-by-n A such that A = L * D ...
int core_cgeadd(pastix_trans_t trans, pastix_int_t M, pastix_int_t N, pastix_complex32_t alpha, const pastix_complex32_t *A, pastix_int_t LDA, pastix_complex32_t beta, pastix_complex32_t *B, pastix_int_t LDB)
Add two matrices together.
Definition: core_cgeadd.c:78
void core_cplrnt(int m, int n, pastix_complex32_t *A, int lda, int gM, int m0, int n0, unsigned long long int seed)
Generate a random tile.
Definition: core_cplrnt.c:91
int core_crqrrt(float tol, pastix_int_t maxrank, pastix_int_t nb, pastix_int_t m, pastix_int_t n, pastix_complex32_t *A, pastix_int_t lda, pastix_complex32_t *tau, pastix_complex32_t *B, pastix_int_t ldb, pastix_complex32_t *tau_b, pastix_complex32_t *work, pastix_int_t lwork, float normA)
Compute a randomized QR factorization with rotation technique.
Definition: core_crqrrt.c:126
int core_cscalo(pastix_trans_t trans, pastix_int_t M, pastix_int_t N, const pastix_complex32_t *A, pastix_int_t lda, const pastix_complex32_t *D, pastix_int_t ldd, pastix_complex32_t *B, pastix_int_t ldb)
Scale a matrix by a diagonal out of place.
Definition: core_cscalo.c:74
int core_cgemdm(pastix_trans_t transA, pastix_trans_t transB, int M, int N, int K, pastix_complex32_t alpha, const pastix_complex32_t *A, int LDA, const pastix_complex32_t *B, int LDB, pastix_complex32_t beta, pastix_complex32_t *C, int LDC, const pastix_complex32_t *D, int incD, pastix_complex32_t *WORK, int LWORK)
Perform one of the following matrix-matrix operations.
Definition: core_cgemdm.c:139
pastix_fixdbl_t core_clrorthu_fullqr(pastix_int_t M, pastix_int_t N, pastix_int_t rank, pastix_complex32_t *U, pastix_int_t ldu, pastix_complex32_t *V, pastix_int_t ldv)
Try to orthognalize the u part of the low-rank form, and update the v part accordingly using full QR.
Definition: core_clrothu.c:83
void core_cpotrfsp(pastix_int_t n, pastix_complex32_t *A, pastix_int_t lda, pastix_int_t *nbpivots, float criterion)
Compute the block static pivoting Cholesky factorization of the matrix n-by-n A = L * L^t .
pastix_fixdbl_t core_clrorthu_partialqr(pastix_int_t M, pastix_int_t N, pastix_int_t r1, pastix_int_t *r2ptr, pastix_int_t offx, pastix_int_t offy, pastix_complex32_t *U, pastix_int_t ldu, pastix_complex32_t *V, pastix_int_t ldv)
Try to orthognalize the U part of the low-rank form, and update the V part accordingly using partial ...
Definition: core_clrothu.c:197
void core_cgetmo(int m, int n, const pastix_complex32_t *A, int lda, pastix_complex32_t *B, int ldb)
Transposes a m-by-n matrix out of place using an extra workspace of size m-by-n.
Definition: core_cgetmo.c:49
void core_cpxtrfsp(pastix_int_t n, pastix_complex32_t *A, pastix_int_t lda, pastix_int_t *nbpivots, float criterion)
Compute the block static pivoting LL^t factorization of the matrix n-by-n A = L * L^t .
int core_ctqrcp(float tol, pastix_int_t maxrank, int unused, pastix_int_t nb, pastix_int_t m, pastix_int_t n, pastix_complex32_t *A, pastix_int_t lda, pastix_int_t *jpvt, pastix_complex32_t *tau, pastix_complex32_t *work, pastix_int_t lwork, float *rwork)
Compute a randomized QR factorization with truncated updates.
Definition: core_ctqrcp.c:99
void core_cgetrfsp(pastix_int_t n, pastix_complex32_t *A, pastix_int_t lda, pastix_int_t *nbpivots, float criterion)
Compute the block static pivoting LU factorization of the matrix m-by-n A = L * U.
int core_cpqrcp(float tol, pastix_int_t maxrank, int full_update, pastix_int_t nb, pastix_int_t m, pastix_int_t n, pastix_complex32_t *A, pastix_int_t lda, pastix_int_t *jpvt, pastix_complex32_t *tau, pastix_complex32_t *work, pastix_int_t lwork, float *rwork)
Compute a rank-reavealing QR factorization.
Definition: core_cpqrcp.c:105
int cpucblk_cpotrfsp1dplus(SolverMatrix *solvmtx, SolverCblk *cblk)
Perform the Cholesky factorization of a given panel and submit tasks for the subsequent updates.
void cpucblk_cpotrfsp1dplus_update(SolverMatrix *solvmtx, SolverBlok *blok, pastix_complex32_t *work, pastix_int_t lwork)
Apply the updates of the cholesky factorisation of a given panel.
void cpucblk_csytrfsp1dplus_update(SolverMatrix *solvmtx, SolverBlok *blok, pastix_complex32_t *work)
Apply the updates of the LDL^t factorisation of a given panel.
void core_csytrfsp1d_gemm(const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcblk, const pastix_complex32_t *L, pastix_complex32_t *C, pastix_complex32_t *work)
void cpucblk_cgetrfsp1dplus_update(SolverMatrix *solvmtx, SolverBlok *blok, pastix_complex32_t *work, pastix_int_t lwork)
Apply the updates of the LU factorisation of a given panel.
int cpucblk_cgetrfsp1dplus(SolverMatrix *solvmtx, SolverCblk *cblk)
Perform the LU factorization of a given panel and submit tasks for the subsequent updates.
int cpucblk_csytrfsp1dplus(SolverMatrix *solvmtx, SolverCblk *cblk)
Perform the LDL^t factorization of a given panel and submit tasks for the subsequent updates.
void cpucblk_cfillin(pastix_coefside_t side, const SolverMatrix *solvmtx, const pastix_bcsc_t *bcsc, pastix_int_t itercblk)
Initialize the coeftab structure from the internal bcsc.
void cpucblk_crequest_cleanup(pastix_coefside_t side, pastix_int_t sched, SolverMatrix *solvmtx)
Waitall routine for current cblk request.
void cpucblk_crelease_rhs_fwd_deps(const args_solve_t *enums, SolverMatrix *solvmtx, pastix_rhs_t rhsb, const SolverCblk *cblk, SolverCblk *fcbk)
Release the dependencies of the given cblk after an update.
void cpucblk_calloc_lr(pastix_coefside_t side, SolverCblk *cblk, int rkmax)
Allocate the cblk structure to store the coefficient.
void cpucblk_cmemory(pastix_coefside_t side, const SolverMatrix *solvmtx, SolverCblk *cblk, pastix_int_t *orig, pastix_int_t *gain)
Return the memory gain of the low-rank form over the full-rank form for a single column-block.
pastix_fixdbl_t cpucblk_cgemmsp(pastix_coefside_t sideA, pastix_trans_t trans, const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcblk, const void *A, const void *B, void *C, pastix_complex32_t *work, pastix_int_t lwork, const pastix_lr_t *lowrank)
Compute the updates associated to one off-diagonal block.
pastix_fixdbl_t cpublok_ctrsmsp(pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk, pastix_int_t blok_m, const void *A, void *C, const pastix_lr_t *lowrank)
Compute the updates associated to one off-diagonal block.
Definition: core_ctrsmsp.c:690
int cpucblk_csytrfsp1d(SolverMatrix *solvmtx, SolverCblk *cblk, pastix_complex32_t *Dlt, pastix_complex32_t *work, pastix_int_t lwork)
Perform the LDL^t factorization of a given panel and apply all its updates.
int cpucblk_cpotrfsp1d(SolverMatrix *solvmtx, SolverCblk *cblk, pastix_complex32_t *work, pastix_int_t lwork)
Perform the Cholesky factorization of a given panel and apply all its updates.
int cpucblk_cgetrfsp1d(SolverMatrix *solvmtx, SolverCblk *cblk, pastix_complex32_t *work, pastix_int_t lwork)
Perform the LU factorization of a given panel and apply all its updates.
void cpucblk_crequest_rhs_fwd_cleanup(const args_solve_t *enums, pastix_int_t sched, SolverMatrix *solvmtx, pastix_rhs_t rhsb)
Waitall routine for current cblk request.
int cpucblk_cgeaddsp1d(const SolverCblk *cblk1, SolverCblk *cblk2, const pastix_complex32_t *L1, pastix_complex32_t *L2, const pastix_complex32_t *U1, pastix_complex32_t *U2)
Add two column blocks together.
Definition: core_cgeadd.c:245
void cpucblk_cuncompress(pastix_coefside_t side, SolverCblk *cblk)
Uncompress a single column block from low-rank format to full-rank format.
int cpucblk_cincoming_rhs_fwd_deps(int rank, const args_solve_t *enums, SolverMatrix *solvmtx, SolverCblk *cblk, pastix_rhs_t rhsb)
Wait for incoming dependencies, and return when cblk->ctrbcnt has reached 0.
void cpucblk_calloc_fr(pastix_coefside_t side, SolverCblk *cblk)
Allocate the cblk structure to store the coefficient.
int cpucblk_cincoming_rhs_bwd_deps(int rank, const args_solve_t *enums, SolverMatrix *solvmtx, SolverCblk *cblk, pastix_rhs_t rhsb)
Wait for incoming dependencies, and return when cblk->ctrbcnt has reached 0.
void cpucblk_cscalo(pastix_trans_t trans, const SolverCblk *cblk, void *dataL, void *dataLD)
Copy the L term with scaling for the two-terms algorithm.
Definition: core_cscalo.c:171
void cpucblk_crecv_rhs_forward(const SolverMatrix *solvmtx, SolverCblk *cblk, pastix_complex32_t *work, pastix_rhs_t b)
Receive the rhs associated to a cblk->lcolidx to the remote node.
void cpucblk_crelease_deps(pastix_coefside_t side, SolverMatrix *solvmtx, const SolverCblk *cblk, SolverCblk *fcbk)
Release the dependencies of the given cblk after an update.
int cpucblk_cpotrfsp1d_panel(SolverMatrix *solvmtx, SolverCblk *cblk, void *L)
Compute the Cholesky factorization of one panel.
pastix_fixdbl_t cpucblk_cadd(pastix_complex32_t alpha, const SolverCblk *cblkA, SolverCblk *cblkB, const void *A, void *B, pastix_complex32_t *work, pastix_int_t lwork, const pastix_lr_t *lowrank)
Add two column bloks in full rank format.
Definition: cpucblk_cadd.c:391
void cpucblk_ctrsmsp(pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk, const void *A, void *C, const pastix_lr_t *lowrank)
Compute the updates associated to a column of off-diagonal blocks.
Definition: core_ctrsmsp.c:356
void cpublok_cscalo(pastix_trans_t trans, const SolverCblk *cblk, pastix_int_t blok_m, const void *A, const void *dataD, void *dataB)
Copy the lower terms of the block with scaling for the two-terms algorithm.
Definition: core_cscalo.c:316
void cpucblk_cdump(pastix_coefside_t side, const SolverCblk *cblk, FILE *stream)
Dump a single column block into a FILE in a human readale format.
Definition: coeftab_cinit.c:54
void cpucblk_crequest_rhs_bwd_cleanup(const args_solve_t *enums, pastix_int_t sched, SolverMatrix *solvmtx, pastix_rhs_t rhsb)
Waitall routine for current cblk request.
void cpucblk_csend_rhs_backward(const SolverMatrix *solvmtx, SolverCblk *cblk, pastix_rhs_t b)
Send the rhs associated to a cblk->lcolidx to the remote node.
pastix_fixdbl_t cpublok_ccompress(const pastix_lr_t *lowrank, pastix_int_t M, pastix_int_t N, pastix_lrblock_t *blok)
Compress a single block from full-rank to low-rank format.
pastix_fixdbl_t cpublok_cadd(pastix_complex32_t alpha, const SolverCblk *cblkA, SolverCblk *cblkB, pastix_int_t blokA_m, pastix_int_t blokB_m, const void *A, void *B, pastix_complex32_t *work, pastix_int_t lwork, const pastix_lr_t *lowrank)
Add two bloks.
Definition: cpublok_cadd.c:431
void cpucblk_csend_rhs_forward(const SolverMatrix *solvmtx, SolverCblk *cblk, pastix_rhs_t b)
Send the rhs associated to a cblk->lcolidx to the remote node.
void cpucblk_crelease_rhs_bwd_deps(const args_solve_t *enums, SolverMatrix *solvmtx, pastix_rhs_t rhsb, const SolverCblk *cblk, SolverCblk *fcbk)
Release the dependencies of the given cblk after an update.
void cpucblk_cinit(pastix_coefside_t side, const SolverMatrix *solvmtx, const pastix_bcsc_t *bcsc, pastix_int_t itercblk, const char *directory)
Fully initialize a single cblk.
int cpucblk_cdiff(pastix_coefside_t side, const SolverCblk *cblkA, SolverCblk *cblkB)
Compare two column blocks in full-rank format.
Definition: cpucblk_cdiff.c:63
void cpucblk_calloc_lrws(const SolverCblk *cblk, pastix_lrblock_t *lrblok, pastix_complex32_t *ws)
Initialize lrblock structure from a workspace for all blocks of the cblk associated.
Definition: cpucblk_cinit.c:98
void cpucblk_calloc(pastix_coefside_t side, SolverCblk *cblk)
Allocate the cblk structure to store the coefficient.
int cpucblk_csytrfsp1d_sytrf(SolverMatrix *solvmtx, SolverCblk *cblk, void *dataL)
Computes the LDL^t factorization of the diagonal block in a panel.
int cpucblk_csytrfsp1d_panel(SolverMatrix *solvmtx, SolverCblk *cblk, void *L, void *DLt)
Compute the LDL^t factorization of one panel.
void cpucblk_cfree(pastix_coefside_t side, SolverCblk *cblk)
Free the cblk structure that store the coefficient.
int cpucblk_cincoming_deps(int mt_flag, pastix_coefside_t side, SolverMatrix *solvmtx, SolverCblk *cblk)
Wait for incoming dependencies, and return when cblk->ctrbcnt has reached 0.
int cpucblk_cgetrfsp1d_panel(SolverMatrix *solvmtx, SolverCblk *cblk, void *L, void *U)
Compute the LU factorization of one panel.
int cpucblk_cpotrfsp1d_potrf(SolverMatrix *solvmtx, SolverCblk *cblk, void *dataL)
Compute the Cholesky factorization of the diagonal block in a panel.
void cpucblk_crecv_rhs_backward(const SolverMatrix *solvmtx, SolverCblk *cblk, pastix_rhs_t b)
Receive the rhs associated to a cblk->lcolidx to the remote node.
void cpucblk_cgetschur(const SolverCblk *cblk, int upper_part, pastix_complex32_t *S, pastix_int_t lds)
Extract a cblk panel of the Schur complement to a dense lapack form.
pastix_int_t cpucblk_ccompress(const SolverMatrix *solvmtx, pastix_coefside_t side, int max_ilulvl, SolverCblk *cblk)
Compress a single column block from full-rank to low-rank format.
int cpucblk_cgetrfsp1d_getrf(SolverMatrix *solvmtx, SolverCblk *cblk, void *L, void *U)
Compute the LU factorization of the diagonal block in a panel.
pastix_fixdbl_t cpublok_cgemmsp(pastix_trans_t trans, const SolverCblk *cblk, SolverCblk *fcblk, pastix_int_t blok_mk, pastix_int_t blok_nk, pastix_int_t blok_mn, const void *A, const void *B, void *C, const pastix_lr_t *lowrank)
Compute the CPU gemm associated to a couple of off-diagonal blocks.
Structure to define the type of function to use for the low-rank kernels and their parameters.
The block low-rank structure to hold a matrix in low-rank form.
void solve_cblk_ctrsmsp_backward(const args_solve_t *enums, SolverMatrix *datacode, SolverCblk *cblk, pastix_rhs_t b)
Apply a backward solve related to one cblk to all the right hand side.
void solve_cblk_cdiag(const SolverCblk *cblk, const void *dataA, int nrhs, pastix_complex32_t *b, int ldb, pastix_complex32_t *work)
Apply the diagonal solve related to one cblk to all the right hand side.
void solve_blok_ctrsm(pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk, int nrhs, const void *dataA, pastix_complex32_t *b, int ldb)
Apply a solve trsm update related to a diagonal block of the matrix A.
Definition: solve_ctrsmsp.c:76
void solve_cblk_ctrsmsp_forward(const args_solve_t *enums, SolverMatrix *datacode, const SolverCblk *cblk, pastix_rhs_t b)
Apply a forward solve related to one cblk to all the right hand side.
void solve_blok_cgemm(pastix_side_t side, pastix_trans_t trans, pastix_int_t nrhs, const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcbk, const void *dataA, const pastix_complex32_t *B, pastix_int_t ldb, pastix_complex32_t *C, pastix_int_t ldc)
Apply a solve gemm update related to a single block of the matrix A.
enum pastix_diag_e pastix_diag_t
Diagonal.
enum pastix_uplo_e pastix_uplo_t
Upper/Lower part.
enum pastix_side_e pastix_side_t
Side of the operation.
enum pastix_trans_e pastix_trans_t
Transpostion.
enum pastix_coefside_e pastix_coefside_t
Data blocks used in the kernel.
Main PaStiX RHS structure.
Definition: pastixdata.h:155
Arguments for the solve.
Definition: solver.h:88
Solver block structure.
Definition: solver.h:141
Solver column block structure.
Definition: solver.h:161
Solver column block structure.
Definition: solver.h:203