PaStiX Handbook  6.2.1
pastix_ccores.h
Go to the documentation of this file.
1 /**
2  * @file pastix_ccores.h
3  *
4  * PaStiX kernel header.
5  *
6  * @copyright 2011-2021 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
7  * Univ. Bordeaux. All rights reserved.
8  *
9  * @version 6.2.0
10  * @author Mathieu Faverge
11  * @author Pierre Ramet
12  * @author Xavier Lacoste
13  * @author Esragul Korkmaz
14  * @author Gregoire Pichon
15  * @author Tony Delarue
16  * @date 2021-03-30
17  * @generated from /builds/solverstack/pastix/kernels/pastix_zcores.h, normal z -> c, Tue Apr 12 09:38:26 2022
18  *
19  */
20 #ifndef _pastix_ccores_h_
21 #define _pastix_ccores_h_
22 
23 #ifndef DOXYGEN_SHOULD_SKIP_THIS
24 #define pastix_cblk_lock( cblk_ ) pastix_atomic_lock( &((cblk_)->lock) )
25 #define pastix_cblk_unlock( cblk_ ) pastix_atomic_unlock( &((cblk_)->lock) )
26 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
27 
28 /**
29  * @addtogroup kernel_blas_lapack
30  * @{
31  * This module contains all the BLAS and LAPACK-like kernels that are working
32  * on lapack layout matrices.
33  *
34  * @name PastixComplex32 BLAS kernels
35  * @{
36  */
37 void core_cplrnt( int m, int n, pastix_complex32_t *A, int lda,
38  int gM, int m0, int n0, unsigned long long int seed );
39 void core_cgetmo( int m, int n,
40  const pastix_complex32_t *A, int lda,
41  pastix_complex32_t *B, int ldb );
42 int core_cgeadd( pastix_trans_t trans, pastix_int_t M, pastix_int_t N,
43  pastix_complex32_t alpha, const pastix_complex32_t *A, pastix_int_t LDA,
44  pastix_complex32_t beta, pastix_complex32_t *B, pastix_int_t LDB );
45 int core_cgemdm( pastix_trans_t transA, pastix_trans_t transB, int M, int N, int K,
46  pastix_complex32_t alpha, const pastix_complex32_t *A, int LDA,
47  const pastix_complex32_t *B, int LDB,
48  pastix_complex32_t beta, pastix_complex32_t *C, int LDC,
49  const pastix_complex32_t *D, int incD,
50  pastix_complex32_t *WORK, int LWORK );
51 int core_cpqrcp( float tol, pastix_int_t maxrank, int full_update, pastix_int_t nb,
52  pastix_int_t m, pastix_int_t n,
53  pastix_complex32_t *A, pastix_int_t lda,
54  pastix_int_t *jpvt, pastix_complex32_t *tau,
55  pastix_complex32_t *work, pastix_int_t lwork, float *rwork );
56 int core_crqrcp( float tol, pastix_int_t maxrank, int refine, pastix_int_t nb,
57  pastix_int_t m, pastix_int_t n,
58  pastix_complex32_t *A, pastix_int_t lda,
59  pastix_int_t *jpvt, pastix_complex32_t *tau,
60  pastix_complex32_t *work, pastix_int_t lwork, float *rwork );
61 int core_crqrrt( float tol, pastix_int_t maxrank, pastix_int_t nb,
62  pastix_int_t m, pastix_int_t n,
63  pastix_complex32_t *A, pastix_int_t lda, pastix_complex32_t *tau,
64  pastix_complex32_t *B, pastix_int_t ldb, pastix_complex32_t *tau_b,
65  pastix_complex32_t *work, pastix_int_t lwork, float normA );
66 int core_ctqrcp( float tol, pastix_int_t maxrank, int unused, pastix_int_t nb,
67  pastix_int_t m, pastix_int_t n,
68  pastix_complex32_t *A, pastix_int_t lda,
69  pastix_int_t *jpvt, pastix_complex32_t *tau,
70  pastix_complex32_t *work, pastix_int_t lwork, float *rwork );
71 int core_ctradd( pastix_uplo_t uplo, pastix_trans_t trans, pastix_int_t M, pastix_int_t N,
72  pastix_complex32_t alpha, const pastix_complex32_t *A, pastix_int_t LDA,
73  pastix_complex32_t beta, pastix_complex32_t *B, pastix_int_t LDB);
74 int core_cscalo( pastix_trans_t trans, pastix_int_t M, pastix_int_t N,
75  const pastix_complex32_t *A, pastix_int_t lda,
76  const pastix_complex32_t *D, pastix_int_t ldd,
77  pastix_complex32_t *B, pastix_int_t ldb );
78 
79 /**
80  * @}
81  * @name PastixComplex32 Othogonalization kernels for low-rank updates
82  * @{
83  */
84 pastix_fixdbl_t
85 core_clrorthu_fullqr( pastix_int_t M, pastix_int_t N, pastix_int_t rank,
86  pastix_complex32_t *U, pastix_int_t ldu,
87  pastix_complex32_t *V, pastix_int_t ldv );
88 pastix_fixdbl_t
89 core_clrorthu_partialqr( pastix_int_t M, pastix_int_t N,
90  pastix_int_t r1, pastix_int_t *r2ptr,
91  pastix_int_t offx, pastix_int_t offy,
92  pastix_complex32_t *U, pastix_int_t ldu,
93  pastix_complex32_t *V, pastix_int_t ldv );
94 pastix_fixdbl_t
95 core_clrorthu_cgs( pastix_int_t M1, pastix_int_t N1,
96  pastix_int_t M2, pastix_int_t N2,
97  pastix_int_t r1, pastix_int_t *r2ptr,
98  pastix_int_t offx, pastix_int_t offy,
99  pastix_complex32_t *U, pastix_int_t ldu,
100  pastix_complex32_t *V, pastix_int_t ldv );
101 
102 /**
103  * @}
104  * @name PastixComplex32 LAPACK kernels
105  * @{
106  */
107 void core_cpotrfsp( pastix_int_t n, pastix_complex32_t *A, pastix_int_t lda,
108  pastix_int_t *nbpivot, float criterion );
109 void core_cpxtrfsp( pastix_int_t n, pastix_complex32_t *A, pastix_int_t lda,
110  pastix_int_t *nbpivot, float criterion );
111 void core_cgetrfsp( pastix_int_t n, pastix_complex32_t *A, pastix_int_t lda,
112  pastix_int_t *nbpivot, float criterion );
113 void core_chetrfsp( pastix_int_t n, pastix_complex32_t *A, pastix_int_t lda,
114  pastix_int_t *nbpivot, float criterion );
115 void core_csytrfsp( pastix_int_t n, pastix_complex32_t *A, pastix_int_t lda,
116  pastix_int_t *nbpivot, float criterion );
117 
118 /**
119  * @}
120  * @}
121  *
122  * @addtogroup kernel_fact
123  * @{
124  * This module contains all the kernel working at the solver matrix structure
125  * level for the numerical factorization step.
126  *
127  * @name PastixComplex32 cblk-BLAS CPU kernels
128  * @{
129  */
130 
131 int cpucblk_cgeaddsp1d( const SolverCblk *cblk1, SolverCblk *cblk2,
132  const pastix_complex32_t *L1, pastix_complex32_t *L2,
133  const pastix_complex32_t *U1, pastix_complex32_t *U2 );
134 
135 pastix_fixdbl_t cpucblk_cgemmsp( pastix_coefside_t sideA, pastix_trans_t trans,
136  const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcblk,
137  const void *A, const void *B, void *C,
138  pastix_complex32_t *work, pastix_int_t lwork, const pastix_lr_t *lowrank );
140  pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk,
141  const void *A, void *C, const pastix_lr_t *lowrank );
142 void cpucblk_cscalo ( pastix_trans_t trans, SolverCblk *cblk, void* dataL, void* dataLD );
143 
144 pastix_fixdbl_t cpublok_cgemmsp( pastix_trans_t trans,
145  const SolverCblk *cblk, SolverCblk *fcblk,
146  pastix_int_t blok_mk, pastix_int_t blok_nk, pastix_int_t blok_mn,
147  const void *A, const void *B, void *C,
148  const pastix_lr_t *lowrank );
149 pastix_fixdbl_t cpublok_ctrsmsp( pastix_side_t side, pastix_uplo_t uplo,
150  pastix_trans_t trans, pastix_diag_t diag,
151  const SolverCblk *cblk, pastix_int_t blok_m,
152  const void *A, void *C,
153  const pastix_lr_t *lowrank );
154 void cpublok_cscalo ( pastix_trans_t trans,
155  SolverCblk *cblk, pastix_int_t blok_m,
156  const void *A, const void *dataD, void *dataB );
157 
158 /**
159  * @}
160  * @name PastixComplex32 cblk LU kernels
161  * @{
162  */
163 int cpucblk_cgetrfsp1d_getrf( SolverMatrix *solvmtx, SolverCblk *cblk,
164  void *L, void *U );
165 int cpucblk_cgetrfsp1d_panel( SolverMatrix *solvmtx, SolverCblk *cblk,
166  void *L, void *U );
167 int cpucblk_cgetrfsp1d ( SolverMatrix *solvmtx, SolverCblk *cblk,
168  pastix_complex32_t *work, pastix_int_t lwork );
169 
170 /**
171  * @}
172  * @name PastixComplex32 cblk Cholesky kernels
173  * @{
174  */
175 int cpucblk_cpotrfsp1d_potrf( SolverMatrix *solvmtx, SolverCblk *cblk, void *dataL );
176 int cpucblk_cpotrfsp1d_panel( SolverMatrix *solvmtx, SolverCblk *cblk, void *dataL );
177 int cpucblk_cpotrfsp1d ( SolverMatrix *solvmtx, SolverCblk *cblk,
178  pastix_complex32_t *work, pastix_int_t lwork );
179 
180 /**
181  * @}
182  * @name PastixComplex32 cblk LDL^h kernels
183  * @{
184  */
185 int cpucblk_chetrfsp1d_hetrf( SolverMatrix *solvmtx, SolverCblk *cblk, void *L );
186 int cpucblk_chetrfsp1d_panel( SolverMatrix *solvmtx, SolverCblk *cblk,
187  void *L, void *DLh );
188 int cpucblk_chetrfsp1d ( SolverMatrix *solvmtx, SolverCblk *cblk,
189  pastix_complex32_t *work1, pastix_complex32_t *work2, pastix_int_t lwork );
190 
191 /**
192  * @}
193  * @name PastixComplex32 cblk LL^t kernels
194  * @{
195  */
196 int cpucblk_cpxtrfsp1d_pxtrf( SolverMatrix *solvmtx, SolverCblk *cblk, void *dataL );
197 int cpucblk_cpxtrfsp1d_panel( SolverMatrix *solvmtx, SolverCblk *cblk, void *dataL );
198 int cpucblk_cpxtrfsp1d ( SolverMatrix *solvmtx, SolverCblk *cblk,
199  pastix_complex32_t *work, pastix_int_t lwork );
200 
201 /**
202  * @}
203  * @name PastixComplex32 cblk LDL^t kernels
204  * @{
205  */
206 int cpucblk_csytrfsp1d_sytrf( SolverMatrix *solvmtx, SolverCblk *cblk, void *L );
207 int cpucblk_csytrfsp1d_panel( SolverMatrix *solvmtx, SolverCblk *cblk,
208  void *L, void *DLt );
209 int cpucblk_csytrfsp1d ( SolverMatrix *solvmtx, SolverCblk *cblk,
210  pastix_complex32_t *work1, pastix_complex32_t *work2, pastix_int_t lwork );
211 
212 /**
213  * @}
214  * @name PastixComplex32 initialization and additionnal routines
215  * @{
216  */
217 void cpucblk_calloc_lrws( const SolverCblk *cblk,
218  pastix_lrblock_t *lrblok,
219  pastix_complex32_t *ws );
221  SolverCblk *cblk,
222  int rkmax );
224  SolverCblk *cblk );
226  SolverCblk *cblk );
227 void cpucblk_cfree ( pastix_coefside_t side,
228  SolverCblk *cblk );
230  const SolverMatrix *solvmtx,
231  const pastix_bcsc_t *bcsc,
232  pastix_int_t itercblk );
233 void cpucblk_cinit ( pastix_coefside_t side,
234  const SolverMatrix *solvmtx,
235  const pastix_bcsc_t *bcsc,
236  pastix_int_t itercblk,
237  const char *directory );
238 void cpucblk_cgetschur( const SolverCblk *cblk,
239  int upper_part,
240  pastix_complex32_t *S,
241  pastix_int_t lds );
242 void cpucblk_cdump ( pastix_coefside_t side,
243  const SolverCblk *cblk,
244  FILE *stream );
246  const SolverCblk *cblkA,
247  SolverCblk *cblkB );
248 void cpucblk_cadd ( pastix_coefside_t side,
249  float alpha,
250  const SolverCblk *cblkA,
251  SolverCblk *cblkB,
252  const pastix_lr_t *lowrank );
253 void cpucblk_cadd_recv( pastix_coefside_t side,
254  float alpha,
255  SolverCblk *cblk );
256 /**
257  * @}
258  * @name PastixComplex32 MPI routines
259  * @{
260  */
261 int cpucblk_cincoming_deps( int mt_flag,
262  pastix_coefside_t side,
263  SolverMatrix *solvmtx,
264  SolverCblk *cblk );
266  SolverMatrix *solvmtx,
267  const SolverCblk *cblk,
268  SolverCblk *fcbk );
270  pastix_int_t sched,
271  SolverMatrix *solvmtx );
272 
273 void cpucblk_cmpi_progress( pastix_coefside_t side,
274  SolverMatrix *solvmtx,
275  int threadid );
276 
277 void cpucblk_csend_rhs_forward( const SolverMatrix *solvmtx,
278  SolverCblk *cblk,
279  pastix_complex32_t *b );
280 void cpucblk_crecv_rhs_forward( const SolverMatrix *solvmtx,
281  SolverCblk *cblk,
282  pastix_complex32_t *work,
283  pastix_int_t nrhs,
284  pastix_complex32_t *b,
285  pastix_int_t ldb );
286 void cpucblk_csend_rhs_backward( const SolverMatrix *solvmtx,
287  SolverCblk *cblk,
288  pastix_complex32_t *b );
289 void cpucblk_crecv_rhs_backward( const SolverMatrix *solvmtx,
290  SolverCblk *cblk,
291  pastix_complex32_t *b );
292 
293 /**
294  * @}
295  * @name PastixComplex32 compression/uncompression routines
296  * @{
297  */
298 pastix_fixdbl_t cpublok_ccompress( const pastix_lr_t *lowrank,
299  pastix_int_t M, pastix_int_t N,
300  pastix_lrblock_t *blok );
301 pastix_int_t cpucblk_ccompress( const SolverMatrix *solvmtx,
302  pastix_coefside_t side,
303  int max_ilulvl,
304  SolverCblk *cblk );
306  SolverCblk *cblk );
308  SolverMatrix *solvmtx,
309  SolverCblk *cblk,
310  pastix_int_t *orig,
311  pastix_int_t *gain );
312 
313 /**
314  * @}
315  * @}
316  *
317  * @addtogroup kernel_solve
318  * @{
319  * This module contains all the kernel working on the solver matrix structure
320  * for the solve step.
321  */
322 
324  pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk,
325  int nrhs, const void* dataA, pastix_complex32_t *b, int ldb );
327  pastix_int_t nrhs, const SolverCblk *cblk, const SolverBlok *blok,
328  SolverCblk *fcbk, const void* dataA, const pastix_complex32_t *B, pastix_int_t ldb,
329  pastix_complex32_t *C, pastix_int_t ldc );
330 
332  pastix_trans_t trans, pastix_diag_t diag,
333  const SolverMatrix *datacode, const SolverCblk *cblk,
334  int nrhs, pastix_complex32_t *b, int ldb );
336  pastix_trans_t trans, pastix_diag_t diag,
337  const SolverMatrix *datacode, SolverCblk *cblk,
338  int nrhs, pastix_complex32_t *b, int ldb );
339 
340 void solve_cblk_cdiag( const SolverCblk *cblk,
341  int nrhs,
342  pastix_complex32_t *b,
343  int ldb,
344  pastix_complex32_t *work );
345 /**
346  * @}
347  */
348 
349 
350 /**
351  * @addtogroup kernel_fact_null
352  * @{
353  * This module contains the three terms update functions for the LDL^t and
354  * LDL^h factorizations.
355  */
356 void core_chetrfsp1d_gemm( const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcblk,
357  const pastix_complex32_t *L, pastix_complex32_t *C,
358  pastix_complex32_t *work );
359 void core_csytrfsp1d_gemm( const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcblk,
360  const pastix_complex32_t *L, pastix_complex32_t *C,
361  pastix_complex32_t *work );
362 
363 /**
364  * @}
365  */
366 
367 #endif /* _pastix_ccores_h_ */
core_cgemdm
int core_cgemdm(pastix_trans_t transA, pastix_trans_t transB, int M, int N, int K, pastix_complex32_t alpha, const pastix_complex32_t *A, int LDA, const pastix_complex32_t *B, int LDB, pastix_complex32_t beta, pastix_complex32_t *C, int LDC, const pastix_complex32_t *D, int incD, pastix_complex32_t *WORK, int LWORK)
Perform one of the following matrix-matrix operations.
Definition: core_cgemdm.c:139
pastix_uplo_t
enum pastix_uplo_e pastix_uplo_t
Upper/Lower part.
cpucblk_chetrfsp1d_hetrf
int cpucblk_chetrfsp1d_hetrf(SolverMatrix *solvmtx, SolverCblk *cblk, void *L)
Computes the LDL^h factorization of the diagonal block in a panel.
Definition: core_chetrfsp.c:234
pastix_lr_s
Structure to define the type of function to use for the low-rank kernels and their parameters.
Definition: pastix_lowrank.h:147
cpublok_ccompress
pastix_fixdbl_t cpublok_ccompress(const pastix_lr_t *lowrank, pastix_int_t M, pastix_int_t N, pastix_lrblock_t *blok)
Compress a single block from full-rank to low-rank format.
Definition: cpucblk_ccompress.c:57
core_crqrrt
int core_crqrrt(float tol, pastix_int_t maxrank, pastix_int_t nb, pastix_int_t m, pastix_int_t n, pastix_complex32_t *A, pastix_int_t lda, pastix_complex32_t *tau, pastix_complex32_t *B, pastix_int_t ldb, pastix_complex32_t *tau_b, pastix_complex32_t *work, pastix_int_t lwork, float normA)
Compute a randomized QR factorization with rotation technique.
Definition: core_crqrrt.c:126
cpucblk_csytrfsp1d_panel
int cpucblk_csytrfsp1d_panel(SolverMatrix *solvmtx, SolverCblk *cblk, void *L, void *DLt)
Compute the LDL^t factorization of one panel.
Definition: core_csytrfsp.c:432
pastix_bcsc_s
Internal column block distributed CSC matrix.
Definition: bcsc.h:37
cpucblk_cgemmsp
pastix_fixdbl_t cpucblk_cgemmsp(pastix_coefside_t sideA, pastix_trans_t trans, const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcblk, const void *A, const void *B, void *C, pastix_complex32_t *work, pastix_int_t lwork, const pastix_lr_t *lowrank)
Compute the updates associated to one off-diagonal block.
Definition: core_cgemmsp.c:1512
cpucblk_cuncompress
void cpucblk_cuncompress(pastix_coefside_t side, SolverCblk *cblk)
Uncompress a single column block from low-rank format to full-rank format.
Definition: cpucblk_ccompress.c:197
pastix_solv_mode_t
enum pastix_solv_mode_e pastix_solv_mode_t
Solve Schur modes.
cpucblk_cgetrfsp1d
int cpucblk_cgetrfsp1d(SolverMatrix *solvmtx, SolverCblk *cblk, pastix_complex32_t *work, pastix_int_t lwork)
Perform the LU factorization of a given panel and apply all its updates.
Definition: core_cgetrfsp.c:355
core_cpotrfsp
void core_cpotrfsp(pastix_int_t n, pastix_complex32_t *A, pastix_int_t lda, pastix_int_t *nbpivot, float criterion)
Compute the block static pivoting Cholesky factorization of the matrix n-by-n A = L * L^t .
Definition: core_cpotrfsp.c:144
cpucblk_cpxtrfsp1d_pxtrf
int cpucblk_cpxtrfsp1d_pxtrf(SolverMatrix *solvmtx, SolverCblk *cblk, void *dataL)
Compute the LL^t factorization of the diagonal block in a panel.
Definition: core_cpxtrfsp.c:212
cpucblk_crelease_deps
void cpucblk_crelease_deps(pastix_coefside_t side, SolverMatrix *solvmtx, const SolverCblk *cblk, SolverCblk *fcbk)
Release the dependencies of the given cblk after an update.
Definition: cpucblk_cmpi_coeftab.c:558
solver_cblk_s
Solver column block structure.
Definition: solver.h:127
cpublok_cgemmsp
pastix_fixdbl_t cpublok_cgemmsp(pastix_trans_t trans, const SolverCblk *cblk, SolverCblk *fcblk, pastix_int_t blok_mk, pastix_int_t blok_nk, pastix_int_t blok_mn, const void *A, const void *B, void *C, const pastix_lr_t *lowrank)
Compute the CPU gemm associated to a couple of off-diagonal blocks.
Definition: core_cgemmsp.c:1655
pastix_coefside_t
enum pastix_coefside_e pastix_coefside_t
Data blocks used in the kernel.
core_ctqrcp
int core_ctqrcp(float tol, pastix_int_t maxrank, int unused, pastix_int_t nb, pastix_int_t m, pastix_int_t n, pastix_complex32_t *A, pastix_int_t lda, pastix_int_t *jpvt, pastix_complex32_t *tau, pastix_complex32_t *work, pastix_int_t lwork, float *rwork)
Compute a randomized QR factorization with truncated updates.
Definition: core_ctqrcp.c:99
cpucblk_calloc
void cpucblk_calloc(pastix_coefside_t side, SolverCblk *cblk)
Allocate the cblk structure to store the coefficient.
Definition: cpucblk_cinit.c:264
pastix_trans_t
enum pastix_trans_e pastix_trans_t
Transpostion.
solver_blok_s
Solver block structure.
Definition: solver.h:107
cpucblk_crecv_rhs_backward
void cpucblk_crecv_rhs_backward(const SolverMatrix *solvmtx, SolverCblk *cblk, pastix_complex32_t *b)
Receive the rhs associated to a cblk->lcolidx to the remote node.
Definition: cpucblk_cmpi_rhs.c:136
cpucblk_cpxtrfsp1d
int cpucblk_cpxtrfsp1d(SolverMatrix *solvmtx, SolverCblk *cblk, pastix_complex32_t *work, pastix_int_t lwork)
Perform the LL^t factorization of a given panel and apply all its updates.
Definition: core_cpxtrfsp.c:326
pastix_lrblock_s
The block low-rank structure to hold a matrix in low-rank form.
Definition: pastix_lowrank.h:112
cpucblk_cscalo
void cpucblk_cscalo(pastix_trans_t trans, SolverCblk *cblk, void *dataL, void *dataLD)
Copy the L term with scaling for the two-terms algorithm.
Definition: core_cscalo.c:170
cpucblk_chetrfsp1d_panel
int cpucblk_chetrfsp1d_panel(SolverMatrix *solvmtx, SolverCblk *cblk, void *L, void *DLh)
Compute the LDL^h factorization of one panel.
Definition: core_chetrfsp.c:437
cpucblk_cadd
void cpucblk_cadd(pastix_coefside_t side, float alpha, const SolverCblk *cblkA, SolverCblk *cblkB, const pastix_lr_t *lowrank)
Add two column bloks in full rank format.
Definition: cpucblk_cadd.c:358
cpucblk_chetrfsp1d
int cpucblk_chetrfsp1d(SolverMatrix *solvmtx, SolverCblk *cblk, pastix_complex32_t *work1, pastix_complex32_t *work2, pastix_int_t lwork)
Perform the LDL^h factorization of a given panel and apply all its updates.
Definition: core_chetrfsp.c:492
core_clrorthu_partialqr
pastix_fixdbl_t core_clrorthu_partialqr(pastix_int_t M, pastix_int_t N, pastix_int_t r1, pastix_int_t *r2ptr, pastix_int_t offx, pastix_int_t offy, pastix_complex32_t *U, pastix_int_t ldu, pastix_complex32_t *V, pastix_int_t ldv)
Try to orthognalize the U part of the low-rank form, and update the V part accordingly using partial ...
Definition: core_clrothu.c:193
cpucblk_csytrfsp1d
int cpucblk_csytrfsp1d(SolverMatrix *solvmtx, SolverCblk *cblk, pastix_complex32_t *work1, pastix_complex32_t *work2, pastix_int_t lwork)
Perform the LDL^t factorization of a given panel and apply all its updates.
Definition: core_csytrfsp.c:487
solve_cblk_cdiag
void solve_cblk_cdiag(const SolverCblk *cblk, int nrhs, pastix_complex32_t *b, int ldb, pastix_complex32_t *work)
Apply the diagonal solve related to one cblk to all the right hand side.
Definition: solve_ctrsmsp.c:616
cpucblk_cdump
void cpucblk_cdump(pastix_coefside_t side, const SolverCblk *cblk, FILE *stream)
Dump a single column block into a FILE in a human readale format.
Definition: coeftab_cinit.c:51
cpucblk_cgetschur
void cpucblk_cgetschur(const SolverCblk *cblk, int upper_part, pastix_complex32_t *S, pastix_int_t lds)
Extract a cblk panel of the Schur complement to a dense lapack form.
Definition: cpucblk_cschur.c:178
core_clrorthu_fullqr
pastix_fixdbl_t core_clrorthu_fullqr(pastix_int_t M, pastix_int_t N, pastix_int_t rank, pastix_complex32_t *U, pastix_int_t ldu, pastix_complex32_t *V, pastix_int_t ldv)
Try to orthognalize the u part of the low-rank form, and update the v part accordingly using full QR.
Definition: core_clrothu.c:83
cpucblk_cpxtrfsp1d_panel
int cpucblk_cpxtrfsp1d_panel(SolverMatrix *solvmtx, SolverCblk *cblk, void *dataL)
Compute the LL^t factorization of one panel.
Definition: core_cpxtrfsp.c:284
core_crqrcp
int core_crqrcp(float tol, pastix_int_t maxrank, int refine, pastix_int_t nb, pastix_int_t m, pastix_int_t n, pastix_complex32_t *A, pastix_int_t lda, pastix_int_t *jpvt, pastix_complex32_t *tau, pastix_complex32_t *work, pastix_int_t lwork, float *rwork)
Compute a randomized QR factorization.
Definition: core_crqrcp.c:114
cpucblk_ctrsmsp
void cpucblk_ctrsmsp(pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk, const void *A, void *C, const pastix_lr_t *lowrank)
Compute the updates associated to a column of off-diagonal blocks.
Definition: core_ctrsmsp.c:356
core_cgetrfsp
void core_cgetrfsp(pastix_int_t n, pastix_complex32_t *A, pastix_int_t lda, pastix_int_t *nbpivot, float criterion)
Compute the block static pivoting LU factorization of the matrix m-by-n A = L * U.
Definition: core_cgetrfsp.c:138
cpucblk_cgetrfsp1d_panel
int cpucblk_cgetrfsp1d_panel(SolverMatrix *solvmtx, SolverCblk *cblk, void *L, void *U)
Compute the LU factorization of one panel.
Definition: core_cgetrfsp.c:305
core_cpxtrfsp
void core_cpxtrfsp(pastix_int_t n, pastix_complex32_t *A, pastix_int_t lda, pastix_int_t *nbpivot, float criterion)
Compute the block static pivoting LL^t factorization of the matrix n-by-n A = L * L^t .
Definition: core_cpxtrfsp.c:136
cpucblk_crequest_cleanup
void cpucblk_crequest_cleanup(pastix_coefside_t side, pastix_int_t sched, SolverMatrix *solvmtx)
Waitall routine for current cblk request.
Definition: cpucblk_cmpi_coeftab.c:609
solve_cblk_ctrsmsp_backward
void solve_cblk_ctrsmsp_backward(pastix_solv_mode_t mode, pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverMatrix *datacode, SolverCblk *cblk, int nrhs, pastix_complex32_t *b, int ldb)
Apply a backward solve related to one cblk to all the right hand side.
Definition: solve_ctrsmsp.c:471
cpucblk_calloc_lrws
void cpucblk_calloc_lrws(const SolverCblk *cblk, pastix_lrblock_t *lrblok, pastix_complex32_t *ws)
Initialize lrblock structure from a workspace from all blocks of the cblk associated.
Definition: cpucblk_cinit.c:96
cpucblk_ccompress
pastix_int_t cpucblk_ccompress(const SolverMatrix *solvmtx, pastix_coefside_t side, int max_ilulvl, SolverCblk *cblk)
Compress a single column block from full-rank to low-rank format.
Definition: cpucblk_ccompress.c:110
core_csytrfsp1d_gemm
void core_csytrfsp1d_gemm(const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcblk, const pastix_complex32_t *L, pastix_complex32_t *C, pastix_complex32_t *work)
Definition: core_csytrfsp.c:319
cpucblk_cdiff
int cpucblk_cdiff(pastix_coefside_t side, const SolverCblk *cblkA, SolverCblk *cblkB)
Compare two column blocks in full-rank format.
Definition: cpucblk_cdiff.c:63
core_clrorthu_cgs
pastix_fixdbl_t core_clrorthu_cgs(pastix_int_t M1, pastix_int_t N1, pastix_int_t M2, pastix_int_t N2, pastix_int_t r1, pastix_int_t *r2ptr, pastix_int_t offx, pastix_int_t offy, pastix_complex32_t *U, pastix_int_t ldu, pastix_complex32_t *V, pastix_int_t ldv)
Try to orthognalize the U part of the low-rank form, and update the V part accordingly using CGS.
Definition: core_clrothu.c:419
cpucblk_cpotrfsp1d_potrf
int cpucblk_cpotrfsp1d_potrf(SolverMatrix *solvmtx, SolverCblk *cblk, void *dataL)
Compute the Cholesky factorization of the diagonal block in a panel.
Definition: core_cpotrfsp.c:220
cpucblk_cinit
void cpucblk_cinit(pastix_coefside_t side, const SolverMatrix *solvmtx, const pastix_bcsc_t *bcsc, pastix_int_t itercblk, const char *directory)
Fully initialize a single cblk.
Definition: coeftab_cinit.c:251
core_csytrfsp
void core_csytrfsp(pastix_int_t n, pastix_complex32_t *A, pastix_int_t lda, pastix_int_t *nbpivot, float criterion)
Compute the block static pivoting factorization of the symmetric matrix n-by-n A such that A = L * D ...
Definition: core_csytrfsp.c:141
cpucblk_csytrfsp1d_sytrf
int cpucblk_csytrfsp1d_sytrf(SolverMatrix *solvmtx, SolverCblk *cblk, void *L)
Computes the LDL^t factorization of the diagonal block in a panel.
Definition: core_csytrfsp.c:232
cpucblk_cincoming_deps
int cpucblk_cincoming_deps(int mt_flag, pastix_coefside_t side, SolverMatrix *solvmtx, SolverCblk *cblk)
Wait for incoming dependencies, and return when cblk->ctrbcnt has reached 0.
Definition: cpucblk_cmpi_coeftab.c:499
cpucblk_calloc_lr
void cpucblk_calloc_lr(pastix_coefside_t side, SolverCblk *cblk, int rkmax)
Allocate the cblk structure to store the coefficient.
Definition: cpucblk_cinit.c:143
pastix_diag_t
enum pastix_diag_e pastix_diag_t
Diagonal.
solve_blok_cgemm
void solve_blok_cgemm(pastix_side_t side, pastix_trans_t trans, pastix_int_t nrhs, const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcbk, const void *dataA, const pastix_complex32_t *B, pastix_int_t ldb, pastix_complex32_t *C, pastix_int_t ldc)
Apply a solve gemm update related to a single block of the matrix A.
Definition: solve_ctrsmsp.c:158
cpublok_cscalo
void cpublok_cscalo(pastix_trans_t trans, SolverCblk *cblk, pastix_int_t blok_m, const void *A, const void *dataD, void *dataB)
Copy the lower terms of the block with scaling for the two-terms algorithm.
Definition: core_cscalo.c:315
cpublok_ctrsmsp
pastix_fixdbl_t cpublok_ctrsmsp(pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk, pastix_int_t blok_m, const void *A, void *C, const pastix_lr_t *lowrank)
Compute the updates associated to one off-diagonal block.
Definition: core_ctrsmsp.c:678
cpucblk_cgetrfsp1d_getrf
int cpucblk_cgetrfsp1d_getrf(SolverMatrix *solvmtx, SolverCblk *cblk, void *L, void *U)
Compute the LU factorization of the diagonal block in a panel.
Definition: core_cgetrfsp.c:217
core_cgetmo
void core_cgetmo(int m, int n, const pastix_complex32_t *A, int lda, pastix_complex32_t *B, int ldb)
Transposes a m-by-n matrix out of place using an extra workspace of size m-by-n.
Definition: core_cgetmo.c:49
cpucblk_calloc_fr
void cpucblk_calloc_fr(pastix_coefside_t side, SolverCblk *cblk)
Allocate the cblk structure to store the coefficient.
Definition: cpucblk_cinit.c:217
core_chetrfsp
void core_chetrfsp(pastix_int_t n, pastix_complex32_t *A, pastix_int_t lda, pastix_int_t *nbpivot, float criterion)
Compute the block static pivoting factorization of the hermitian matrix n-by-n A such that A = L * D ...
Definition: core_chetrfsp.c:142
cpucblk_cpotrfsp1d
int cpucblk_cpotrfsp1d(SolverMatrix *solvmtx, SolverCblk *cblk, pastix_complex32_t *work, pastix_int_t lwork)
Perform the Cholesky factorization of a given panel and apply all its updates.
Definition: core_cpotrfsp.c:334
cpucblk_cfillin
void cpucblk_cfillin(pastix_coefside_t side, const SolverMatrix *solvmtx, const pastix_bcsc_t *bcsc, pastix_int_t itercblk)
Initialize the coeftab structure from the internal bcsc.
Definition: cpucblk_cinit.c:604
core_cplrnt
void core_cplrnt(int m, int n, pastix_complex32_t *A, int lda, int gM, int m0, int n0, unsigned long long int seed)
Generate a random tile.
Definition: core_cplrnt.c:91
cpucblk_cmemory
void cpucblk_cmemory(pastix_coefside_t side, SolverMatrix *solvmtx, SolverCblk *cblk, pastix_int_t *orig, pastix_int_t *gain)
Return the memory gain of the low-rank form over the full-rank form for a single column-block.
Definition: cpucblk_ccompress.c:277
core_chetrfsp1d_gemm
void core_chetrfsp1d_gemm(const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcblk, const pastix_complex32_t *L, pastix_complex32_t *C, pastix_complex32_t *work)
Definition: core_chetrfsp.c:321
cpucblk_csend_rhs_forward
void cpucblk_csend_rhs_forward(const SolverMatrix *solvmtx, SolverCblk *cblk, pastix_complex32_t *b)
Send the rhs associated to a cblk->lcolidx to the remote node.
Definition: cpucblk_cmpi_rhs.c:42
cpucblk_cfree
void cpucblk_cfree(pastix_coefside_t side, SolverCblk *cblk)
Free the cblk structure that store the coefficient.
Definition: cpucblk_cinit.c:317
solve_cblk_ctrsmsp_forward
void solve_cblk_ctrsmsp_forward(pastix_solv_mode_t mode, pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverMatrix *datacode, const SolverCblk *cblk, int nrhs, pastix_complex32_t *b, int ldb)
Apply a forward solve related to one cblk to all the right hand side.
Definition: solve_ctrsmsp.c:323
cpucblk_cpotrfsp1d_panel
int cpucblk_cpotrfsp1d_panel(SolverMatrix *solvmtx, SolverCblk *cblk, void *dataL)
Compute the Cholesky factorization of one panel.
Definition: core_cpotrfsp.c:292
core_cpqrcp
int core_cpqrcp(float tol, pastix_int_t maxrank, int full_update, pastix_int_t nb, pastix_int_t m, pastix_int_t n, pastix_complex32_t *A, pastix_int_t lda, pastix_int_t *jpvt, pastix_complex32_t *tau, pastix_complex32_t *work, pastix_int_t lwork, float *rwork)
Compute a rank-reavealing QR factorization.
Definition: core_cpqrcp.c:105
core_cgeadd
int core_cgeadd(pastix_trans_t trans, pastix_int_t M, pastix_int_t N, pastix_complex32_t alpha, const pastix_complex32_t *A, pastix_int_t LDA, pastix_complex32_t beta, pastix_complex32_t *B, pastix_int_t LDB)
Add two matrices together.
Definition: core_cgeadd.c:78
core_ctradd
int core_ctradd(pastix_uplo_t uplo, pastix_trans_t trans, pastix_int_t M, pastix_int_t N, pastix_complex32_t alpha, const pastix_complex32_t *A, pastix_int_t LDA, pastix_complex32_t beta, pastix_complex32_t *B, pastix_int_t LDB)
Add two triangular matrices together as in PBLAS pctradd.
Definition: core_ctradd.c:79
cpucblk_crecv_rhs_forward
void cpucblk_crecv_rhs_forward(const SolverMatrix *solvmtx, SolverCblk *cblk, pastix_complex32_t *work, pastix_int_t nrhs, pastix_complex32_t *b, pastix_int_t ldb)
Receive the rhs associated to a cblk->lcolidx to the remote node.
Definition: cpucblk_cmpi_rhs.c:195
pastix_side_t
enum pastix_side_e pastix_side_t
Side of the operation.
solve_blok_ctrsm
void solve_blok_ctrsm(pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk, int nrhs, const void *dataA, pastix_complex32_t *b, int ldb)
Apply a solve trsm update related to a diagonal block of the matrix A.
Definition: solve_ctrsmsp.c:70
cpucblk_csend_rhs_backward
void cpucblk_csend_rhs_backward(const SolverMatrix *solvmtx, SolverCblk *cblk, pastix_complex32_t *b)
Send the rhs associated to a cblk->lcolidx to the remote node.
Definition: cpucblk_cmpi_rhs.c:89
core_cscalo
int core_cscalo(pastix_trans_t trans, pastix_int_t M, pastix_int_t N, const pastix_complex32_t *A, pastix_int_t lda, const pastix_complex32_t *D, pastix_int_t ldd, pastix_complex32_t *B, pastix_int_t ldb)
Scale a matrix by a diagonal out of place.
Definition: core_cscalo.c:73
cpucblk_cgeaddsp1d
int cpucblk_cgeaddsp1d(const SolverCblk *cblk1, SolverCblk *cblk2, const pastix_complex32_t *L1, pastix_complex32_t *L2, const pastix_complex32_t *U1, pastix_complex32_t *U2)
Add two column blocks together.
Definition: core_cgeadd.c:245