PaStiX Handbook  6.2.1
pastix_dcores.h
Go to the documentation of this file.
1 /**
2  * @file pastix_dcores.h
3  *
4  * PaStiX kernel header.
5  *
6  * @copyright 2011-2021 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
7  * Univ. Bordeaux. All rights reserved.
8  *
9  * @version 6.2.0
10  * @author Mathieu Faverge
11  * @author Pierre Ramet
12  * @author Xavier Lacoste
13  * @author Esragul Korkmaz
14  * @author Gregoire Pichon
15  * @author Tony Delarue
16  * @date 2021-03-30
17  * @generated from /builds/solverstack/pastix/kernels/pastix_zcores.h, normal z -> d, Tue Apr 12 09:38:26 2022
18  *
19  */
20 #ifndef _pastix_dcores_h_
21 #define _pastix_dcores_h_
22 
23 #ifndef DOXYGEN_SHOULD_SKIP_THIS
24 #define pastix_cblk_lock( cblk_ ) pastix_atomic_lock( &((cblk_)->lock) )
25 #define pastix_cblk_unlock( cblk_ ) pastix_atomic_unlock( &((cblk_)->lock) )
26 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
27 
28 /**
29  * @addtogroup kernel_blas_lapack
30  * @{
31  * This module contains all the BLAS and LAPACK-like kernels that are working
32  * on lapack layout matrices.
33  *
34  * @name PastixDouble BLAS kernels
35  * @{
36  */
37 void core_dplrnt( int m, int n, double *A, int lda,
38  int gM, int m0, int n0, unsigned long long int seed );
39 void core_dgetmo( int m, int n,
40  const double *A, int lda,
41  double *B, int ldb );
42 int core_dgeadd( pastix_trans_t trans, pastix_int_t M, pastix_int_t N,
43  double alpha, const double *A, pastix_int_t LDA,
44  double beta, double *B, pastix_int_t LDB );
45 int core_dgemdm( pastix_trans_t transA, pastix_trans_t transB, int M, int N, int K,
46  double alpha, const double *A, int LDA,
47  const double *B, int LDB,
48  double beta, double *C, int LDC,
49  const double *D, int incD,
50  double *WORK, int LWORK );
51 int core_dpqrcp( double tol, pastix_int_t maxrank, int full_update, pastix_int_t nb,
52  pastix_int_t m, pastix_int_t n,
53  double *A, pastix_int_t lda,
54  pastix_int_t *jpvt, double *tau,
55  double *work, pastix_int_t lwork, double *rwork );
56 int core_drqrcp( double tol, pastix_int_t maxrank, int refine, pastix_int_t nb,
57  pastix_int_t m, pastix_int_t n,
58  double *A, pastix_int_t lda,
59  pastix_int_t *jpvt, double *tau,
60  double *work, pastix_int_t lwork, double *rwork );
61 int core_drqrrt( double tol, pastix_int_t maxrank, pastix_int_t nb,
62  pastix_int_t m, pastix_int_t n,
63  double *A, pastix_int_t lda, double *tau,
64  double *B, pastix_int_t ldb, double *tau_b,
65  double *work, pastix_int_t lwork, double normA );
66 int core_dtqrcp( double tol, pastix_int_t maxrank, int unused, pastix_int_t nb,
67  pastix_int_t m, pastix_int_t n,
68  double *A, pastix_int_t lda,
69  pastix_int_t *jpvt, double *tau,
70  double *work, pastix_int_t lwork, double *rwork );
71 int core_dtradd( pastix_uplo_t uplo, pastix_trans_t trans, pastix_int_t M, pastix_int_t N,
72  double alpha, const double *A, pastix_int_t LDA,
73  double beta, double *B, pastix_int_t LDB);
74 int core_dscalo( pastix_trans_t trans, pastix_int_t M, pastix_int_t N,
75  const double *A, pastix_int_t lda,
76  const double *D, pastix_int_t ldd,
77  double *B, pastix_int_t ldb );
78 
79 /**
80  * @}
81  * @name PastixDouble Othogonalization kernels for low-rank updates
82  * @{
83  */
84 pastix_fixdbl_t
85 core_dlrorthu_fullqr( pastix_int_t M, pastix_int_t N, pastix_int_t rank,
86  double *U, pastix_int_t ldu,
87  double *V, pastix_int_t ldv );
88 pastix_fixdbl_t
89 core_dlrorthu_partialqr( pastix_int_t M, pastix_int_t N,
90  pastix_int_t r1, pastix_int_t *r2ptr,
91  pastix_int_t offx, pastix_int_t offy,
92  double *U, pastix_int_t ldu,
93  double *V, pastix_int_t ldv );
94 pastix_fixdbl_t
95 core_dlrorthu_cgs( pastix_int_t M1, pastix_int_t N1,
96  pastix_int_t M2, pastix_int_t N2,
97  pastix_int_t r1, pastix_int_t *r2ptr,
98  pastix_int_t offx, pastix_int_t offy,
99  double *U, pastix_int_t ldu,
100  double *V, pastix_int_t ldv );
101 
102 /**
103  * @}
104  * @name PastixDouble LAPACK kernels
105  * @{
106  */
107 void core_dpotrfsp( pastix_int_t n, double *A, pastix_int_t lda,
108  pastix_int_t *nbpivot, double criterion );
109 void core_dpotrfsp( pastix_int_t n, double *A, pastix_int_t lda,
110  pastix_int_t *nbpivot, double criterion );
111 void core_dgetrfsp( pastix_int_t n, double *A, pastix_int_t lda,
112  pastix_int_t *nbpivot, double criterion );
113 void core_dsytrfsp( pastix_int_t n, double *A, pastix_int_t lda,
114  pastix_int_t *nbpivot, double criterion );
115 void core_dsytrfsp( pastix_int_t n, double *A, pastix_int_t lda,
116  pastix_int_t *nbpivot, double criterion );
117 
118 /**
119  * @}
120  * @}
121  *
122  * @addtogroup kernel_fact
123  * @{
124  * This module contains all the kernel working at the solver matrix structure
125  * level for the numerical factorization step.
126  *
127  * @name PastixDouble cblk-BLAS CPU kernels
128  * @{
129  */
130 
131 int cpucblk_dgeaddsp1d( const SolverCblk *cblk1, SolverCblk *cblk2,
132  const double *L1, double *L2,
133  const double *U1, double *U2 );
134 
135 pastix_fixdbl_t cpucblk_dgemmsp( pastix_coefside_t sideA, pastix_trans_t trans,
136  const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcblk,
137  const void *A, const void *B, void *C,
138  double *work, pastix_int_t lwork, const pastix_lr_t *lowrank );
140  pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk,
141  const void *A, void *C, const pastix_lr_t *lowrank );
142 void cpucblk_dscalo ( pastix_trans_t trans, SolverCblk *cblk, void* dataL, void* dataLD );
143 
144 pastix_fixdbl_t cpublok_dgemmsp( pastix_trans_t trans,
145  const SolverCblk *cblk, SolverCblk *fcblk,
146  pastix_int_t blok_mk, pastix_int_t blok_nk, pastix_int_t blok_mn,
147  const void *A, const void *B, void *C,
148  const pastix_lr_t *lowrank );
149 pastix_fixdbl_t cpublok_dtrsmsp( pastix_side_t side, pastix_uplo_t uplo,
150  pastix_trans_t trans, pastix_diag_t diag,
151  const SolverCblk *cblk, pastix_int_t blok_m,
152  const void *A, void *C,
153  const pastix_lr_t *lowrank );
154 void cpublok_dscalo ( pastix_trans_t trans,
155  SolverCblk *cblk, pastix_int_t blok_m,
156  const void *A, const void *dataD, void *dataB );
157 
158 /**
159  * @}
160  * @name PastixDouble cblk LU kernels
161  * @{
162  */
163 int cpucblk_dgetrfsp1d_getrf( SolverMatrix *solvmtx, SolverCblk *cblk,
164  void *L, void *U );
165 int cpucblk_dgetrfsp1d_panel( SolverMatrix *solvmtx, SolverCblk *cblk,
166  void *L, void *U );
167 int cpucblk_dgetrfsp1d ( SolverMatrix *solvmtx, SolverCblk *cblk,
168  double *work, pastix_int_t lwork );
169 
170 /**
171  * @}
172  * @name PastixDouble cblk Cholesky kernels
173  * @{
174  */
175 int cpucblk_dpotrfsp1d_potrf( SolverMatrix *solvmtx, SolverCblk *cblk, void *dataL );
176 int cpucblk_dpotrfsp1d_panel( SolverMatrix *solvmtx, SolverCblk *cblk, void *dataL );
177 int cpucblk_dpotrfsp1d ( SolverMatrix *solvmtx, SolverCblk *cblk,
178  double *work, pastix_int_t lwork );
179 
180 /**
181  * @}
182  * @name PastixDouble cblk LDL^h kernels
183  * @{
184  */
185 int cpucblk_dsytrfsp1d_sytrf( SolverMatrix *solvmtx, SolverCblk *cblk, void *L );
186 int cpucblk_dsytrfsp1d_panel( SolverMatrix *solvmtx, SolverCblk *cblk,
187  void *L, void *DLh );
188 int cpucblk_dsytrfsp1d ( SolverMatrix *solvmtx, SolverCblk *cblk,
189  double *work1, double *work2, pastix_int_t lwork );
190 
191 /**
192  * @}
193  * @name PastixDouble cblk LL^t kernels
194  * @{
195  */
196 int cpucblk_dpotrfsp1d_pxtrf( SolverMatrix *solvmtx, SolverCblk *cblk, void *dataL );
197 int cpucblk_dpotrfsp1d_panel( SolverMatrix *solvmtx, SolverCblk *cblk, void *dataL );
198 int cpucblk_dpotrfsp1d ( SolverMatrix *solvmtx, SolverCblk *cblk,
199  double *work, pastix_int_t lwork );
200 
201 /**
202  * @}
203  * @name PastixDouble cblk LDL^t kernels
204  * @{
205  */
206 int cpucblk_dsytrfsp1d_sytrf( SolverMatrix *solvmtx, SolverCblk *cblk, void *L );
207 int cpucblk_dsytrfsp1d_panel( SolverMatrix *solvmtx, SolverCblk *cblk,
208  void *L, void *DLt );
209 int cpucblk_dsytrfsp1d ( SolverMatrix *solvmtx, SolverCblk *cblk,
210  double *work1, double *work2, pastix_int_t lwork );
211 
212 /**
213  * @}
214  * @name PastixDouble initialization and additionnal routines
215  * @{
216  */
217 void cpucblk_dalloc_lrws( const SolverCblk *cblk,
218  pastix_lrblock_t *lrblok,
219  double *ws );
221  SolverCblk *cblk,
222  int rkmax );
224  SolverCblk *cblk );
226  SolverCblk *cblk );
227 void cpucblk_dfree ( pastix_coefside_t side,
228  SolverCblk *cblk );
230  const SolverMatrix *solvmtx,
231  const pastix_bcsc_t *bcsc,
232  pastix_int_t itercblk );
233 void cpucblk_dinit ( pastix_coefside_t side,
234  const SolverMatrix *solvmtx,
235  const pastix_bcsc_t *bcsc,
236  pastix_int_t itercblk,
237  const char *directory );
238 void cpucblk_dgetschur( const SolverCblk *cblk,
239  int upper_part,
240  double *S,
241  pastix_int_t lds );
242 void cpucblk_ddump ( pastix_coefside_t side,
243  const SolverCblk *cblk,
244  FILE *stream );
246  const SolverCblk *cblkA,
247  SolverCblk *cblkB );
248 void cpucblk_dadd ( pastix_coefside_t side,
249  double alpha,
250  const SolverCblk *cblkA,
251  SolverCblk *cblkB,
252  const pastix_lr_t *lowrank );
253 void cpucblk_dadd_recv( pastix_coefside_t side,
254  double alpha,
255  SolverCblk *cblk );
256 /**
257  * @}
258  * @name PastixDouble MPI routines
259  * @{
260  */
261 int cpucblk_dincoming_deps( int mt_flag,
262  pastix_coefside_t side,
263  SolverMatrix *solvmtx,
264  SolverCblk *cblk );
266  SolverMatrix *solvmtx,
267  const SolverCblk *cblk,
268  SolverCblk *fcbk );
270  pastix_int_t sched,
271  SolverMatrix *solvmtx );
272 
273 void cpucblk_dmpi_progress( pastix_coefside_t side,
274  SolverMatrix *solvmtx,
275  int threadid );
276 
277 void cpucblk_dsend_rhs_forward( const SolverMatrix *solvmtx,
278  SolverCblk *cblk,
279  double *b );
280 void cpucblk_drecv_rhs_forward( const SolverMatrix *solvmtx,
281  SolverCblk *cblk,
282  double *work,
283  pastix_int_t nrhs,
284  double *b,
285  pastix_int_t ldb );
286 void cpucblk_dsend_rhs_backward( const SolverMatrix *solvmtx,
287  SolverCblk *cblk,
288  double *b );
289 void cpucblk_drecv_rhs_backward( const SolverMatrix *solvmtx,
290  SolverCblk *cblk,
291  double *b );
292 
293 /**
294  * @}
295  * @name PastixDouble compression/uncompression routines
296  * @{
297  */
298 pastix_fixdbl_t cpublok_dcompress( const pastix_lr_t *lowrank,
299  pastix_int_t M, pastix_int_t N,
300  pastix_lrblock_t *blok );
301 pastix_int_t cpucblk_dcompress( const SolverMatrix *solvmtx,
302  pastix_coefside_t side,
303  int max_ilulvl,
304  SolverCblk *cblk );
306  SolverCblk *cblk );
308  SolverMatrix *solvmtx,
309  SolverCblk *cblk,
310  pastix_int_t *orig,
311  pastix_int_t *gain );
312 
313 /**
314  * @}
315  * @}
316  *
317  * @addtogroup kernel_solve
318  * @{
319  * This module contains all the kernel working on the solver matrix structure
320  * for the solve step.
321  */
322 
324  pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk,
325  int nrhs, const void* dataA, double *b, int ldb );
327  pastix_int_t nrhs, const SolverCblk *cblk, const SolverBlok *blok,
328  SolverCblk *fcbk, const void* dataA, const double *B, pastix_int_t ldb,
329  double *C, pastix_int_t ldc );
330 
332  pastix_trans_t trans, pastix_diag_t diag,
333  const SolverMatrix *datacode, const SolverCblk *cblk,
334  int nrhs, double *b, int ldb );
336  pastix_trans_t trans, pastix_diag_t diag,
337  const SolverMatrix *datacode, SolverCblk *cblk,
338  int nrhs, double *b, int ldb );
339 
340 void solve_cblk_ddiag( const SolverCblk *cblk,
341  int nrhs,
342  double *b,
343  int ldb,
344  double *work );
345 /**
346  * @}
347  */
348 
349 
350 /**
351  * @addtogroup kernel_fact_null
352  * @{
353  * This module contains the three terms update functions for the LDL^t and
354  * LDL^h factorizations.
355  */
356 void core_dsytrfsp1d_gemm( const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcblk,
357  const double *L, double *C,
358  double *work );
359 void core_dsytrfsp1d_gemm( const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcblk,
360  const double *L, double *C,
361  double *work );
362 
363 /**
364  * @}
365  */
366 
367 #endif /* _pastix_dcores_h_ */
cpucblk_dmemory
void cpucblk_dmemory(pastix_coefside_t side, SolverMatrix *solvmtx, SolverCblk *cblk, pastix_int_t *orig, pastix_int_t *gain)
Return the memory gain of the low-rank form over the full-rank form for a single column-block.
Definition: cpucblk_dcompress.c:277
pastix_uplo_t
enum pastix_uplo_e pastix_uplo_t
Upper/Lower part.
cpucblk_ddump
void cpucblk_ddump(pastix_coefside_t side, const SolverCblk *cblk, FILE *stream)
Dump a single column block into a FILE in a human readale format.
Definition: coeftab_dinit.c:51
pastix_lr_s
Structure to define the type of function to use for the low-rank kernels and their parameters.
Definition: pastix_lowrank.h:147
cpucblk_dgetrfsp1d_getrf
int cpucblk_dgetrfsp1d_getrf(SolverMatrix *solvmtx, SolverCblk *cblk, void *L, void *U)
Compute the LU factorization of the diagonal block in a panel.
Definition: core_dgetrfsp.c:217
cpucblk_dcompress
pastix_int_t cpucblk_dcompress(const SolverMatrix *solvmtx, pastix_coefside_t side, int max_ilulvl, SolverCblk *cblk)
Compress a single column block from full-rank to low-rank format.
Definition: cpucblk_dcompress.c:110
core_dgemdm
int core_dgemdm(pastix_trans_t transA, pastix_trans_t transB, int M, int N, int K, double alpha, const double *A, int LDA, const double *B, int LDB, double beta, double *C, int LDC, const double *D, int incD, double *WORK, int LWORK)
Perform one of the following matrix-matrix operations.
Definition: core_dgemdm.c:139
cpucblk_dpotrfsp1d_potrf
int cpucblk_dpotrfsp1d_potrf(SolverMatrix *solvmtx, SolverCblk *cblk, void *dataL)
Compute the Cholesky factorization of the diagonal block in a panel.
Definition: core_dpotrfsp.c:220
solve_cblk_dtrsmsp_forward
void solve_cblk_dtrsmsp_forward(pastix_solv_mode_t mode, pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverMatrix *datacode, const SolverCblk *cblk, int nrhs, double *b, int ldb)
Apply a forward solve related to one cblk to all the right hand side.
Definition: solve_dtrsmsp.c:323
cpucblk_duncompress
void cpucblk_duncompress(pastix_coefside_t side, SolverCblk *cblk)
Uncompress a single column block from low-rank format to full-rank format.
Definition: cpucblk_dcompress.c:197
cpucblk_dsend_rhs_backward
void cpucblk_dsend_rhs_backward(const SolverMatrix *solvmtx, SolverCblk *cblk, double *b)
Send the rhs associated to a cblk->lcolidx to the remote node.
Definition: cpucblk_dmpi_rhs.c:89
pastix_bcsc_s
Internal column block distributed CSC matrix.
Definition: bcsc.h:37
cpucblk_dfillin
void cpucblk_dfillin(pastix_coefside_t side, const SolverMatrix *solvmtx, const pastix_bcsc_t *bcsc, pastix_int_t itercblk)
Initialize the coeftab structure from the internal bcsc.
Definition: cpucblk_dinit.c:604
pastix_solv_mode_t
enum pastix_solv_mode_e pastix_solv_mode_t
Solve Schur modes.
cpucblk_dgetrfsp1d_panel
int cpucblk_dgetrfsp1d_panel(SolverMatrix *solvmtx, SolverCblk *cblk, void *L, void *U)
Compute the LU factorization of one panel.
Definition: core_dgetrfsp.c:305
solver_cblk_s
Solver column block structure.
Definition: solver.h:127
pastix_coefside_t
enum pastix_coefside_e pastix_coefside_t
Data blocks used in the kernel.
cpucblk_dfree
void cpucblk_dfree(pastix_coefside_t side, SolverCblk *cblk)
Free the cblk structure that store the coefficient.
Definition: cpucblk_dinit.c:317
cpublok_dtrsmsp
pastix_fixdbl_t cpublok_dtrsmsp(pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk, pastix_int_t blok_m, const void *A, void *C, const pastix_lr_t *lowrank)
Compute the updates associated to one off-diagonal block.
Definition: core_dtrsmsp.c:678
pastix_trans_t
enum pastix_trans_e pastix_trans_t
Transpostion.
cpucblk_dgetschur
void cpucblk_dgetschur(const SolverCblk *cblk, int upper_part, double *S, pastix_int_t lds)
Extract a cblk panel of the Schur complement to a dense lapack form.
Definition: cpucblk_dschur.c:178
solver_blok_s
Solver block structure.
Definition: solver.h:107
cpucblk_dalloc_lr
void cpucblk_dalloc_lr(pastix_coefside_t side, SolverCblk *cblk, int rkmax)
Allocate the cblk structure to store the coefficient.
Definition: cpucblk_dinit.c:143
cpublok_dscalo
void cpublok_dscalo(pastix_trans_t trans, SolverCblk *cblk, pastix_int_t blok_m, const void *A, const void *dataD, void *dataB)
Copy the lower terms of the block with scaling for the two-terms algorithm.
Definition: core_dscalo.c:315
cpucblk_dsend_rhs_forward
void cpucblk_dsend_rhs_forward(const SolverMatrix *solvmtx, SolverCblk *cblk, double *b)
Send the rhs associated to a cblk->lcolidx to the remote node.
Definition: cpucblk_dmpi_rhs.c:42
cpucblk_dinit
void cpucblk_dinit(pastix_coefside_t side, const SolverMatrix *solvmtx, const pastix_bcsc_t *bcsc, pastix_int_t itercblk, const char *directory)
Fully initialize a single cblk.
Definition: coeftab_dinit.c:251
pastix_lrblock_s
The block low-rank structure to hold a matrix in low-rank form.
Definition: pastix_lowrank.h:112
core_dpotrfsp
void core_dpotrfsp(pastix_int_t n, double *A, pastix_int_t lda, pastix_int_t *nbpivot, double criterion)
Compute the block static pivoting Cholesky factorization of the matrix n-by-n A = L * L^t .
Definition: core_dpotrfsp.c:144
cpucblk_dgetrfsp1d
int cpucblk_dgetrfsp1d(SolverMatrix *solvmtx, SolverCblk *cblk, double *work, pastix_int_t lwork)
Perform the LU factorization of a given panel and apply all its updates.
Definition: core_dgetrfsp.c:355
core_dlrorthu_partialqr
pastix_fixdbl_t core_dlrorthu_partialqr(pastix_int_t M, pastix_int_t N, pastix_int_t r1, pastix_int_t *r2ptr, pastix_int_t offx, pastix_int_t offy, double *U, pastix_int_t ldu, double *V, pastix_int_t ldv)
Try to orthognalize the U part of the low-rank form, and update the V part accordingly using partial ...
Definition: core_dlrothu.c:193
core_dgetmo
void core_dgetmo(int m, int n, const double *A, int lda, double *B, int ldb)
Transposes a m-by-n matrix out of place using an extra workspace of size m-by-n.
Definition: core_dgetmo.c:49
core_drqrcp
int core_drqrcp(double tol, pastix_int_t maxrank, int refine, pastix_int_t nb, pastix_int_t m, pastix_int_t n, double *A, pastix_int_t lda, pastix_int_t *jpvt, double *tau, double *work, pastix_int_t lwork, double *rwork)
Compute a randomized QR factorization.
Definition: core_drqrcp.c:114
cpucblk_dalloc
void cpucblk_dalloc(pastix_coefside_t side, SolverCblk *cblk)
Allocate the cblk structure to store the coefficient.
Definition: cpucblk_dinit.c:264
cpucblk_dgemmsp
pastix_fixdbl_t cpucblk_dgemmsp(pastix_coefside_t sideA, pastix_trans_t trans, const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcblk, const void *A, const void *B, void *C, double *work, pastix_int_t lwork, const pastix_lr_t *lowrank)
Compute the updates associated to one off-diagonal block.
Definition: core_dgemmsp.c:1512
cpucblk_dtrsmsp
void cpucblk_dtrsmsp(pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk, const void *A, void *C, const pastix_lr_t *lowrank)
Compute the updates associated to a column of off-diagonal blocks.
Definition: core_dtrsmsp.c:356
core_drqrrt
int core_drqrrt(double tol, pastix_int_t maxrank, pastix_int_t nb, pastix_int_t m, pastix_int_t n, double *A, pastix_int_t lda, double *tau, double *B, pastix_int_t ldb, double *tau_b, double *work, pastix_int_t lwork, double normA)
Compute a randomized QR factorization with rotation technique.
Definition: core_drqrrt.c:126
cpublok_dgemmsp
pastix_fixdbl_t cpublok_dgemmsp(pastix_trans_t trans, const SolverCblk *cblk, SolverCblk *fcblk, pastix_int_t blok_mk, pastix_int_t blok_nk, pastix_int_t blok_mn, const void *A, const void *B, void *C, const pastix_lr_t *lowrank)
Compute the CPU gemm associated to a couple of off-diagonal blocks.
Definition: core_dgemmsp.c:1655
cpucblk_dgeaddsp1d
int cpucblk_dgeaddsp1d(const SolverCblk *cblk1, SolverCblk *cblk2, const double *L1, double *L2, const double *U1, double *U2)
Add two column blocks together.
Definition: core_dgeadd.c:245
cpucblk_dsytrfsp1d_sytrf
int cpucblk_dsytrfsp1d_sytrf(SolverMatrix *solvmtx, SolverCblk *cblk, void *L)
Computes the LDL^t factorization of the diagonal block in a panel.
Definition: core_dsytrfsp.c:232
solve_cblk_ddiag
void solve_cblk_ddiag(const SolverCblk *cblk, int nrhs, double *b, int ldb, double *work)
Apply the diagonal solve related to one cblk to all the right hand side.
Definition: solve_dtrsmsp.c:616
cpucblk_drecv_rhs_backward
void cpucblk_drecv_rhs_backward(const SolverMatrix *solvmtx, SolverCblk *cblk, double *b)
Receive the rhs associated to a cblk->lcolidx to the remote node.
Definition: cpucblk_dmpi_rhs.c:136
core_dgetrfsp
void core_dgetrfsp(pastix_int_t n, double *A, pastix_int_t lda, pastix_int_t *nbpivot, double criterion)
Compute the block static pivoting LU factorization of the matrix m-by-n A = L * U.
Definition: core_dgetrfsp.c:138
core_dtqrcp
int core_dtqrcp(double tol, pastix_int_t maxrank, int unused, pastix_int_t nb, pastix_int_t m, pastix_int_t n, double *A, pastix_int_t lda, pastix_int_t *jpvt, double *tau, double *work, pastix_int_t lwork, double *rwork)
Compute a randomized QR factorization with truncated updates.
Definition: core_dtqrcp.c:99
cpucblk_dalloc_lrws
void cpucblk_dalloc_lrws(const SolverCblk *cblk, pastix_lrblock_t *lrblok, double *ws)
Initialize lrblock structure from a workspace from all blocks of the cblk associated.
Definition: cpucblk_dinit.c:96
core_dsytrfsp
void core_dsytrfsp(pastix_int_t n, double *A, pastix_int_t lda, pastix_int_t *nbpivot, double criterion)
Compute the block static pivoting factorization of the symmetric matrix n-by-n A such that A = L * D ...
Definition: core_dsytrfsp.c:141
solve_blok_dtrsm
void solve_blok_dtrsm(pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk, int nrhs, const void *dataA, double *b, int ldb)
Apply a solve trsm update related to a diagonal block of the matrix A.
Definition: solve_dtrsmsp.c:70
cpucblk_dalloc_fr
void cpucblk_dalloc_fr(pastix_coefside_t side, SolverCblk *cblk)
Allocate the cblk structure to store the coefficient.
Definition: cpucblk_dinit.c:217
core_dsytrfsp1d_gemm
void core_dsytrfsp1d_gemm(const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcblk, const double *L, double *C, double *work)
Definition: core_dsytrfsp.c:319
core_dlrorthu_cgs
pastix_fixdbl_t core_dlrorthu_cgs(pastix_int_t M1, pastix_int_t N1, pastix_int_t M2, pastix_int_t N2, pastix_int_t r1, pastix_int_t *r2ptr, pastix_int_t offx, pastix_int_t offy, double *U, pastix_int_t ldu, double *V, pastix_int_t ldv)
Try to orthognalize the U part of the low-rank form, and update the V part accordingly using CGS.
Definition: core_dlrothu.c:419
cpucblk_drecv_rhs_forward
void cpucblk_drecv_rhs_forward(const SolverMatrix *solvmtx, SolverCblk *cblk, double *work, pastix_int_t nrhs, double *b, pastix_int_t ldb)
Receive the rhs associated to a cblk->lcolidx to the remote node.
Definition: cpucblk_dmpi_rhs.c:195
solve_blok_dgemm
void solve_blok_dgemm(pastix_side_t side, pastix_trans_t trans, pastix_int_t nrhs, const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcbk, const void *dataA, const double *B, pastix_int_t ldb, double *C, pastix_int_t ldc)
Apply a solve gemm update related to a single block of the matrix A.
Definition: solve_dtrsmsp.c:158
core_dlrorthu_fullqr
pastix_fixdbl_t core_dlrorthu_fullqr(pastix_int_t M, pastix_int_t N, pastix_int_t rank, double *U, pastix_int_t ldu, double *V, pastix_int_t ldv)
Try to orthognalize the u part of the low-rank form, and update the v part accordingly using full QR.
Definition: core_dlrothu.c:83
pastix_diag_t
enum pastix_diag_e pastix_diag_t
Diagonal.
cpucblk_drelease_deps
void cpucblk_drelease_deps(pastix_coefside_t side, SolverMatrix *solvmtx, const SolverCblk *cblk, SolverCblk *fcbk)
Release the dependencies of the given cblk after an update.
Definition: cpucblk_dmpi_coeftab.c:558
cpucblk_dsytrfsp1d_panel
int cpucblk_dsytrfsp1d_panel(SolverMatrix *solvmtx, SolverCblk *cblk, void *L, void *DLh)
Compute the LDL^t factorization of one panel.
Definition: core_dsytrfsp.c:432
cpucblk_dscalo
void cpucblk_dscalo(pastix_trans_t trans, SolverCblk *cblk, void *dataL, void *dataLD)
Copy the L term with scaling for the two-terms algorithm.
Definition: core_dscalo.c:170
cpucblk_drequest_cleanup
void cpucblk_drequest_cleanup(pastix_coefside_t side, pastix_int_t sched, SolverMatrix *solvmtx)
Waitall routine for current cblk request.
Definition: cpucblk_dmpi_coeftab.c:609
core_dpqrcp
int core_dpqrcp(double tol, pastix_int_t maxrank, int full_update, pastix_int_t nb, pastix_int_t m, pastix_int_t n, double *A, pastix_int_t lda, pastix_int_t *jpvt, double *tau, double *work, pastix_int_t lwork, double *rwork)
Compute a rank-reavealing QR factorization.
Definition: core_dpqrcp.c:105
solve_cblk_dtrsmsp_backward
void solve_cblk_dtrsmsp_backward(pastix_solv_mode_t mode, pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverMatrix *datacode, SolverCblk *cblk, int nrhs, double *b, int ldb)
Apply a backward solve related to one cblk to all the right hand side.
Definition: solve_dtrsmsp.c:471
cpucblk_dsytrfsp1d
int cpucblk_dsytrfsp1d(SolverMatrix *solvmtx, SolverCblk *cblk, double *work1, double *work2, pastix_int_t lwork)
Perform the LDL^t factorization of a given panel and apply all its updates.
Definition: core_dsytrfsp.c:487
core_dplrnt
void core_dplrnt(int m, int n, double *A, int lda, int gM, int m0, int n0, unsigned long long int seed)
Generate a random tile.
Definition: core_dplrnt.c:91
cpucblk_dincoming_deps
int cpucblk_dincoming_deps(int mt_flag, pastix_coefside_t side, SolverMatrix *solvmtx, SolverCblk *cblk)
Wait for incoming dependencies, and return when cblk->ctrbcnt has reached 0.
Definition: cpucblk_dmpi_coeftab.c:499
cpucblk_dpotrfsp1d_panel
int cpucblk_dpotrfsp1d_panel(SolverMatrix *solvmtx, SolverCblk *cblk, void *dataL)
Compute the Cholesky factorization of one panel.
Definition: core_dpotrfsp.c:292
cpucblk_dadd
void cpucblk_dadd(pastix_coefside_t side, double alpha, const SolverCblk *cblkA, SolverCblk *cblkB, const pastix_lr_t *lowrank)
Add two column bloks in full rank format.
Definition: cpucblk_dadd.c:358
core_dscalo
int core_dscalo(pastix_trans_t trans, pastix_int_t M, pastix_int_t N, const double *A, pastix_int_t lda, const double *D, pastix_int_t ldd, double *B, pastix_int_t ldb)
Scale a matrix by a diagonal out of place.
Definition: core_dscalo.c:73
cpublok_dcompress
pastix_fixdbl_t cpublok_dcompress(const pastix_lr_t *lowrank, pastix_int_t M, pastix_int_t N, pastix_lrblock_t *blok)
Compress a single block from full-rank to low-rank format.
Definition: cpucblk_dcompress.c:57
pastix_side_t
enum pastix_side_e pastix_side_t
Side of the operation.
core_dtradd
int core_dtradd(pastix_uplo_t uplo, pastix_trans_t trans, pastix_int_t M, pastix_int_t N, double alpha, const double *A, pastix_int_t LDA, double beta, double *B, pastix_int_t LDB)
Add two triangular matrices together as in PBLAS pdtradd.
Definition: core_dtradd.c:79
core_dgeadd
int core_dgeadd(pastix_trans_t trans, pastix_int_t M, pastix_int_t N, double alpha, const double *A, pastix_int_t LDA, double beta, double *B, pastix_int_t LDB)
Add two matrices together.
Definition: core_dgeadd.c:78
cpucblk_ddiff
int cpucblk_ddiff(pastix_coefside_t side, const SolverCblk *cblkA, SolverCblk *cblkB)
Compare two column blocks in full-rank format.
Definition: cpucblk_ddiff.c:63
cpucblk_dpotrfsp1d
int cpucblk_dpotrfsp1d(SolverMatrix *solvmtx, SolverCblk *cblk, double *work, pastix_int_t lwork)
Perform the Cholesky factorization of a given panel and apply all its updates.
Definition: core_dpotrfsp.c:334