PaStiX Handbook  6.2.1
pastix_scores.h
Go to the documentation of this file.
1 /**
2  * @file pastix_scores.h
3  *
4  * PaStiX kernel header.
5  *
6  * @copyright 2011-2021 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
7  * Univ. Bordeaux. All rights reserved.
8  *
9  * @version 6.2.0
10  * @author Mathieu Faverge
11  * @author Pierre Ramet
12  * @author Xavier Lacoste
13  * @author Esragul Korkmaz
14  * @author Gregoire Pichon
15  * @author Tony Delarue
16  * @date 2021-03-30
17  * @generated from /builds/solverstack/pastix/kernels/pastix_zcores.h, normal z -> s, Tue Apr 12 09:38:26 2022
18  *
19  */
20 #ifndef _pastix_scores_h_
21 #define _pastix_scores_h_
22 
23 #ifndef DOXYGEN_SHOULD_SKIP_THIS
24 #define pastix_cblk_lock( cblk_ ) pastix_atomic_lock( &((cblk_)->lock) )
25 #define pastix_cblk_unlock( cblk_ ) pastix_atomic_unlock( &((cblk_)->lock) )
26 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
27 
28 /**
29  * @addtogroup kernel_blas_lapack
30  * @{
31  * This module contains all the BLAS and LAPACK-like kernels that are working
32  * on lapack layout matrices.
33  *
34  * @name PastixFloat BLAS kernels
35  * @{
36  */
37 void core_splrnt( int m, int n, float *A, int lda,
38  int gM, int m0, int n0, unsigned long long int seed );
39 void core_sgetmo( int m, int n,
40  const float *A, int lda,
41  float *B, int ldb );
42 int core_sgeadd( pastix_trans_t trans, pastix_int_t M, pastix_int_t N,
43  float alpha, const float *A, pastix_int_t LDA,
44  float beta, float *B, pastix_int_t LDB );
45 int core_sgemdm( pastix_trans_t transA, pastix_trans_t transB, int M, int N, int K,
46  float alpha, const float *A, int LDA,
47  const float *B, int LDB,
48  float beta, float *C, int LDC,
49  const float *D, int incD,
50  float *WORK, int LWORK );
51 int core_spqrcp( float tol, pastix_int_t maxrank, int full_update, pastix_int_t nb,
52  pastix_int_t m, pastix_int_t n,
53  float *A, pastix_int_t lda,
54  pastix_int_t *jpvt, float *tau,
55  float *work, pastix_int_t lwork, float *rwork );
56 int core_srqrcp( float tol, pastix_int_t maxrank, int refine, pastix_int_t nb,
57  pastix_int_t m, pastix_int_t n,
58  float *A, pastix_int_t lda,
59  pastix_int_t *jpvt, float *tau,
60  float *work, pastix_int_t lwork, float *rwork );
61 int core_srqrrt( float tol, pastix_int_t maxrank, pastix_int_t nb,
62  pastix_int_t m, pastix_int_t n,
63  float *A, pastix_int_t lda, float *tau,
64  float *B, pastix_int_t ldb, float *tau_b,
65  float *work, pastix_int_t lwork, float normA );
66 int core_stqrcp( float tol, pastix_int_t maxrank, int unused, pastix_int_t nb,
67  pastix_int_t m, pastix_int_t n,
68  float *A, pastix_int_t lda,
69  pastix_int_t *jpvt, float *tau,
70  float *work, pastix_int_t lwork, float *rwork );
71 int core_stradd( pastix_uplo_t uplo, pastix_trans_t trans, pastix_int_t M, pastix_int_t N,
72  float alpha, const float *A, pastix_int_t LDA,
73  float beta, float *B, pastix_int_t LDB);
74 int core_sscalo( pastix_trans_t trans, pastix_int_t M, pastix_int_t N,
75  const float *A, pastix_int_t lda,
76  const float *D, pastix_int_t ldd,
77  float *B, pastix_int_t ldb );
78 
79 /**
80  * @}
81  * @name PastixFloat Othogonalization kernels for low-rank updates
82  * @{
83  */
84 pastix_fixdbl_t
85 core_slrorthu_fullqr( pastix_int_t M, pastix_int_t N, pastix_int_t rank,
86  float *U, pastix_int_t ldu,
87  float *V, pastix_int_t ldv );
88 pastix_fixdbl_t
89 core_slrorthu_partialqr( pastix_int_t M, pastix_int_t N,
90  pastix_int_t r1, pastix_int_t *r2ptr,
91  pastix_int_t offx, pastix_int_t offy,
92  float *U, pastix_int_t ldu,
93  float *V, pastix_int_t ldv );
94 pastix_fixdbl_t
95 core_slrorthu_cgs( pastix_int_t M1, pastix_int_t N1,
96  pastix_int_t M2, pastix_int_t N2,
97  pastix_int_t r1, pastix_int_t *r2ptr,
98  pastix_int_t offx, pastix_int_t offy,
99  float *U, pastix_int_t ldu,
100  float *V, pastix_int_t ldv );
101 
102 /**
103  * @}
104  * @name PastixFloat LAPACK kernels
105  * @{
106  */
107 void core_spotrfsp( pastix_int_t n, float *A, pastix_int_t lda,
108  pastix_int_t *nbpivot, float criterion );
109 void core_spotrfsp( pastix_int_t n, float *A, pastix_int_t lda,
110  pastix_int_t *nbpivot, float criterion );
111 void core_sgetrfsp( pastix_int_t n, float *A, pastix_int_t lda,
112  pastix_int_t *nbpivot, float criterion );
113 void core_ssytrfsp( pastix_int_t n, float *A, pastix_int_t lda,
114  pastix_int_t *nbpivot, float criterion );
115 void core_ssytrfsp( pastix_int_t n, float *A, pastix_int_t lda,
116  pastix_int_t *nbpivot, float criterion );
117 
118 /**
119  * @}
120  * @}
121  *
122  * @addtogroup kernel_fact
123  * @{
124  * This module contains all the kernel working at the solver matrix structure
125  * level for the numerical factorization step.
126  *
127  * @name PastixFloat cblk-BLAS CPU kernels
128  * @{
129  */
130 
131 int cpucblk_sgeaddsp1d( const SolverCblk *cblk1, SolverCblk *cblk2,
132  const float *L1, float *L2,
133  const float *U1, float *U2 );
134 
135 pastix_fixdbl_t cpucblk_sgemmsp( pastix_coefside_t sideA, pastix_trans_t trans,
136  const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcblk,
137  const void *A, const void *B, void *C,
138  float *work, pastix_int_t lwork, const pastix_lr_t *lowrank );
140  pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk,
141  const void *A, void *C, const pastix_lr_t *lowrank );
142 void cpucblk_sscalo ( pastix_trans_t trans, SolverCblk *cblk, void* dataL, void* dataLD );
143 
144 pastix_fixdbl_t cpublok_sgemmsp( pastix_trans_t trans,
145  const SolverCblk *cblk, SolverCblk *fcblk,
146  pastix_int_t blok_mk, pastix_int_t blok_nk, pastix_int_t blok_mn,
147  const void *A, const void *B, void *C,
148  const pastix_lr_t *lowrank );
149 pastix_fixdbl_t cpublok_strsmsp( pastix_side_t side, pastix_uplo_t uplo,
150  pastix_trans_t trans, pastix_diag_t diag,
151  const SolverCblk *cblk, pastix_int_t blok_m,
152  const void *A, void *C,
153  const pastix_lr_t *lowrank );
154 void cpublok_sscalo ( pastix_trans_t trans,
155  SolverCblk *cblk, pastix_int_t blok_m,
156  const void *A, const void *dataD, void *dataB );
157 
158 /**
159  * @}
160  * @name PastixFloat cblk LU kernels
161  * @{
162  */
163 int cpucblk_sgetrfsp1d_getrf( SolverMatrix *solvmtx, SolverCblk *cblk,
164  void *L, void *U );
165 int cpucblk_sgetrfsp1d_panel( SolverMatrix *solvmtx, SolverCblk *cblk,
166  void *L, void *U );
167 int cpucblk_sgetrfsp1d ( SolverMatrix *solvmtx, SolverCblk *cblk,
168  float *work, pastix_int_t lwork );
169 
170 /**
171  * @}
172  * @name PastixFloat cblk Cholesky kernels
173  * @{
174  */
175 int cpucblk_spotrfsp1d_potrf( SolverMatrix *solvmtx, SolverCblk *cblk, void *dataL );
176 int cpucblk_spotrfsp1d_panel( SolverMatrix *solvmtx, SolverCblk *cblk, void *dataL );
177 int cpucblk_spotrfsp1d ( SolverMatrix *solvmtx, SolverCblk *cblk,
178  float *work, pastix_int_t lwork );
179 
180 /**
181  * @}
182  * @name PastixFloat cblk LDL^h kernels
183  * @{
184  */
185 int cpucblk_ssytrfsp1d_sytrf( SolverMatrix *solvmtx, SolverCblk *cblk, void *L );
186 int cpucblk_ssytrfsp1d_panel( SolverMatrix *solvmtx, SolverCblk *cblk,
187  void *L, void *DLh );
188 int cpucblk_ssytrfsp1d ( SolverMatrix *solvmtx, SolverCblk *cblk,
189  float *work1, float *work2, pastix_int_t lwork );
190 
191 /**
192  * @}
193  * @name PastixFloat cblk LL^t kernels
194  * @{
195  */
196 int cpucblk_spotrfsp1d_pxtrf( SolverMatrix *solvmtx, SolverCblk *cblk, void *dataL );
197 int cpucblk_spotrfsp1d_panel( SolverMatrix *solvmtx, SolverCblk *cblk, void *dataL );
198 int cpucblk_spotrfsp1d ( SolverMatrix *solvmtx, SolverCblk *cblk,
199  float *work, pastix_int_t lwork );
200 
201 /**
202  * @}
203  * @name PastixFloat cblk LDL^t kernels
204  * @{
205  */
206 int cpucblk_ssytrfsp1d_sytrf( SolverMatrix *solvmtx, SolverCblk *cblk, void *L );
207 int cpucblk_ssytrfsp1d_panel( SolverMatrix *solvmtx, SolverCblk *cblk,
208  void *L, void *DLt );
209 int cpucblk_ssytrfsp1d ( SolverMatrix *solvmtx, SolverCblk *cblk,
210  float *work1, float *work2, pastix_int_t lwork );
211 
212 /**
213  * @}
214  * @name PastixFloat initialization and additionnal routines
215  * @{
216  */
217 void cpucblk_salloc_lrws( const SolverCblk *cblk,
218  pastix_lrblock_t *lrblok,
219  float *ws );
221  SolverCblk *cblk,
222  int rkmax );
224  SolverCblk *cblk );
226  SolverCblk *cblk );
227 void cpucblk_sfree ( pastix_coefside_t side,
228  SolverCblk *cblk );
230  const SolverMatrix *solvmtx,
231  const pastix_bcsc_t *bcsc,
232  pastix_int_t itercblk );
233 void cpucblk_sinit ( pastix_coefside_t side,
234  const SolverMatrix *solvmtx,
235  const pastix_bcsc_t *bcsc,
236  pastix_int_t itercblk,
237  const char *directory );
238 void cpucblk_sgetschur( const SolverCblk *cblk,
239  int upper_part,
240  float *S,
241  pastix_int_t lds );
242 void cpucblk_sdump ( pastix_coefside_t side,
243  const SolverCblk *cblk,
244  FILE *stream );
246  const SolverCblk *cblkA,
247  SolverCblk *cblkB );
248 void cpucblk_sadd ( pastix_coefside_t side,
249  float alpha,
250  const SolverCblk *cblkA,
251  SolverCblk *cblkB,
252  const pastix_lr_t *lowrank );
253 void cpucblk_sadd_recv( pastix_coefside_t side,
254  float alpha,
255  SolverCblk *cblk );
256 /**
257  * @}
258  * @name PastixFloat MPI routines
259  * @{
260  */
261 int cpucblk_sincoming_deps( int mt_flag,
262  pastix_coefside_t side,
263  SolverMatrix *solvmtx,
264  SolverCblk *cblk );
266  SolverMatrix *solvmtx,
267  const SolverCblk *cblk,
268  SolverCblk *fcbk );
270  pastix_int_t sched,
271  SolverMatrix *solvmtx );
272 
273 void cpucblk_smpi_progress( pastix_coefside_t side,
274  SolverMatrix *solvmtx,
275  int threadid );
276 
277 void cpucblk_ssend_rhs_forward( const SolverMatrix *solvmtx,
278  SolverCblk *cblk,
279  float *b );
280 void cpucblk_srecv_rhs_forward( const SolverMatrix *solvmtx,
281  SolverCblk *cblk,
282  float *work,
283  pastix_int_t nrhs,
284  float *b,
285  pastix_int_t ldb );
286 void cpucblk_ssend_rhs_backward( const SolverMatrix *solvmtx,
287  SolverCblk *cblk,
288  float *b );
289 void cpucblk_srecv_rhs_backward( const SolverMatrix *solvmtx,
290  SolverCblk *cblk,
291  float *b );
292 
293 /**
294  * @}
295  * @name PastixFloat compression/uncompression routines
296  * @{
297  */
298 pastix_fixdbl_t cpublok_scompress( const pastix_lr_t *lowrank,
299  pastix_int_t M, pastix_int_t N,
300  pastix_lrblock_t *blok );
301 pastix_int_t cpucblk_scompress( const SolverMatrix *solvmtx,
302  pastix_coefside_t side,
303  int max_ilulvl,
304  SolverCblk *cblk );
306  SolverCblk *cblk );
308  SolverMatrix *solvmtx,
309  SolverCblk *cblk,
310  pastix_int_t *orig,
311  pastix_int_t *gain );
312 
313 /**
314  * @}
315  * @}
316  *
317  * @addtogroup kernel_solve
318  * @{
319  * This module contains all the kernel working on the solver matrix structure
320  * for the solve step.
321  */
322 
324  pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk,
325  int nrhs, const void* dataA, float *b, int ldb );
327  pastix_int_t nrhs, const SolverCblk *cblk, const SolverBlok *blok,
328  SolverCblk *fcbk, const void* dataA, const float *B, pastix_int_t ldb,
329  float *C, pastix_int_t ldc );
330 
332  pastix_trans_t trans, pastix_diag_t diag,
333  const SolverMatrix *datacode, const SolverCblk *cblk,
334  int nrhs, float *b, int ldb );
336  pastix_trans_t trans, pastix_diag_t diag,
337  const SolverMatrix *datacode, SolverCblk *cblk,
338  int nrhs, float *b, int ldb );
339 
340 void solve_cblk_sdiag( const SolverCblk *cblk,
341  int nrhs,
342  float *b,
343  int ldb,
344  float *work );
345 /**
346  * @}
347  */
348 
349 
350 /**
351  * @addtogroup kernel_fact_null
352  * @{
353  * This module contains the three terms update functions for the LDL^t and
354  * LDL^h factorizations.
355  */
356 void core_ssytrfsp1d_gemm( const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcblk,
357  const float *L, float *C,
358  float *work );
359 void core_ssytrfsp1d_gemm( const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcblk,
360  const float *L, float *C,
361  float *work );
362 
363 /**
364  * @}
365  */
366 
367 #endif /* _pastix_scores_h_ */
core_slrorthu_cgs
pastix_fixdbl_t core_slrorthu_cgs(pastix_int_t M1, pastix_int_t N1, pastix_int_t M2, pastix_int_t N2, pastix_int_t r1, pastix_int_t *r2ptr, pastix_int_t offx, pastix_int_t offy, float *U, pastix_int_t ldu, float *V, pastix_int_t ldv)
Try to orthognalize the U part of the low-rank form, and update the V part accordingly using CGS.
Definition: core_slrothu.c:419
pastix_uplo_t
enum pastix_uplo_e pastix_uplo_t
Upper/Lower part.
cpucblk_salloc_fr
void cpucblk_salloc_fr(pastix_coefside_t side, SolverCblk *cblk)
Allocate the cblk structure to store the coefficient.
Definition: cpucblk_sinit.c:217
core_splrnt
void core_splrnt(int m, int n, float *A, int lda, int gM, int m0, int n0, unsigned long long int seed)
Generate a random tile.
Definition: core_splrnt.c:91
pastix_lr_s
Structure to define the type of function to use for the low-rank kernels and their parameters.
Definition: pastix_lowrank.h:147
cpucblk_spotrfsp1d_panel
int cpucblk_spotrfsp1d_panel(SolverMatrix *solvmtx, SolverCblk *cblk, void *dataL)
Compute the Cholesky factorization of one panel.
Definition: core_spotrfsp.c:292
cpucblk_sfree
void cpucblk_sfree(pastix_coefside_t side, SolverCblk *cblk)
Free the cblk structure that store the coefficient.
Definition: cpucblk_sinit.c:317
core_ssytrfsp1d_gemm
void core_ssytrfsp1d_gemm(const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcblk, const float *L, float *C, float *work)
Definition: core_ssytrfsp.c:319
cpublok_strsmsp
pastix_fixdbl_t cpublok_strsmsp(pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk, pastix_int_t blok_m, const void *A, void *C, const pastix_lr_t *lowrank)
Compute the updates associated to one off-diagonal block.
Definition: core_strsmsp.c:678
cpucblk_sadd
void cpucblk_sadd(pastix_coefside_t side, float alpha, const SolverCblk *cblkA, SolverCblk *cblkB, const pastix_lr_t *lowrank)
Add two column bloks in full rank format.
Definition: cpucblk_sadd.c:358
pastix_bcsc_s
Internal column block distributed CSC matrix.
Definition: bcsc.h:37
core_sgetrfsp
void core_sgetrfsp(pastix_int_t n, float *A, pastix_int_t lda, pastix_int_t *nbpivot, float criterion)
Compute the block static pivoting LU factorization of the matrix m-by-n A = L * U.
Definition: core_sgetrfsp.c:138
cpucblk_sgetrfsp1d_getrf
int cpucblk_sgetrfsp1d_getrf(SolverMatrix *solvmtx, SolverCblk *cblk, void *L, void *U)
Compute the LU factorization of the diagonal block in a panel.
Definition: core_sgetrfsp.c:217
pastix_solv_mode_t
enum pastix_solv_mode_e pastix_solv_mode_t
Solve Schur modes.
cpublok_scompress
pastix_fixdbl_t cpublok_scompress(const pastix_lr_t *lowrank, pastix_int_t M, pastix_int_t N, pastix_lrblock_t *blok)
Compress a single block from full-rank to low-rank format.
Definition: cpucblk_scompress.c:57
solve_blok_strsm
void solve_blok_strsm(pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk, int nrhs, const void *dataA, float *b, int ldb)
Apply a solve trsm update related to a diagonal block of the matrix A.
Definition: solve_strsmsp.c:70
solver_cblk_s
Solver column block structure.
Definition: solver.h:127
cpucblk_ssytrfsp1d
int cpucblk_ssytrfsp1d(SolverMatrix *solvmtx, SolverCblk *cblk, float *work1, float *work2, pastix_int_t lwork)
Perform the LDL^t factorization of a given panel and apply all its updates.
Definition: core_ssytrfsp.c:487
pastix_coefside_t
enum pastix_coefside_e pastix_coefside_t
Data blocks used in the kernel.
cpucblk_sgetrfsp1d_panel
int cpucblk_sgetrfsp1d_panel(SolverMatrix *solvmtx, SolverCblk *cblk, void *L, void *U)
Compute the LU factorization of one panel.
Definition: core_sgetrfsp.c:305
pastix_trans_t
enum pastix_trans_e pastix_trans_t
Transpostion.
cpucblk_spotrfsp1d_potrf
int cpucblk_spotrfsp1d_potrf(SolverMatrix *solvmtx, SolverCblk *cblk, void *dataL)
Compute the Cholesky factorization of the diagonal block in a panel.
Definition: core_spotrfsp.c:220
solver_blok_s
Solver block structure.
Definition: solver.h:107
cpucblk_suncompress
void cpucblk_suncompress(pastix_coefside_t side, SolverCblk *cblk)
Uncompress a single column block from low-rank format to full-rank format.
Definition: cpucblk_scompress.c:197
cpucblk_salloc_lr
void cpucblk_salloc_lr(pastix_coefside_t side, SolverCblk *cblk, int rkmax)
Allocate the cblk structure to store the coefficient.
Definition: cpucblk_sinit.c:143
cpucblk_salloc_lrws
void cpucblk_salloc_lrws(const SolverCblk *cblk, pastix_lrblock_t *lrblok, float *ws)
Initialize lrblock structure from a workspace from all blocks of the cblk associated.
Definition: cpucblk_sinit.c:96
cpucblk_sdump
void cpucblk_sdump(pastix_coefside_t side, const SolverCblk *cblk, FILE *stream)
Dump a single column block into a FILE in a human readale format.
Definition: coeftab_sinit.c:51
pastix_lrblock_s
The block low-rank structure to hold a matrix in low-rank form.
Definition: pastix_lowrank.h:112
core_sscalo
int core_sscalo(pastix_trans_t trans, pastix_int_t M, pastix_int_t N, const float *A, pastix_int_t lda, const float *D, pastix_int_t ldd, float *B, pastix_int_t ldb)
Scale a matrix by a diagonal out of place.
Definition: core_sscalo.c:73
solve_blok_sgemm
void solve_blok_sgemm(pastix_side_t side, pastix_trans_t trans, pastix_int_t nrhs, const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcbk, const void *dataA, const float *B, pastix_int_t ldb, float *C, pastix_int_t ldc)
Apply a solve gemm update related to a single block of the matrix A.
Definition: solve_strsmsp.c:158
cpucblk_sgeaddsp1d
int cpucblk_sgeaddsp1d(const SolverCblk *cblk1, SolverCblk *cblk2, const float *L1, float *L2, const float *U1, float *U2)
Add two column blocks together.
Definition: core_sgeadd.c:245
cpucblk_sincoming_deps
int cpucblk_sincoming_deps(int mt_flag, pastix_coefside_t side, SolverMatrix *solvmtx, SolverCblk *cblk)
Wait for incoming dependencies, and return when cblk->ctrbcnt has reached 0.
Definition: cpucblk_smpi_coeftab.c:499
cpublok_sgemmsp
pastix_fixdbl_t cpublok_sgemmsp(pastix_trans_t trans, const SolverCblk *cblk, SolverCblk *fcblk, pastix_int_t blok_mk, pastix_int_t blok_nk, pastix_int_t blok_mn, const void *A, const void *B, void *C, const pastix_lr_t *lowrank)
Compute the CPU gemm associated to a couple of off-diagonal blocks.
Definition: core_sgemmsp.c:1655
solve_cblk_sdiag
void solve_cblk_sdiag(const SolverCblk *cblk, int nrhs, float *b, int ldb, float *work)
Apply the diagonal solve related to one cblk to all the right hand side.
Definition: solve_strsmsp.c:616
cpucblk_sgetrfsp1d
int cpucblk_sgetrfsp1d(SolverMatrix *solvmtx, SolverCblk *cblk, float *work, pastix_int_t lwork)
Perform the LU factorization of a given panel and apply all its updates.
Definition: core_sgetrfsp.c:355
core_stqrcp
int core_stqrcp(float tol, pastix_int_t maxrank, int unused, pastix_int_t nb, pastix_int_t m, pastix_int_t n, float *A, pastix_int_t lda, pastix_int_t *jpvt, float *tau, float *work, pastix_int_t lwork, float *rwork)
Compute a randomized QR factorization with truncated updates.
Definition: core_stqrcp.c:99
cpucblk_sgetschur
void cpucblk_sgetschur(const SolverCblk *cblk, int upper_part, float *S, pastix_int_t lds)
Extract a cblk panel of the Schur complement to a dense lapack form.
Definition: cpucblk_sschur.c:178
cpucblk_srequest_cleanup
void cpucblk_srequest_cleanup(pastix_coefside_t side, pastix_int_t sched, SolverMatrix *solvmtx)
Waitall routine for current cblk request.
Definition: cpucblk_smpi_coeftab.c:609
solve_cblk_strsmsp_backward
void solve_cblk_strsmsp_backward(pastix_solv_mode_t mode, pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverMatrix *datacode, SolverCblk *cblk, int nrhs, float *b, int ldb)
Apply a backward solve related to one cblk to all the right hand side.
Definition: solve_strsmsp.c:471
cpucblk_srelease_deps
void cpucblk_srelease_deps(pastix_coefside_t side, SolverMatrix *solvmtx, const SolverCblk *cblk, SolverCblk *fcbk)
Release the dependencies of the given cblk after an update.
Definition: cpucblk_smpi_coeftab.c:558
cpucblk_ssend_rhs_backward
void cpucblk_ssend_rhs_backward(const SolverMatrix *solvmtx, SolverCblk *cblk, float *b)
Send the rhs associated to a cblk->lcolidx to the remote node.
Definition: cpucblk_smpi_rhs.c:89
core_srqrcp
int core_srqrcp(float tol, pastix_int_t maxrank, int refine, pastix_int_t nb, pastix_int_t m, pastix_int_t n, float *A, pastix_int_t lda, pastix_int_t *jpvt, float *tau, float *work, pastix_int_t lwork, float *rwork)
Compute a randomized QR factorization.
Definition: core_srqrcp.c:114
cpucblk_ssend_rhs_forward
void cpucblk_ssend_rhs_forward(const SolverMatrix *solvmtx, SolverCblk *cblk, float *b)
Send the rhs associated to a cblk->lcolidx to the remote node.
Definition: cpucblk_smpi_rhs.c:42
cpucblk_scompress
pastix_int_t cpucblk_scompress(const SolverMatrix *solvmtx, pastix_coefside_t side, int max_ilulvl, SolverCblk *cblk)
Compress a single column block from full-rank to low-rank format.
Definition: cpucblk_scompress.c:110
cpucblk_srecv_rhs_backward
void cpucblk_srecv_rhs_backward(const SolverMatrix *solvmtx, SolverCblk *cblk, float *b)
Receive the rhs associated to a cblk->lcolidx to the remote node.
Definition: cpucblk_smpi_rhs.c:136
core_ssytrfsp
void core_ssytrfsp(pastix_int_t n, float *A, pastix_int_t lda, pastix_int_t *nbpivot, float criterion)
Compute the block static pivoting factorization of the symmetric matrix n-by-n A such that A = L * D ...
Definition: core_ssytrfsp.c:141
cpucblk_ssytrfsp1d_sytrf
int cpucblk_ssytrfsp1d_sytrf(SolverMatrix *solvmtx, SolverCblk *cblk, void *L)
Computes the LDL^t factorization of the diagonal block in a panel.
Definition: core_ssytrfsp.c:232
cpucblk_salloc
void cpucblk_salloc(pastix_coefside_t side, SolverCblk *cblk)
Allocate the cblk structure to store the coefficient.
Definition: cpucblk_sinit.c:264
cpublok_sscalo
void cpublok_sscalo(pastix_trans_t trans, SolverCblk *cblk, pastix_int_t blok_m, const void *A, const void *dataD, void *dataB)
Copy the lower terms of the block with scaling for the two-terms algorithm.
Definition: core_sscalo.c:315
cpucblk_srecv_rhs_forward
void cpucblk_srecv_rhs_forward(const SolverMatrix *solvmtx, SolverCblk *cblk, float *work, pastix_int_t nrhs, float *b, pastix_int_t ldb)
Receive the rhs associated to a cblk->lcolidx to the remote node.
Definition: cpucblk_smpi_rhs.c:195
cpucblk_sinit
void cpucblk_sinit(pastix_coefside_t side, const SolverMatrix *solvmtx, const pastix_bcsc_t *bcsc, pastix_int_t itercblk, const char *directory)
Fully initialize a single cblk.
Definition: coeftab_sinit.c:251
core_slrorthu_fullqr
pastix_fixdbl_t core_slrorthu_fullqr(pastix_int_t M, pastix_int_t N, pastix_int_t rank, float *U, pastix_int_t ldu, float *V, pastix_int_t ldv)
Try to orthognalize the u part of the low-rank form, and update the v part accordingly using full QR.
Definition: core_slrothu.c:83
core_spqrcp
int core_spqrcp(float tol, pastix_int_t maxrank, int full_update, pastix_int_t nb, pastix_int_t m, pastix_int_t n, float *A, pastix_int_t lda, pastix_int_t *jpvt, float *tau, float *work, pastix_int_t lwork, float *rwork)
Compute a rank-reavealing QR factorization.
Definition: core_spqrcp.c:105
core_srqrrt
int core_srqrrt(float tol, pastix_int_t maxrank, pastix_int_t nb, pastix_int_t m, pastix_int_t n, float *A, pastix_int_t lda, float *tau, float *B, pastix_int_t ldb, float *tau_b, float *work, pastix_int_t lwork, float normA)
Compute a randomized QR factorization with rotation technique.
Definition: core_srqrrt.c:126
core_slrorthu_partialqr
pastix_fixdbl_t core_slrorthu_partialqr(pastix_int_t M, pastix_int_t N, pastix_int_t r1, pastix_int_t *r2ptr, pastix_int_t offx, pastix_int_t offy, float *U, pastix_int_t ldu, float *V, pastix_int_t ldv)
Try to orthognalize the U part of the low-rank form, and update the V part accordingly using partial ...
Definition: core_slrothu.c:193
pastix_diag_t
enum pastix_diag_e pastix_diag_t
Diagonal.
cpucblk_strsmsp
void cpucblk_strsmsp(pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk, const void *A, void *C, const pastix_lr_t *lowrank)
Compute the updates associated to a column of off-diagonal blocks.
Definition: core_strsmsp.c:356
core_stradd
int core_stradd(pastix_uplo_t uplo, pastix_trans_t trans, pastix_int_t M, pastix_int_t N, float alpha, const float *A, pastix_int_t LDA, float beta, float *B, pastix_int_t LDB)
Add two triangular matrices together as in PBLAS pstradd.
Definition: core_stradd.c:79
core_sgeadd
int core_sgeadd(pastix_trans_t trans, pastix_int_t M, pastix_int_t N, float alpha, const float *A, pastix_int_t LDA, float beta, float *B, pastix_int_t LDB)
Add two matrices together.
Definition: core_sgeadd.c:78
core_sgetmo
void core_sgetmo(int m, int n, const float *A, int lda, float *B, int ldb)
Transposes a m-by-n matrix out of place using an extra workspace of size m-by-n.
Definition: core_sgetmo.c:49
cpucblk_spotrfsp1d
int cpucblk_spotrfsp1d(SolverMatrix *solvmtx, SolverCblk *cblk, float *work, pastix_int_t lwork)
Perform the Cholesky factorization of a given panel and apply all its updates.
Definition: core_spotrfsp.c:334
cpucblk_ssytrfsp1d_panel
int cpucblk_ssytrfsp1d_panel(SolverMatrix *solvmtx, SolverCblk *cblk, void *L, void *DLh)
Compute the LDL^t factorization of one panel.
Definition: core_ssytrfsp.c:432
core_spotrfsp
void core_spotrfsp(pastix_int_t n, float *A, pastix_int_t lda, pastix_int_t *nbpivot, float criterion)
Compute the block static pivoting Cholesky factorization of the matrix n-by-n A = L * L^t .
Definition: core_spotrfsp.c:144
cpucblk_sdiff
int cpucblk_sdiff(pastix_coefside_t side, const SolverCblk *cblkA, SolverCblk *cblkB)
Compare two column blocks in full-rank format.
Definition: cpucblk_sdiff.c:63
cpucblk_smemory
void cpucblk_smemory(pastix_coefside_t side, SolverMatrix *solvmtx, SolverCblk *cblk, pastix_int_t *orig, pastix_int_t *gain)
Return the memory gain of the low-rank form over the full-rank form for a single column-block.
Definition: cpucblk_scompress.c:277
core_sgemdm
int core_sgemdm(pastix_trans_t transA, pastix_trans_t transB, int M, int N, int K, float alpha, const float *A, int LDA, const float *B, int LDB, float beta, float *C, int LDC, const float *D, int incD, float *WORK, int LWORK)
Perform one of the following matrix-matrix operations.
Definition: core_sgemdm.c:139
cpucblk_sgemmsp
pastix_fixdbl_t cpucblk_sgemmsp(pastix_coefside_t sideA, pastix_trans_t trans, const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcblk, const void *A, const void *B, void *C, float *work, pastix_int_t lwork, const pastix_lr_t *lowrank)
Compute the updates associated to one off-diagonal block.
Definition: core_sgemmsp.c:1512
cpucblk_sscalo
void cpucblk_sscalo(pastix_trans_t trans, SolverCblk *cblk, void *dataL, void *dataLD)
Copy the L term with scaling for the two-terms algorithm.
Definition: core_sscalo.c:170
solve_cblk_strsmsp_forward
void solve_cblk_strsmsp_forward(pastix_solv_mode_t mode, pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverMatrix *datacode, const SolverCblk *cblk, int nrhs, float *b, int ldb)
Apply a forward solve related to one cblk to all the right hand side.
Definition: solve_strsmsp.c:323
pastix_side_t
enum pastix_side_e pastix_side_t
Side of the operation.
cpucblk_sfillin
void cpucblk_sfillin(pastix_coefside_t side, const SolverMatrix *solvmtx, const pastix_bcsc_t *bcsc, pastix_int_t itercblk)
Initialize the coeftab structure from the internal bcsc.
Definition: cpucblk_sinit.c:604