PaStiX Handbook 6.4.0
Loading...
Searching...
No Matches
pastix_zcores.h
Go to the documentation of this file.
1/**
2 * @file pastix_zcores.h
3 *
4 * PaStiX kernel header.
5 *
6 * @copyright 2011-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
7 * Univ. Bordeaux. All rights reserved.
8 *
9 * @version 6.4.0
10 * @author Mathieu Faverge
11 * @author Pierre Ramet
12 * @author Xavier Lacoste
13 * @author Esragul Korkmaz
14 * @author Gregoire Pichon
15 * @author Tony Delarue
16 * @author Alycia Lisito
17 * @author Nolan Bredel
18 * @date 2024-07-05
19 * @generated from /builds/2mk6rsew/0/solverstack/pastix/kernels/pastix_zcores.h, normal z -> z, Tue Feb 25 14:34:34 2025
20 *
21 */
22#ifndef _pastix_zcores_h_
23#define _pastix_zcores_h_
24
25#ifndef DOXYGEN_SHOULD_SKIP_THIS
26#define pastix_cblk_lock( cblk_ ) pastix_atomic_lock( &((cblk_)->lock) )
27#define pastix_cblk_unlock( cblk_ ) pastix_atomic_unlock( &((cblk_)->lock) )
28#endif /* DOXYGEN_SHOULD_SKIP_THIS */
29
30/**
31 * @addtogroup kernel_blas_lapack
32 * @{
33 * This module contains all the BLAS and LAPACK-like kernels that are working
34 * on lapack layout matrices.
35 *
36 * @name PastixComplex64 BLAS kernels
37 * @{
38 */
39void core_zplrnt( int m,
40 int n,
41 pastix_complex64_t *A,
42 int lda,
43 int gM,
44 int m0,
45 int n0,
46 unsigned long long int seed );
47void core_zgetmo( int m,
48 int n,
49 const pastix_complex64_t *A,
50 int lda,
51 pastix_complex64_t *B,
52 int ldb );
56 pastix_complex64_t alpha,
57 const pastix_complex64_t *A,
58 pastix_int_t LDA,
59 pastix_complex64_t beta,
60 pastix_complex64_t *B,
61 pastix_int_t LDB );
62int core_zgemdm( pastix_trans_t transA,
63 pastix_trans_t transB,
64 int M,
65 int N,
66 int K,
67 pastix_complex64_t alpha,
68 const pastix_complex64_t *A,
69 int LDA,
70 const pastix_complex64_t *B,
71 int LDB,
72 pastix_complex64_t beta,
73 pastix_complex64_t *C,
74 int LDC,
75 const pastix_complex64_t *D,
76 int incD,
77 pastix_complex64_t *WORK,
78 int LWORK );
79int core_zpqrcp( double tol,
80 pastix_int_t maxrank,
81 int full_update,
82 pastix_int_t nb,
85 pastix_complex64_t *A,
86 pastix_int_t lda,
87 pastix_int_t *jpvt,
88 pastix_complex64_t *tau,
89 pastix_complex64_t *work,
90 pastix_int_t lwork,
91 double *rwork );
92int core_zrqrcp( double tol,
93 pastix_int_t maxrank,
94 int refine,
95 pastix_int_t nb,
98 pastix_complex64_t *A,
99 pastix_int_t lda,
100 pastix_int_t *jpvt,
101 pastix_complex64_t *tau,
102 pastix_complex64_t *work,
103 pastix_int_t lwork,
104 double *rwork );
105int core_zrqrrt( double tol,
106 pastix_int_t maxrank,
107 pastix_int_t nb,
108 pastix_int_t m,
109 pastix_int_t n,
110 pastix_complex64_t *A,
111 pastix_int_t lda,
112 pastix_complex64_t *tau,
113 pastix_complex64_t *B,
114 pastix_int_t ldb,
115 pastix_complex64_t *tau_b,
116 pastix_complex64_t *work,
117 pastix_int_t lwork,
118 double normA );
119int core_ztqrcp( double tol,
120 pastix_int_t maxrank,
121 int unused,
122 pastix_int_t nb,
123 pastix_int_t m,
124 pastix_int_t n,
125 pastix_complex64_t *A,
126 pastix_int_t lda,
127 pastix_int_t *jpvt,
128 pastix_complex64_t *tau,
129 pastix_complex64_t *work,
130 pastix_int_t lwork,
131 double *rwork );
132int core_ztradd( pastix_uplo_t uplo,
133 pastix_trans_t trans,
134 pastix_int_t M,
135 pastix_int_t N,
136 pastix_complex64_t alpha,
137 const pastix_complex64_t *A,
138 pastix_int_t LDA,
139 pastix_complex64_t beta,
140 pastix_complex64_t *B,
141 pastix_int_t LDB);
142int core_zscalo( pastix_trans_t trans,
143 pastix_int_t M,
144 pastix_int_t N,
145 const pastix_complex64_t *A,
146 pastix_int_t lda,
147 const pastix_complex64_t *D,
148 pastix_int_t ldd,
149 pastix_complex64_t *B,
150 pastix_int_t ldb );
151
152/**
153 * @}
154 * @name PastixComplex64 Othogonalization kernels for low-rank updates
155 * @{
156 */
158 pastix_int_t N,
159 pastix_int_t rank,
160 pastix_complex64_t *U,
161 pastix_int_t ldu,
162 pastix_complex64_t *V,
163 pastix_int_t ldv );
165 pastix_int_t N,
166 pastix_int_t r1,
167 pastix_int_t *r2ptr,
168 pastix_int_t offx,
169 pastix_int_t offy,
170 pastix_complex64_t *U,
171 pastix_int_t ldu,
172 pastix_complex64_t *V,
173 pastix_int_t ldv );
175 pastix_int_t N1,
176 pastix_int_t M2,
177 pastix_int_t N2,
178 pastix_int_t r1,
179 pastix_int_t *r2ptr,
180 pastix_int_t offx,
181 pastix_int_t offy,
182 pastix_complex64_t *U,
183 pastix_int_t ldu,
184 pastix_complex64_t *V,
185 pastix_int_t ldv );
186
187/**
188 * @}
189 * @name PastixComplex64 LAPACK kernels
190 * @{
191 */
193 pastix_complex64_t *A,
194 pastix_int_t lda,
195 pastix_int_t *nbpivots,
196 double criterion );
198 pastix_complex64_t *A,
199 pastix_int_t lda,
200 pastix_int_t *nbpivots,
201 double criterion );
203 pastix_complex64_t *A,
204 pastix_int_t lda,
205 pastix_int_t *nbpivots,
206 double criterion );
207#if defined(PRECISION_z) || defined(PRECISION_c)
209 pastix_complex64_t *A,
210 pastix_int_t lda,
211 pastix_int_t *nbpivots,
212 double criterion );
213#endif
215 pastix_complex64_t *A,
216 pastix_int_t lda,
217 pastix_int_t *nbpivots,
218 double criterion );
219
220/**
221 * @}
222 * @}
223 *
224 * @addtogroup kernel_fact
225 * @{
226 * This module contains all the kernel working at the solver matrix structure
227 * level for the numerical factorization step.
228 *
229 * @name PastixComplex64 cblk-BLAS CPU kernels
230 * @{
231 */
232
233int cpucblk_zgeaddsp1d( const SolverCblk *cblk1,
234 SolverCblk *cblk2,
235 const pastix_complex64_t *L1,
236 pastix_complex64_t *L2,
237 const pastix_complex64_t *U1,
238 pastix_complex64_t *U2 );
239
241 pastix_trans_t trans,
242 const SolverCblk *cblk,
243 const SolverBlok *blok,
244 SolverCblk *fcblk,
245 const void *A,
246 const void *B,
247 void *C,
248 pastix_complex64_t *work,
249 pastix_int_t lwork,
250 const pastix_lr_t *lowrank );
252 pastix_uplo_t uplo,
253 pastix_trans_t trans,
254 pastix_diag_t diag,
255 const SolverCblk *cblk,
256 const void *A,
257 void *C,
258 const pastix_lr_t *lowrank );
259void cpucblk_zscalo ( pastix_trans_t trans,
260 const SolverCblk *cblk,
261 void *dataL,
262 void *dataLD );
263
265 const SolverCblk *cblk,
266 SolverCblk *fcblk,
267 pastix_int_t blok_mk,
268 pastix_int_t blok_nk,
269 pastix_int_t blok_mn,
270 const void *A,
271 const void *B,
272 void *C,
273 const pastix_lr_t *lowrank );
275 pastix_uplo_t uplo,
276 pastix_trans_t trans,
277 pastix_diag_t diag,
278 const SolverCblk *cblk,
279 pastix_int_t blok_m,
280 const void *A,
281 void *C,
282 const pastix_lr_t *lowrank );
283void cpublok_zscalo ( pastix_trans_t trans,
284 const SolverCblk *cblk,
285 pastix_int_t blok_m,
286 const void *A,
287 const void *dataD,
288 void *dataB );
289
290/**
291 * @}
292 * @name PastixComplex64 cblk LU kernels
293 * @{
294 */
296 SolverCblk *cblk,
297 void *L,
298 void *U );
300 SolverCblk *cblk,
301 void *L,
302 void *U );
303int cpucblk_zgetrfsp1d ( SolverMatrix *solvmtx,
304 SolverCblk *cblk,
305 pastix_complex64_t *work,
306 pastix_int_t lwork );
307
308/**
309 * @}
310 * @name PastixComplex64 cblk Cholesky kernels
311 * @{
312 */
314 SolverCblk *cblk,
315 void *dataL );
317 SolverCblk *cblk,
318 void *L );
319int cpucblk_zpotrfsp1d( SolverMatrix *solvmtx,
320 SolverCblk *cblk,
321 pastix_complex64_t *work,
322 pastix_int_t lwork );
323
324/**
325 * @}
326 */
327
328#if defined(PRECISION_z) || defined(PRECISION_c)
329 /**
330 * @name PastixComplex64 cblk LDL^h kernels
331 * @{
332 */
334 SolverCblk *cblk,
335 void *L );
337 SolverCblk *cblk,
338 void *L,
339 void *DLh );
340int cpucblk_zhetrfsp1d( SolverMatrix *solvmtx,
341 SolverCblk *cblk,
342 pastix_complex64_t *work1,
343 pastix_complex64_t *work2,
344 pastix_int_t lwork );
345
346/**
347 * @}
348 * @name PastixComplex64 cblk LL^t kernels
349 * @{
350 */
352 SolverCblk *cblk,
353 void *dataL );
355 SolverCblk *cblk,
356 void *dataL );
357int cpucblk_zpxtrfsp1d( SolverMatrix *solvmtx,
358 SolverCblk *cblk,
359 pastix_complex64_t *work,
360 pastix_int_t lwork );
361
362/**
363 * @}
364 */
365#endif
366
367 /**
368 * @name PastixComplex64 cblk LDL^t kernels
369 * @{
370 */
372 SolverCblk *cblk,
373 void *dataL );
375 SolverCblk *cblk,
376 void *L,
377 void *DLt );
378int cpucblk_zsytrfsp1d( SolverMatrix *solvmtx,
379 SolverCblk *cblk,
380 pastix_complex64_t *Dlt,
381 pastix_complex64_t *work,
382 pastix_int_t lwork );
383
384/**
385 * @}
386 * @name PastixComplex64 initialization and additionnal routines
387 * @{
388 */
389void cpucblk_zalloc_lrws( const SolverCblk *cblk,
390 pastix_lrblock_t *lrblok,
391 pastix_complex64_t *ws );
393 SolverCblk *cblk,
394 int rkmax );
396 SolverCblk *cblk );
398 SolverCblk *cblk );
400 SolverCblk *cblk );
402 const SolverMatrix *solvmtx,
403 const pastix_bcsc_t *bcsc,
404 pastix_int_t itercblk );
406 const SolverMatrix *solvmtx,
407 const pastix_bcsc_t *bcsc,
408 pastix_int_t itercblk,
409 const char *directory );
410void cpucblk_zgetschur( const SolverCblk *cblk,
411 int upper_part,
412 pastix_complex64_t *S,
413 pastix_int_t lds );
415 const SolverCblk *cblk,
416 FILE *stream );
418 const SolverCblk *cblkA,
419 SolverCblk *cblkB );
420pastix_fixdbl_t cpucblk_zadd( pastix_complex64_t alpha,
421 const SolverCblk *cblkA,
422 SolverCblk *cblkB,
423 const void *A,
424 void *B,
425 pastix_complex64_t *work,
426 pastix_int_t lwork,
427 const pastix_lr_t *lowrank );
428pastix_fixdbl_t cpublok_zadd( pastix_complex64_t alpha,
429 const SolverCblk *cblkA,
430 SolverCblk *cblkB,
431 pastix_int_t blokA_m,
432 pastix_int_t blokB_m,
433 const void *A,
434 void *B,
435 pastix_complex64_t *work,
436 pastix_int_t lwork,
437 const pastix_lr_t *lowrank );
438
439/**
440 * @}
441 * @name PastixComplex64 MPI routines
442 * @{
443 */
444int cpucblk_zincoming_deps( int mt_flag,
446 SolverMatrix *solvmtx,
447 SolverCblk *cblk );
449 SolverMatrix *solvmtx,
450 const SolverCblk *cblk,
451 SolverCblk *fcbk );
453 pastix_int_t sched,
454 SolverMatrix *solvmtx );
455void cpucblk_zupdate_reqtab( SolverMatrix *solvmtx );
456#if defined( PASTIX_WITH_MPI )
457void cpucblk_zmpi_progress( pastix_coefside_t side,
458 SolverMatrix *solvmtx,
459 int threadid );
460void cpucblk_zisend_rhs_bwd( SolverMatrix *solvmtx,
461 pastix_rhs_t rhsb,
462 SolverCblk *cblk );
463#endif
464void cpucblk_zmpi_rhs_fwd_progress( const args_solve_t *enums,
465 SolverMatrix *solvmtx,
466 pastix_rhs_t rhsb,
467 int threadid );
469 SolverMatrix *solvmtx,
470 pastix_rhs_t rhsb,
471 const SolverCblk *cblk,
472 SolverCblk *fcbk );
474 const args_solve_t *enums,
475 SolverMatrix *solvmtx,
476 SolverCblk *cblk,
477 pastix_rhs_t rhsb );
479 pastix_int_t sched,
480 SolverMatrix *solvmtx,
481 pastix_rhs_t rhsb );
482
483void cpucblk_zmpi_rhs_bwd_progress( const args_solve_t *enums,
484 SolverMatrix *solvmtx,
485 pastix_rhs_t rhsb,
486 int threadid );
488 SolverMatrix *solvmtx,
489 pastix_rhs_t rhsb,
490 const SolverCblk *cblk,
491 SolverCblk *fcbk );
493 const args_solve_t *enums,
494 SolverMatrix *solvmtx,
495 SolverCblk *cblk,
496 pastix_rhs_t rhsb );
498 pastix_int_t sched,
499 SolverMatrix *solvmtx,
500 pastix_rhs_t rhsb );
501void cpucblk_zsend_rhs_forward( const SolverMatrix *solvmtx,
502 SolverCblk *cblk,
503 pastix_rhs_t b );
504void cpucblk_zrecv_rhs_forward( const SolverMatrix *solvmtx,
505 SolverCblk *cblk,
506 pastix_complex64_t *work,
507 pastix_rhs_t b );
508void cpucblk_zsend_rhs_backward( const SolverMatrix *solvmtx,
509 SolverCblk *cblk,
510 pastix_rhs_t b );
511void cpucblk_zrecv_rhs_backward( const SolverMatrix *solvmtx,
512 SolverCblk *cblk,
513 pastix_rhs_t b );
514
515/**
516 * @}
517 * @name PastixComplex64 compression/uncompression routines
518 * @{
519 */
521 pastix_int_t M,
522 pastix_int_t N,
523 pastix_lrblock_t *blok );
526 int max_ilulvl,
527 SolverCblk *cblk );
529 SolverCblk *cblk );
531 const SolverMatrix *solvmtx,
532 SolverCblk *cblk,
533 pastix_int_t *orig,
534 pastix_int_t *gain );
535
536/**
537 * @}
538 * @}
539 *
540 * @addtogroup kernel_solve
541 * @{
542 * This module contains all the kernel working on the solver matrix structure
543 * for the solve step.
544 *
545 */
546
548 pastix_uplo_t uplo,
549 pastix_trans_t trans,
550 pastix_diag_t diag,
551 const SolverCblk *cblk,
552 int nrhs,
553 const void *dataA,
554 pastix_complex64_t *b,
555 int ldb );
557 pastix_trans_t trans,
558 pastix_int_t nrhs,
559 const SolverCblk *cblk,
560 const SolverBlok *blok,
561 SolverCblk *fcbk,
562 const void *dataA,
563 const pastix_complex64_t *B,
564 pastix_int_t ldb,
565 pastix_complex64_t *C,
566 pastix_int_t ldc );
567
569 SolverMatrix *datacode,
570 const SolverCblk *cblk,
571 pastix_rhs_t b );
573 SolverMatrix *datacode,
574 SolverCblk *cblk,
575 pastix_rhs_t b );
576
577void solve_cblk_zdiag( const SolverCblk *cblk,
578 const void *dataA,
579 int nrhs,
580 pastix_complex64_t *b,
581 int ldb,
582 pastix_complex64_t *work );
583/**
584 * @}
585 *
586 * @addtogroup kernel_fact_null
587 * @{
588 * This module contains the three terms update functions for the LDL^t and
589 * LDL^h factorizations.
590 *
591 */
592#if defined(PRECISION_z) || defined(PRECISION_c)
593void core_zhetrfsp1d_gemm( const SolverCblk *cblk,
594 const SolverBlok *blok,
595 SolverCblk *fcblk,
596 const pastix_complex64_t *L,
597 pastix_complex64_t *C,
598 pastix_complex64_t *work );
599#endif
600void core_zsytrfsp1d_gemm( const SolverCblk *cblk,
601 const SolverBlok *blok,
602 SolverCblk *fcblk,
603 const pastix_complex64_t *L,
604 pastix_complex64_t *C,
605 pastix_complex64_t *work );
606
607int
609 SolverCblk *cblk );
610void
612 SolverBlok *blok,
613 pastix_complex64_t *work,
614 pastix_int_t lwork );
615int
617 SolverCblk *cblk );
618void
620 SolverBlok *blok,
621 pastix_complex64_t *work );
622int
624 SolverCblk *cblk );
625void
627 SolverBlok *blok,
628 pastix_complex64_t *work,
629 pastix_int_t lwork );
630#if defined(PRECISION_z) || defined(PRECISION_c)
631int
633 SolverCblk *cblk );
634void
636 SolverBlok *blok,
637 pastix_complex64_t *work,
638 pastix_int_t lwork );
639int
641 SolverCblk *cblk );
642void
644 SolverBlok *blok,
645 pastix_complex64_t *work );
646#endif
647
648/**
649 * @}
650 */
651
652#endif /* _pastix_zcores_h_ */
int cpucblk_zhetrfsp1d(SolverMatrix *solvmtx, SolverCblk *cblk, pastix_complex64_t *DLh, pastix_complex64_t *work, pastix_int_t lwork)
Perform the LDL^h factorization of a given panel and apply all its updates.
int cpucblk_zhetrfsp1d_hetrf(SolverMatrix *solvmtx, SolverCblk *cblk, void *dataL)
Computes the LDL^h factorization of the diagonal block in a panel.
int cpucblk_zhetrfsp1d_panel(SolverMatrix *solvmtx, SolverCblk *cblk, void *L, void *DLh)
Compute the LDL^h factorization of one panel.
void cpucblk_zhetrfsp1dplus_update(SolverMatrix *solvmtx, SolverBlok *blok, pastix_complex64_t *work)
Apply the updates of the LDL^h factorisation of a given panel.
void core_zhetrfsp(pastix_int_t n, pastix_complex64_t *A, pastix_int_t lda, pastix_int_t *nbpivots, double criterion)
Compute the block static pivoting factorization of the hermitian matrix n-by-n A such that A = L * D ...
void core_zhetrfsp1d_gemm(const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcblk, const pastix_complex64_t *L, pastix_complex64_t *C, pastix_complex64_t *work)
int cpucblk_zhetrfsp1dplus(SolverMatrix *solvmtx, SolverCblk *cblk)
Perform the LDL^h factorization of a given panel and submit tasks for the subsequent updates.
void cpucblk_zpxtrfsp1dplus_update(SolverMatrix *solvmtx, SolverBlok *blok, pastix_complex64_t *work, pastix_int_t lwork)
Apply the updates of the LL^t factorisation of a given panel.
int cpucblk_zpxtrfsp1d_panel(SolverMatrix *solvmtx, SolverCblk *cblk, void *L)
Compute the LL^t factorization of one panel.
int cpucblk_zpxtrfsp1d(SolverMatrix *solvmtx, SolverCblk *cblk, pastix_complex64_t *work, pastix_int_t lwork)
Perform the LL^t factorization of a given panel and apply all its updates.
int cpucblk_zpxtrfsp1dplus(SolverMatrix *solvmtx, SolverCblk *cblk)
Perform the LL^t factorization of a given panel.
int cpucblk_zpxtrfsp1d_pxtrf(SolverMatrix *solvmtx, SolverCblk *cblk, void *dataL)
Compute the LL^t factorization of the diagonal block in a panel.
BEGIN_C_DECLS typedef int pastix_int_t
Definition datatypes.h:51
double pastix_fixdbl_t
Definition datatypes.h:65
int core_zrqrcp(double tol, pastix_int_t maxrank, int refine, pastix_int_t nb, pastix_int_t m, pastix_int_t n, pastix_complex64_t *A, pastix_int_t lda, pastix_int_t *jpvt, pastix_complex64_t *tau, pastix_complex64_t *work, pastix_int_t lwork, double *rwork)
Compute a randomized QR factorization.
void core_zgetrfsp(pastix_int_t n, pastix_complex64_t *A, pastix_int_t lda, pastix_int_t *nbpivots, double criterion)
Compute the block static pivoting LU factorization of the matrix m-by-n A = L * U.
pastix_fixdbl_t core_zlrorthu_fullqr(pastix_int_t M, pastix_int_t N, pastix_int_t rank, pastix_complex64_t *U, pastix_int_t ldu, pastix_complex64_t *V, pastix_int_t ldv)
Try to orthognalize the u part of the low-rank form, and update the v part accordingly using full QR.
void core_zgetmo(int m, int n, const pastix_complex64_t *A, int lda, pastix_complex64_t *B, int ldb)
Transposes a m-by-n matrix out of place using an extra workspace of size m-by-n.
Definition core_zgetmo.c:49
int core_ztqrcp(double tol, pastix_int_t maxrank, int unused, pastix_int_t nb, pastix_int_t m, pastix_int_t n, pastix_complex64_t *A, pastix_int_t lda, pastix_int_t *jpvt, pastix_complex64_t *tau, pastix_complex64_t *work, pastix_int_t lwork, double *rwork)
Compute a randomized QR factorization with truncated updates.
Definition core_ztqrcp.c:99
int core_zgeadd(pastix_trans_t trans, pastix_int_t M, pastix_int_t N, pastix_complex64_t alpha, const pastix_complex64_t *A, pastix_int_t LDA, pastix_complex64_t beta, pastix_complex64_t *B, pastix_int_t LDB)
Add two matrices together.
Definition core_zgeadd.c:78
void core_zsytrfsp(pastix_int_t n, pastix_complex64_t *A, pastix_int_t lda, pastix_int_t *nbpivots, double criterion)
Compute the block static pivoting factorization of the symmetric matrix n-by-n A such that A = L * D ...
void core_zplrnt(int m, int n, pastix_complex64_t *A, int lda, int gM, int m0, int n0, unsigned long long int seed)
Generate a random tile.
Definition core_zplrnt.c:91
pastix_fixdbl_t core_zlrorthu_cgs(pastix_int_t M1, pastix_int_t N1, pastix_int_t M2, pastix_int_t N2, pastix_int_t r1, pastix_int_t *r2ptr, pastix_int_t offx, pastix_int_t offy, pastix_complex64_t *U, pastix_int_t ldu, pastix_complex64_t *V, pastix_int_t ldv)
Try to orthognalize the U part of the low-rank form, and update the V part accordingly using CGS.
int core_zpqrcp(double tol, pastix_int_t maxrank, int full_update, pastix_int_t nb, pastix_int_t m, pastix_int_t n, pastix_complex64_t *A, pastix_int_t lda, pastix_int_t *jpvt, pastix_complex64_t *tau, pastix_complex64_t *work, pastix_int_t lwork, double *rwork)
Compute a rank-reavealing QR factorization.
int core_zrqrrt(double tol, pastix_int_t maxrank, pastix_int_t nb, pastix_int_t m, pastix_int_t n, pastix_complex64_t *A, pastix_int_t lda, pastix_complex64_t *tau, pastix_complex64_t *B, pastix_int_t ldb, pastix_complex64_t *tau_b, pastix_complex64_t *work, pastix_int_t lwork, double normA)
Compute a randomized QR factorization with rotation technique.
int core_zscalo(pastix_trans_t trans, pastix_int_t M, pastix_int_t N, const pastix_complex64_t *A, pastix_int_t lda, const pastix_complex64_t *D, pastix_int_t ldd, pastix_complex64_t *B, pastix_int_t ldb)
Scale a matrix by a diagonal out of place.
Definition core_zscalo.c:74
int core_ztradd(pastix_uplo_t uplo, pastix_trans_t trans, pastix_int_t M, pastix_int_t N, pastix_complex64_t alpha, const pastix_complex64_t *A, pastix_int_t LDA, pastix_complex64_t beta, pastix_complex64_t *B, pastix_int_t LDB)
Add two triangular matrices together as in PBLAS pztradd.
Definition core_ztradd.c:79
int core_zgemdm(pastix_trans_t transA, pastix_trans_t transB, int M, int N, int K, pastix_complex64_t alpha, const pastix_complex64_t *A, int LDA, const pastix_complex64_t *B, int LDB, pastix_complex64_t beta, pastix_complex64_t *C, int LDC, const pastix_complex64_t *D, int incD, pastix_complex64_t *WORK, int LWORK)
Perform one of the following matrix-matrix operations.
pastix_fixdbl_t core_zlrorthu_partialqr(pastix_int_t M, pastix_int_t N, pastix_int_t r1, pastix_int_t *r2ptr, pastix_int_t offx, pastix_int_t offy, pastix_complex64_t *U, pastix_int_t ldu, pastix_complex64_t *V, pastix_int_t ldv)
Try to orthognalize the U part of the low-rank form, and update the V part accordingly using partial ...
void core_zpotrfsp(pastix_int_t n, pastix_complex64_t *A, pastix_int_t lda, pastix_int_t *nbpivots, double criterion)
Compute the block static pivoting Cholesky factorization of the matrix n-by-n A = L * L^t .
void core_zpxtrfsp(pastix_int_t n, pastix_complex64_t *A, pastix_int_t lda, pastix_int_t *nbpivots, double criterion)
Compute the block static pivoting LL^t factorization of the matrix n-by-n A = L * L^t .
void core_zsytrfsp1d_gemm(const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcblk, const pastix_complex64_t *L, pastix_complex64_t *C, pastix_complex64_t *work)
void cpucblk_zsytrfsp1dplus_update(SolverMatrix *solvmtx, SolverBlok *blok, pastix_complex64_t *work)
Apply the updates of the LDL^t factorisation of a given panel.
int cpucblk_zgetrfsp1dplus(SolverMatrix *solvmtx, SolverCblk *cblk)
Perform the LU factorization of a given panel and submit tasks for the subsequent updates.
void cpucblk_zpotrfsp1dplus_update(SolverMatrix *solvmtx, SolverBlok *blok, pastix_complex64_t *work, pastix_int_t lwork)
Apply the updates of the cholesky factorisation of a given panel.
void cpucblk_zgetrfsp1dplus_update(SolverMatrix *solvmtx, SolverBlok *blok, pastix_complex64_t *work, pastix_int_t lwork)
Apply the updates of the LU factorisation of a given panel.
int cpucblk_zpotrfsp1dplus(SolverMatrix *solvmtx, SolverCblk *cblk)
Perform the Cholesky factorization of a given panel and submit tasks for the subsequent updates.
int cpucblk_zsytrfsp1dplus(SolverMatrix *solvmtx, SolverCblk *cblk)
Perform the LDL^t factorization of a given panel and submit tasks for the subsequent updates.
void cpucblk_zrelease_rhs_bwd_deps(const args_solve_t *enums, SolverMatrix *solvmtx, pastix_rhs_t rhsb, const SolverCblk *cblk, SolverCblk *fcbk)
Release the dependencies of the given cblk after an update.
void cpublok_zscalo(pastix_trans_t trans, const SolverCblk *cblk, pastix_int_t blok_m, const void *A, const void *dataD, void *dataB)
Copy the lower terms of the block with scaling for the two-terms algorithm.
void cpucblk_ztrsmsp(pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk, const void *A, void *C, const pastix_lr_t *lowrank)
Compute the updates associated to a column of off-diagonal blocks.
void cpucblk_zrequest_rhs_bwd_cleanup(const args_solve_t *enums, pastix_int_t sched, SolverMatrix *solvmtx, pastix_rhs_t rhsb)
Waitall routine for current cblk request.
void cpucblk_zsend_rhs_backward(const SolverMatrix *solvmtx, SolverCblk *cblk, pastix_rhs_t b)
Send the rhs associated to a cblk->lcolidx to the remote node.
void cpucblk_zinit(pastix_coefside_t side, const SolverMatrix *solvmtx, const pastix_bcsc_t *bcsc, pastix_int_t itercblk, const char *directory)
Fully initialize a single cblk.
void cpucblk_zuncompress(pastix_coefside_t side, SolverCblk *cblk)
Uncompress a single column block from low-rank format to full-rank format.
int cpucblk_zincoming_rhs_fwd_deps(int rank, const args_solve_t *enums, SolverMatrix *solvmtx, SolverCblk *cblk, pastix_rhs_t rhsb)
Wait for incoming dependencies, and return when cblk->ctrbcnt has reached 0.
int cpucblk_zpotrfsp1d_potrf(SolverMatrix *solvmtx, SolverCblk *cblk, void *dataL)
Compute the Cholesky factorization of the diagonal block in a panel.
int cpucblk_zgetrfsp1d_getrf(SolverMatrix *solvmtx, SolverCblk *cblk, void *L, void *U)
Compute the LU factorization of the diagonal block in a panel.
int cpucblk_zsytrfsp1d_panel(SolverMatrix *solvmtx, SolverCblk *cblk, void *L, void *DLt)
Compute the LDL^t factorization of one panel.
void cpucblk_zalloc(pastix_coefside_t side, SolverCblk *cblk)
Allocate the cblk structure to store the coefficient.
void cpucblk_zgetschur(const SolverCblk *cblk, int upper_part, pastix_complex64_t *S, pastix_int_t lds)
Extract a cblk panel of the Schur complement to a dense lapack form.
int cpucblk_zdiff(pastix_coefside_t side, const SolverCblk *cblkA, SolverCblk *cblkB)
Compare two column blocks in full-rank format.
void cpucblk_zalloc_fr(pastix_coefside_t side, SolverCblk *cblk)
Allocate the cblk structure to store the coefficient.
void cpucblk_zalloc_lr(pastix_coefside_t side, SolverCblk *cblk, int rkmax)
Allocate the cblk structure to store the coefficient.
pastix_fixdbl_t cpucblk_zgemmsp(pastix_coefside_t sideA, pastix_trans_t trans, const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcblk, const void *A, const void *B, void *C, pastix_complex64_t *work, pastix_int_t lwork, const pastix_lr_t *lowrank)
Compute the updates associated to one off-diagonal block.
int cpucblk_zincoming_deps(int mt_flag, pastix_coefside_t side, SolverMatrix *solvmtx, SolverCblk *cblk)
Wait for incoming dependencies, and return when cblk->ctrbcnt has reached 0.
void cpucblk_zdump(pastix_coefside_t side, const SolverCblk *cblk, FILE *stream)
Dump a single column block into a FILE in a human readale format.
void cpucblk_zrequest_rhs_fwd_cleanup(const args_solve_t *enums, pastix_int_t sched, SolverMatrix *solvmtx, pastix_rhs_t rhsb)
Waitall routine for current cblk request.
int cpucblk_zgeaddsp1d(const SolverCblk *cblk1, SolverCblk *cblk2, const pastix_complex64_t *L1, pastix_complex64_t *L2, const pastix_complex64_t *U1, pastix_complex64_t *U2)
Add two column blocks together.
pastix_fixdbl_t cpublok_zadd(pastix_complex64_t alpha, const SolverCblk *cblkA, SolverCblk *cblkB, pastix_int_t blokA_m, pastix_int_t blokB_m, const void *A, void *B, pastix_complex64_t *work, pastix_int_t lwork, const pastix_lr_t *lowrank)
Add two bloks.
int cpucblk_zgetrfsp1d(SolverMatrix *solvmtx, SolverCblk *cblk, pastix_complex64_t *work, pastix_int_t lwork)
Perform the LU factorization of a given panel and apply all its updates.
void cpucblk_zsend_rhs_forward(const SolverMatrix *solvmtx, SolverCblk *cblk, pastix_rhs_t b)
Send the rhs associated to a cblk->lcolidx to the remote node.
pastix_fixdbl_t cpublok_zcompress(const pastix_lr_t *lowrank, pastix_int_t M, pastix_int_t N, pastix_lrblock_t *blok)
Compress a single block from full-rank to low-rank format.
void cpucblk_zrelease_deps(pastix_coefside_t side, SolverMatrix *solvmtx, const SolverCblk *cblk, SolverCblk *fcbk)
Release the dependencies of the given cblk after an update.
int cpucblk_zincoming_rhs_bwd_deps(int rank, const args_solve_t *enums, SolverMatrix *solvmtx, SolverCblk *cblk, pastix_rhs_t rhsb)
Wait for incoming dependencies, and return when cblk->ctrbcnt has reached 0.
int cpucblk_zpotrfsp1d(SolverMatrix *solvmtx, SolverCblk *cblk, pastix_complex64_t *work, pastix_int_t lwork)
Perform the Cholesky factorization of a given panel and apply all its updates.
void cpucblk_zscalo(pastix_trans_t trans, const SolverCblk *cblk, void *dataL, void *dataLD)
Copy the L term with scaling for the two-terms algorithm.
int cpucblk_zpotrfsp1d_panel(SolverMatrix *solvmtx, SolverCblk *cblk, void *L)
Compute the Cholesky factorization of one panel.
void cpucblk_zfree(pastix_coefside_t side, SolverCblk *cblk)
Free the cblk structure that store the coefficient.
pastix_fixdbl_t cpublok_zgemmsp(pastix_trans_t trans, const SolverCblk *cblk, SolverCblk *fcblk, pastix_int_t blok_mk, pastix_int_t blok_nk, pastix_int_t blok_mn, const void *A, const void *B, void *C, const pastix_lr_t *lowrank)
Compute the CPU gemm associated to a couple of off-diagonal blocks.
pastix_int_t cpucblk_zcompress(const SolverMatrix *solvmtx, pastix_coefside_t side, int max_ilulvl, SolverCblk *cblk)
Compress a single column block from full-rank to low-rank format.
void cpucblk_zrecv_rhs_backward(const SolverMatrix *solvmtx, SolverCblk *cblk, pastix_rhs_t b)
Receive the rhs associated to a cblk->lcolidx to the remote node.
pastix_fixdbl_t cpublok_ztrsmsp(pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk, pastix_int_t blok_m, const void *A, void *C, const pastix_lr_t *lowrank)
Compute the updates associated to one off-diagonal block.
void cpucblk_zmemory(pastix_coefside_t side, const SolverMatrix *solvmtx, SolverCblk *cblk, pastix_int_t *orig, pastix_int_t *gain)
Return the memory gain of the low-rank form over the full-rank form for a single column-block.
int cpucblk_zgetrfsp1d_panel(SolverMatrix *solvmtx, SolverCblk *cblk, void *L, void *U)
Compute the LU factorization of one panel.
void cpucblk_zrequest_cleanup(pastix_coefside_t side, pastix_int_t sched, SolverMatrix *solvmtx)
Waitall routine for current cblk request.
void cpucblk_zrelease_rhs_fwd_deps(const args_solve_t *enums, SolverMatrix *solvmtx, pastix_rhs_t rhsb, const SolverCblk *cblk, SolverCblk *fcbk)
Release the dependencies of the given cblk after an update.
int cpucblk_zsytrfsp1d_sytrf(SolverMatrix *solvmtx, SolverCblk *cblk, void *dataL)
Computes the LDL^t factorization of the diagonal block in a panel.
int cpucblk_zsytrfsp1d(SolverMatrix *solvmtx, SolverCblk *cblk, pastix_complex64_t *Dlt, pastix_complex64_t *work, pastix_int_t lwork)
Perform the LDL^t factorization of a given panel and apply all its updates.
void cpucblk_zalloc_lrws(const SolverCblk *cblk, pastix_lrblock_t *lrblok, pastix_complex64_t *ws)
Initialize lrblock structure from a workspace for all blocks of the cblk associated.
void cpucblk_zrecv_rhs_forward(const SolverMatrix *solvmtx, SolverCblk *cblk, pastix_complex64_t *work, pastix_rhs_t b)
Receive the rhs associated to a cblk->lcolidx to the remote node.
pastix_fixdbl_t cpucblk_zadd(pastix_complex64_t alpha, const SolverCblk *cblkA, SolverCblk *cblkB, const void *A, void *B, pastix_complex64_t *work, pastix_int_t lwork, const pastix_lr_t *lowrank)
Add two column bloks in full rank format.
void cpucblk_zfillin(pastix_coefside_t side, const SolverMatrix *solvmtx, const pastix_bcsc_t *bcsc, pastix_int_t itercblk)
Initialize the coeftab structure from the internal bcsc.
Structure to define the type of function to use for the low-rank kernels and their parameters.
The block low-rank structure to hold a matrix in low-rank form.
void solve_blok_zgemm(pastix_side_t side, pastix_trans_t trans, pastix_int_t nrhs, const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcbk, const void *dataA, const pastix_complex64_t *B, pastix_int_t ldb, pastix_complex64_t *C, pastix_int_t ldc)
Apply a solve gemm update related to a single block of the matrix A.
void solve_cblk_zdiag(const SolverCblk *cblk, const void *dataA, int nrhs, pastix_complex64_t *b, int ldb, pastix_complex64_t *work)
Apply the diagonal solve related to one cblk to all the right hand side.
void solve_cblk_ztrsmsp_forward(const args_solve_t *enums, SolverMatrix *datacode, const SolverCblk *cblk, pastix_rhs_t b)
Apply a forward solve related to one cblk to all the right hand side.
void solve_blok_ztrsm(pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk, int nrhs, const void *dataA, pastix_complex64_t *b, int ldb)
Apply a solve trsm update related to a diagonal block of the matrix A.
void solve_cblk_ztrsmsp_backward(const args_solve_t *enums, SolverMatrix *datacode, SolverCblk *cblk, pastix_rhs_t b)
Apply a backward solve related to one cblk to all the right hand side.
enum pastix_diag_e pastix_diag_t
Diagonal.
enum pastix_uplo_e pastix_uplo_t
Upper/Lower part.
enum pastix_side_e pastix_side_t
Side of the operation.
enum pastix_trans_e pastix_trans_t
Transpostion.
enum pastix_coefside_e pastix_coefside_t
Data blocks used in the kernel.
Main PaStiX RHS structure.
Definition pastixdata.h:155
Arguments for the solve.
Definition solver.h:88
Solver block structure.
Definition solver.h:141
Solver column block structure.
Definition solver.h:161
Solver column block structure.
Definition solver.h:203