30 #ifndef DOXYGEN_SHOULD_SKIP_THIS
105 ret = LAPACKE_cgeqrf_work( LAPACK_COL_MAJOR, M, rank,
106 U, ldu, tau, work, lwork );
108 flops += FLOPS_CGEQRF( M, rank );
111 cblas_ctrmm( CblasColMajor,
112 CblasLeft, CblasUpper,
113 CblasNoTrans, CblasNonUnit,
114 rank, N, CBLAS_SADDR(cone),
119 ret = LAPACKE_cungqr_work( LAPACK_COL_MAJOR, M, rank, rank,
120 U, ldu, tau, work, lwork );
122 flops += FLOPS_CUNGQR( M, rank, rank );
210 pastix_int_t ldwork = pastix_imax( r1 * r2, M * 32 + minMN );
225 eps = LAPACKE_slamch_work(
'e');
228 for (i=0; i<r2; i++, u2 += ldu, v2++) {
229 norm = cblas_scnrm2( M, u2, 1 );
230 if ( norm > (M * eps) ) {
231 cblas_csscal( M, 1. / norm, u2, 1 );
232 cblas_csscal( N, norm, v2, ldv );
236 cblas_cswap( M, u2, 1, U + (r1+r2-1) * ldu, 1 );
239 cblas_cswap( N, v2, ldv, V + (r1+r2-1), ldv );
240 ret = LAPACKE_claset_work( LAPACK_COL_MAJOR,
'A', 1, N,
241 0., 0., V + (r1+r2-1), ldv );
250 ret = LAPACKE_claset_work( LAPACK_COL_MAJOR,
'A', 1, N,
268 cblas_cgemm( CblasColMajor, CblasConjTrans, CblasNoTrans,
270 CBLAS_SADDR(cone), u1, ldu,
272 CBLAS_SADDR(czero), W, r1 );
273 flops += FLOPS_CGEMM( r1, r2, M );
276 cblas_cgemm( CblasColMajor, CblasNoTrans, CblasNoTrans,
278 CBLAS_SADDR(mcone), u1, ldu,
280 CBLAS_SADDR(cone), u2, ldu );
281 flops += FLOPS_CGEMM( M, r2, r1 );
284 cblas_cgemm( CblasColMajor, CblasNoTrans, CblasNoTrans,
286 CBLAS_SADDR(cone), W, r1,
288 CBLAS_SADDR(cone), v1, ldv );
289 flops += FLOPS_CGEMM( r1, N, r2 );
291 #if !defined(PASTIX_LR_CGS1)
293 cblas_cgemm( CblasColMajor, CblasConjTrans, CblasNoTrans,
295 CBLAS_SADDR(cone), u1, ldu,
297 CBLAS_SADDR(czero), W, r1 );
298 flops += FLOPS_CGEMM( r1, r2, M );
301 cblas_cgemm( CblasColMajor, CblasNoTrans, CblasNoTrans,
303 CBLAS_SADDR(mcone), u1, ldu,
305 CBLAS_SADDR(cone), u2, ldu );
306 flops += FLOPS_CGEMM( M, r2, r1 );
309 cblas_cgemm( CblasColMajor, CblasNoTrans, CblasNoTrans,
311 CBLAS_SADDR(cone), W, r1,
313 CBLAS_SADDR(cone), v1, ldv );
314 flops += FLOPS_CGEMM( r1, N, r2 );
317 #if defined(PASTIX_DEBUG_LR)
319 fprintf(stderr,
"partialQR: u2 not correctly projected with u1\n" );
324 ret = LAPACKE_cgeqrf_work( LAPACK_COL_MAJOR, M, r2,
325 u2, ldu, tau, work, ldwork );
327 flops += FLOPS_CGEQRF( M, r2 );
330 cblas_ctrmm( CblasColMajor,
331 CblasLeft, CblasUpper,
332 CblasNoTrans, CblasNonUnit,
333 r2, N, CBLAS_SADDR(cone),
338 ret = LAPACKE_cungqr_work( LAPACK_COL_MAJOR, M, r2, r2,
339 u2, ldu, tau, work, ldwork );
341 flops += FLOPS_CUNGQR( M, r2, r2 );
343 #if defined(PASTIX_DEBUG_LR)
345 fprintf(stderr,
"partialQR: Final u2 not orthogonal to u1\n" );
454 float norm_before, alpha;
456 assert( M1 >= (M2 + offx) );
457 assert( N1 >= (N2 + offy) );
460 eps = LAPACKE_slamch_work(
'e' );
461 alpha = 1. / sqrtf(2);
464 for (i=r1; i<rank; i++, u2 += ldu, v2++) {
466 norm = cblas_scnrm2( M2, u2 + offx, 1 );
467 if ( norm > ( M2 * eps ) ) {
468 cblas_csscal( M2, 1. / norm, u2 + offx, 1 );
469 cblas_csscal( N2, norm, v2 + offy * ldv, ldv );
474 cblas_cswap( M2, u2 + offx, 1, U + rank * ldu + offx, 1 );
479 cblas_cswap( N2, v2 + offy * ldv, ldv, V + offy * ldv + rank, ldv );
482 ret = LAPACKE_claset_work( LAPACK_COL_MAJOR,
'A', 1, N1,
483 0., 0., V + rank, ldv );
493 ret = LAPACKE_claset_work( LAPACK_COL_MAJOR,
'A', 1, N1,
502 cblas_cgemv( CblasColMajor, CblasConjTrans,
504 CBLAS_SADDR(cone), u1+offx, ldu,
506 CBLAS_SADDR(czero), W, 1 );
507 flops += FLOPS_CGEMM( M2, i, 1 );
510 cblas_cgemv( CblasColMajor, CblasNoTrans,
512 CBLAS_SADDR(mcone), u1, ldu,
514 CBLAS_SADDR(cone), u2, 1 );
515 flops += FLOPS_CGEMM( M1, i, 1 );
518 cblas_cgemm( CblasColMajor, CblasNoTrans, CblasNoTrans,
520 CBLAS_SADDR(cone), W, i,
522 CBLAS_SADDR(cone), v1, ldv );
523 flops += FLOPS_CGEMM( i, N1, 1 );
525 norm_before = cblas_scnrm2( i, W, 1 );
526 norm = cblas_scnrm2( M1, u2, 1 );
528 #if !defined(PASTIX_LR_CGS1)
529 if ( norm <= (alpha * norm_before) ){
531 cblas_cgemv( CblasColMajor, CblasConjTrans,
533 CBLAS_SADDR(cone), u1, ldu,
535 CBLAS_SADDR(czero), W, 1 );
536 flops += FLOPS_CGEMM( M1, i, 1 );
539 cblas_cgemv( CblasColMajor, CblasNoTrans,
541 CBLAS_SADDR(mcone), u1, ldu,
543 CBLAS_SADDR(cone), u2, 1 );
544 flops += FLOPS_CGEMM( M1, i, 1 );
547 cblas_cgemm( CblasColMajor, CblasNoTrans, CblasNoTrans,
549 CBLAS_SADDR(cone), W, i,
551 CBLAS_SADDR(cone), v1, ldv );
552 flops += FLOPS_CGEMM( i, N1, 1 );
554 norm = cblas_scnrm2( M1, u2, 1 );
558 if ( norm > M1 * eps ) {
559 cblas_csscal( M1, 1. / norm, u2, 1 );
560 cblas_csscal( N1, norm, v2, ldv );
565 cblas_cswap( M1, u2, 1, U + rank * ldu, 1 );
568 cblas_cswap( N1, v2, ldv, V + rank, ldv );
569 ret = LAPACKE_claset_work( LAPACK_COL_MAJOR,
'A', 1, N1,
570 0., 0., V + rank, ldv );
578 ret = LAPACKE_claset_work( LAPACK_COL_MAJOR,
'A', 1, N1,
586 #if defined(PASTIX_DEBUG_LR)
590 fprintf(stderr,
"cgs: Final u2 not orthogonal to u1\n" );
Manage nancheck for lowrank kernels. This header describes all the LAPACKE functions used for low-ran...
BEGIN_C_DECLS typedef int pastix_int_t
float _Complex pastix_complex32_t
pastix_fixdbl_t core_clrorthu_cgs(pastix_int_t M1, pastix_int_t N1, pastix_int_t M2, pastix_int_t N2, pastix_int_t r1, pastix_int_t *r2ptr, pastix_int_t offx, pastix_int_t offy, pastix_complex32_t *U, pastix_int_t ldu, pastix_complex32_t *V, pastix_int_t ldv)
Try to orthognalize the U part of the low-rank form, and update the V part accordingly using CGS.
pastix_fixdbl_t core_clrorthu_fullqr(pastix_int_t M, pastix_int_t N, pastix_int_t rank, pastix_complex32_t *U, pastix_int_t ldu, pastix_complex32_t *V, pastix_int_t ldv)
Try to orthognalize the u part of the low-rank form, and update the v part accordingly using full QR.
pastix_fixdbl_t core_clrorthu_partialqr(pastix_int_t M, pastix_int_t N, pastix_int_t r1, pastix_int_t *r2ptr, pastix_int_t offx, pastix_int_t offy, pastix_complex32_t *U, pastix_int_t ldu, pastix_complex32_t *V, pastix_int_t ldv)
Try to orthognalize the U part of the low-rank form, and update the V part accordingly using partial ...
int core_clrdbg_check_orthogonality_AB(pastix_int_t M, pastix_int_t NA, pastix_int_t NB, const pastix_complex32_t *A, pastix_int_t lda, const pastix_complex32_t *B, pastix_int_t ldb)
Check the orthogonality of the matrix A relatively to the matrix B.