30 #ifndef DOXYGEN_SHOULD_SKIP_THIS
31 static double mdone = -1.0;
32 static double done = 1.0;
33 static double dzero = 0.0;
94 double *W = malloc( lwork *
sizeof(
double) );
105 ret = LAPACKE_dgeqrf_work( LAPACK_COL_MAJOR, M, rank,
106 U, ldu, tau, work, lwork );
108 flops += FLOPS_DGEQRF( M, rank );
111 cblas_dtrmm( CblasColMajor,
112 CblasLeft, CblasUpper,
113 CblasNoTrans, CblasNonUnit,
119 ret = LAPACKE_dorgqr_work( LAPACK_COL_MAJOR, M, rank, rank,
120 U, ldu, tau, work, lwork );
122 flops += FLOPS_DORGQR( M, rank, rank );
210 pastix_int_t ldwork = pastix_imax( r1 * r2, M * 32 + minMN );
213 double *u2 = U + r1 * ldu;
216 double *W = malloc( ldwork *
sizeof(
double) );
225 eps = LAPACKE_dlamch_work(
'e');
228 for (i=0; i<r2; i++, u2 += ldu, v2++) {
229 norm = cblas_dnrm2( M, u2, 1 );
230 if ( norm > (M * eps) ) {
231 cblas_dscal( M, 1. / norm, u2, 1 );
232 cblas_dscal( N, norm, v2, ldv );
236 cblas_dswap( M, u2, 1, U + (r1+r2-1) * ldu, 1 );
237 memset( U + (r1+r2-1) * ldu, 0, M *
sizeof(
double) );
239 cblas_dswap( N, v2, ldv, V + (r1+r2-1), ldv );
240 ret = LAPACKE_dlaset_work( LAPACK_COL_MAJOR,
'A', 1, N,
241 0., 0., V + (r1+r2-1), ldv );
249 memset( u2, 0, M *
sizeof(
double) );
250 ret = LAPACKE_dlaset_work( LAPACK_COL_MAJOR,
'A', 1, N,
268 cblas_dgemm( CblasColMajor, CblasTrans, CblasNoTrans,
273 flops += FLOPS_DGEMM( r1, r2, M );
276 cblas_dgemm( CblasColMajor, CblasNoTrans, CblasNoTrans,
281 flops += FLOPS_DGEMM( M, r2, r1 );
284 cblas_dgemm( CblasColMajor, CblasNoTrans, CblasNoTrans,
289 flops += FLOPS_DGEMM( r1, N, r2 );
291 #if !defined(PASTIX_LR_CGS1)
293 cblas_dgemm( CblasColMajor, CblasTrans, CblasNoTrans,
298 flops += FLOPS_DGEMM( r1, r2, M );
301 cblas_dgemm( CblasColMajor, CblasNoTrans, CblasNoTrans,
306 flops += FLOPS_DGEMM( M, r2, r1 );
309 cblas_dgemm( CblasColMajor, CblasNoTrans, CblasNoTrans,
314 flops += FLOPS_DGEMM( r1, N, r2 );
317 #if defined(PASTIX_DEBUG_LR)
319 fprintf(stderr,
"partialQR: u2 not correctly projected with u1\n" );
324 ret = LAPACKE_dgeqrf_work( LAPACK_COL_MAJOR, M, r2,
325 u2, ldu, tau, work, ldwork );
327 flops += FLOPS_DGEQRF( M, r2 );
330 cblas_dtrmm( CblasColMajor,
331 CblasLeft, CblasUpper,
332 CblasNoTrans, CblasNonUnit,
338 ret = LAPACKE_dorgqr_work( LAPACK_COL_MAJOR, M, r2, r2,
339 u2, ldu, tau, work, ldwork );
341 flops += FLOPS_DORGQR( M, r2, r2 );
343 #if defined(PASTIX_DEBUG_LR)
345 fprintf(stderr,
"partialQR: Final u2 not orthogonal to u1\n" );
445 double *u2 = U + r1 * ldu;
454 double norm_before, alpha;
456 assert( M1 >= (M2 + offx) );
457 assert( N1 >= (N2 + offy) );
459 W = malloc(ldwork *
sizeof(
double));
460 eps = LAPACKE_dlamch_work(
'e' );
461 alpha = 1. / sqrt(2);
464 for (i=r1; i<rank; i++, u2 += ldu, v2++) {
466 norm = cblas_dnrm2( M2, u2 + offx, 1 );
467 if ( norm > ( M2 * eps ) ) {
468 cblas_dscal( M2, 1. / norm, u2 + offx, 1 );
469 cblas_dscal( N2, norm, v2 + offy * ldv, ldv );
474 cblas_dswap( M2, u2 + offx, 1, U + rank * ldu + offx, 1 );
476 memset( U + rank * ldu, 0, M1 *
sizeof(
double) );
479 cblas_dswap( N2, v2 + offy * ldv, ldv, V + offy * ldv + rank, ldv );
482 ret = LAPACKE_dlaset_work( LAPACK_COL_MAJOR,
'A', 1, N1,
483 0., 0., V + rank, ldv );
492 memset( u2, 0, M1 *
sizeof(
double) );
493 ret = LAPACKE_dlaset_work( LAPACK_COL_MAJOR,
'A', 1, N1,
502 cblas_dgemv( CblasColMajor, CblasTrans,
504 (done), u1+offx, ldu,
507 flops += FLOPS_DGEMM( M2, i, 1 );
510 cblas_dgemv( CblasColMajor, CblasNoTrans,
515 flops += FLOPS_DGEMM( M1, i, 1 );
518 cblas_dgemm( CblasColMajor, CblasNoTrans, CblasNoTrans,
523 flops += FLOPS_DGEMM( i, N1, 1 );
525 norm_before = cblas_dnrm2( i, W, 1 );
526 norm = cblas_dnrm2( M1, u2, 1 );
528 #if !defined(PASTIX_LR_CGS1)
529 if ( norm <= (alpha * norm_before) ){
531 cblas_dgemv( CblasColMajor, CblasTrans,
536 flops += FLOPS_DGEMM( M1, i, 1 );
539 cblas_dgemv( CblasColMajor, CblasNoTrans,
544 flops += FLOPS_DGEMM( M1, i, 1 );
547 cblas_dgemm( CblasColMajor, CblasNoTrans, CblasNoTrans,
552 flops += FLOPS_DGEMM( i, N1, 1 );
554 norm = cblas_dnrm2( M1, u2, 1 );
558 if ( norm > M1 * eps ) {
559 cblas_dscal( M1, 1. / norm, u2, 1 );
560 cblas_dscal( N1, norm, v2, ldv );
565 cblas_dswap( M1, u2, 1, U + rank * ldu, 1 );
566 memset( U + rank * ldu, 0, M1 *
sizeof(
double) );
568 cblas_dswap( N1, v2, ldv, V + rank, ldv );
569 ret = LAPACKE_dlaset_work( LAPACK_COL_MAJOR,
'A', 1, N1,
570 0., 0., V + rank, ldv );
577 memset( u2, 0, M1 *
sizeof(
double) );
578 ret = LAPACKE_dlaset_work( LAPACK_COL_MAJOR,
'A', 1, N1,
586 #if defined(PASTIX_DEBUG_LR)
590 fprintf(stderr,
"cgs: Final u2 not orthogonal to u1\n" );
Manage nancheck for lowrank kernels. This header describes all the LAPACKE functions used for low-ran...
BEGIN_C_DECLS typedef int pastix_int_t
pastix_fixdbl_t core_dlrorthu_fullqr(pastix_int_t M, pastix_int_t N, pastix_int_t rank, double *U, pastix_int_t ldu, double *V, pastix_int_t ldv)
Try to orthognalize the u part of the low-rank form, and update the v part accordingly using full QR.
pastix_fixdbl_t core_dlrorthu_partialqr(pastix_int_t M, pastix_int_t N, pastix_int_t r1, pastix_int_t *r2ptr, pastix_int_t offx, pastix_int_t offy, double *U, pastix_int_t ldu, double *V, pastix_int_t ldv)
Try to orthognalize the U part of the low-rank form, and update the V part accordingly using partial ...
pastix_fixdbl_t core_dlrorthu_cgs(pastix_int_t M1, pastix_int_t N1, pastix_int_t M2, pastix_int_t N2, pastix_int_t r1, pastix_int_t *r2ptr, pastix_int_t offx, pastix_int_t offy, double *U, pastix_int_t ldu, double *V, pastix_int_t ldv)
Try to orthognalize the U part of the low-rank form, and update the V part accordingly using CGS.
int core_dlrdbg_check_orthogonality_AB(pastix_int_t M, pastix_int_t NA, pastix_int_t NB, const double *A, pastix_int_t lda, const double *B, pastix_int_t ldb)
Check the orthogonality of the matrix A relatively to the matrix B.