30 #ifndef DOXYGEN_SHOULD_SKIP_THIS
31 static float msone = -1.0;
32 static float sone = 1.0;
33 static float szero = 0.0;
94 float *W = malloc( lwork *
sizeof(
float) );
105 ret = LAPACKE_sgeqrf_work( LAPACK_COL_MAJOR, M, rank,
106 U, ldu, tau, work, lwork );
108 flops += FLOPS_SGEQRF( M, rank );
111 cblas_strmm( CblasColMajor,
112 CblasLeft, CblasUpper,
113 CblasNoTrans, CblasNonUnit,
119 ret = LAPACKE_sorgqr_work( LAPACK_COL_MAJOR, M, rank, rank,
120 U, ldu, tau, work, lwork );
122 flops += FLOPS_SORGQR( M, rank, rank );
210 pastix_int_t ldwork = pastix_imax( r1 * r2, M * 32 + minMN );
213 float *u2 = U + r1 * ldu;
216 float *W = malloc( ldwork *
sizeof(
float) );
225 eps = LAPACKE_slamch_work(
'e');
228 for (i=0; i<r2; i++, u2 += ldu, v2++) {
229 norm = cblas_snrm2( M, u2, 1 );
230 if ( norm > (M * eps) ) {
231 cblas_sscal( M, 1. / norm, u2, 1 );
232 cblas_sscal( N, norm, v2, ldv );
236 cblas_sswap( M, u2, 1, U + (r1+r2-1) * ldu, 1 );
237 memset( U + (r1+r2-1) * ldu, 0, M *
sizeof(
float) );
239 cblas_sswap( N, v2, ldv, V + (r1+r2-1), ldv );
240 ret = LAPACKE_slaset_work( LAPACK_COL_MAJOR,
'A', 1, N,
241 0., 0., V + (r1+r2-1), ldv );
249 memset( u2, 0, M *
sizeof(
float) );
250 ret = LAPACKE_slaset_work( LAPACK_COL_MAJOR,
'A', 1, N,
268 cblas_sgemm( CblasColMajor, CblasTrans, CblasNoTrans,
273 flops += FLOPS_SGEMM( r1, r2, M );
276 cblas_sgemm( CblasColMajor, CblasNoTrans, CblasNoTrans,
281 flops += FLOPS_SGEMM( M, r2, r1 );
284 cblas_sgemm( CblasColMajor, CblasNoTrans, CblasNoTrans,
289 flops += FLOPS_SGEMM( r1, N, r2 );
291 #if !defined(PASTIX_LR_CGS1)
293 cblas_sgemm( CblasColMajor, CblasTrans, CblasNoTrans,
298 flops += FLOPS_SGEMM( r1, r2, M );
301 cblas_sgemm( CblasColMajor, CblasNoTrans, CblasNoTrans,
306 flops += FLOPS_SGEMM( M, r2, r1 );
309 cblas_sgemm( CblasColMajor, CblasNoTrans, CblasNoTrans,
314 flops += FLOPS_SGEMM( r1, N, r2 );
317 #if defined(PASTIX_DEBUG_LR)
319 fprintf(stderr,
"partialQR: u2 not correctly projected with u1\n" );
324 ret = LAPACKE_sgeqrf_work( LAPACK_COL_MAJOR, M, r2,
325 u2, ldu, tau, work, ldwork );
327 flops += FLOPS_SGEQRF( M, r2 );
330 cblas_strmm( CblasColMajor,
331 CblasLeft, CblasUpper,
332 CblasNoTrans, CblasNonUnit,
338 ret = LAPACKE_sorgqr_work( LAPACK_COL_MAJOR, M, r2, r2,
339 u2, ldu, tau, work, ldwork );
341 flops += FLOPS_SORGQR( M, r2, r2 );
343 #if defined(PASTIX_DEBUG_LR)
345 fprintf(stderr,
"partialQR: Final u2 not orthogonal to u1\n" );
445 float *u2 = U + r1 * ldu;
454 float norm_before, alpha;
456 assert( M1 >= (M2 + offx) );
457 assert( N1 >= (N2 + offy) );
459 W = malloc(ldwork *
sizeof(
float));
460 eps = LAPACKE_slamch_work(
'e' );
461 alpha = 1. / sqrtf(2);
464 for (i=r1; i<rank; i++, u2 += ldu, v2++) {
466 norm = cblas_snrm2( M2, u2 + offx, 1 );
467 if ( norm > ( M2 * eps ) ) {
468 cblas_sscal( M2, 1. / norm, u2 + offx, 1 );
469 cblas_sscal( N2, norm, v2 + offy * ldv, ldv );
474 cblas_sswap( M2, u2 + offx, 1, U + rank * ldu + offx, 1 );
476 memset( U + rank * ldu, 0, M1 *
sizeof(
float) );
479 cblas_sswap( N2, v2 + offy * ldv, ldv, V + offy * ldv + rank, ldv );
482 ret = LAPACKE_slaset_work( LAPACK_COL_MAJOR,
'A', 1, N1,
483 0., 0., V + rank, ldv );
492 memset( u2, 0, M1 *
sizeof(
float) );
493 ret = LAPACKE_slaset_work( LAPACK_COL_MAJOR,
'A', 1, N1,
502 cblas_sgemv( CblasColMajor, CblasTrans,
504 (sone), u1+offx, ldu,
507 flops += FLOPS_SGEMM( M2, i, 1 );
510 cblas_sgemv( CblasColMajor, CblasNoTrans,
515 flops += FLOPS_SGEMM( M1, i, 1 );
518 cblas_sgemm( CblasColMajor, CblasNoTrans, CblasNoTrans,
523 flops += FLOPS_SGEMM( i, N1, 1 );
525 norm_before = cblas_snrm2( i, W, 1 );
526 norm = cblas_snrm2( M1, u2, 1 );
528 #if !defined(PASTIX_LR_CGS1)
529 if ( norm <= (alpha * norm_before) ){
531 cblas_sgemv( CblasColMajor, CblasTrans,
536 flops += FLOPS_SGEMM( M1, i, 1 );
539 cblas_sgemv( CblasColMajor, CblasNoTrans,
544 flops += FLOPS_SGEMM( M1, i, 1 );
547 cblas_sgemm( CblasColMajor, CblasNoTrans, CblasNoTrans,
552 flops += FLOPS_SGEMM( i, N1, 1 );
554 norm = cblas_snrm2( M1, u2, 1 );
558 if ( norm > M1 * eps ) {
559 cblas_sscal( M1, 1. / norm, u2, 1 );
560 cblas_sscal( N1, norm, v2, ldv );
565 cblas_sswap( M1, u2, 1, U + rank * ldu, 1 );
566 memset( U + rank * ldu, 0, M1 *
sizeof(
float) );
568 cblas_sswap( N1, v2, ldv, V + rank, ldv );
569 ret = LAPACKE_slaset_work( LAPACK_COL_MAJOR,
'A', 1, N1,
570 0., 0., V + rank, ldv );
577 memset( u2, 0, M1 *
sizeof(
float) );
578 ret = LAPACKE_slaset_work( LAPACK_COL_MAJOR,
'A', 1, N1,
586 #if defined(PASTIX_DEBUG_LR)
590 fprintf(stderr,
"cgs: Final u2 not orthogonal to u1\n" );
BEGIN_C_DECLS typedef int pastix_int_t
pastix_fixdbl_t core_slrorthu_fullqr(pastix_int_t M, pastix_int_t N, pastix_int_t rank, float *U, pastix_int_t ldu, float *V, pastix_int_t ldv)
Try to orthognalize the u part of the low-rank form, and update the v part accordingly using full QR.
pastix_fixdbl_t core_slrorthu_cgs(pastix_int_t M1, pastix_int_t N1, pastix_int_t M2, pastix_int_t N2, pastix_int_t r1, pastix_int_t *r2ptr, pastix_int_t offx, pastix_int_t offy, float *U, pastix_int_t ldu, float *V, pastix_int_t ldv)
Try to orthognalize the U part of the low-rank form, and update the V part accordingly using CGS.
pastix_fixdbl_t core_slrorthu_partialqr(pastix_int_t M, pastix_int_t N, pastix_int_t r1, pastix_int_t *r2ptr, pastix_int_t offx, pastix_int_t offy, float *U, pastix_int_t ldu, float *V, pastix_int_t ldv)
Try to orthognalize the U part of the low-rank form, and update the V part accordingly using partial ...
int core_slrdbg_check_orthogonality_AB(pastix_int_t M, pastix_int_t NA, pastix_int_t NB, const float *A, pastix_int_t lda, const float *B, pastix_int_t ldb)
Check the orthogonality of the matrix A relatively to the matrix B.
Manage nancheck for lowrank kernels. This header describes all the LAPACKE functions used for low-ran...