137 const double *A = (
const double*)Avoid;
139 double *u, *v, *zwork, *Acpy, ws;
159 norm = LAPACKE_dlange_work( LAPACK_COL_MAJOR,
'f', m, n,
163 if ( (norm == 0.) && (tol >= 0.) ) {
172 else if ( use_reltol ) {
177 minMN = pastix_imin(m, n);
178 rklimit = pastix_imin( minMN, rklimit );
185 if ( rklimit == 0 ) {
186 if ( (tol < 0.) || (norm < tol) ) {
193 ret = LAPACKE_dlacpy_work( LAPACK_COL_MAJOR,
'A', m, n,
194 A, lda, Alr->
u, Alr->
rkmax );
211#if defined(PASTIX_DEBUG_LR_NANCHECK)
220 ret = MYLAPACKE_dgesvd_work( LAPACK_COL_MAJOR,
'S',
'S',
222 NULL, NULL, ldu, NULL, ldv,
223 &ws, lwork, &rwork );
232#if defined(PRECISION_z) || defined(PRECISION_c)
236 zwork = malloc( zsize *
sizeof(
double) + rsize *
sizeof(
double) );
237 rwork = (
double*)(zwork + zsize);
239 Acpy = zwork + lwork;
245 ret = LAPACKE_dlacpy_work(LAPACK_COL_MAJOR,
'A', m, n,
249 ret = MYLAPACKE_dgesvd_work( LAPACK_COL_MAJOR,
'S',
'S',
252 zwork, lwork, rwork + minMN );
254 pastix_print_error(
"SVD Failed\n" );
258 imax = pastix_imin( minMN, rklimit+1 );
259 for (i=0; i<imax; i++, v+=1) {
272#if defined(PASTIX_SVD_2NORM)
275 frob_norm =
core_dlassq( minMN-i, s + minMN - 1, -1 );
278 if (frob_norm < tol) {
281 cblas_dscal(n, s[i], v, ldv);
293 ret = LAPACKE_dlacpy_work( LAPACK_COL_MAJOR,
'A', m, n,
294 A, lda, Alr->
u, Alr->
rkmax );
358 const void *alphaptr,
371 double *u1u2, *v1v2, *R, *u, *v;
372 double *tmp, *zbuf, *tauU, *tauV;
374 double alpha = *((
double*)alphaptr);
376 size_t wzsize, wdsize;
380 rank = (A->
rk == -1) ? pastix_imin(M1, N1) : A->
rk;
382 M = pastix_imax(M2, M1);
383 N = pastix_imax(N2, N1);
384 minU = pastix_imin(M, rank);
385 minV = pastix_imin(N, rank);
387 assert(M2 == M && N2 == N);
393 if ( ((M1 + offx) > M2) ||
396 pastix_print_error(
"Dimensions are not correct" );
408 ldau = (A->
rk == -1) ? A->
rkmax : M1;
419 M1, N1, A, M2, N2, B,
427 if ( rank > pastix_imin( M, N ) ) {
435 wzsize = (M+N) * rank;
437 wzsize += minU + minV;
440 wzsize += 3 * rank * rank;
443 lwork = pastix_imax( M, N ) * 32;
446#if defined(PASTIX_DEBUG_LR_NANCHECK)
455 ret = MYLAPACKE_dgesvd_work( LAPACK_COL_MAJOR,
'S',
'S',
456 rank, rank, NULL, rank,
457 NULL, NULL, rank, NULL, rank,
458 &querysize, -1, &rwork );
462 lwork = pastix_imax( lwork, querysize );
466#if defined(PRECISION_z) || defined(PRECISION_c)
470 zbuf = malloc( wzsize *
sizeof(
double) + wdsize *
sizeof(
double) );
471 s = (
double*)(zbuf + wzsize);
474 tauU = u1u2 + M * rank;
476 tauV = v1v2 + N * rank;
493 ret = LAPACKE_dgeqrf_work( LAPACK_COL_MAJOR, M, rank,
494 u1u2, M, tauU, zbuf, lwork );
496 total_flops += FLOPS_DGEQRF( M, rank );
511 ret = LAPACKE_dgelqf_work( LAPACK_COL_MAJOR, rank, N,
512 v1v2, rank, tauV, zbuf, lwork );
514 total_flops += FLOPS_DGELQF( rank, N );
522 memset(R, 0, rank * rank *
sizeof(
double));
524 ret = LAPACKE_dlacpy_work( LAPACK_COL_MAJOR,
'U', rank, rank,
528 cblas_dtrmm(CblasColMajor,
529 CblasRight, CblasLower,
530 CblasNoTrans, CblasNonUnit,
532 v1v2, rank, R, rank);
533 total_flops += FLOPS_DTRMM(
PastixRight, rank, rank );
561 tol = tol * ( fabs(alpha) * normA + normB );
568 flops = FLOPS_DGEQRF( rank, rank ) + FLOPS_DGELQF( rank, (rank-1) );
570 kernel_trace_start_lvl2( PastixKernelLvl2_LR_add2C_rradd_recompression );
571 ret = MYLAPACKE_dgesvd_work( LAPACK_COL_MAJOR,
'S',
'S',
574 zbuf, lwork, s + rank );
576 pastix_print_error(
"LAPACKE_dgesvd FAILED" );
584 for (i=0; i<rank; i++, tmp+=1){
597#if defined(PASTIX_SVD_2NORM)
600 frob_norm =
core_dlassq( rank-i, s + rank - 1, -1 );
603 if (frob_norm < tol) {
606 cblas_dscal(rank, s[i], tmp, rank);
609 kernel_trace_stop_lvl2_rank( flops, new_rank );
610 total_flops += flops;
622 flops = FLOPS_DGEMM( M, N, Bbackup.
rk );
623 kernel_trace_start_lvl2( PastixKernelLvl2_LR_add2C_uncompress );
624 cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans,
626 (done), Bbackup.
u, ldbu,
629 kernel_trace_stop_lvl2( flops );
630 total_flops += flops;
635 kernel_trace_start_lvl2( PastixKernelLvl2_FR_GEMM );
638 done, u + offy * M + offx, M);
639 kernel_trace_stop_lvl2( flops );
642 flops = FLOPS_DGEMM( M1, N1, A->
rk );
643 kernel_trace_start_lvl2( PastixKernelLvl2_FR_GEMM );
644 cblas_dgemm(CblasColMajor, CblasNoTrans, (CBLAS_TRANSPOSE)transA1,
648 (done), u + offy * M + offx, M);
649 kernel_trace_stop_lvl2( flops );
651 total_flops += flops;
656 else if ( new_rank == 0 ) {
668 assert( B->
rkmax >= new_rank );
677#if defined(PASTIX_DEBUG_LR_NANCHECK)
678 ret = LAPACKE_dlaset_work( LAPACK_COL_MAJOR,
'A', M, new_rank,
679 0.0, 0.0, B->
u, ldbu );
681 ret = LAPACKE_dlacpy_work( LAPACK_COL_MAJOR,
'A', rank, new_rank,
682 u, rank, B->
u, ldbu );
686 for (i=0; i<new_rank; i++, tmp+=ldbu, u+=rank) {
687 memcpy(tmp, u, rank *
sizeof(
double));
688 memset(tmp + rank, 0, (M - rank) *
sizeof(
double));
692 flops = FLOPS_DORMQR( M, new_rank, minU,
PastixLeft )
694 kernel_trace_start_lvl2( PastixKernelLvl2_LR_add2C_rradd_computeNewU );
695 ret = LAPACKE_dormqr_work(LAPACK_COL_MAJOR,
'L',
'N',
706 ret = LAPACKE_dlacpy_work( LAPACK_COL_MAJOR,
'A', new_rank, rank,
707 v, rank, B->
v, ldbv );
710 ret = LAPACKE_dlaset_work( LAPACK_COL_MAJOR,
'A', new_rank, N-rank,
711 0.0, 0.0, tmp + ldbv * rank, ldbv );
714 ret = LAPACKE_dormlq_work(LAPACK_COL_MAJOR,
'R',
'N',
720 kernel_trace_stop_lvl2( flops );
721 total_flops += flops;