159 norm = LAPACKE_clange_work( LAPACK_COL_MAJOR,
'f', m, n,
163 if ( (norm == 0.) && (tol >= 0.) ) {
172 else if ( use_reltol ) {
177 minMN = pastix_imin(m, n);
178 rklimit = pastix_imin( minMN, rklimit );
185 if ( rklimit == 0 ) {
186 if ( (tol < 0.) || (norm < tol) ) {
193 ret = LAPACKE_clacpy_work( LAPACK_COL_MAJOR,
'A', m, n,
194 A, lda, Alr->
u, Alr->
rkmax );
211#if defined(PASTIX_DEBUG_LR_NANCHECK)
220 ret = MYLAPACKE_cgesvd_work( LAPACK_COL_MAJOR,
'S',
'S',
222 NULL, NULL, ldu, NULL, ldv,
223 &ws, lwork, &rwork );
232#if defined(PRECISION_z) || defined(PRECISION_c)
237 rwork = (
float*)(zwork + zsize);
239 Acpy = zwork + lwork;
245 ret = LAPACKE_clacpy_work(LAPACK_COL_MAJOR,
'A', m, n,
249 ret = MYLAPACKE_cgesvd_work( LAPACK_COL_MAJOR,
'S',
'S',
252 zwork, lwork, rwork + minMN );
254 pastix_print_error(
"SVD Failed\n" );
258 imax = pastix_imin( minMN, rklimit+1 );
259 for (i=0; i<imax; i++, v+=1) {
272#if defined(PASTIX_SVD_2NORM)
275 frob_norm =
core_slassq( minMN-i, s + minMN - 1, -1 );
278 if (frob_norm < tol) {
281 cblas_csscal(n, s[i], v, ldv);
293 ret = LAPACKE_clacpy_work( LAPACK_COL_MAJOR,
'A', m, n,
294 A, lda, Alr->
u, Alr->
rkmax );
358 const void *alphaptr,
376 size_t wzsize, wdsize;
380 rank = (A->
rk == -1) ? pastix_imin(M1, N1) : A->
rk;
382 M = pastix_imax(M2, M1);
383 N = pastix_imax(N2, N1);
384 minU = pastix_imin(M, rank);
385 minV = pastix_imin(N, rank);
387 assert(M2 == M && N2 == N);
393 if ( ((M1 + offx) > M2) ||
396 pastix_print_error(
"Dimensions are not correct" );
408 ldau = (A->
rk == -1) ? A->
rkmax : M1;
419 M1, N1, A, M2, N2, B,
427 if ( rank > pastix_imin( M, N ) ) {
435 wzsize = (M+N) * rank;
437 wzsize += minU + minV;
440 wzsize += 3 * rank * rank;
443 lwork = pastix_imax( M, N ) * 32;
446#if defined(PASTIX_DEBUG_LR_NANCHECK)
455 ret = MYLAPACKE_cgesvd_work( LAPACK_COL_MAJOR,
'S',
'S',
456 rank, rank, NULL, rank,
457 NULL, NULL, rank, NULL, rank,
458 &querysize, -1, &rwork );
462 lwork = pastix_imax( lwork, querysize );
466#if defined(PRECISION_z) || defined(PRECISION_c)
471 s = (
float*)(zbuf + wzsize);
474 tauU = u1u2 + M * rank;
476 tauV = v1v2 + N * rank;
493 ret = LAPACKE_cgeqrf_work( LAPACK_COL_MAJOR, M, rank,
494 u1u2, M, tauU, zbuf, lwork );
496 total_flops += FLOPS_CGEQRF( M, rank );
511 ret = LAPACKE_cgelqf_work( LAPACK_COL_MAJOR, rank, N,
512 v1v2, rank, tauV, zbuf, lwork );
514 total_flops += FLOPS_CGELQF( rank, N );
524 ret = LAPACKE_clacpy_work( LAPACK_COL_MAJOR,
'U', rank, rank,
528 cblas_ctrmm(CblasColMajor,
529 CblasRight, CblasLower,
530 CblasNoTrans, CblasNonUnit,
531 rank, rank, CBLAS_SADDR(cone),
532 v1v2, rank, R, rank);
533 total_flops += FLOPS_CTRMM(
PastixRight, rank, rank );
561 tol = tol * ( cabsf(alpha) * normA + normB );
568 flops = FLOPS_CGEQRF( rank, rank ) + FLOPS_CGELQF( rank, (rank-1) );
570 kernel_trace_start_lvl2( PastixKernelLvl2_LR_add2C_rradd_recompression );
571 ret = MYLAPACKE_cgesvd_work( LAPACK_COL_MAJOR,
'S',
'S',
574 zbuf, lwork, s + rank );
576 pastix_print_error(
"LAPACKE_cgesvd FAILED" );
584 for (i=0; i<rank; i++, tmp+=1){
597#if defined(PASTIX_SVD_2NORM)
600 frob_norm =
core_slassq( rank-i, s + rank - 1, -1 );
603 if (frob_norm < tol) {
606 cblas_csscal(rank, s[i], tmp, rank);
609 kernel_trace_stop_lvl2_rank( flops, new_rank );
610 total_flops += flops;
622 flops = FLOPS_CGEMM( M, N, Bbackup.
rk );
623 kernel_trace_start_lvl2( PastixKernelLvl2_LR_add2C_uncompress );
624 cblas_cgemm(CblasColMajor, CblasNoTrans, CblasNoTrans,
626 CBLAS_SADDR(cone), Bbackup.
u, ldbu,
628 CBLAS_SADDR(czero), u, M );
629 kernel_trace_stop_lvl2( flops );
630 total_flops += flops;
635 kernel_trace_start_lvl2( PastixKernelLvl2_FR_GEMM );
638 cone, u + offy * M + offx, M);
639 kernel_trace_stop_lvl2( flops );
642 flops = FLOPS_CGEMM( M1, N1, A->
rk );
643 kernel_trace_start_lvl2( PastixKernelLvl2_FR_GEMM );
644 cblas_cgemm(CblasColMajor, CblasNoTrans, (CBLAS_TRANSPOSE)transA1,
646 CBLAS_SADDR(alpha), A->
u, ldau,
648 CBLAS_SADDR(cone), u + offy * M + offx, M);
649 kernel_trace_stop_lvl2( flops );
651 total_flops += flops;
656 else if ( new_rank == 0 ) {
668 assert( B->
rkmax >= new_rank );
677#if defined(PASTIX_DEBUG_LR_NANCHECK)
678 ret = LAPACKE_claset_work( LAPACK_COL_MAJOR,
'A', M, new_rank,
679 0.0, 0.0, B->
u, ldbu );
681 ret = LAPACKE_clacpy_work( LAPACK_COL_MAJOR,
'A', rank, new_rank,
682 u, rank, B->
u, ldbu );
686 for (i=0; i<new_rank; i++, tmp+=ldbu, u+=rank) {
692 flops = FLOPS_CUNMQR( M, new_rank, minU,
PastixLeft )
694 kernel_trace_start_lvl2( PastixKernelLvl2_LR_add2C_rradd_computeNewU );
695 ret = LAPACKE_cunmqr_work(LAPACK_COL_MAJOR,
'L',
'N',
706 ret = LAPACKE_clacpy_work( LAPACK_COL_MAJOR,
'A', new_rank, rank,
707 v, rank, B->
v, ldbv );
710 ret = LAPACKE_claset_work( LAPACK_COL_MAJOR,
'A', new_rank, N-rank,
711 0.0, 0.0, tmp + ldbv * rank, ldbv );
714 ret = LAPACKE_cunmlq_work(LAPACK_COL_MAJOR,
'R',
'N',
720 kernel_trace_stop_lvl2( flops );
721 total_flops += flops;