28 #ifndef DOXYGEN_SHOULD_SKIP_THIS
121 float temp, temp2, machine_prec, residual;
137 lwkopt = n * nb + pastix_imax(m, n);
149 if (lda < pastix_imax(1, m)) {
152 if( lwork < lwkopt ) {
157 minMN = pastix_imin(m, n);
161 maxrank = pastix_imin( minMN, maxrank );
168 if ( maxrank == 0 ) {
173 norm = LAPACKE_clange_work( LAPACK_COL_MAJOR,
'f', m, n,
185 f = work + pastix_imax(m, n);
193 VN1[j] = cblas_scnrm2(m, A + j * lda, 1);
198 machine_prec = sqrtf(LAPACKE_slamch_work(
'e'));
201 while ( rk < maxrank ) {
203 jb = pastix_imin(nb, maxrank-offset);
207 for ( k=0; k<jb; k++ ) {
211 assert( rk < maxrank );
213 pvt = rk + cblas_isamax( n-rk, VN1 + rk, 1 );
219 if ( (VN1[pvt] == 0.) || (VN1[pvt] < tol) ) {
220 residual = cblas_snrm2( n-rk, VN1 + rk, 1 );
221 if ( (residual == 0.) || (residual < tol) ) {
222 assert( rk < maxrank );
232 cblas_cswap( m, A + pvt * lda, 1,
234 cblas_cswap( k, f + (pvt-offset), ldf,
238 jpvt[pvt] = jpvt[rk];
249 assert( (rk < n) && (rk < m) );
251 #if defined(PRECISION_c) || defined(PRECISION_z)
252 cblas_cgemm( CblasColMajor, CblasNoTrans, CblasConjTrans, m-rk, 1, k,
253 CBLAS_SADDR(mcone), A + offset * lda + rk, lda,
255 CBLAS_SADDR(cone), A + rk * lda + rk, lda );
257 cblas_cgemv( CblasColMajor, CblasNoTrans, m-rk, k,
258 CBLAS_SADDR(mcone), A + offset * lda + rk, lda,
260 CBLAS_SADDR(cone), A + rk * lda + rk, 1 );
268 ret = LAPACKE_clarfg_work(m-rk, A + rk * lda + rk, A + rk * lda + (rk+1), 1, tau + rk);
272 ret = LAPACKE_clarfg_work(1, A + rk * lda + rk, A + rk * lda + rk, 1, tau + rk);
276 akk = A[rk * lda + rk];
277 A[rk * lda + rk] = cone;
285 cblas_cgemv( CblasColMajor, CblasConjTrans, m-rk, n-rk-1,
286 CBLAS_SADDR(alpha), A + (rk+1) * lda + rk, lda,
287 A + rk * lda + rk, 1,
288 CBLAS_SADDR(czero), f + k * ldf + k + 1, 1 );
302 cblas_cgemv( CblasColMajor, CblasConjTrans, m-rk, k,
303 CBLAS_SADDR(alpha), A + offset * lda + rk, lda,
304 A + rk * lda + rk, 1,
305 CBLAS_SADDR(czero), auxv, 1 );
307 cblas_cgemv( CblasColMajor, CblasNoTrans, n-offset, k,
308 CBLAS_SADDR(cone), f, ldf,
310 CBLAS_SADDR(cone), f + k * ldf, 1);
318 #if defined(PRECISION_c) || defined(PRECISION_z)
319 cblas_cgemm( CblasColMajor, CblasNoTrans, CblasConjTrans,
321 CBLAS_SADDR(mcone), A + (offset) * lda + rk, lda,
323 CBLAS_SADDR(cone), A + (rk + 1) * lda + rk, lda );
325 cblas_cgemv( CblasColMajor, CblasNoTrans, n-rk-1, k+1,
326 CBLAS_SADDR(mcone), f + (k+1), ldf,
327 A + (offset) * lda + rk, lda,
328 CBLAS_SADDR(cone), A + (rk + 1) * lda + rk, lda );
335 for (j=rk+1; j<n; j++) {
341 temp = cabsf( A[j * lda + rk] ) / VN1[j];
342 temp2 = (1.0 + temp) * (1.0 - temp);
343 temp = (temp2 > 0.0) ? temp2 : 0.0;
345 temp2 = temp * ((VN1[j] / VN2[j]) * ( VN1[j] / VN2[j]));
346 if (temp2 < machine_prec){
347 VN2[j] = (float)lsticc;
351 VN1[j] = VN1[j] * sqrtf(temp);
356 A[rk * lda + rk] = akk;
374 cblas_cgemm( CblasColMajor, CblasNoTrans, CblasConjTrans,
376 CBLAS_SADDR(mcone), A + offset * lda + rk, lda,
378 CBLAS_SADDR(cone), A + rk * lda + rk, lda );
386 VN1[lsticc] = cblas_scnrm2(m-rk, A + lsticc * lda + rk, 1 );
393 VN2[lsticc] = VN1[lsticc];
404 residual = cblas_snrm2( n-rk, VN1 + rk, 1 );
405 if ( (tol < 0.) || ( (residual == 0.) || (residual < tol) ) ) {
406 assert( rk == maxrank );
525 const void *alphaptr,
536 M1, N1, A, M2, N2, B, offx, offy );
Manage nancheck for lowrank kernels. This header describes all the LAPACKE functions used for low-ran...
BEGIN_C_DECLS typedef int pastix_int_t
float _Complex pastix_complex32_t
int core_cpqrcp(float tol, pastix_int_t maxrank, int full_update, pastix_int_t nb, pastix_int_t m, pastix_int_t n, pastix_complex32_t *A, pastix_int_t lda, pastix_int_t *jpvt, pastix_complex32_t *tau, pastix_complex32_t *work, pastix_int_t lwork, float *rwork)
Compute a rank-reavealing QR factorization.
Structure to define the type of function to use for the low-rank kernels and their parameters.
The block low-rank structure to hold a matrix in low-rank form.
pastix_fixdbl_t core_cge2lr_pqrcp(int use_reltol, pastix_fixdbl_t tol, pastix_int_t rklimit, pastix_int_t m, pastix_int_t n, const void *A, pastix_int_t lda, pastix_lrblock_t *Alr)
Convert a full rank matrix in a low rank matrix, using PQRCP.
pastix_fixdbl_t core_crradd_pqrcp(const pastix_lr_t *lowrank, pastix_trans_t transA1, const void *alphaptr, pastix_int_t M1, pastix_int_t N1, const pastix_lrblock_t *A, pastix_int_t M2, pastix_int_t N2, pastix_lrblock_t *B, pastix_int_t offx, pastix_int_t offy)
Add two LR structures A=(-u1) v1^T and B=u2 v2^T into u2 v2^T.
pastix_fixdbl_t core_crradd_qr(core_crrqr_cp_t rrqrfct, const pastix_lr_t *lowrank, pastix_trans_t transA1, const void *alphaptr, pastix_int_t M1, pastix_int_t N1, const pastix_lrblock_t *A, pastix_int_t M2, pastix_int_t N2, pastix_lrblock_t *B, pastix_int_t offx, pastix_int_t offy)
Template to perform the addition of two low-rank structures with compression kernel based on QR decom...
pastix_fixdbl_t core_cge2lr_qrcp(core_crrqr_cp_t rrqrfct, int use_reltol, pastix_fixdbl_t tol, pastix_int_t rklimit, pastix_int_t m, pastix_int_t n, const void *Avoid, pastix_int_t lda, pastix_lrblock_t *Alr)
Template to convert a full rank matrix into a low rank matrix through QR decompositions.
enum pastix_trans_e pastix_trans_t
Transpostion.