28 #ifndef DOXYGEN_SHOULD_SKIP_THIS
29 static float msone = -1.0;
30 static float sone = 1.0;
31 static float szero = 0.0;
121 float temp, temp2, machine_prec, residual;
122 float akk, *auxv, *f;
137 lwkopt = n * nb + pastix_imax(m, n);
139 work[0] = (float)lwkopt;
149 if (lda < pastix_imax(1, m)) {
152 if( lwork < lwkopt ) {
157 minMN = pastix_imin(m, n);
161 maxrank = pastix_imin( minMN, maxrank );
168 if ( maxrank == 0 ) {
173 norm = LAPACKE_slange_work( LAPACK_COL_MAJOR,
'f', m, n,
185 f = work + pastix_imax(m, n);
193 VN1[j] = cblas_snrm2(m, A + j * lda, 1);
198 machine_prec = sqrtf(LAPACKE_slamch_work(
'e'));
201 while ( rk < maxrank ) {
203 jb = pastix_imin(nb, maxrank-offset);
207 for ( k=0; k<jb; k++ ) {
211 assert( rk < maxrank );
213 pvt = rk + cblas_isamax( n-rk, VN1 + rk, 1 );
219 if ( (VN1[pvt] == 0.) || (VN1[pvt] < tol) ) {
220 residual = cblas_snrm2( n-rk, VN1 + rk, 1 );
221 if ( (residual == 0.) || (residual < tol) ) {
222 assert( rk < maxrank );
232 cblas_sswap( m, A + pvt * lda, 1,
234 cblas_sswap( k, f + (pvt-offset), ldf,
238 jpvt[pvt] = jpvt[rk];
249 assert( (rk < n) && (rk < m) );
251 #if defined(PRECISION_c) || defined(PRECISION_z)
252 cblas_sgemm( CblasColMajor, CblasNoTrans, CblasTrans, m-rk, 1, k,
253 (msone), A + offset * lda + rk, lda,
255 (sone), A + rk * lda + rk, lda );
257 cblas_sgemv( CblasColMajor, CblasNoTrans, m-rk, k,
258 (msone), A + offset * lda + rk, lda,
260 (sone), A + rk * lda + rk, 1 );
268 ret = LAPACKE_slarfg_work(m-rk, A + rk * lda + rk, A + rk * lda + (rk+1), 1, tau + rk);
272 ret = LAPACKE_slarfg_work(1, A + rk * lda + rk, A + rk * lda + rk, 1, tau + rk);
276 akk = A[rk * lda + rk];
277 A[rk * lda + rk] = sone;
284 float alpha = tau[rk];
285 cblas_sgemv( CblasColMajor, CblasTrans, m-rk, n-rk-1,
286 (alpha), A + (rk+1) * lda + rk, lda,
287 A + rk * lda + rk, 1,
288 (szero), f + k * ldf + k + 1, 1 );
294 memset( f + k * ldf, 0, k *
sizeof(
float ) );
301 float alpha = -tau[rk];
302 cblas_sgemv( CblasColMajor, CblasTrans, m-rk, k,
303 (alpha), A + offset * lda + rk, lda,
304 A + rk * lda + rk, 1,
307 cblas_sgemv( CblasColMajor, CblasNoTrans, n-offset, k,
310 (sone), f + k * ldf, 1);
318 #if defined(PRECISION_c) || defined(PRECISION_z)
319 cblas_sgemm( CblasColMajor, CblasNoTrans, CblasTrans,
321 (msone), A + (offset) * lda + rk, lda,
323 (sone), A + (rk + 1) * lda + rk, lda );
325 cblas_sgemv( CblasColMajor, CblasNoTrans, n-rk-1, k+1,
326 (msone), f + (k+1), ldf,
327 A + (offset) * lda + rk, lda,
328 (sone), A + (rk + 1) * lda + rk, lda );
335 for (j=rk+1; j<n; j++) {
341 temp = fabsf( A[j * lda + rk] ) / VN1[j];
342 temp2 = (1.0 + temp) * (1.0 - temp);
343 temp = (temp2 > 0.0) ? temp2 : 0.0;
345 temp2 = temp * ((VN1[j] / VN2[j]) * ( VN1[j] / VN2[j]));
346 if (temp2 < machine_prec){
347 VN2[j] = (float)lsticc;
351 VN1[j] = VN1[j] * sqrtf(temp);
356 A[rk * lda + rk] = akk;
374 cblas_sgemm( CblasColMajor, CblasNoTrans, CblasTrans,
376 (msone), A + offset * lda + rk, lda,
378 (sone), A + rk * lda + rk, lda );
386 VN1[lsticc] = cblas_snrm2(m-rk, A + lsticc * lda + rk, 1 );
393 VN2[lsticc] = VN1[lsticc];
404 residual = cblas_snrm2( n-rk, VN1 + rk, 1 );
405 if ( (tol < 0.) || ( (residual == 0.) || (residual < tol) ) ) {
406 assert( rk == maxrank );
525 const void *alphaptr,
536 M1, N1, A, M2, N2, B, offx, offy );
BEGIN_C_DECLS typedef int pastix_int_t
int core_spqrcp(float tol, pastix_int_t maxrank, int full_update, pastix_int_t nb, pastix_int_t m, pastix_int_t n, float *A, pastix_int_t lda, pastix_int_t *jpvt, float *tau, float *work, pastix_int_t lwork, float *rwork)
Compute a rank-reavealing QR factorization.
Structure to define the type of function to use for the low-rank kernels and their parameters.
The block low-rank structure to hold a matrix in low-rank form.
pastix_fixdbl_t core_sge2lr_pqrcp(int use_reltol, pastix_fixdbl_t tol, pastix_int_t rklimit, pastix_int_t m, pastix_int_t n, const void *A, pastix_int_t lda, pastix_lrblock_t *Alr)
Convert a full rank matrix in a low rank matrix, using PQRCP.
pastix_fixdbl_t core_srradd_pqrcp(const pastix_lr_t *lowrank, pastix_trans_t transA1, const void *alphaptr, pastix_int_t M1, pastix_int_t N1, const pastix_lrblock_t *A, pastix_int_t M2, pastix_int_t N2, pastix_lrblock_t *B, pastix_int_t offx, pastix_int_t offy)
Add two LR structures A=(-u1) v1^T and B=u2 v2^T into u2 v2^T.
pastix_fixdbl_t core_sge2lr_qrcp(core_srrqr_cp_t rrqrfct, int use_reltol, pastix_fixdbl_t tol, pastix_int_t rklimit, pastix_int_t m, pastix_int_t n, const void *Avoid, pastix_int_t lda, pastix_lrblock_t *Alr)
Template to convert a full rank matrix into a low rank matrix through QR decompositions.
pastix_fixdbl_t core_srradd_qr(core_srrqr_cp_t rrqrfct, const pastix_lr_t *lowrank, pastix_trans_t transA1, const void *alphaptr, pastix_int_t M1, pastix_int_t N1, const pastix_lrblock_t *A, pastix_int_t M2, pastix_int_t N2, pastix_lrblock_t *B, pastix_int_t offx, pastix_int_t offy)
Template to perform the addition of two low-rank structures with compression kernel based on QR decom...
enum pastix_trans_e pastix_trans_t
Transpostion.
Manage nancheck for lowrank kernels. This header describes all the LAPACKE functions used for low-ran...