28 #ifndef DOXYGEN_SHOULD_SKIP_THIS
29 static pastix_complex64_t mzone = -1.0;
30 static pastix_complex64_t zone = 1.0;
31 static pastix_complex64_t zzero = 0.0;
111 pastix_complex64_t *A,
114 pastix_complex64_t *tau,
115 pastix_complex64_t *work,
121 double temp, temp2, machine_prec, residual;
122 pastix_complex64_t akk, *auxv, *f;
137 lwkopt = n * nb + pastix_imax(m, n);
139 work[0] = (pastix_complex64_t)lwkopt;
149 if (lda < pastix_imax(1, m)) {
152 if( lwork < lwkopt ) {
157 minMN = pastix_imin(m, n);
161 maxrank = pastix_imin( minMN, maxrank );
168 if ( maxrank == 0 ) {
173 norm = LAPACKE_zlange_work( LAPACK_COL_MAJOR,
'f', m, n,
185 f = work + pastix_imax(m, n);
193 VN1[j] = cblas_dznrm2(m, A + j * lda, 1);
198 machine_prec = sqrt(LAPACKE_dlamch_work(
'e'));
201 while ( rk < maxrank ) {
203 jb = pastix_imin(nb, maxrank-offset);
207 for ( k=0; k<jb; k++ ) {
211 assert( rk < maxrank );
213 pvt = rk + cblas_idamax( n-rk, VN1 + rk, 1 );
219 if ( (VN1[pvt] == 0.) || (VN1[pvt] < tol) ) {
220 residual = cblas_dnrm2( n-rk, VN1 + rk, 1 );
221 if ( (residual == 0.) || (residual < tol) ) {
222 assert( rk < maxrank );
232 cblas_zswap( m, A + pvt * lda, 1,
234 cblas_zswap( k, f + (pvt-offset), ldf,
238 jpvt[pvt] = jpvt[rk];
249 assert( (rk < n) && (rk < m) );
251 #if defined(PRECISION_c) || defined(PRECISION_z)
252 cblas_zgemm( CblasColMajor, CblasNoTrans, CblasConjTrans, m-rk, 1, k,
253 CBLAS_SADDR(mzone), A + offset * lda + rk, lda,
255 CBLAS_SADDR(zone), A + rk * lda + rk, lda );
257 cblas_zgemv( CblasColMajor, CblasNoTrans, m-rk, k,
258 CBLAS_SADDR(mzone), A + offset * lda + rk, lda,
260 CBLAS_SADDR(zone), A + rk * lda + rk, 1 );
268 ret = LAPACKE_zlarfg_work(m-rk, A + rk * lda + rk, A + rk * lda + (rk+1), 1, tau + rk);
272 ret = LAPACKE_zlarfg_work(1, A + rk * lda + rk, A + rk * lda + rk, 1, tau + rk);
276 akk = A[rk * lda + rk];
277 A[rk * lda + rk] = zone;
284 pastix_complex64_t alpha = tau[rk];
285 cblas_zgemv( CblasColMajor, CblasConjTrans, m-rk, n-rk-1,
286 CBLAS_SADDR(alpha), A + (rk+1) * lda + rk, lda,
287 A + rk * lda + rk, 1,
288 CBLAS_SADDR(zzero), f + k * ldf + k + 1, 1 );
294 memset( f + k * ldf, 0, k *
sizeof( pastix_complex64_t ) );
301 pastix_complex64_t alpha = -tau[rk];
302 cblas_zgemv( CblasColMajor, CblasConjTrans, m-rk, k,
303 CBLAS_SADDR(alpha), A + offset * lda + rk, lda,
304 A + rk * lda + rk, 1,
305 CBLAS_SADDR(zzero), auxv, 1 );
307 cblas_zgemv( CblasColMajor, CblasNoTrans, n-offset, k,
308 CBLAS_SADDR(zone), f, ldf,
310 CBLAS_SADDR(zone), f + k * ldf, 1);
318 #if defined(PRECISION_c) || defined(PRECISION_z)
319 cblas_zgemm( CblasColMajor, CblasNoTrans, CblasConjTrans,
321 CBLAS_SADDR(mzone), A + (offset) * lda + rk, lda,
323 CBLAS_SADDR(zone), A + (rk + 1) * lda + rk, lda );
325 cblas_zgemv( CblasColMajor, CblasNoTrans, n-rk-1, k+1,
326 CBLAS_SADDR(mzone), f + (k+1), ldf,
327 A + (offset) * lda + rk, lda,
328 CBLAS_SADDR(zone), A + (rk + 1) * lda + rk, lda );
335 for (j=rk+1; j<n; j++) {
341 temp = cabs( A[j * lda + rk] ) / VN1[j];
342 temp2 = (1.0 + temp) * (1.0 - temp);
343 temp = (temp2 > 0.0) ? temp2 : 0.0;
345 temp2 = temp * ((VN1[j] / VN2[j]) * ( VN1[j] / VN2[j]));
346 if (temp2 < machine_prec){
347 VN2[j] = (double)lsticc;
351 VN1[j] = VN1[j] * sqrt(temp);
356 A[rk * lda + rk] = akk;
374 cblas_zgemm( CblasColMajor, CblasNoTrans, CblasConjTrans,
376 CBLAS_SADDR(mzone), A + offset * lda + rk, lda,
378 CBLAS_SADDR(zone), A + rk * lda + rk, lda );
386 VN1[lsticc] = cblas_dznrm2(m-rk, A + lsticc * lda + rk, 1 );
393 VN2[lsticc] = VN1[lsticc];
404 residual = cblas_dnrm2( n-rk, VN1 + rk, 1 );
405 if ( (tol < 0.) || ( (residual == 0.) || (residual < tol) ) ) {
406 assert( rk == maxrank );
525 const void *alphaptr,
536 M1, N1, A, M2, N2, B, offx, offy );
BEGIN_C_DECLS typedef int pastix_int_t
int core_zpqrcp(double tol, pastix_int_t maxrank, int full_update, pastix_int_t nb, pastix_int_t m, pastix_int_t n, pastix_complex64_t *A, pastix_int_t lda, pastix_int_t *jpvt, pastix_complex64_t *tau, pastix_complex64_t *work, pastix_int_t lwork, double *rwork)
Compute a rank-reavealing QR factorization.
Structure to define the type of function to use for the low-rank kernels and their parameters.
The block low-rank structure to hold a matrix in low-rank form.
pastix_fixdbl_t core_zrradd_pqrcp(const pastix_lr_t *lowrank, pastix_trans_t transA1, const void *alphaptr, pastix_int_t M1, pastix_int_t N1, const pastix_lrblock_t *A, pastix_int_t M2, pastix_int_t N2, pastix_lrblock_t *B, pastix_int_t offx, pastix_int_t offy)
Add two LR structures A=(-u1) v1^T and B=u2 v2^T into u2 v2^T.
pastix_fixdbl_t core_zge2lr_pqrcp(int use_reltol, pastix_fixdbl_t tol, pastix_int_t rklimit, pastix_int_t m, pastix_int_t n, const void *A, pastix_int_t lda, pastix_lrblock_t *Alr)
Convert a full rank matrix in a low rank matrix, using PQRCP.
pastix_fixdbl_t core_zge2lr_qrcp(core_zrrqr_cp_t rrqrfct, int use_reltol, pastix_fixdbl_t tol, pastix_int_t rklimit, pastix_int_t m, pastix_int_t n, const void *Avoid, pastix_int_t lda, pastix_lrblock_t *Alr)
Template to convert a full rank matrix into a low rank matrix through QR decompositions.
pastix_fixdbl_t core_zrradd_qr(core_zrrqr_cp_t rrqrfct, const pastix_lr_t *lowrank, pastix_trans_t transA1, const void *alphaptr, pastix_int_t M1, pastix_int_t N1, const pastix_lrblock_t *A, pastix_int_t M2, pastix_int_t N2, pastix_lrblock_t *B, pastix_int_t offx, pastix_int_t offy)
Template to perform the addition of two low-rank structures with compression kernel based on QR decom...
enum pastix_trans_e pastix_trans_t
Transpostion.
Manage nancheck for lowrank kernels. This header describes all the LAPACKE functions used for low-ran...