111 pastix_complex64_t *A,
114 pastix_complex64_t *tau,
115 pastix_complex64_t *work,
121 double temp, temp2, machine_prec, residual;
122 pastix_complex64_t akk, *auxv, *f;
137 lwkopt = n * nb + pastix_imax(m, n);
139 work[0] = (pastix_complex64_t)lwkopt;
149 if (lda < pastix_imax(1, m)) {
152 if( lwork < lwkopt ) {
157 minMN = pastix_imin(m, n);
161 maxrank = pastix_imin( minMN, maxrank );
168 if ( maxrank == 0 ) {
173 norm = LAPACKE_zlange_work( LAPACK_COL_MAJOR,
'f', m, n,
185 f = work + pastix_imax(m, n);
193 VN1[j] = cblas_dznrm2(m, A + j * lda, 1);
198 machine_prec = sqrt(LAPACKE_dlamch_work(
'e'));
201 while ( rk < maxrank ) {
203 jb = pastix_imin(nb, maxrank-offset);
207 for ( k=0; k<jb; k++ ) {
211 assert( rk < maxrank );
213 pvt = rk + cblas_idamax( n-rk, VN1 + rk, 1 );
219 if ( (VN1[pvt] == 0.) || (VN1[pvt] < tol) ) {
220 residual = cblas_dnrm2( n-rk, VN1 + rk, 1 );
221 if ( (residual == 0.) || (residual < tol) ) {
222 assert( rk < maxrank );
232 cblas_zswap( m, A + pvt * lda, 1,
234 cblas_zswap( k, f + (pvt-offset), ldf,
238 jpvt[pvt] = jpvt[rk];
249 assert( (rk < n) && (rk < m) );
251#if defined(PRECISION_c) || defined(PRECISION_z)
252 cblas_zgemm( CblasColMajor, CblasNoTrans, CblasConjTrans, m-rk, 1, k,
253 CBLAS_SADDR(mzone), A + offset * lda + rk, lda,
255 CBLAS_SADDR(zone), A + rk * lda + rk, lda );
257 cblas_zgemv( CblasColMajor, CblasNoTrans, m-rk, k,
258 CBLAS_SADDR(mzone), A + offset * lda + rk, lda,
260 CBLAS_SADDR(zone), A + rk * lda + rk, 1 );
268 ret = LAPACKE_zlarfg_work(m-rk, A + rk * lda + rk, A + rk * lda + (rk+1), 1, tau + rk);
272 ret = LAPACKE_zlarfg_work(1, A + rk * lda + rk, A + rk * lda + rk, 1, tau + rk);
276 akk = A[rk * lda + rk];
277 A[rk * lda + rk] = zone;
284 pastix_complex64_t alpha = tau[rk];
285 cblas_zgemv( CblasColMajor, CblasConjTrans, m-rk, n-rk-1,
286 CBLAS_SADDR(alpha), A + (rk+1) * lda + rk, lda,
287 A + rk * lda + rk, 1,
288 CBLAS_SADDR(zzero), f + k * ldf + k + 1, 1 );
294 memset( f + k * ldf, 0, k *
sizeof( pastix_complex64_t ) );
301 pastix_complex64_t alpha = -tau[rk];
302 cblas_zgemv( CblasColMajor, CblasConjTrans, m-rk, k,
303 CBLAS_SADDR(alpha), A + offset * lda + rk, lda,
304 A + rk * lda + rk, 1,
305 CBLAS_SADDR(zzero), auxv, 1 );
307 cblas_zgemv( CblasColMajor, CblasNoTrans, n-offset, k,
308 CBLAS_SADDR(zone), f, ldf,
310 CBLAS_SADDR(zone), f + k * ldf, 1);
318#if defined(PRECISION_c) || defined(PRECISION_z)
319 cblas_zgemm( CblasColMajor, CblasNoTrans, CblasConjTrans,
321 CBLAS_SADDR(mzone), A + (offset) * lda + rk, lda,
323 CBLAS_SADDR(zone), A + (rk + 1) * lda + rk, lda );
325 cblas_zgemv( CblasColMajor, CblasNoTrans, n-rk-1, k+1,
326 CBLAS_SADDR(mzone), f + (k+1), ldf,
327 A + (offset) * lda + rk, lda,
328 CBLAS_SADDR(zone), A + (rk + 1) * lda + rk, lda );
335 for (j=rk+1; j<n; j++) {
341 temp = cabs( A[j * lda + rk] ) / VN1[j];
342 temp2 = (1.0 + temp) * (1.0 - temp);
343 temp = (temp2 > 0.0) ? temp2 : 0.0;
345 temp2 = temp * ((VN1[j] / VN2[j]) * ( VN1[j] / VN2[j]));
346 if (temp2 < machine_prec){
347 VN2[j] = (double)lsticc;
351 VN1[j] = VN1[j] * sqrt(temp);
356 A[rk * lda + rk] = akk;
374 cblas_zgemm( CblasColMajor, CblasNoTrans, CblasConjTrans,
376 CBLAS_SADDR(mzone), A + offset * lda + rk, lda,
378 CBLAS_SADDR(zone), A + rk * lda + rk, lda );
386 VN1[lsticc] = cblas_dznrm2(m-rk, A + lsticc * lda + rk, 1 );
393 VN2[lsticc] = VN1[lsticc];
404 residual = cblas_dnrm2( n-rk, VN1 + rk, 1 );
405 if ( (tol < 0.) || ( (residual == 0.) || (residual < tol) ) ) {
406 assert( rk == maxrank );