51 pastix_complex64_t *Cptr;
59 Cptr += ldcu * offy + offx;
61 pastix_atomic_lock( lock );
62 assert( C->rk == -1 );
67 cblas_zgemm( CblasColMajor, (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB,
69 CBLAS_SADDR(alpha), A->u, ldau,
71 CBLAS_SADDR(beta), Cptr, ldcu );
72 flops = FLOPS_ZGEMM( M, N, K );
74 pastix_atomic_unlock( lock );
104 pastix_complex64_t *Cptr;
106 pastix_fixdbl_t flops1 = FLOPS_ZGEMM( M, B->rk, K ) + FLOPS_ZGEMM( M, N, B->rk );
107 pastix_fixdbl_t flops2 = FLOPS_ZGEMM( K, N, B->rk ) + FLOPS_ZGEMM( M, N, K );
114 ldbv = ( B->rk == -1 ) ? -1 : B->rkmax;
118 Cptr += ldcu * offy + offx;
123 if ( flops1 <= flops2 ) {
125 work = malloc( M * B->rk *
sizeof(pastix_complex64_t) );
132 cblas_zgemm( CblasColMajor, (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB,
134 CBLAS_SADDR(zone), A->u, ldau,
136 CBLAS_SADDR(zzero), work, M );
138 pastix_atomic_lock( lock );
139 assert( C->rk == -1 );
140 cblas_zgemm( CblasColMajor, CblasNoTrans, (CBLAS_TRANSPOSE)transB,
142 CBLAS_SADDR(alpha), work, M,
144 CBLAS_SADDR(beta), Cptr, ldcu );
146 pastix_atomic_unlock( lock );
150 work = malloc( K * N *
sizeof(pastix_complex64_t) );
157 cblas_zgemm( CblasColMajor, CblasNoTrans, CblasNoTrans,
159 CBLAS_SADDR(zone), B->u, ldbu,
161 CBLAS_SADDR(zzero), work, K );
163 pastix_atomic_lock( lock );
164 assert( C->rk == -1 );
165 cblas_zgemm( CblasColMajor, (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB,
167 CBLAS_SADDR(alpha), A->u, ldau,
169 CBLAS_SADDR(beta), Cptr, ldcu );
172 pastix_atomic_unlock( lock );
205 pastix_complex64_t *Cptr;
207 pastix_fixdbl_t flops1 = FLOPS_ZGEMM( A->rk, N, K ) + FLOPS_ZGEMM( M, N, A->rk );
208 pastix_fixdbl_t flops2 = FLOPS_ZGEMM( M, K, A->rk ) + FLOPS_ZGEMM( M, N, K );
214 ldav = ( A->rk == -1 ) ? -1 : A->rkmax;
219 Cptr += ldcu * offy + offx;
224 if ( flops1 <= flops2 ) {
226 work = malloc( A->rk * N *
sizeof(pastix_complex64_t) );
233 cblas_zgemm( CblasColMajor, CblasNoTrans, (CBLAS_TRANSPOSE)transB,
235 CBLAS_SADDR(zone), A->v, ldav,
237 CBLAS_SADDR(zzero), work, A->rk );
239 pastix_atomic_lock( lock );
240 assert( C->rk == -1 );
241 cblas_zgemm( CblasColMajor, CblasNoTrans, CblasNoTrans,
243 CBLAS_SADDR(alpha), A->u, ldau,
245 CBLAS_SADDR(beta), Cptr, ldcu );
248 pastix_atomic_unlock( lock );
252 work = malloc( M * K *
sizeof(pastix_complex64_t) );
259 cblas_zgemm( CblasColMajor, CblasNoTrans, CblasNoTrans,
261 CBLAS_SADDR(zone), A->u, ldau,
263 CBLAS_SADDR(zzero), work, M );
265 pastix_atomic_lock( lock );
266 assert( C->rk == -1 );
267 cblas_zgemm( CblasColMajor, (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB,
269 CBLAS_SADDR(alpha), work, M,
271 CBLAS_SADDR(beta), Cptr, ldcu );
274 pastix_atomic_unlock( lock );
307 pastix_complex64_t *Cptr;
316 Cptr += ldcu * offy + offx;
319 assert( AB.
rk != -1 );
320 assert( AB.
rkmax != -1 );
329 pastix_atomic_lock( lock );
330 assert( C->rk == -1 );
332 cblas_zgemm( CblasColMajor, CblasNoTrans, (CBLAS_TRANSPOSE)trans,
334 CBLAS_SADDR(alpha), AB.
u, M,
336 CBLAS_SADDR(beta), Cptr, ldcu );
337 flops = FLOPS_ZGEMM( M, N, AB.
rk );
338 pastix_atomic_unlock( lock );
pastix_fixdbl_t core_zlrlr2lr(core_zlrmm_t *params, pastix_lrblock_t *AB, int *infomask)
Perform the operation AB = op(A) * op(B), with A, B, and AB low-rank.