25 #ifndef DOXYGEN_SHOULD_SKIP_THIS
26 static pastix_complex64_t zone = 1.0;
27 static pastix_complex64_t zzero = 0.0;
68 pastix_complex64_t *Cfr = C->u;
69 Cfr += Cm * offy + offx;
71 assert( C->rk == -1 );
76 kernel_trace_start_lvl2( PastixKernelLvl2_LR_add2C_updateCfr );
80 kernel_trace_stop_lvl2( flops );
83 flops = FLOPS_ZGEMM( M, N, AB->
rk );
84 kernel_trace_start_lvl2( PastixKernelLvl2_LR_add2C_updateCfr );
85 cblas_zgemm( CblasColMajor, CblasNoTrans, (CBLAS_TRANSPOSE)transV,
87 CBLAS_SADDR(alpha), AB->
u, ldabu,
89 CBLAS_SADDR(beta), Cfr, Cm );
90 kernel_trace_stop_lvl2( flops );
139 assert( (C->rk >= 0) && (C->rk <= C->rkmax) );
144 if ( (C->rk + rAB) > rklimit )
146 pastix_complex64_t *Cfr, *Coff;
149 Cfr = malloc( Cm * Cn *
sizeof(pastix_complex64_t) );
152 Coff = Cfr + Cm * offy + offx;
154 kernel_trace_start_lvl2( PastixKernelLvl2_LR_add2C_uncompress );
155 cblas_zgemm( CblasColMajor, CblasNoTrans, CblasNoTrans,
157 CBLAS_SADDR(zone), C->u, Cm,
159 CBLAS_SADDR(zzero), Cfr, Cm );
160 flops = FLOPS_ZGEMM( Cm, Cn, C->rk );
163 if ( AB->
rk == -1 ) {
167 flops += (2. * M * N);
170 cblas_zgemm( CblasColMajor, CblasNoTrans, (CBLAS_TRANSPOSE)transV,
172 CBLAS_SADDR(alpha), AB->
u, ldabu,
174 CBLAS_SADDR(beta), Coff, Cm );
175 flops += FLOPS_ZGEMM( M, N, AB->
rk );
177 kernel_trace_stop_lvl2( flops );
178 total_flops += flops;
181 kernel_trace_start_lvl2( PastixKernelLvl2_LR_add2C_recompress );
183 flops = lowrank->core_ge2lr( lowrank->use_reltol, lowrank->tolerance, -1, Cm, Cn, Cfr, Cm, C );
184 kernel_trace_stop_lvl2_rank( flops, C->rk );
185 total_flops += flops;
195 total_flops += lowrank->core_rradd( lowrank, transV, &alpha,
254 assert( C->rk == 0 );
256 if ( AB->
rk > rklimit ) {
257 pastix_complex64_t *Cfr, *Coff;
259 Cfr = malloc( Cm * Cn *
sizeof(pastix_complex64_t) );
262 Coff = Cfr + Cm * offy + offx;
265 if ( (M != Cm) || (N != Cn) ) {
266 memset( Cfr, 0, Cm * Cn *
sizeof(pastix_complex64_t) );
270 flops = FLOPS_ZGEMM( M, N, AB->
rk );
271 kernel_trace_start_lvl2( PastixKernelLvl2_LR_add2C_uncompress );
272 cblas_zgemm( CblasColMajor, CblasNoTrans, (CBLAS_TRANSPOSE)transV,
274 CBLAS_SADDR(alpha), AB->
u, ldabu,
276 CBLAS_SADDR(beta), Coff, Cm );
277 kernel_trace_stop_lvl2( flops );
278 total_flops += flops;
281 kernel_trace_start_lvl2( PastixKernelLvl2_LR_add2C_recompress );
282 flops = lowrank->core_ge2lr( lowrank->use_reltol, lowrank->tolerance, -1, Cm, Cn, Cfr, Cm, C );
283 kernel_trace_stop_lvl2_rank( flops, C->rk );
284 total_flops += flops;
296 pastix_complex64_t *ABfr;
300 kernel_trace_start_lvl2( PastixKernelLvl2_LR_add2C_orthou );
304 ABfr = malloc( M * N *
sizeof(pastix_complex64_t) );
308 cblas_zgemm( CblasColMajor, CblasNoTrans, (CBLAS_TRANSPOSE)transV,
310 CBLAS_SADDR(zone), AB->
u, ldabu,
312 CBLAS_SADDR(zzero), ABfr, M );
313 flops = FLOPS_ZGEMM( M, N, AB->
rk );
320 flops += lowrank->core_ge2lr( lowrank->use_reltol, lowrank->tolerance, rklimit,
321 M, N, ABfr, M, &backup );
324 M, N, &backup, Cm, Cn, C,
327 kernel_trace_stop_lvl2( flops );
329 total_flops += flops;
392 pastix_atomic_lock( params->
lock );
415 pastix_atomic_unlock( params->
lock );
static pastix_fixdbl_t core_zlr2fr(core_zlrmm_t *params, const pastix_lrblock_t *AB, pastix_trans_t transV)
Perform the addition of the low-rank matrix AB and the full-rank matrix C.
static pastix_fixdbl_t core_zlr2null(core_zlrmm_t *params, const pastix_lrblock_t *AB, pastix_trans_t transV, int infomask)
Perform the addition of the low-rank matrix AB into the null matrix C.
static pastix_fixdbl_t core_zlr2lr(core_zlrmm_t *params, const pastix_lrblock_t *AB, pastix_trans_t transV)
Perform the addition of the low-rank matrix AB and the low-rank matrix C.
BEGIN_C_DECLS typedef int pastix_int_t
int core_zgeadd(pastix_trans_t trans, pastix_int_t M, pastix_int_t N, pastix_complex64_t alpha, const pastix_complex64_t *A, pastix_int_t LDA, pastix_complex64_t beta, pastix_complex64_t *B, pastix_int_t LDB)
Add two matrices together.
pastix_atomic_lock_t * lock
#define PASTE_CORE_ZLRMM_PARAMS(_a_)
Initialize all the parameters of the core_zlrmm family functions to ease the access.
static pastix_complex64_t * core_zlrmm_getws(core_zlrmm_t *params, ssize_t newsize)
Function to get a workspace pointer if space is available in the one provided.
#define PASTE_CORE_ZLRMM_VOID
Void all the parameters of the core_zlrmm family functions to silent warnings.
pastix_fixdbl_t core_zlradd(core_zlrmm_t *params, const pastix_lrblock_t *A, pastix_trans_t transV, int infomask)
Perform the addition of two low-rank matrices.
Structure to store all the parameters of the core_zlrmm family functions.
pastix_int_t(* core_get_rklimit)(pastix_int_t, pastix_int_t)
Compute the maximal rank accepted for a given matrix size. The pointer is set according to the low-ra...
#define PASTIX_LRM3_ORTHOU
Macro to specify if the U part of a low-rank matrix is orthogonal or not (Used in LRMM functions).
The block low-rank structure to hold a matrix in low-rank form.
void core_zlrcpy(const pastix_lr_t *lowrank, pastix_trans_t transAv, pastix_complex64_t alpha, pastix_int_t M1, pastix_int_t N1, const pastix_lrblock_t *A, pastix_int_t M2, pastix_int_t N2, pastix_lrblock_t *B, pastix_int_t offx, pastix_int_t offy)
Copy a small low-rank structure into a large one.
void core_zlrfree(pastix_lrblock_t *A)
Free a low-rank matrix.
enum pastix_trans_e pastix_trans_t
Transpostion.