PaStiX Handbook 6.4.0
Loading...
Searching...
No Matches
core_clrmm.c
Go to the documentation of this file.
1/**
2 *
3 * @file core_clrmm.c
4 *
5 * PaStiX low-rank kernel routines to compute a matrix matrix product in either
6 * form low or full rank.
7 *
8 * @copyright 2016-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
9 * Univ. Bordeaux. All rights reserved.
10 *
11 * @version 6.4.0
12 * @author Mathieu Faverge
13 * @author Gregoire Pichon
14 * @author Pierre Ramet
15 * @date 2024-07-05
16 * @generated from /builds/2mk6rsew/0/solverstack/pastix/kernels/core_zlrmm.c, normal z -> c, Tue Feb 25 14:34:50 2025
17 *
18 **/
19#include "common.h"
20#include "kernels_trace.h"
21#include "pastix_clrcores.h"
22
23/**
24 *******************************************************************************
25 *
26 * @brief Compute the matrix matrix product when applied to a full rank matrix.
27 *
28 * This function considers that the C matrix is full rank, and A and B are
29 * either full-rank or low-rank. The result of the product is directly applied
30 * to the C matrix.
31 *
32 *******************************************************************************
33 *
34 * @param[inout] params
35 * The LRMM structure that stores all the parameters used in the LRMM
36 * functions family.
37 * @sa core_clrmm_t
38 *
39 *******************************************************************************
40 *
41 * @return The number of flops required to perform the operation.
42 *
43 *******************************************************************************/
44static inline pastix_fixdbl_t
46{
47 const pastix_lrblock_t *A = params->A;
48 const pastix_lrblock_t *B = params->B;
49 pastix_fixdbl_t flops = 0.0;
50
51 assert( params->transA == PastixNoTrans );
52 assert( params->transB != PastixNoTrans );
53 assert( A->rk <= A->rkmax && A->rk != 0 );
54 assert( B->rk <= B->rkmax && B->rk != 0 );
55 assert( params->C->rk == -1 );
56
57 if ( A->rk == -1 ) {
58 if ( B->rk == -1 ) {
59 kernel_trace_start_lvl2( PastixKernelLvl2_LR_FRFR2FR );
60 flops = core_cfrfr2fr( params );
61 kernel_trace_stop_lvl2( flops );
62 }
63 else {
64 kernel_trace_start_lvl2( PastixKernelLvl2_LR_FRLR2FR );
65 flops = core_cfrlr2fr( params );
66 kernel_trace_stop_lvl2( flops );
67 }
68 }
69 else {
70 if ( B->rk == -1 ) {
71 kernel_trace_start_lvl2( PastixKernelLvl2_LR_LRFR2FR );
72 flops = core_clrfr2fr( params );
73 kernel_trace_stop_lvl2( flops );
74 }
75 else {
76 kernel_trace_start_lvl2( PastixKernelLvl2_LR_LRLR2FR );
77 flops = core_clrlr2fr( params );
78 kernel_trace_stop_lvl2( flops );
79 }
80 }
81
82 assert( params->C->rk == -1 );
83
84 return flops;
85}
86
87/**
88 *******************************************************************************
89 *
90 * @brief Compute the matrix matrix product when applied to a null matrix.
91 *
92 * This function considers that the C matrix is null, and A and B are either
93 * full-rank or low-rank. The result of the product is directly applied to the
94 * C matrix.
95 *
96 *******************************************************************************
97 *
98 * @param[inout] params
99 * The LRMM structure that stores all the parameters used in the LRMM
100 * functions family.
101 * @sa core_clrmm_t
102 *
103 *******************************************************************************
104 *
105 * @return The number of flops required to perform the operation.
106 *
107 *******************************************************************************/
108static inline pastix_fixdbl_t
110{
111 PASTE_CORE_CLRMM_PARAMS( params );
114 int infomask = 0;
115 pastix_fixdbl_t flops = 0.0;
116
117 assert(transA == PastixNoTrans);
118 assert(transB != PastixNoTrans);
119 assert( A->rk <= A->rkmax && A->rk != 0 );
120 assert( B->rk <= B->rkmax && B->rk != 0 );
121
122 if ( A->rk == -1 ) {
123 if ( B->rk == -1 ) {
124 kernel_trace_start_lvl2( PastixKernelLvl2_LR_FRFR2null );
125 flops = core_cfrfr2lr( params, &AB, &infomask,
126 pastix_imin( pastix_imin( M, N ),
127 core_get_rklimit( Cm, Cn ) ) );
128 kernel_trace_stop_lvl2( flops );
129 }
130 else {
131 kernel_trace_start_lvl2( PastixKernelLvl2_LR_FRLR2null );
132 flops = core_cfrlr2lr( params, &AB, &infomask,
133 pastix_imin( M, core_get_rklimit( Cm, Cn ) ) );
134 kernel_trace_stop_lvl2( flops );
135 }
136 }
137 else {
138 if ( B->rk == -1 ) {
139 kernel_trace_start_lvl2( PastixKernelLvl2_LR_LRFR2null );
140 flops = core_clrfr2lr( params, &AB, &infomask,
141 pastix_imin( N, core_get_rklimit( Cm, Cn ) ) );
142 kernel_trace_stop_lvl2( flops );
143 }
144 else {
145 kernel_trace_start_lvl2( PastixKernelLvl2_LR_LRLR2null );
146 flops = core_clrlr2lr( params, &AB, &infomask );
147 kernel_trace_stop_lvl2( flops );
148
149 assert( AB.rk != -1 );
150 assert( AB.rkmax != -1 );
151 }
152 }
153
154 if ( infomask & PASTIX_LRM3_TRANSB ) {
155 transV = transB;
156 }
157
158 flops += core_clradd( params, &AB, transV, infomask );
159
160 /* Free memory from zlrm3 */
161 if ( infomask & PASTIX_LRM3_ALLOCU ) {
162 free(AB.u);
163 }
164 if ( infomask & PASTIX_LRM3_ALLOCV ) {
165 free(AB.v);
166 }
167
169
170 return flops;
171}
172
173/**
174 *******************************************************************************
175 *
176 * @brief Compute the matrix matrix product when applied to a low rank matrix.
177 *
178 * This function considers that the C matrix is low rank, and A and B are
179 * either full-rank or low-rank. The result of the product is directly applied
180 * to the C matrix.
181 *
182 *******************************************************************************
183 *
184 * @param[inout] params
185 * The LRMM structure that stores all the parameters used in the LRMM
186 * functions family.
187 * @sa core_clrmm_t
188 *
189 *******************************************************************************
190 *
191 * @return The number of flops required to perform the operation.
192 *
193 *******************************************************************************/
194static inline pastix_fixdbl_t
196{
197 PASTE_CORE_CLRMM_PARAMS( params );
200 int infomask = 0;
201 pastix_fixdbl_t flops = 0.0;
202
203 assert(transA == PastixNoTrans);
204 assert(transB != PastixNoTrans);
205 assert( A->rk <= A->rkmax && A->rk != 0 );
206 assert( B->rk <= B->rkmax && B->rk != 0 );
207
208 if ( A->rk == -1 ) {
209 if ( B->rk == -1 ) {
210 kernel_trace_start_lvl2( PastixKernelLvl2_LR_FRFR2LR );
211 flops = core_cfrfr2lr( params, &AB, &infomask,
212 pastix_imin( M, N ) );
213 kernel_trace_stop_lvl2( flops );
214 }
215 else {
216 kernel_trace_start_lvl2( PastixKernelLvl2_LR_FRLR2LR );
217 flops = core_cfrlr2lr( params, &AB, &infomask, M );
218 kernel_trace_stop_lvl2( flops );
219 }
220 }
221 else {
222 if ( B->rk == -1 ) {
223 kernel_trace_start_lvl2( PastixKernelLvl2_LR_LRFR2LR );
224 flops = core_clrfr2lr( params, &AB, &infomask, N );
225 kernel_trace_stop_lvl2( flops );
226 }
227 else {
228 kernel_trace_start_lvl2( PastixKernelLvl2_LR_LRLR2LR );
229 flops = core_clrlr2lr( params, &AB, &infomask );
230 kernel_trace_stop_lvl2( flops );
231
232 assert( AB.rk != -1 );
233 assert( AB.rkmax != -1 );
234 }
235 }
236
237 if ( infomask & PASTIX_LRM3_TRANSB ) {
238 transV = transB;
239 }
240
241 flops += core_clradd( params, &AB, transV, infomask );
242
243 /* Free memory from zlrm3 */
244 if ( infomask & PASTIX_LRM3_ALLOCU ) {
245 free(AB.u);
246 }
247 if ( infomask & PASTIX_LRM3_ALLOCV ) {
248 free(AB.v);
249 }
250
252
253 return flops;
254}
255
256/**
257 *******************************************************************************
258 *
259 * @brief Compute the matrix matrix product when involved matrices are stored in
260 * a low-rank structure.
261 *
262 * This function considers the generic matrix matrix product added to a third
263 * matric C. All matrices are either null, low-rank or full-rank.
264 *
265 *******************************************************************************
266 *
267 * @param[inout] params
268 * The LRMM structure that stores all the parameters used in the LRMM
269 * functions family.
270 * @sa core_clrmm_t
271 *
272 *******************************************************************************
273 *
274 * @return The number of flops required to perform the operation.
275 *
276 *******************************************************************************/
279{
280 PASTE_CORE_CLRMM_PARAMS( params );
281 pastix_fixdbl_t flops;
282
283 assert( transA == PastixNoTrans );
284 assert( transB != PastixNoTrans );
285 assert( A->rk <= A->rkmax);
286 assert( B->rk <= B->rkmax);
287 assert( C->rk <= C->rkmax);
288
289 /* Quick return if multiplication by 0 */
290 if ( A->rk == 0 || B->rk == 0 ) {
291 return 0.0;
292 }
293
294 params->lwused = 0;
295
296 /* TODO: this is a temporary fix */
297 if ( lwork == 0 ) {
298 params->work = work = NULL;
299 }
300 assert( ((work != NULL) && (lwork > 0)) ||
301 ((work == NULL) && (lwork <= 0)) );
302
303 if ( C->rk == 0 ) {
304 flops = core_clrmm_Cnull( params );
305 }
306 else if ( C->rk == -1 ) {
307 flops = core_clrmm_Cfr( params );
308 }
309 else {
310 flops = core_clrmm_Clr( params );
311 }
312
313#if defined(PASTIX_DEBUG_LR)
314 pastix_atomic_lock( lock );
315 if ( (C->rk > 0) && (lowrank->compress_method != PastixCompressMethodSVD) ) {
316 int rc = core_clrdbg_check_orthogonality( Cm, C->rk, (pastix_complex32_t*)C->u, Cm );
317 if (rc == 1) {
318 fprintf(stderr, "Failed to have u orthogonal in exit of lrmm\n" );
319 }
320 }
321 pastix_atomic_unlock( lock );
322#endif
323
325 return flops;
326}
static pastix_fixdbl_t core_clrmm_Cnull(core_clrmm_t *params)
Compute the matrix matrix product when applied to a null matrix.
Definition core_clrmm.c:109
static pastix_fixdbl_t core_clrmm_Cfr(core_clrmm_t *params)
Compute the matrix matrix product when applied to a full rank matrix.
Definition core_clrmm.c:45
static pastix_fixdbl_t core_clrmm_Clr(core_clrmm_t *params)
Compute the matrix matrix product when applied to a low rank matrix.
Definition core_clrmm.c:195
float _Complex pastix_complex32_t
Definition datatypes.h:76
double pastix_fixdbl_t
Definition datatypes.h:65
int core_clrdbg_check_orthogonality(pastix_int_t M, pastix_int_t N, const pastix_complex32_t *A, pastix_int_t lda)
Check the orthogonality of the matrix A.
pastix_lrblock_t * C
pastix_trans_t transA
pastix_int_t lwused
const pastix_lrblock_t * B
const pastix_lrblock_t * A
pastix_trans_t transB
pastix_complex32_t * work
pastix_fixdbl_t core_cfrfr2fr(core_clrmm_t *params)
Perform the full-rank operation C = alpha * op(A) * op(B) + beta C.
Definition core_cxx2fr.c:48
#define PASTE_CORE_CLRMM_PARAMS(_a_)
Initialize all the parameters of the core_clrmm family functions to ease the access.
pastix_fixdbl_t core_clradd(core_clrmm_t *params, const pastix_lrblock_t *A, pastix_trans_t transV, int infomask)
Perform the addition of two low-rank matrices.
pastix_fixdbl_t core_clrfr2lr(core_clrmm_t *params, pastix_lrblock_t *AB, int *infomask, pastix_int_t Arkmin)
Perform the operation AB = op(A) * op(B), with B full-rank and A and AB low-rank.
pastix_fixdbl_t core_clrlr2fr(core_clrmm_t *params)
Perform the operation C = alpha * op(A) * op(B) + beta C, with A and B low-rank and C full-rank.
#define PASTE_CORE_CLRMM_VOID
Void all the parameters of the core_clrmm family functions to silent warnings.
pastix_fixdbl_t core_clrmm(core_clrmm_t *params)
Compute the matrix matrix product when involved matrices are stored in a low-rank structure.
Definition core_clrmm.c:278
pastix_fixdbl_t core_clrfr2fr(core_clrmm_t *params)
Perform the operation C = alpha * op(A) * op(B) + beta C, with B and C full-rank and A low-rank.
pastix_fixdbl_t core_cfrlr2lr(core_clrmm_t *params, pastix_lrblock_t *AB, int *infomask, pastix_int_t Brkmin)
Perform the operation AB = op(A) * op(B), with A full-rank and B and AB low-rank.
pastix_fixdbl_t core_cfrfr2lr(core_clrmm_t *params, pastix_lrblock_t *AB, int *infomask, pastix_int_t Kmax)
Perform the operation AB = op(A) * op(B), with A and B full-rank and AB low-rank.
Definition core_cxx2lr.c:66
pastix_fixdbl_t core_clrlr2lr(core_clrmm_t *params, pastix_lrblock_t *AB, int *infomask)
Perform the operation AB = op(A) * op(B), with A, B, and AB low-rank.
pastix_fixdbl_t core_cfrlr2fr(core_clrmm_t *params)
Perform the operation C = alpha * op(A) * op(B) + beta C, with A and C full-rank and B low-rank.
Structure to store all the parameters of the core_clrmm family functions.
#define PASTIX_LRM3_ALLOCV
Macro to specify if the V part of a low-rank matrix has been allocated and need to be freed or not (U...
#define PASTIX_LRM3_TRANSB
Macro to specify if the the operator on B, still needs to be applied to the V part of the low-rank ma...
pastix_int_t(* core_get_rklimit)(pastix_int_t, pastix_int_t)
Compute the maximal rank accepted for a given matrix size. The pointer is set according to the low-ra...
#define PASTIX_LRM3_ALLOCU
Macro to specify if the U part of a low-rank matrix has been allocated and need to be freed or not (U...
The block low-rank structure to hold a matrix in low-rank form.
enum pastix_trans_e pastix_trans_t
Transpostion.
@ PastixNoTrans
Definition api.h:445
@ PastixCompressMethodSVD
Definition api.h:395