PaStiX Handbook  6.4.0
core_clrmm.c
Go to the documentation of this file.
1 /**
2  *
3  * @file core_clrmm.c
4  *
5  * PaStiX low-rank kernel routines to compute a matrix matrix product in either
6  * form low or full rank.
7  *
8  * @copyright 2016-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
9  * Univ. Bordeaux. All rights reserved.
10  *
11  * @version 6.4.0
12  * @author Mathieu Faverge
13  * @author Gregoire Pichon
14  * @author Pierre Ramet
15  * @date 2024-07-05
16  * @generated from /builds/solverstack/pastix/kernels/core_zlrmm.c, normal z -> c, Thu Aug 29 14:20:16 2024
17  *
18  **/
19 #include "common.h"
20 #include "kernels_trace.h"
21 #include "pastix_clrcores.h"
22 
23 /**
24  *******************************************************************************
25  *
26  * @brief Compute the matrix matrix product when applied to a full rank matrix.
27  *
28  * This function considers that the C matrix is full rank, and A and B are
29  * either full-rank or low-rank. The result of the product is directly applied
30  * to the C matrix.
31  *
32  *******************************************************************************
33  *
34  * @param[inout] params
35  * The LRMM structure that stores all the parameters used in the LRMM
36  * functions family.
37  * @sa core_clrmm_t
38  *
39  *******************************************************************************
40  *
41  * @return The number of flops required to perform the operation.
42  *
43  *******************************************************************************/
44 static inline pastix_fixdbl_t
46 {
47  const pastix_lrblock_t *A = params->A;
48  const pastix_lrblock_t *B = params->B;
49  pastix_fixdbl_t flops = 0.0;
50 
51  assert( params->transA == PastixNoTrans );
52  assert( params->transB != PastixNoTrans );
53  assert( A->rk <= A->rkmax && A->rk != 0 );
54  assert( B->rk <= B->rkmax && B->rk != 0 );
55  assert( params->C->rk == -1 );
56 
57  if ( A->rk == -1 ) {
58  if ( B->rk == -1 ) {
59  kernel_trace_start_lvl2( PastixKernelLvl2_LR_FRFR2FR );
60  flops = core_cfrfr2fr( params );
61  kernel_trace_stop_lvl2( flops );
62  }
63  else {
64  kernel_trace_start_lvl2( PastixKernelLvl2_LR_FRLR2FR );
65  flops = core_cfrlr2fr( params );
66  kernel_trace_stop_lvl2( flops );
67  }
68  }
69  else {
70  if ( B->rk == -1 ) {
71  kernel_trace_start_lvl2( PastixKernelLvl2_LR_LRFR2FR );
72  flops = core_clrfr2fr( params );
73  kernel_trace_stop_lvl2( flops );
74  }
75  else {
76  kernel_trace_start_lvl2( PastixKernelLvl2_LR_LRLR2FR );
77  flops = core_clrlr2fr( params );
78  kernel_trace_stop_lvl2( flops );
79  }
80  }
81 
82  assert( params->C->rk == -1 );
83 
84  return flops;
85 }
86 
87 /**
88  *******************************************************************************
89  *
90  * @brief Compute the matrix matrix product when applied to a null matrix.
91  *
92  * This function considers that the C matrix is null, and A and B are either
93  * full-rank or low-rank. The result of the product is directly applied to the
94  * C matrix.
95  *
96  *******************************************************************************
97  *
98  * @param[inout] params
99  * The LRMM structure that stores all the parameters used in the LRMM
100  * functions family.
101  * @sa core_clrmm_t
102  *
103  *******************************************************************************
104  *
105  * @return The number of flops required to perform the operation.
106  *
107  *******************************************************************************/
108 static inline pastix_fixdbl_t
110 {
111  PASTE_CORE_CLRMM_PARAMS( params );
112  pastix_lrblock_t AB;
113  pastix_trans_t transV = PastixNoTrans;
114  int infomask = 0;
115  pastix_fixdbl_t flops = 0.0;
116 
117  assert(transA == PastixNoTrans);
118  assert(transB != PastixNoTrans);
119  assert( A->rk <= A->rkmax && A->rk != 0 );
120  assert( B->rk <= B->rkmax && B->rk != 0 );
121 
122  if ( A->rk == -1 ) {
123  if ( B->rk == -1 ) {
124  kernel_trace_start_lvl2( PastixKernelLvl2_LR_FRFR2null );
125  flops = core_cfrfr2lr( params, &AB, &infomask,
126  pastix_imin( pastix_imin( M, N ),
127  core_get_rklimit( Cm, Cn ) ) );
128  kernel_trace_stop_lvl2( flops );
129  }
130  else {
131  kernel_trace_start_lvl2( PastixKernelLvl2_LR_FRLR2null );
132  flops = core_cfrlr2lr( params, &AB, &infomask,
133  pastix_imin( M, core_get_rklimit( Cm, Cn ) ) );
134  kernel_trace_stop_lvl2( flops );
135  }
136  }
137  else {
138  if ( B->rk == -1 ) {
139  kernel_trace_start_lvl2( PastixKernelLvl2_LR_LRFR2null );
140  flops = core_clrfr2lr( params, &AB, &infomask,
141  pastix_imin( N, core_get_rklimit( Cm, Cn ) ) );
142  kernel_trace_stop_lvl2( flops );
143  }
144  else {
145  kernel_trace_start_lvl2( PastixKernelLvl2_LR_LRLR2null );
146  flops = core_clrlr2lr( params, &AB, &infomask );
147  kernel_trace_stop_lvl2( flops );
148 
149  assert( AB.rk != -1 );
150  assert( AB.rkmax != -1 );
151  }
152  }
153 
154  if ( infomask & PASTIX_LRM3_TRANSB ) {
155  transV = transB;
156  }
157 
158  flops += core_clradd( params, &AB, transV, infomask );
159 
160  /* Free memory from zlrm3 */
161  if ( infomask & PASTIX_LRM3_ALLOCU ) {
162  free(AB.u);
163  }
164  if ( infomask & PASTIX_LRM3_ALLOCV ) {
165  free(AB.v);
166  }
167 
169 
170  return flops;
171 }
172 
173 /**
174  *******************************************************************************
175  *
176  * @brief Compute the matrix matrix product when applied to a low rank matrix.
177  *
178  * This function considers that the C matrix is low rank, and A and B are
179  * either full-rank or low-rank. The result of the product is directly applied
180  * to the C matrix.
181  *
182  *******************************************************************************
183  *
184  * @param[inout] params
185  * The LRMM structure that stores all the parameters used in the LRMM
186  * functions family.
187  * @sa core_clrmm_t
188  *
189  *******************************************************************************
190  *
191  * @return The number of flops required to perform the operation.
192  *
193  *******************************************************************************/
194 static inline pastix_fixdbl_t
196 {
197  PASTE_CORE_CLRMM_PARAMS( params );
198  pastix_lrblock_t AB;
199  pastix_trans_t transV = PastixNoTrans;
200  int infomask = 0;
201  pastix_fixdbl_t flops = 0.0;
202 
203  assert(transA == PastixNoTrans);
204  assert(transB != PastixNoTrans);
205  assert( A->rk <= A->rkmax && A->rk != 0 );
206  assert( B->rk <= B->rkmax && B->rk != 0 );
207 
208  if ( A->rk == -1 ) {
209  if ( B->rk == -1 ) {
210  kernel_trace_start_lvl2( PastixKernelLvl2_LR_FRFR2LR );
211  flops = core_cfrfr2lr( params, &AB, &infomask,
212  pastix_imin( M, N ) );
213  kernel_trace_stop_lvl2( flops );
214  }
215  else {
216  kernel_trace_start_lvl2( PastixKernelLvl2_LR_FRLR2LR );
217  flops = core_cfrlr2lr( params, &AB, &infomask, M );
218  kernel_trace_stop_lvl2( flops );
219  }
220  }
221  else {
222  if ( B->rk == -1 ) {
223  kernel_trace_start_lvl2( PastixKernelLvl2_LR_LRFR2LR );
224  flops = core_clrfr2lr( params, &AB, &infomask, N );
225  kernel_trace_stop_lvl2( flops );
226  }
227  else {
228  kernel_trace_start_lvl2( PastixKernelLvl2_LR_LRLR2LR );
229  flops = core_clrlr2lr( params, &AB, &infomask );
230  kernel_trace_stop_lvl2( flops );
231 
232  assert( AB.rk != -1 );
233  assert( AB.rkmax != -1 );
234  }
235  }
236 
237  if ( infomask & PASTIX_LRM3_TRANSB ) {
238  transV = transB;
239  }
240 
241  flops += core_clradd( params, &AB, transV, infomask );
242 
243  /* Free memory from zlrm3 */
244  if ( infomask & PASTIX_LRM3_ALLOCU ) {
245  free(AB.u);
246  }
247  if ( infomask & PASTIX_LRM3_ALLOCV ) {
248  free(AB.v);
249  }
250 
252 
253  return flops;
254 }
255 
256 /**
257  *******************************************************************************
258  *
259  * @brief Compute the matrix matrix product when involved matrices are stored in
260  * a low-rank structure.
261  *
262  * This function considers the generic matrix matrix product added to a third
263  * matric C. All matrices are either null, low-rank or full-rank.
264  *
265  *******************************************************************************
266  *
267  * @param[inout] params
268  * The LRMM structure that stores all the parameters used in the LRMM
269  * functions family.
270  * @sa core_clrmm_t
271  *
272  *******************************************************************************
273  *
274  * @return The number of flops required to perform the operation.
275  *
276  *******************************************************************************/
279 {
280  PASTE_CORE_CLRMM_PARAMS( params );
281  pastix_fixdbl_t flops;
282 
283  assert( transA == PastixNoTrans );
284  assert( transB != PastixNoTrans );
285  assert( A->rk <= A->rkmax);
286  assert( B->rk <= B->rkmax);
287  assert( C->rk <= C->rkmax);
288 
289  /* Quick return if multiplication by 0 */
290  if ( A->rk == 0 || B->rk == 0 ) {
291  return 0.0;
292  }
293 
294  params->lwused = 0;
295 
296  /* TODO: this is a temporary fix */
297  if ( lwork == 0 ) {
298  params->work = work = NULL;
299  }
300  assert( ((work != NULL) && (lwork > 0)) ||
301  ((work == NULL) && (lwork <= 0)) );
302 
303  if ( C->rk == 0 ) {
304  flops = core_clrmm_Cnull( params );
305  }
306  else if ( C->rk == -1 ) {
307  flops = core_clrmm_Cfr( params );
308  }
309  else {
310  flops = core_clrmm_Clr( params );
311  }
312 
313 #if defined(PASTIX_DEBUG_LR)
314  pastix_atomic_lock( lock );
315  if ( (C->rk > 0) && (lowrank->compress_method != PastixCompressMethodSVD) ) {
316  int rc = core_clrdbg_check_orthogonality( Cm, C->rk, (pastix_complex32_t*)C->u, Cm );
317  if (rc == 1) {
318  fprintf(stderr, "Failed to have u orthogonal in exit of lrmm\n" );
319  }
320  }
321  pastix_atomic_unlock( lock );
322 #endif
323 
325  return flops;
326 }
static pastix_fixdbl_t core_clrmm_Cnull(core_clrmm_t *params)
Compute the matrix matrix product when applied to a null matrix.
Definition: core_clrmm.c:109
static pastix_fixdbl_t core_clrmm_Cfr(core_clrmm_t *params)
Compute the matrix matrix product when applied to a full rank matrix.
Definition: core_clrmm.c:45
static pastix_fixdbl_t core_clrmm_Clr(core_clrmm_t *params)
Compute the matrix matrix product when applied to a low rank matrix.
Definition: core_clrmm.c:195
float _Complex pastix_complex32_t
Definition: datatypes.h:76
double pastix_fixdbl_t
Definition: datatypes.h:65
int core_clrdbg_check_orthogonality(pastix_int_t M, pastix_int_t N, const pastix_complex32_t *A, pastix_int_t lda)
Check the orthogonality of the matrix A.
Definition: core_clrdbg.c:101
pastix_lrblock_t * C
pastix_trans_t transA
pastix_int_t lwused
const pastix_lrblock_t * B
const pastix_lrblock_t * A
pastix_trans_t transB
pastix_complex32_t * work
pastix_fixdbl_t core_cfrfr2fr(core_clrmm_t *params)
Perform the full-rank operation C = alpha * op(A) * op(B) + beta C.
Definition: core_cxx2fr.c:48
#define PASTE_CORE_CLRMM_PARAMS(_a_)
Initialize all the parameters of the core_clrmm family functions to ease the access.
pastix_fixdbl_t core_clradd(core_clrmm_t *params, const pastix_lrblock_t *A, pastix_trans_t transV, int infomask)
Perform the addition of two low-rank matrices.
Definition: core_clr2xx.c:383
pastix_fixdbl_t core_clrfr2lr(core_clrmm_t *params, pastix_lrblock_t *AB, int *infomask, pastix_int_t Arkmin)
Perform the operation AB = op(A) * op(B), with B full-rank and A and AB low-rank.
Definition: core_cxx2lr.c:298
pastix_fixdbl_t core_clrlr2fr(core_clrmm_t *params)
Perform the operation C = alpha * op(A) * op(B) + beta C, with A and B low-rank and C full-rank.
Definition: core_cxx2fr.c:304
#define PASTE_CORE_CLRMM_VOID
Void all the parameters of the core_clrmm family functions to silent warnings.
pastix_fixdbl_t core_clrmm(core_clrmm_t *params)
Compute the matrix matrix product when involved matrices are stored in a low-rank structure.
Definition: core_clrmm.c:278
pastix_fixdbl_t core_clrfr2fr(core_clrmm_t *params)
Perform the operation C = alpha * op(A) * op(B) + beta C, with B and C full-rank and A low-rank.
Definition: core_cxx2fr.c:202
pastix_fixdbl_t core_cfrlr2lr(core_clrmm_t *params, pastix_lrblock_t *AB, int *infomask, pastix_int_t Brkmin)
Perform the operation AB = op(A) * op(B), with A full-rank and B and AB low-rank.
Definition: core_cxx2lr.c:152
pastix_fixdbl_t core_cfrfr2lr(core_clrmm_t *params, pastix_lrblock_t *AB, int *infomask, pastix_int_t Kmax)
Perform the operation AB = op(A) * op(B), with A and B full-rank and AB low-rank.
Definition: core_cxx2lr.c:66
pastix_fixdbl_t core_clrlr2lr(core_clrmm_t *params, pastix_lrblock_t *AB, int *infomask)
Perform the operation AB = op(A) * op(B), with A, B, and AB low-rank.
Definition: core_cxx2lr.c:442
pastix_fixdbl_t core_cfrlr2fr(core_clrmm_t *params)
Perform the operation C = alpha * op(A) * op(B) + beta C, with A and C full-rank and B low-rank.
Definition: core_cxx2fr.c:101
Structure to store all the parameters of the core_clrmm family functions.
#define PASTIX_LRM3_ALLOCV
Macro to specify if the V part of a low-rank matrix has been allocated and need to be freed or not (U...
#define PASTIX_LRM3_TRANSB
Macro to specify if the the operator on B, still needs to be applied to the V part of the low-rank ma...
pastix_int_t(* core_get_rklimit)(pastix_int_t, pastix_int_t)
Compute the maximal rank accepted for a given matrix size. The pointer is set according to the low-ra...
Definition: kernels_trace.c:46
#define PASTIX_LRM3_ALLOCU
Macro to specify if the U part of a low-rank matrix has been allocated and need to be freed or not (U...
The block low-rank structure to hold a matrix in low-rank form.
enum pastix_trans_e pastix_trans_t
Transpostion.
@ PastixNoTrans
Definition: api.h:445
@ PastixCompressMethodSVD
Definition: api.h:395