PaStiX Handbook  6.2.1
coeftab_s.c
Go to the documentation of this file.
1 /**
2  *
3  * @file coeftab_s.c
4  *
5  * Precision dependent sequential routines to apply operation of the full matrix.
6  *
7  * @copyright 2015-2021 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8  * Univ. Bordeaux. All rights reserved.
9  *
10  * @version 6.2.0
11  * @author Pierre Ramet
12  * @author Xavier Lacoste
13  * @author Gregoire Pichon
14  * @author Mathieu Faverge
15  * @author Esragul Korkmaz
16  * @author Tony Delarue
17  * @date 2021-04-02
18  *
19  * @generated from /builds/solverstack/pastix/sopalin/coeftab_z.c, normal z -> s, Tue Jul 27 19:17:34 2021
20  *
21  **/
22 #include "common.h"
23 #include "blend/solver.h"
24 #include "lapacke.h"
25 #include "sopalin/coeftab_s.h"
26 #include "pastix_scores.h"
27 
28 /**
29  *******************************************************************************
30  *
31  * @brief Dump the solver matrix coefficients into a file in human readable
32  * format.
33  *
34  * All non-zeroes coefficients are dumped in the format:
35  * i j val
36  * with one value per row.
37  *
38  *******************************************************************************
39  *
40  * @param[inout] pastix_data
41  * The pastix_data instance to access the unique directory id in which
42  * output the files.
43  *
44  * @param[in] solvmtx
45  * The solver matrix to print.
46  *
47  * @param[in] filename
48  * The filename where to store the output matrix.
49  *
50  *******************************************************************************/
51 void
52 coeftab_sdump( pastix_data_t *pastix_data,
53  const SolverMatrix *solvmtx,
54  const char *prefix )
55 {
56  SolverCblk *cblk = solvmtx->cblktab;
57  pastix_int_t itercblk;
58  char filename[256];
59  FILE *stream = NULL;
60 
61  pastix_gendirectories( pastix_data );
62 
63  /*
64  * TODO: there is a problem right here for now, because there are no
65  * distinctions between L and U coeffcients in the final file
66  */
67  for (itercblk=0; itercblk<solvmtx->cblknbr; itercblk++, cblk++)
68  {
69  if ( cblk->cblktype & (CBLK_FANIN|CBLK_RECV) ) {
70  continue;
71  }
72  if ( solvmtx->clustnum != cblk->ownerid ) {
73  continue;
74  }
75 
76  sprintf( filename, "%s_%ld.txt", prefix, (long)cblk->gcblknum );
77  stream = pastix_fopenw( pastix_data->dir_global, filename, "w" );
78  if ( stream == NULL ){
79  continue;
80  }
81 
82  cpucblk_sdump( PastixLCoef, cblk, stream );
83  if ( NULL != cblk->ucoeftab ) {
84  cpucblk_sdump( PastixUCoef, cblk, stream );
85  }
86 
87  fclose( stream );
88  }
89 }
90 
91 /**
92  *******************************************************************************
93  *
94  * @brief Compare two solver matrices in full-rank format with the same data
95  * distribution.
96  *
97  * The second solver matrix is overwritten by the difference of the two
98  * matrices. The frobenius norm of the difference of each column block is
99  * computed and the functions returns 0 if the result for all the column blocks
100  * of:
101  * || B_k - A_k || / ( || A_k || * eps )
102  *
103  * is below 10. Otherwise, an error message is printed and 1 is returned.
104  *
105  *******************************************************************************
106  *
107  * @param[in] side
108  * Define which side of the cblk must be tested.
109  * @arg PastixLCoef if lower part only
110  * @arg PastixUCoef if upper part only
111  * @arg PastixLUCoef if both sides.
112  *
113  * @param[in] solvA
114  * The solver matrix A.
115  *
116  * @param[inout] solvB
117  * The solver matrix B.
118  * On exit, B coefficient arrays are overwritten by the result of
119  * (B-A).
120  *
121  *******************************************************************************
122  *
123  * @return 0 if the test is passed, >= 0 otherwise.
124  *
125  *******************************************************************************/
126 int
128  const SolverMatrix *solvA,
129  SolverMatrix *solvB )
130 {
131  SolverCblk *cblkA = solvA->cblktab;
132  SolverCblk *cblkB = solvB->cblktab;
133  pastix_int_t cblknum;
134  int rc = 0;
135  int saved_rc = 0;
136 
137  for(cblknum=0; cblknum<solvA->cblknbr; cblknum++, cblkA++, cblkB++) {
138  rc += cpucblk_sdiff( side, cblkA, cblkB );
139  if ( rc != saved_rc ){
140  fprintf(stderr, "CBLK %ld was not correctly compressed\n", (long)cblknum);
141  saved_rc = rc;
142  }
143  }
144 
145  return rc;
146 }
147 
148 /**
149  *******************************************************************************
150  *
151  * @brief Compress all the cblks marked as valid for low-rank format.
152  *
153  * All the cblk in the top levels of the elimination tree marked as candidates
154  * for compression are compressed if there is a gain to compress them. The
155  * compression to low-rank format is parameterized by the input information
156  * stored in the lowrank structure. On exit, all the cblks marked for
157  * compression are stored through the low-rank structure, even if they are kept
158  * in their full-rank form.
159  *
160  * @remark This routine is sequential
161  *
162  *******************************************************************************
163  *
164  * @param[inout] solvmtx
165  * The solver matrix of the problem to compress.
166  *
167  *******************************************************************************
168  *
169  * @return The memory gain resulting from the compression to low-rank format in
170  * Bytes.
171  *
172  *******************************************************************************/
173 pastix_int_t
174 coeftab_scompress( SolverMatrix *solvmtx )
175 {
176  SolverCblk *cblk = solvmtx->cblktab;
177  pastix_coefside_t side = (solvmtx->factotype == PastixFactLU) ? PastixLUCoef : PastixLCoef;
178  int ilu_lvl;
179  pastix_int_t cblknum, gain = 0;
180 
181  ilu_lvl = solvmtx->lowrank.compress_preselect ? -1 : solvmtx->lowrank.ilu_lvl;
182 
183  for(cblknum=0; cblknum<solvmtx->cblknbr; cblknum++, cblk++) {
184  if ( cblk->cblktype & CBLK_COMPRESSED ) {
185  gain += cpucblk_scompress( solvmtx, side, ilu_lvl, cblk );
186  }
187  }
188  return gain;
189 }
190 
191 /**
192  *******************************************************************************
193  *
194  * @brief Uncompress all column block in low-rank format into full-rank format
195  *
196  *******************************************************************************
197  *
198  * @param[inout] solvmtx
199  * The solver matrix of the problem.
200  *
201  *******************************************************************************/
202 void
203 coeftab_suncompress( SolverMatrix *solvmtx )
204 {
205  SolverCblk *cblk = solvmtx->cblktab;
206  pastix_int_t cblknum;
207  pastix_coefside_t side = (solvmtx->factotype == PastixFactLU) ? PastixLUCoef : PastixLCoef;
208 
209  for(cblknum=0; cblknum<solvmtx->cblknbr; cblknum++, cblk++) {
210  if (cblk->cblktype & CBLK_COMPRESSED) {
211  cpucblk_suncompress( side, cblk );
212  }
213  }
214 }
215 
216 /**
217  *******************************************************************************
218  *
219  * @brief Compute the memory gain of the low-rank form over the full-rank form
220  * for the entire matrix.
221  *
222  * This function returns the memory gain in bytes for the full matrix when
223  * column blocks are stored in low-rank format compared to a full rank storage.
224  *
225  *******************************************************************************
226  *
227  * @param[in] solvmtx
228  * The solver matrix of the problem.
229  *
230  *******************************************************************************/
231 void
232 coeftab_smemory( SolverMatrix *solvmtx )
233 {
234  pastix_coefside_t side = (solvmtx->factotype == PastixFactLU) ? PastixLUCoef : PastixLCoef;
235  SolverCblk *cblk = solvmtx->cblktab;
236  SolverBlok *blok;
237  pastix_int_t i, cblknum;
238  pastix_int_t gain[MEMORY_STATS_SIZE] = { 0 };
239  pastix_int_t orig[MEMORY_STATS_SIZE] = { 0 };
240  pastix_fixdbl_t memlr[MEMORY_STATS_SIZE] = { 0. };
241  pastix_fixdbl_t memfr[MEMORY_STATS_SIZE] = { 0. };
242  pastix_fixdbl_t totlr, totfr;
243 
244 #if defined(PASTIX_SUPERNODE_STATS)
245  pastix_int_t last[3] = { 0 };
246  pastix_fixdbl_t memlast[4];
247  SolverBlok *solvblok = solvmtx->bloktab;
248 
249  for(i=0; i<solvmtx->bloknbr; i++, solvblok++ ) {
250  SolverCblk *lcblk = solvmtx->cblktab + solvblok->lcblknm;
251  pastix_int_t ncols = cblk_colnbr( lcblk );
252  pastix_int_t nrows = blok_rownbr( solvblok );
253  pastix_int_t size = ncols * nrows;
254 
255  /* Let's skip recv and fanin for now */
256  if ( lcblk->cblktype & (CBLK_RECV|CBLK_FANIN) ) {
257  continue;
258  }
259 
260  if ( !(lcblk->cblktype & CBLK_COMPRESSED) ) {
261  if ( side != PastixLCoef ) {
262  last[solvblok->inlast] += 2 * size;
263  }
264  else{
265  last[solvblok->inlast] += size;
266  }
267  }
268  else{
269  if ( side != PastixUCoef ) {
270  if ( solvblok->LRblock[0].rk >= 0 ) {
271  assert( solvblok->LRblock[0].rk <= core_get_rklimit( nrows, ncols ) );
272  assert( ((nrows+ncols) * solvblok->LRblock[0].rkmax) <= size );
273  last[solvblok->inlast] += ((nrows+ncols) * solvblok->LRblock[0].rkmax);
274  }
275  else {
276  last[solvblok->inlast] += size;
277  }
278  }
279 
280  if ( side != PastixLCoef ) {
281  if ( solvblok->LRblock[1].rk >= 0 ) {
282  assert( solvblok->LRblock[1].rk <= core_get_rklimit( nrows, ncols ) );
283  assert( ((nrows+ncols) * solvblok->LRblock[1].rkmax) <= size );
284  last[solvblok->inlast] += ((nrows+ncols) * solvblok->LRblock[1].rkmax);
285  }
286  else {
287  last[solvblok->inlast] += size;
288  }
289  }
290  }
291  }
292  for (i=0; i<3; i++) {
293  memlast[i] = last[i] * pastix_size_of( PastixFloat );
294  }
295  memlast[3] = memlast[0] + memlast[1] + memlast[2];
296 
297  pastix_print( solvmtx->clustnum, 0,
298  " Compression on LAST\n"
299  " ------------------------------------------------\n"
300  " A11 %8.3g %co\n"
301  " A12 %8.3g %co\n"
302  " A22 %8.3g %co\n"
303  " SUM %8.3g %co\n",
304  pastix_print_value(memlast[0]), pastix_print_unit(memlast[0]),
305  pastix_print_value(memlast[1]), pastix_print_unit(memlast[1]),
306  pastix_print_value(memlast[2]), pastix_print_unit(memlast[2]),
307  pastix_print_value(memlast[3]), pastix_print_unit(memlast[3]));
308 #endif
309 
310  for(cblknum=0; cblknum<solvmtx->cblknbr; cblknum++, cblk++) {
311  pastix_int_t colnbr = cblk_colnbr( cblk );
312 
313  /* Let's skip recv and fanin for now */
314  if ( cblk->cblktype & (CBLK_RECV|CBLK_FANIN) ) {
315  continue;
316  }
317 
318  if ( !(cblk->cblktype & CBLK_COMPRESSED) )
319  {
320  pastix_int_t in_height = 0;
321  pastix_int_t off_height = cblk->stride;
322 
323  /* Compute the size of the original supernode diagonal block */
324  blok = cblk->fblokptr;
325  while( (blok < cblk[1].fblokptr) &&
326  ((solvmtx->cblktab + blok->fcblknm)->sndeidx == cblk->sndeidx) )
327  {
328  in_height += blok_rownbr( blok );
329  blok++;
330  }
331 
332  /* Size of the cblk outside the diagonal block */
333  off_height -= in_height;
334 
335  orig[FR_InDiag] += colnbr * in_height;
336  orig[FR_OffDiag] += colnbr * off_height;
337  }
338  else {
339  /* The gain for the diagonal block is always 0 */
340  orig[LR_DInD] += colnbr * colnbr;
341  cpucblk_smemory( side, solvmtx, cblk, orig, gain );
342  }
343  }
344 
345  if ( side == PastixLUCoef ) {
346  orig[FR_InDiag] *= 2;
347  orig[FR_OffDiag] *= 2;
348  orig[LR_InDiag] *= 2;
349  orig[LR_OffDiag] *= 2;
350  orig[LR_InSele] *= 2;
351  }
352 
353  totlr = 0.;
354  totfr = 0.;
355 
356  for (i=0; i<MEMORY_STATS_SIZE; i++) {
357  memlr[i] = (orig[i] - gain[i]) * pastix_size_of( PastixFloat );
358  memfr[i] = orig[i] * pastix_size_of( PastixFloat );
359  totlr += memlr[i];
360  totfr += memfr[i];
361  }
362 
363  pastix_print( solvmtx->clustnum, 0,
364  " Compression:\n"
365  " ------------------------------------------------\n"
366  " Full-rank supernodes\n"
367  " Inside %8.3g %co\n"
368  " Outside %8.3g %co\n"
369  " Low-rank supernodes\n"
370  " Diag in diag %8.3g %co\n"
371  " Inside not selected %8.3g %co / %8.3g %co\n"
372  " Inside selected %8.3g %co / %8.3g %co\n"
373  " Outside %8.3g %co / %8.3g %co\n"
374  " ------------------------------------------------\n"
375  " Total %8.3g %co / %8.3g %co\n",
376  pastix_print_value(memfr[FR_InDiag] ), pastix_print_unit(memfr[FR_InDiag] ),
377  pastix_print_value(memfr[FR_OffDiag]), pastix_print_unit(memfr[FR_OffDiag]),
378 
379  pastix_print_value(memfr[LR_DInD]), pastix_print_unit(memfr[LR_DInD]),
380 
381  pastix_print_value(memlr[LR_InDiag] ), pastix_print_unit(memlr[LR_InDiag] ),
382  pastix_print_value(memfr[LR_InDiag] ), pastix_print_unit(memfr[LR_InDiag] ),
383 
384  pastix_print_value(memlr[LR_InSele] ), pastix_print_unit(memlr[LR_InSele]),
385  pastix_print_value(memfr[LR_InSele] ), pastix_print_unit(memfr[LR_InSele]),
386 
387  pastix_print_value(memlr[LR_OffDiag]), pastix_print_unit(memlr[LR_OffDiag]),
388  pastix_print_value(memfr[LR_OffDiag]), pastix_print_unit(memfr[LR_OffDiag]),
389 
390  pastix_print_value(totlr), pastix_print_unit(totlr),
391  pastix_print_value(totfr), pastix_print_unit(totfr) );
392 
393  return;
394 }
395 
396 /**
397  *******************************************************************************
398  *
399  * @brief Extract the Schur complement
400  *
401  * This routine is sequential and returns the full Schur complement
402  * uncommpressed in Lapack format.
403  *
404  *******************************************************************************
405  *
406  * @param[in] solvmtx
407  * The solver matrix structure describing the problem.
408  *
409  * @param[inout] S
410  * The pointer to the allocated matrix array that will store the Schur
411  * complement.
412  *
413  * @param[in] lds
414  * The leading dimension of the S array.
415  *
416  *******************************************************************************/
417 void
418 coeftab_sgetschur( const SolverMatrix *solvmtx,
419  float *S, pastix_int_t lds )
420 {
421  SolverCblk *cblk = solvmtx->cblktab + solvmtx->cblkschur;
422  float *localS;
423  pastix_int_t itercblk, fcolnum, nbcol;
424  int upper_part = (solvmtx->factotype == PastixFactLU);
425  fcolnum = cblk->fcolnum;
426 
427  nbcol = solvmtx->nodenbr - fcolnum;
428  assert( nbcol <= lds );
429 
430  /* Initialize the array to 0 */
431  LAPACKE_slaset_work( LAPACK_COL_MAJOR, 'A', nbcol, nbcol, 0., 0., S, lds );
432 
433  for (itercblk=solvmtx->cblkschur; itercblk<solvmtx->cblknbr; itercblk++, cblk++)
434  {
435  assert( cblk->cblktype & CBLK_IN_SCHUR );
436  assert( lds >= cblk->stride );
437 
438  localS = S + (cblk->fcolnum - fcolnum) * lds + (cblk->fcolnum - fcolnum);
439 
440  cpucblk_sgetschur( cblk, upper_part, localS, lds );
441  }
442 }
443 
444 /**
445  *******************************************************************************
446  *
447  * @brief Extract the diagonal
448  *
449  * This routine is sequential and returns the full diagonal in the vector D,
450  * such that:
451  * D[incD*i]= A(i, i)
452  *
453  *******************************************************************************
454  *
455  * @param[in] solvmtx
456  * The solver matrix structure describing the problem.
457  *
458  * @param[inout] D
459  * The pointer to the allocated vector array that will store the diagonal.
460  * D must be of size solvmtx->nodenbr * incD.
461  *
462  * @param[in] incD
463  * The increment bewteen two elements of D. incD > 0.
464  *
465  *******************************************************************************/
466 void
467 coeftab_sgetdiag( const SolverMatrix *solvmtx,
468  float *D, pastix_int_t incD )
469 {
470  SolverCblk *cblk = solvmtx->cblktab;
471  float *A;
472  pastix_int_t lda, itercblk, nbcol, i;
473 
474  for (itercblk=0; itercblk<solvmtx->cblknbr; itercblk++, cblk++)
475  {
476  nbcol = cblk_colnbr( cblk );
477  if ( cblk->cblktype & CBLK_COMPRESSED ) {
478  assert( cblk->fblokptr->LRblock[0].rk == -1 );
479  A = cblk->fblokptr->LRblock[0].u;
480  lda = cblk_colnbr( cblk ) + 1;
481  }
482  else {
483  A = cblk->lcoeftab;
484 
485  if ( cblk->cblktype & CBLK_LAYOUT_2D ) {
486  lda = cblk_colnbr( cblk ) + 1;
487  }
488  else {
489  lda = cblk->stride + 1;
490  }
491  }
492 
493  for (i=0; i<nbcol; i++, D += incD, A += lda ) {
494  *D = *A;
495  }
496  }
497 }
coeftab_sdump
void coeftab_sdump(pastix_data_t *pastix_data, const SolverMatrix *solvmtx, const char *prefix)
Dump the solver matrix coefficients into a file in human readable format.
Definition: coeftab_s.c:52
solver_cblk_s::ownerid
int ownerid
Definition: solver.h:146
FR_InDiag
@ FR_InDiag
Definition: pastix_lowrank.h:164
solver.h
blok_rownbr
static pastix_int_t blok_rownbr(const SolverBlok *blok)
Compute the number of rows of a block.
Definition: solver.h:271
cblk_colnbr
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
Definition: solver.h:247
core_get_rklimit
pastix_int_t(* core_get_rklimit)(pastix_int_t, pastix_int_t)
Compute the maximal rank accepted for a given matrix size. The pointer is set according to the low-ra...
Definition: kernels_trace.c:30
coeftab_sgetschur
void coeftab_sgetschur(const SolverMatrix *solvmtx, float *S, pastix_int_t lds)
Extract the Schur complement.
Definition: coeftab_s.c:418
solver_blok_s::LRblock
pastix_lrblock_t * LRblock
Definition: solver.h:121
solver_cblk_s::fblokptr
SolverBlok * fblokptr
Definition: solver.h:134
solver_blok_s::lcblknm
pastix_int_t lcblknm
Definition: solver.h:109
solver_cblk_s::stride
pastix_int_t stride
Definition: solver.h:135
pastix_lrblock_s::u
void * u
Definition: pastix_lowrank.h:115
solver_cblk_s
Solver column block structure.
Definition: solver.h:127
LR_OffDiag
@ LR_OffDiag
Definition: pastix_lowrank.h:168
pastix_coefside_t
enum pastix_coefside_e pastix_coefside_t
Data blocks used in the kernel.
pastix_gendirectories
void pastix_gendirectories(pastix_data_t *pastix_data)
Generate a unique temporary directory to store output files.
Definition: api.c:69
solver_blok_s
Solver block structure.
Definition: solver.h:107
cpucblk_suncompress
void cpucblk_suncompress(pastix_coefside_t side, SolverCblk *cblk)
Uncompress a single column block from low-rank format to full-rank format.
Definition: cpucblk_scompress.c:203
cpucblk_sdump
void cpucblk_sdump(pastix_coefside_t side, const SolverCblk *cblk, FILE *stream)
Dump a single column block into a FILE in a human readale format.
Definition: coeftab_sinit.c:51
FR_OffDiag
@ FR_OffDiag
Definition: pastix_lowrank.h:165
solver_cblk_s::ucoeftab
void * ucoeftab
Definition: solver.h:143
cpucblk_sgetschur
void cpucblk_sgetschur(const SolverCblk *cblk, int upper_part, float *S, pastix_int_t lds)
Extract a cblk panel of the Schur complement to a dense lapack form.
Definition: cpucblk_sschur.c:178
solver_cblk_s::sndeidx
pastix_int_t sndeidx
Definition: solver.h:139
solver_cblk_s::lcoeftab
void * lcoeftab
Definition: solver.h:142
pastix_scores.h
PastixUCoef
@ PastixUCoef
Definition: api.h:455
cpucblk_scompress
pastix_int_t cpucblk_scompress(const SolverMatrix *solvmtx, pastix_coefside_t side, int max_ilulvl, SolverCblk *cblk)
Compress a single column block from full-rank to low-rank format.
Definition: cpucblk_scompress.c:116
pastix_fopenw
FILE * pastix_fopenw(const char *dirname, const char *filename, const char *mode)
Open a file in the unique directory of the pastix instance.
Definition: api.c:232
coeftab_scompress
pastix_int_t coeftab_scompress(SolverMatrix *solvmtx)
Compress all the cblks marked as valid for low-rank format.
Definition: coeftab_s.c:174
LR_InSele
@ LR_InSele
Definition: pastix_lowrank.h:167
PastixLCoef
@ PastixLCoef
Definition: api.h:454
solver_cblk_s::gcblknum
pastix_int_t gcblknum
Definition: solver.h:140
pastix_lrblock_s::rk
int rk
Definition: pastix_lowrank.h:113
LR_InDiag
@ LR_InDiag
Definition: pastix_lowrank.h:166
solver_cblk_s::cblktype
int8_t cblktype
Definition: solver.h:130
PastixLUCoef
@ PastixLUCoef
Definition: api.h:456
solver_blok_s::inlast
int8_t inlast
Definition: solver.h:117
solver_cblk_s::fcolnum
pastix_int_t fcolnum
Definition: solver.h:132
PastixFactLU
@ PastixFactLU
Definition: api.h:302
LR_DInD
@ LR_DInD
Definition: pastix_lowrank.h:169
coeftab_sgetdiag
void coeftab_sgetdiag(const SolverMatrix *solvmtx, float *D, pastix_int_t incD)
Extract the diagonal.
Definition: coeftab_s.c:467
coeftab_s.h
coeftab_suncompress
void coeftab_suncompress(SolverMatrix *solvmtx)
Uncompress all column block in low-rank format into full-rank format.
Definition: coeftab_s.c:203
pastix_lrblock_s::rkmax
int rkmax
Definition: pastix_lowrank.h:114
coeftab_sdiff
int coeftab_sdiff(pastix_coefside_t side, const SolverMatrix *solvA, SolverMatrix *solvB)
Compare two solver matrices in full-rank format with the same data distribution.
Definition: coeftab_s.c:127
cpucblk_sdiff
int cpucblk_sdiff(pastix_coefside_t side, const SolverCblk *cblkA, SolverCblk *cblkB)
Compare two column blocks in full-rank format.
Definition: cpucblk_sdiff.c:63
cpucblk_smemory
void cpucblk_smemory(pastix_coefside_t side, SolverMatrix *solvmtx, SolverCblk *cblk, pastix_int_t *orig, pastix_int_t *gain)
Return the memory gain of the low-rank form over the full-rank form for a single column-block.
Definition: cpucblk_scompress.c:283
coeftab_smemory
void coeftab_smemory(SolverMatrix *solvmtx)
Compute the memory gain of the low-rank form over the full-rank form for the entire matrix.
Definition: coeftab_s.c:232
solver_blok_s::fcblknm
pastix_int_t fcblknm
Definition: solver.h:110