PaStiX Handbook  6.2.1
cpucblk_scompress.c
Go to the documentation of this file.
1 /**
2  *
3  * @file cpucblk_scompress.c
4  *
5  * Precision dependent function to compress/uncompress the coefficients
6  *
7  * @copyright 2015-2021 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8  * Univ. Bordeaux. All rights reserved.
9  *
10  * @version 6.2.0
11  * @author Gregoire Pichon
12  * @author Mathieu Faverge
13  * @author Esragul Korkmaz
14  * @date 2021-04-02
15  *
16  * @generated from /builds/solverstack/pastix/kernels/cpucblk_zcompress.c, normal z -> s, Tue Apr 12 09:38:40 2022
17  *
18  **/
19 #include "common/common.h"
20 #include "blend/solver.h"
21 #include <lapacke.h>
22 #include "kernels_trace.h"
23 #include "pastix_scores.h"
24 #include "pastix_slrcores.h"
25 
26 /**
27  *******************************************************************************
28  *
29  * @brief Compress a single block from full-rank to low-rank format
30  *
31  * The compression to low-rank format is parameterized by the input information
32  * stored in the low-rank structure.
33  *
34  *******************************************************************************
35  *
36  * @param[in] lowrank
37  * The pointer to the low-rank structure describing the lo-rank
38  * compression parameters.
39  *
40  * @param[in] M
41  * The number of rows in the block
42  *
43  * @param[in] N
44  * The number of columns in the block
45  *
46  * @param[inout] lrA
47  * The block to compress. On input, it points to a full-rank matrix. On
48  * output, if possible the matrix is compressed in block low-rank
49  * format.
50  *
51  *******************************************************************************
52  *
53  * @return The number of flops used to compress the block.
54  *
55  *******************************************************************************/
56 pastix_fixdbl_t
58  pastix_int_t M, pastix_int_t N,
59  pastix_lrblock_t *lrA )
60 {
61  pastix_fixdbl_t flops;
62  float *A = lrA->u;
63 
64  if ( lrA->rk != -1 ) {
65  return 0.;
66  }
67  assert( lrA->u != NULL );
68  assert( lrA->v == NULL );
69 
70  kernel_trace_start_lvl2( PastixKernelLvl2_LR_init_compress );
71  flops = lowrank->core_ge2lr( lowrank->use_reltol, lowrank->tolerance, -1,
72  M, N, A, M, lrA );
73  kernel_trace_stop_lvl2_rank( flops, lrA->rk );
74 
75  assert( A != lrA->u );
76  free( A );
77 
78  return flops;
79 }
80 
81 /**
82  *******************************************************************************
83  *
84  * @brief Compress a single column block from full-rank to low-rank format
85  *
86  * The compression to low-rank format is parameterized by the input information
87  * stored in the low-rank structure.
88  *
89  *******************************************************************************
90  *
91  * @param[in] solvmtx
92  * The pointer to the solver structure.
93  *
94  * @param[in] side
95  * Define which side of the matrix must be initialized.
96  * @arg PastixLCoef if lower part only
97  * @arg PastixUCoef if upper part only
98  * @arg PastixLUCoef if both sides.
99  *
100  * @param[inout] cblk
101  * The column block to compress.
102  *
103  *******************************************************************************
104  *
105  * @return The memory gain resulting from the compression to low-rank format in
106  * number of elements.
107  *
108  *******************************************************************************/
109 pastix_int_t
110 cpucblk_scompress( const SolverMatrix *solvmtx,
111  pastix_coefside_t side,
112  int max_ilulvl,
113  SolverCblk *cblk )
114 {
115  pastix_lrblock_t *lrA;
116  SolverBlok *blok = cblk[0].fblokptr + 1;
117  SolverBlok *lblok = cblk[1].fblokptr;
118  pastix_int_t ncols = cblk_colnbr( cblk );
119  pastix_int_t gain;
120  pastix_int_t gainL = 0;
121  pastix_int_t gainU = 0;
122  const pastix_lr_t *lowrank = &(solvmtx->lowrank);
123 
124  assert( cblk->cblktype & CBLK_LAYOUT_2D );
125  assert( cblk->cblktype & CBLK_COMPRESSED );
126 
127  if ( ncols < lowrank->compress_min_width ) {
128  return 0;
129  }
130 
131  for (; blok<lblok; blok++)
132  {
133  pastix_int_t nrows = blok_rownbr( blok );
134  int is_preselected = ( blok->iluklvl <= max_ilulvl );
135 
136  /* Skip uncompressible blocks */
137  if ( nrows < lowrank->compress_min_height ) {
138  continue;
139  }
140 
141  if ( is_preselected ) {
142  continue;
143  }
144 
145  gain = nrows * ncols;
146 
147  /* Lower part */
148  if ( side != PastixUCoef ) {
149  lrA = blok->LRblock[0];
150 
151  /* Try to compress non selected blocks */
152  if ( lrA->rk == -1 ) {
153  cpublok_scompress( lowrank, nrows, ncols, lrA );
154  }
155 
156  if ( lrA->rk != -1 ) {
157  gainL += gain - ((nrows+ncols) * lrA->rk);
158  }
159  }
160 
161  /* Upper part */
162  if ( side != PastixLCoef ) {
163  lrA = blok->LRblock[1];
164 
165  if ( lrA->rk == -1 ) {
166  cpublok_scompress( lowrank, nrows, ncols, lrA );
167  }
168 
169  if ( lrA->rk != -1 ) {
170  gainU += gain - ((nrows+ncols) * lrA->rk);
171  }
172  }
173  }
174 
175  return gainL + gainU;
176 }
177 
178 /**
179  *******************************************************************************
180  *
181  * @brief Uncompress a single column block from low-rank format to full-rank
182  * format.
183  *
184  *******************************************************************************
185  *
186  * @param[in] side
187  * Define which side of the matrix must be initialized.
188  * @arg PastixLCoef if lower part only
189  * @arg PastixUCoef if upper part only
190  * @arg PastixLUCoef if both sides.
191  *
192  * @param[inout] cblk
193  * The column block to uncompress.
194  *
195  *******************************************************************************/
196 void
198  SolverCblk *cblk )
199 {
200  SolverBlok *blok, *lblok;
201  pastix_int_t ncols = cblk_colnbr( cblk );
202  int ret;
203 
204  if ( side != PastixUCoef ) {
205  blok = cblk[0].fblokptr;
206  lblok = cblk[1].fblokptr;
207  for (; blok<lblok; blok++)
208  {
209  pastix_lrblock_t lrtmp;
210  pastix_int_t nrows = blok_rownbr( blok );
211 
212  memcpy( &lrtmp, blok->LRblock[0], sizeof(pastix_lrblock_t) );
213 
214  core_slralloc( nrows, ncols, -1, blok->LRblock[0] );
215  ret = core_slr2ge( PastixNoTrans, nrows, ncols,
216  &lrtmp,
217  blok->LRblock[0]->u, nrows );
218  assert( ret == 0 );
219  core_slrfree( &lrtmp );
220  }
221  }
222 
223  if ( side != PastixLCoef ) {
224  blok = cblk[0].fblokptr;
225  lblok = cblk[1].fblokptr;
226  for (; blok<lblok; blok++)
227  {
228  pastix_lrblock_t lrtmp;
229  pastix_int_t nrows = blok_rownbr( blok );
230 
231  memcpy( &lrtmp, blok->LRblock[1], sizeof(pastix_lrblock_t) );
232 
233  core_slralloc( nrows, ncols, -1, blok->LRblock[1] );
234  ret = core_slr2ge( PastixNoTrans, nrows, ncols,
235  &lrtmp,
236  blok->LRblock[1]->u, nrows );
237  assert( ret == 0 );
238  core_slrfree( &lrtmp );
239  }
240  }
241 
242  (void)ret;
243 }
244 
245 /**
246  *******************************************************************************
247  *
248  * @brief Return the memory gain of the low-rank form over the full-rank form
249  * for a single column-block.
250  *
251  * This function returns the memory gain in number of elements for a single
252  * column block when it is stored in low-rank format compared to a full rank
253  * storage.
254  *
255  *******************************************************************************
256  *
257  * @param[in] side
258  * Define which side of the matrix must be initialized.
259  * @arg PastixLCoef if lower part only
260  * @arg PastixUCoef if upper part only
261  * @arg PastixLUCoef if both sides.
262  *
263  * @param[in] solvmtx
264  * The pointer to the solver structure.
265  *
266  * @param[in] cblk
267  * The column block to study.
268  *
269  * @param[in,out] orig
270  * The structure that counts the original cost of the blocks.
271  *
272  * @param[in,out] gain
273  * The structure that counts gain on each type of the blocks.
274  *
275  *******************************************************************************/
276 void
278  SolverMatrix *solvmtx,
279  SolverCblk *cblk,
280  pastix_int_t *orig,
281  pastix_int_t *gain )
282 {
283  SolverBlok *blok = cblk[0].fblokptr + 1;
284  SolverBlok *lblok = cblk[1].fblokptr;
285 
286  pastix_int_t ncols = cblk_colnbr( cblk );
287  pastix_int_t size;
288  pastix_int_t origblok;
289  pastix_int_t gainblok, gaintmp;
290 
291  assert( cblk->ownerid == solvmtx->clustnum );
292 
293  /* Compute potential gains if blocks where not compressed */
294  if ( cblk->cblktype & CBLK_COMPRESSED ) {
295  int ilu_lvl = solvmtx->lowrank.compress_preselect ? -1 : solvmtx->lowrank.ilu_lvl;
296  cpucblk_scompress( solvmtx, side, ilu_lvl, cblk );
297  }
298 
299  for (; blok<lblok; blok++)
300  {
301  const SolverCblk *fcblk = solvmtx->cblktab + blok->fcblknm;
302  pastix_int_t nrows = blok_rownbr( blok );
303  size = nrows * ncols;
304  gainblok = 0;
305  origblok = size;
306 
307  /* Lower part */
308  if ( (side != PastixUCoef) &&
309  (blok->LRblock[0]->rk >= 0) )
310  {
311  gaintmp = (size - ((nrows+ncols) * blok->LRblock[0]->rkmax));
312  assert( gaintmp >= 0 );
313  gainblok += gaintmp;
314  }
315 
316  /* Upper part */
317  if ( (side != PastixLCoef) &&
318  (blok->LRblock[1]->rk >= 0) )
319  {
320  gaintmp = (size - ((nrows+ncols) * blok->LRblock[1]->rkmax));
321  assert( gaintmp >= 0 );
322  gainblok += gaintmp;
323  }
324 
325  if ( blok_is_preselected( cblk, blok, fcblk ) )
326  {
327  /* Selected block should always be inside supernode diagonal blocks */
328  assert( fcblk->sndeidx == cblk->sndeidx );
329  gain[LR_InSele] += gainblok;
330  orig[LR_InSele] += origblok;
331  }
332  else{
333  if ( fcblk->sndeidx == cblk->sndeidx ) {
334  gain[LR_InDiag] += gainblok;
335  orig[LR_InDiag] += origblok;
336  }
337  else {
338  gain[LR_OffDiag] += gainblok;
339  orig[LR_OffDiag] += origblok;
340  }
341  }
342  }
343  return;
344 }
solver_cblk_s::ownerid
int ownerid
Definition: solver.h:146
solver.h
blok_rownbr
static pastix_int_t blok_rownbr(const SolverBlok *blok)
Compute the number of rows of a block.
Definition: solver.h:313
cblk_colnbr
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
Definition: solver.h:247
pastix_lr_s
Structure to define the type of function to use for the low-rank kernels and their parameters.
Definition: pastix_lowrank.h:147
solver_cblk_s::fblokptr
SolverBlok * fblokptr
Definition: solver.h:134
pastix_lrblock_s::v
void * v
Definition: pastix_lowrank.h:116
pastix_lrblock_s::u
void * u
Definition: pastix_lowrank.h:115
cpublok_scompress
pastix_fixdbl_t cpublok_scompress(const pastix_lr_t *lowrank, pastix_int_t M, pastix_int_t N, pastix_lrblock_t *lrA)
Compress a single block from full-rank to low-rank format.
Definition: cpucblk_scompress.c:57
pastix_lr_s::use_reltol
int use_reltol
Definition: pastix_lowrank.h:153
solver_cblk_s
Solver column block structure.
Definition: solver.h:127
LR_OffDiag
@ LR_OffDiag
Definition: pastix_lowrank.h:168
pastix_coefside_t
enum pastix_coefside_e pastix_coefside_t
Data blocks used in the kernel.
core_slralloc
void core_slralloc(pastix_int_t M, pastix_int_t N, pastix_int_t rkmax, pastix_lrblock_t *A)
Allocate a low-rank matrix.
Definition: core_sgelrops.c:56
solver_blok_s
Solver block structure.
Definition: solver.h:107
cpucblk_suncompress
void cpucblk_suncompress(pastix_coefside_t side, SolverCblk *cblk)
Uncompress a single column block from low-rank format to full-rank format.
Definition: cpucblk_scompress.c:197
pastix_lrblock_s
The block low-rank structure to hold a matrix in low-rank form.
Definition: pastix_lowrank.h:112
core_slrfree
void core_slrfree(pastix_lrblock_t *A)
Free a low-rank matrix.
Definition: core_sgelrops.c:116
PastixNoTrans
@ PastixNoTrans
Definition: api.h:424
solver_blok_s::iluklvl
int iluklvl
Definition: solver.h:118
solver_cblk_s::sndeidx
pastix_int_t sndeidx
Definition: solver.h:139
pastix_lr_s::core_ge2lr
fct_ge2lr_t core_ge2lr
Definition: pastix_lowrank.h:157
blok_is_preselected
static int blok_is_preselected(const SolverCblk *cblk, const SolverBlok *blok, const SolverCblk *fcbk)
Return if a block is preselected as either part of the projection, or as a sub-diagonal block.
Definition: solver.h:350
pastix_scores.h
PastixUCoef
@ PastixUCoef
Definition: api.h:457
cpucblk_scompress
pastix_int_t cpucblk_scompress(const SolverMatrix *solvmtx, pastix_coefside_t side, int max_ilulvl, SolverCblk *cblk)
Compress a single column block from full-rank to low-rank format.
Definition: cpucblk_scompress.c:110
LR_InSele
@ LR_InSele
Definition: pastix_lowrank.h:167
PastixLCoef
@ PastixLCoef
Definition: api.h:456
pastix_slrcores.h
pastix_lrblock_s::rk
int rk
Definition: pastix_lowrank.h:113
LR_InDiag
@ LR_InDiag
Definition: pastix_lowrank.h:166
pastix_lr_s::tolerance
double tolerance
Definition: pastix_lowrank.h:155
solver_cblk_s::cblktype
int8_t cblktype
Definition: solver.h:130
solver_blok_s::LRblock
pastix_lrblock_t * LRblock[2]
Definition: solver.h:121
core_slr2ge
int core_slr2ge(pastix_trans_t trans, pastix_int_t M, pastix_int_t N, const pastix_lrblock_t *Alr, float *A, pastix_int_t lda)
Convert a low rank matrix into a dense matrix.
Definition: core_sgelrops.c:300
pastix_lrblock_s::rkmax
int rkmax
Definition: pastix_lowrank.h:114
cpucblk_smemory
void cpucblk_smemory(pastix_coefside_t side, SolverMatrix *solvmtx, SolverCblk *cblk, pastix_int_t *orig, pastix_int_t *gain)
Return the memory gain of the low-rank form over the full-rank form for a single column-block.
Definition: cpucblk_scompress.c:277
solver_blok_s::fcblknm
pastix_int_t fcblknm
Definition: solver.h:110