PaStiX Handbook  6.3.2
symbol_fax_iluk.c
Go to the documentation of this file.
1 /**
2  *
3  * @file symbol_fax_iluk.c
4  *
5  * PaStiX symbolic factorization routines for ILU(k)
6  *
7  * @copyright 2004-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8  * Univ. Bordeaux. All rights reserved.
9  *
10  * @version 6.3.2
11  * @author Pascal Henon
12  * @author Mathieu Faverge
13  * @date 2023-07-21
14  *
15  **/
16 #include "common.h"
17 #include "graph/graph.h"
18 #include "pastix/order.h"
19 #include "symbol/symbol.h"
20 #include "fax_csr.h"
21 
22 /**
23  *******************************************************************************
24  *
25  * @ingroup symbol_dev_fax
26  *
27  * @brief Create the symbol matrix from the graph of the non zero pattern of the
28  * factorized matrix and the supernode partition.
29  *
30  *******************************************************************************
31  *
32  * @param[in] symbptr
33  * TODO
34  *
35  * @param[in] levelk
36  * TODO
37  *
38  * @param[in] graphA
39  * TODO
40  *
41  * @param[out] ordeptr
42  * TODO
43  *
44  *******************************************************************************
45  *
46  * @retval 0 on success.
47  * @retval !0 on failure.
48  *
49  *******************************************************************************/
50 int
52  pastix_int_t levelk,
53  const pastix_graph_t *graphA,
54  const pastix_order_t *ordeptr )
55 {
56  pastix_int_t i, j, k, l;
57  pastix_int_t cblknum;
58  pastix_int_t ind;
59  pastix_int_t *tmp = NULL;
60  pastix_int_t *node2cblk = NULL;
61  pastix_int_t *ja = NULL;
62  pastix_int_t n, bloknbr, tmpsize;
63  pastix_int_t cblknbr;
64  pastix_int_t *rangtab;
65  fax_csr_t graphPA;
66  fax_csr_t graphL;
67 
68  assert( ordeptr->baseval == 0 );
69  cblknbr = ordeptr->cblknbr;
70  rangtab = ordeptr->rangtab;
71 
72  n = rangtab[cblknbr];
73  tmpsize = pastix_iceil( n, 2 );
74  MALLOC_INTERN( tmp, tmpsize, pastix_int_t );
75  MALLOC_INTERN( node2cblk, n, pastix_int_t );
76 
77  /*
78  * Let's regenerate the compresse graph of L:
79  * 1) Apply the permutation
80  * 2) Apply the symbolic ILUk
81  * 3) Compress the obtained L, with the partition previsouly computed at order stage
82  */
83  /* Create the graph of P A */
84  faxCSRGenPA( graphA, ordeptr->permtab, &graphPA );
85 
86  /* Create the graph of L */
87  memset( &graphL, 0, sizeof(fax_csr_t) );
88  faxCSRFactILUk( &graphPA, ordeptr, levelk, &graphL );
89  faxCSRClean( &graphPA );
90 
91  assert( cblknbr == graphL.n );
92 
93  /**** First we transform the P matrix to find the block ****/
94  for ( k = 0; k < cblknbr; k++ ) {
95  for ( i = rangtab[k]; i < rangtab[k + 1]; i++ ) {
96  node2cblk[i] = k;
97  }
98  }
99 
100  /* Let's update P to store all the couples (frownum,lrownum) in rows arrays */
101  bloknbr = 0;
102  for ( k = 0; k < cblknbr; k++ ) {
103  assert( graphL.nnz[k] >= ( rangtab[k + 1] - rangtab[k] ) );
104 
105 #if defined( PASTIX_DEBUG_SYMBOL )
106  for ( l = 0; l < rangtab[k + 1] - rangtab[k]; l++ ) {
107  assert( graphL.rows[k][l] == rangtab[k] + l );
108  assert( node2cblk[graphL.rows[k][l]] == k );
109  }
110 #endif
111 
112  ja = graphL.rows[k];
113  j = 0;
114  ind = 0;
115  while ( j < graphL.nnz[k] ) {
116  cblknum = node2cblk[ja[j]];
117  l = j + 1;
118  while ( ( l < graphL.nnz[k] ) && ( ja[l] == ja[l - 1] + 1 ) &&
119  ( node2cblk[ja[l]] == cblknum ) ) {
120  l++;
121  }
122 
123  assert( ind < tmpsize );
124  tmp[ind++] = ja[j];
125  tmp[ind++] = ja[l - 1];
126  assert( ( ja[l - 1] - ja[j] + 1 ) == ( l - j ) );
127 
128  j = l;
129  }
130 
131  graphL.nnz[k] = ind;
132  bloknbr += ind / 2;
133 
134  memFree( graphL.rows[k] );
135  MALLOC_INTERN( graphL.rows[k], ind, pastix_int_t );
136  memcpy( graphL.rows[k], tmp, ind * sizeof( pastix_int_t ) );
137  }
138 
139  memFree( tmp );
140 
141  assert( bloknbr == faxCSRGetNNZ( &graphL ) / 2 );
142 
143 #if defined( PASTIX_DEBUG_SYMBOL )
144  for ( k = 0; k < cblknbr; k++ ) {
145  assert( graphL.nnz[k] > 0 );
146  assert( graphL.rows[k][0] == rangtab[k] );
147  assert( ( graphL.rows[k][1] - graphL.rows[k][0] + 1 ) == ( rangtab[k + 1] - rangtab[k] ) );
148  }
149 #endif
150 
151  /**********************************/
152  /*** Compute the symbol matrix ****/
153  /**********************************/
154  symbptr->baseval = 0;
155  symbptr->cblknbr = cblknbr;
156  symbptr->bloknbr = bloknbr;
157  symbptr->nodenbr = n;
158  symbptr->browtab = NULL;
159 
160  MALLOC_INTERN( symbptr->cblktab, cblknbr + 1, symbol_cblk_t );
161  MALLOC_INTERN( symbptr->bloktab, symbptr->bloknbr, symbol_blok_t );
162 
163  ind = 0;
164  for ( k = 0; k < cblknbr; k++ ) {
165  symbptr->cblktab[k].fcolnum = rangtab[k];
166  symbptr->cblktab[k].lcolnum = rangtab[k + 1] - 1;
167  symbptr->cblktab[k].bloknum = ind;
168  symbptr->cblktab[k].brownum = -1;
169 
170  for ( i = 0; i < graphL.nnz[k]; i += 2 ) {
171  j = graphL.rows[k][i];
172  symbptr->bloktab[ind].frownum = j;
173  symbptr->bloktab[ind].lrownum = graphL.rows[k][i + 1];
174  symbptr->bloktab[ind].lcblknm = k;
175  symbptr->bloktab[ind].fcblknm = node2cblk[j];
176  ind++;
177 
178  assert( node2cblk[j] == node2cblk[graphL.rows[k][i + 1]] );
179  }
180 
181 #if defined( PASTIX_DEBUG_SYMBOL )
182  assert( symbptr->bloktab[symbptr->cblktab[k].bloknum].frownum ==
183  symbptr->cblktab[k].fcolnum );
184  assert( symbptr->bloktab[symbptr->cblktab[k].bloknum].lrownum ==
185  symbptr->cblktab[k].lcolnum );
186  assert( symbptr->bloktab[symbptr->cblktab[k].bloknum].fcblknm == k );
187 #endif
188  }
189 
190  /* virtual cblk to avoid side effect in the loops on cblk bloks */
191  symbptr->cblktab[cblknbr].fcolnum = symbptr->cblktab[cblknbr - 1].lcolnum + 1;
192  symbptr->cblktab[cblknbr].lcolnum = symbptr->cblktab[cblknbr - 1].lcolnum + 1;
193  symbptr->cblktab[cblknbr].bloknum = ind;
194  symbptr->cblktab[cblknbr].brownum = -1;
195 
196  assert( ind == symbptr->bloknbr );
197  memFree( node2cblk );
198 
199  return 0;
200 }
201 
202 /**
203  *******************************************************************************
204  *
205  * @ingroup symbol_dev_fax
206  *
207  * @brief Patch the symbol matrix to add blocks in order to get a
208  * real elimination tree.
209  *
210  * This function is called when ILU(k) factorization is
211  * performed and the faxCSRBuildSymbol() function might have returned a symbol
212  * matrix that doesn't provide a real elimination tree.
213  *
214  *******************************************************************************
215  *
216  * @param[inout] symbmtx
217  * On entry, a generated symbol matrix with faxCSRBuildSymbol() for example.
218  * On exit, the patched symbol matrix with extra blocks to have a real
219  * elimination tree.
220  *
221  *******************************************************************************/
222 void
224 {
225  pastix_int_t i, j, k;
226  pastix_int_t *father = NULL; /** For the cblk of the symbol matrix **/
227  symbol_blok_t *newbloktab = NULL;
228  symbol_cblk_t *cblktab = NULL;
229  symbol_blok_t *bloktab = NULL;
230  fax_csr_t Q;
231 
232  cblktab = symbmtx->cblktab;
233  bloktab = symbmtx->bloktab;
234 
235  MALLOC_INTERN( father, symbmtx->cblknbr, pastix_int_t );
236  MALLOC_INTERN( newbloktab, symbmtx->cblknbr + symbmtx->bloknbr, symbol_blok_t );
237 
238  faxCSRInit( symbmtx->cblknbr, &Q );
239 
240  /*
241  * Count how many extra-diagonal bloks are facing each diagonal blok
242  * Double-loop to avoid diagonal blocks.
243  */
244  for ( i = 0; i < symbmtx->cblknbr; i++ ) {
245  for ( j = cblktab[i].bloknum + 1; j < cblktab[i + 1].bloknum; j++ ) {
246  Q.nnz[bloktab[j].fcblknm]++;
247  }
248  }
249 
250  /* Allocate nFacingBlok integer for each diagonal blok */
251  for ( i = 0; i < symbmtx->cblknbr; i++ ) {
252  if ( Q.nnz[i] > 0 ) {
253  MALLOC_INTERN( Q.rows[i], Q.nnz[i], pastix_int_t );
254  }
255  else {
256  Q.rows[i] = NULL;
257  }
258  }
259 
260  memset( Q.nnz, 0, symbmtx->cblknbr * sizeof( pastix_int_t ) );
261 
262  /*
263  * Q.rows[k] will contain, for each extra-diagonal facing blok
264  * of the column blok k, its column blok.
265  */
266  for ( i = 0; i < symbmtx->cblknbr; i++ ) {
267  for ( j = cblktab[i].bloknum + 1; j < cblktab[i + 1].bloknum; j++ ) {
268  k = bloktab[j].fcblknm;
269  Q.rows[k][Q.nnz[k]++] = i;
270  }
271  }
272 
273  for ( i = 0; i < Q.n; i++ ) {
274  father[i] = -1;
275  }
276 
277  for ( i = 0; i < Q.n; i++ ) {
278  /*
279  * For each blok facing diagonal blok i, belonging to column blok k.
280  */
281  for ( j = 0; j < Q.nnz[i]; j++ ) {
282  k = Q.rows[i][j];
283  assert( k < i );
284 
285  while ( ( father[k] != -1 ) && ( father[k] != i ) ) {
286  k = father[k];
287  }
288  father[k] = i;
289  }
290  }
291 
292  for ( i = 0; i < Q.n; i++ ) {
293  if ( father[i] == -1 ) {
294  father[i] = i + 1;
295  }
296  }
297 
298  faxCSRClean( &Q );
299 
300  k = 0;
301  for ( i = 0; i < symbmtx->cblknbr - 1; i++ ) {
302  pastix_int_t odb, fbloknum;
303 
304  fbloknum = cblktab[i].bloknum;
305  memcpy( newbloktab + k, bloktab + fbloknum, sizeof( symbol_blok_t ) );
306  cblktab[i].bloknum = k;
307  k++;
308  odb = cblktab[i + 1].bloknum - fbloknum;
309  if ( odb <= 1 || bloktab[fbloknum + 1].fcblknm != father[i] ) {
310  /** Add a blok toward the father **/
311  newbloktab[k].frownum = cblktab[father[i]].fcolnum;
312  newbloktab[k].lrownum = cblktab[father[i]].fcolnum; /** OIMBE try lcolnum **/
313  newbloktab[k].lcblknm = i;
314  newbloktab[k].fcblknm = father[i];
315 #if defined( PASTIX_DEBUG_SYMBOL )
316  if ( father[i] != i ) {
317  assert( cblktab[father[i]].fcolnum > cblktab[i].lcolnum );
318  }
319 #endif
320  k++;
321  }
322 
323  if ( odb > 1 ) {
324  memcpy( newbloktab + k, bloktab + fbloknum + 1, sizeof( symbol_blok_t ) * ( odb - 1 ) );
325  k += odb - 1;
326  }
327  }
328 
329  /** Copy the last one **/
330  memcpy( newbloktab + k,
331  bloktab + symbmtx->cblktab[symbmtx->cblknbr - 1].bloknum,
332  sizeof( symbol_blok_t ) );
333  cblktab[symbmtx->cblknbr - 1].bloknum = k;
334  k++;
335  /** Virtual cblk **/
336  symbmtx->cblktab[symbmtx->cblknbr].bloknum = k;
337 
338 #if defined( PASTIX_DEBUG_SYMBOL )
339  assert( k >= symbmtx->bloknbr );
340  assert( k < symbmtx->cblknbr + symbmtx->bloknbr );
341 #endif
342  symbmtx->bloknbr = k;
343  memFree( symbmtx->bloktab );
344  MALLOC_INTERN( symbmtx->bloktab, k, symbol_blok_t );
345  memcpy( symbmtx->bloktab, newbloktab, sizeof( symbol_blok_t ) * symbmtx->bloknbr );
346  /* virtual cblk to avoid side effect in the loops on cblk bloks */
347  cblktab[symbmtx->cblknbr].bloknum = k;
348 
349  memFree( father );
350  memFree( newbloktab );
351 }
BEGIN_C_DECLS typedef int pastix_int_t
Definition: datatypes.h:51
pastix_int_t baseval
Definition: order.h:48
pastix_int_t * permtab
Definition: order.h:51
pastix_int_t cblknbr
Definition: order.h:50
pastix_int_t * rangtab
Definition: order.h:53
Order structure.
Definition: order.h:47
pastix_int_t fcblknm
Definition: symbol.h:63
pastix_int_t frownum
Definition: symbol.h:60
pastix_int_t lrownum
Definition: symbol.h:61
pastix_int_t bloknbr
Definition: symbol.h:80
pastix_int_t * browtab
Definition: symbol.h:85
pastix_int_t baseval
Definition: symbol.h:78
symbol_cblk_t * cblktab
Definition: symbol.h:83
pastix_int_t bloknum
Definition: symbol.h:48
pastix_int_t brownum
Definition: symbol.h:49
pastix_int_t fcolnum
Definition: symbol.h:46
pastix_int_t lcolnum
Definition: symbol.h:47
symbol_blok_t * bloktab
Definition: symbol.h:84
pastix_int_t lcblknm
Definition: symbol.h:62
pastix_int_t nodenbr
Definition: symbol.h:81
pastix_int_t cblknbr
Definition: symbol.h:79
Symbol block structure.
Definition: symbol.h:59
Symbol column block structure.
Definition: symbol.h:45
Symbol matrix structure.
Definition: symbol.h:77
pastix_int_t faxCSRFactILUk(const fax_csr_t *graphA, const pastix_order_t *order, pastix_int_t level, fax_csr_t *graphL)
Compute the non zero pattern of the levelized incomplete factor for a sparse lower triangular matrix ...
Definition: fax_csr_iluk.c:57
int faxCSRGenPA(const pastix_graph_t *graphA, const pastix_int_t *perm, fax_csr_t *graphPA)
Generate the graph of P*A from the graph of A and the permutation vector.
Definition: fax_csr.c:183
pastix_int_t faxCSRGetNNZ(const fax_csr_t *csr)
Computes the number of non zero entries in the graph.
Definition: fax_csr.c:99
void faxCSRClean(fax_csr_t *csr)
Free the data store in the structure.
Definition: fax_csr.c:65
void faxCSRInit(pastix_int_t n, fax_csr_t *csr)
Initialize the data structure by doing the first allocations within the structure and initializing th...
Definition: fax_csr.c:40
Fax blocked csr structure.
Definition: fax_csr.h:25
void faxCSRPatchSymbol(symbol_matrix_t *symbmtx)
Patch the symbol matrix to add blocks in order to get a real elimination tree.
int pastixSymbolFaxILUk(symbol_matrix_t *symbptr, pastix_int_t levelk, const pastix_graph_t *graphA, const pastix_order_t *ordeptr)
Create the symbol matrix from the graph of the non zero pattern of the factorized matrix and the supe...