PaStiX Handbook  6.4.0
cand.c
Go to the documentation of this file.
1 /**
2  *
3  * @file cand.c
4  *
5  * PaStiX analyse functions to manipulate candidates on the elimination tree
6  * structure.
7  *
8  * @copyright 2004-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
9  * Univ. Bordeaux. All rights reserved.
10  *
11  * @version 6.4.0
12  * @author Pascal Henon
13  * @author Mathieu Faverge
14  * @author Gregoire Pichon
15  * @author Pierre Ramet
16  * @date 2024-07-05
17  *
18  * @addtogroup blend_dev_cand
19  * @{
20  * This module contains all subroutines to initialize the candidates array
21  * for each supernode, as well as supernode properties that are defined by
22  * level such as 2D layouts and 2D tasks.
23  *
24  **/
25 #include "common.h"
26 #include "symbol/symbol.h"
27 #include "blend/elimintree.h"
28 #include "blend/cost.h"
29 #include "blend/cand.h"
30 #include "blend/solver.h"
31 
32 /**
33  *******************************************************************************
34  *
35  * @brief Initialize the candtab array with default values.
36  *
37  *******************************************************************************
38  *
39  * @param[in] cblknbr
40  * The size of the candtab array.
41  *
42  *******************************************************************************
43  *
44  * @return The array of size cblknbr+1 of Cand structure initialized.
45  *
46  *******************************************************************************/
47 Cand *
49 {
50  Cand *candtab, *cand;
51  pastix_int_t i;
52 
53  MALLOC_INTERN( candtab, cblknbr+1, Cand );
54  cand = candtab;
55 
56  for(i=-1;i<cblknbr;i++, cand++)
57  {
58  cand->costlevel = 0.0;
59  cand->treelevel = 0;
60  cand->fcandnum = -1;
61  cand->lcandnum = -1;
62  cand->fccandnum = -1;
63  cand->lccandnum = -1;
64  cand->cluster = -1;
65  cand->cblktype = CBLK_LAYOUT_2D | CBLK_TASKS_2D;
66  }
67 
68  return candtab+1;
69 }
70 
71 /**
72  *******************************************************************************
73  *
74  * @brief Exit and free the candtab structure given
75  *
76  *******************************************************************************
77  *
78  * @param[inout] candtab
79  * The candtab array to free.
80  *
81  *******************************************************************************/
82 void
83 candExit( Cand *candtab )
84 {
85  candtab--;
86  memFree_null( candtab );
87 }
88 
89 /**
90  *******************************************************************************
91  *
92  * @brief Print the candidates array into the candtab.txt file
93  *
94  *******************************************************************************
95  *
96  * @param[in] candtab
97  * The array of size cblknbr to print in the file.
98  *
99  * @param[in] cblknbr
100  * The size of the candtab array.
101  *
102  * @param[inout] directory
103  * Directory where to store the file. If NULL, initialized to
104  * pastix-XXXXXX, with XXXXXX a random generated string.
105  *
106  *******************************************************************************/
107 void
108 candSave( const Cand *candtab,
109  pastix_int_t cblknbr,
110  const char *directory )
111 {
112  pastix_int_t i;
113  FILE *f = NULL;
114 
115  f = pastix_fopenw( directory, "candtab.txt", "w" );
116 
117  fprintf(f, "%ld\n", (long)cblknbr );
118  for(i=-1;i<cblknbr;i++)
119  {
120  fprintf(f, "%lf %ld %ld %ld %ld %ld %ld %ld\n",
121  (double)candtab[i].costlevel,
122  (long)candtab[i].treelevel,
123  (long)candtab[i].fcandnum,
124  (long)candtab[i].lcandnum,
125  (long)candtab[i].fccandnum,
126  (long)candtab[i].lccandnum,
127  (long)candtab[i].cluster,
128  (long)candtab[i].cblktype );
129  }
130  fclose(f);
131 }
132 
133 /**
134  *******************************************************************************
135  *
136  * @brief Set the clusters candidates from the cores canditates
137  *
138  *******************************************************************************
139  *
140  * @param[inout] candtab
141  * On entry, the array of candidates with the first and last core
142  * candidates initialized.
143  * On exit, the array of candidate with the first and last cluster
144  * candidate information updated.
145  *
146  * @param[in] cblknbr
147  * The size of the candtab array.
148  *
149  * @param[in] core2clust
150  * An array that defines the cluster (MPI process) that owns each core
151  * candidate.
152  *
153  * @param[in] coresnbr
154  * The size of the core2clust array.
155  *
156  *******************************************************************************/
157 void
159  pastix_int_t cblknbr,
160  const pastix_int_t *core2clust,
161  pastix_int_t coresnbr )
162 {
163  pastix_int_t i;
164  (void)coresnbr;
165 
166  assert( candtab[-1].fcandnum == 0 );
167  assert( candtab[-1].lcandnum == coresnbr-1 );
168  candtab[-1].fccandnum = core2clust[ candtab[-1].fcandnum ];
169  candtab[-1].lccandnum = core2clust[ candtab[-1].lcandnum ];
170 
171  for(i=0; i<cblknbr; i++) {
172  assert( candtab[i].fcandnum >= 0 );
173  assert( candtab[i].lcandnum >= 0 );
174  assert( candtab[i].fcandnum < coresnbr );
175  assert( candtab[i].lcandnum < coresnbr );
176  candtab[i].fccandnum = core2clust[ candtab[i].fcandnum ];
177  candtab[i].lccandnum = core2clust[ candtab[i].lcandnum ];
178  }
179 }
180 
181 /**
182  *******************************************************************************
183  *
184  * @brief Check the correctness of the computed candidates
185  *
186  * Each node of the elimination tree must have a set of candidates included in
187  * its father's set.
188  *
189  *******************************************************************************
190  *
191  * @param[in] candtab
192  * On entry, the array of candidates to check.
193  *
194  * @param[in] symbmtx
195  * The symbol matrix structure associated to the candidate array.
196  *
197  *******************************************************************************
198  *
199  * @retval 0 if bad candidat set appear.
200  * @retval 1 if success.
201  *
202  *******************************************************************************/
203 int
204 candCheck( const Cand *candtab,
205  const symbol_matrix_t *symbmtx )
206 {
207  pastix_int_t i, j;
208  pastix_int_t facecblknum;
209 
210  for(i=0; i<symbmtx->cblknbr; i++)
211  {
212  for(j = symbmtx->cblktab[i].bloknum;
213  j < symbmtx->cblktab[i+1].bloknum; j++)
214  {
215  facecblknum = symbmtx->bloktab[j].fcblknm;
216 
217  if( (candtab[i].fcandnum < candtab[facecblknum].fcandnum) ||
218  (candtab[i].lcandnum > candtab[facecblknum].lcandnum) )
219  {
220  pastix_print_error( "bad processor candidat sets : cblk %ld candidat =[%ld %ld] father %ld candidat = [%ld %ld].",
221  (long)i, (long)candtab[i].fcandnum, (long)candtab[i].lcandnum,
222  (long)facecblknum, (long)candtab[facecblknum].fcandnum,
223  (long)candtab[facecblknum].lcandnum);
224  return 0;
225  }
226  }
227  }
228  return 1;
229 }
230 
231 /**
232  *******************************************************************************
233  *
234  * @brief Recursive function to update the cost fields of the both the candtab
235  * array, and the elimination tree structure.
236  *
237  *******************************************************************************
238  *
239  * @param[in] rootnum
240  * Root of the subtree.
241  *
242  * @param[inout] candtab
243  * Pointer to the global candtab array where fields treelevel and
244  * costlevel are updated. Treelevel represent the depth of the
245  * node in the elimination tree, and costlevel the cost of the
246  * path from rootnum to the root of the tree.
247  *
248  * @param[inout] etree
249  * Pointer to the global elimination tree structure. The node
250  * fields total and subtree are updated. Total represents the
251  * total cost of of the current node only, and subtree the cost of
252  * the current node and all its descendants.
253  *
254  * @param[in] symbmtx
255  * Pointer to the symbol matrix we are working with.
256  *
257  * @param[in] costmtx
258  * Pointer to the cost matrix associated to the symbol matrix and
259  * that holds the cost of each cblk and blok.
260  *
261  * @param[out] cripath
262  * On exit, contains the length of the critical path of the
263  * subtree.
264  *
265  *******************************************************************************
266  *
267  * @return The cost of the subtree.
268  *
269  *******************************************************************************/
270 static inline double
272  Cand *candtab,
273  EliminTree *etree,
274  const symbol_matrix_t *symbmtx,
275  const CostMatrix *costmtx,
276  double *cripath )
277 {
278  double cost, mycp = 0.0;
279  pastix_int_t i, son, sonsnbr;
280 
281  /* Get costs of current node */
282  if ( rootnum == -1 ) {
283  etree->nodetab[ rootnum ].ndecost = 0.;
284  }
285  else {
286  etree->nodetab[ rootnum ].ndecost = costmtx->cblkcost[rootnum];
287  }
288 
289  mycp = etree->nodetab[ rootnum ].ndepath;
290  cost = etree->nodetab[ rootnum ].ndecost;
291 
292  etree->nodetab[ rootnum ].subpath = mycp;
293  etree->nodetab[ rootnum ].subcost = cost;
294 
295  sonsnbr = etree->nodetab[rootnum].sonsnbr;
296  mycp = 0.0;
297 
298  for(i=0; i<sonsnbr; i++)
299  {
300  double soncp = 0.0;
301 
302  son = eTreeSonI(etree, rootnum, i);
303  candtab[ son ].treelevel = candtab[ rootnum ].treelevel - 1;
304  candtab[ son ].costlevel = candtab[ rootnum ].costlevel - cost;
305 
306  etree->nodetab[ rootnum ].subcost +=
307  candSubTreeBuild( son, candtab, etree, symbmtx, costmtx, &soncp );
308 
309  mycp = (mycp > soncp) ? mycp : soncp;
310  }
311 
312  etree->nodetab[ rootnum ].subpath += mycp;
313 
314  *cripath = etree->nodetab[ rootnum ].subpath;
315 
316  assert( etree->nodetab[ rootnum ].subpath <= etree->nodetab[ rootnum ].subcost );
317  assert( etree->nodetab[ rootnum ].ndepath <= etree->nodetab[ rootnum ].ndecost );
318  assert( etree->nodetab[ rootnum ].subpath >= etree->nodetab[ rootnum ].ndepath );
319  assert( etree->nodetab[ rootnum ].subcost >= etree->nodetab[ rootnum ].ndecost );
320 
321  return etree->nodetab[ rootnum ].subcost;
322 }
323 
324 /**
325  *******************************************************************************
326  *
327  * @brief Recursive function to compute the distribution of the nodes among the
328  * different levels.
329  *
330  * This function defines which cblk are candidates to be stored with a 2D
331  * layout, computed as 2D tasks, compressible, and/or be part of the Schur
332  * complement. The criterion to remove the 2D task flag or the LR flag is the
333  * width of the cblk. As soon as one cblk that does not match the criterion is
334  * found, all its descendant loose the flag too.
335  *
336  *******************************************************************************
337  *
338  * @param[in] rootnum
339  * Root of the subtree.
340  *
341  * @param[in] cblktype
342  * List of flags that can be forwarded to rootnum and its
343  * descendants.
344  *
345  * @param[in] ratiolimit2D
346  * Ratio that defines the minimal size to allow the flag 2D to be
347  * forwarded to the sons.
348  *
349  * @param[in] ratiolimitLR
350  * Ratio that defines the minimal size to allow the flag LR to be
351  * forwarded to the sons.
352  *
353  * @param[inout] candtab
354  * Pointer to the global candtab array where field cblktype is
355  * updated. cblktype defines the optimization/properties that are
356  * defined on each cblk and which are defined by level in the
357  * tree.
358  *
359  * @param[in] etree
360  * Pointer to the global elimination tree structure that is used
361  * to travel through the cblk, and affect the properies with the
362  * correct filiation property.
363  *
364  * @param[in] symbmtx
365  * Pointer to the symbol matrix we are working with.
366  *
367  *******************************************************************************/
368 void
370  pastix_int_t cblktype,
371  pastix_int_t ratiolimit2D,
372  pastix_int_t ratiolimitLR,
373  Cand *candtab,
374  const EliminTree *etree,
375  const symbol_matrix_t *symbmtx )
376 {
377  pastix_int_t i, son;
378  pastix_int_t width = symbmtx->cblktab[ rootnum ].lcolnum - symbmtx->cblktab[ rootnum ].fcolnum + 1;
379 
380  if ( (cblktype & CBLK_IN_SCHUR) &&
381  (symbmtx->cblktab[ rootnum ].lcolnum < symbmtx->schurfcol) )
382  {
383  cblktype &= ~(CBLK_IN_SCHUR);
384  }
385 
386  if( (cblktype & CBLK_TASKS_2D) && (width < ratiolimit2D) ) {
387  cblktype = cblktype & (~CBLK_TASKS_2D);
388  }
389 
390  if( (cblktype & CBLK_COMPRESSED) && (width < ratiolimitLR) ) {
391  cblktype = cblktype & (~CBLK_COMPRESSED);
392  }
393 
394  candtab[ rootnum ].cblktype = cblktype;
395 
396  for(i=0; i<etree->nodetab[rootnum].sonsnbr; i++)
397  {
398  son = eTreeSonI(etree, rootnum, i);
399  candSubTreeDistribFirstWidth( son, candtab[ rootnum ].cblktype,
400  ratiolimit2D, ratiolimitLR,
401  candtab, etree, symbmtx );
402  }
403 }
404 
405 /**
406  *******************************************************************************
407  *
408  * @brief Recursive function to compute the distribution of the nodes among the
409  * different levels.
410  *
411  * This function defines which cblk are candidates to be stored with a 2D
412  * layout, computed as 2D tasks, compressible, and/or be part of the Schur
413  * complement. The criterion to remove the 2D task flag and the LR flag is the
414  * width of the cblk.
415  * This function enables the flags to the deepest nodes that respect the
416  * condition, as well as to all their ascendants in the elimination tree.
417  *
418  *******************************************************************************
419  *
420  * @param[in] rootnum
421  * Root of the subtree.
422  *
423  * @param[in] cblktype
424  * List of flags that can be forwarded to rootnum and its
425  * descendants.
426  *
427  * @param[in] ratiolimit2D
428  * Ratio that defines the minimal size to allow the flag 2D for a
429  * cblk and its ascendants.
430  *
431  * @param[in] ratiolimitLR
432  * Ratio that defines the minimal size to allow the flag LR for a
433  * cblk and its ascendants.
434  *
435  * @param[inout] candtab
436  * Pointer to the global candtab array where field cblktype is
437  * updated. cblktype defines the optimization/properties that are
438  * defined on each cblk and which are defined by level in the
439  * tree.
440  *
441  * @param[in] etree
442  * Pointer to the global elimination tree structure that is used
443  * to travel through the cblk, and affect the properies with the
444  * correct filiation property.
445  *
446  * @param[in] symbmtx
447  * Pointer to the symbol matrix we are working with.
448  *
449  *******************************************************************************
450  *
451  * @return the cblktype flag of the root of the subtree.
452  *
453  *******************************************************************************/
456  pastix_int_t cblktype,
457  pastix_int_t ratiolimit2D,
458  pastix_int_t ratiolimitLR,
459  Cand *candtab,
460  const EliminTree *etree,
461  const symbol_matrix_t *symbmtx )
462 {
463  pastix_int_t i, son;
464  pastix_int_t sonstype = 0;
465  pastix_int_t width = symbmtx->cblktab[ rootnum ].lcolnum - symbmtx->cblktab[ rootnum ].fcolnum + 1;
466 
467  if ( (cblktype & CBLK_IN_SCHUR) &&
468  (symbmtx->cblktab[ rootnum ].lcolnum < symbmtx->schurfcol) )
469  {
470  cblktype &= ~(CBLK_IN_SCHUR);
471  }
472 
473  for(i=0; i<etree->nodetab[rootnum].sonsnbr; i++)
474  {
475  son = eTreeSonI(etree, rootnum, i);
476  sonstype |= candSubTreeDistribDeepestWidth( son, cblktype,
477  ratiolimit2D, ratiolimitLR,
478  candtab, etree, symbmtx );
479  }
480 
481  if( (cblktype & CBLK_TASKS_2D) && (width < ratiolimit2D) ) {
482  cblktype = cblktype & (~CBLK_TASKS_2D);
483  }
484 
485  if( (cblktype & CBLK_COMPRESSED) && (width < ratiolimitLR) ) {
486  cblktype = cblktype & (~CBLK_COMPRESSED);
487  }
488 
489  candtab[ rootnum ].cblktype = cblktype | sonstype;
490  return candtab[ rootnum ].cblktype;
491 }
492 
493 /**
494  *******************************************************************************
495  *
496  * @brief Recursive function to compute the distribution of the nodes among the
497  * different levels based on depth.
498  *
499  * This function defines which cblk are candidates to be stored with a 2D layout,
500  * computed as 2D tasks, and/or be part of the Schur complement. The criterion to
501  * remove the 2D task flag is the depth on the elimination tree.
502  *
503  *******************************************************************************
504  *
505  * @param[in] rootnum
506  * Root of the subtree.
507  *
508  * @param[in] cblktype
509  * List of flags that can be forwarded to rootnum and its
510  * descendants.
511  *
512  * @param[in] level2D
513  * The number of levels of the tree that will be flagged as 2D
514  * tasks.
515  *
516  * @param[in] ratiolimitLR
517  * Ratio that defines the minimal size to allow the flag LR for a
518  * cblk and its ascendants.
519  *
520  * @param[inout] candtab
521  * Pointer to the global candtab array where field cblktype is
522  * updated. cblktype defines the optimization/properties that are
523  * defined on each cblk and which are defined by level in the
524  * tree.
525  *
526  * @param[in] etree
527  * Pointer to the global elimination tree structure that is used
528  * to travel through the cblk, and affect the properies with the
529  * correct filiation property.
530  *
531  * @param[in] symbmtx
532  * Pointer to the symbol matrix we are working with.
533  *
534  *******************************************************************************
535  *
536  * @return the cblktype flag of the root of the subtree.
537  *
538  *******************************************************************************/
541  pastix_int_t cblktype,
542  pastix_int_t level2D,
543  pastix_int_t ratiolimitLR,
544  Cand *candtab,
545  const EliminTree *etree,
546  const symbol_matrix_t *symbmtx )
547 {
548  pastix_int_t i, son;
549  pastix_int_t sonstype = 0;
550  pastix_int_t width = symbmtx->cblktab[ rootnum ].lcolnum - symbmtx->cblktab[ rootnum ].fcolnum + 1;
551 
552  if ( (cblktype & CBLK_IN_SCHUR) &&
553  (symbmtx->cblktab[ rootnum ].lcolnum < symbmtx->schurfcol) )
554  {
555  cblktype &= ~(CBLK_IN_SCHUR);
556  }
557 
558  if( (cblktype & CBLK_TASKS_2D) && (level2D <= 0) ) {
559  cblktype = cblktype & (~CBLK_TASKS_2D);
560  }
561 
562  level2D--;
563  for(i=0; i<etree->nodetab[rootnum].sonsnbr; i++)
564  {
565  son = eTreeSonI(etree, rootnum, i);
566  sonstype |= candSubTreeDistribDeepestLevel( son, cblktype,
567  level2D, ratiolimitLR,
568  candtab, etree, symbmtx );
569  }
570 
571  if( (cblktype & CBLK_COMPRESSED) && (width < ratiolimitLR) ) {
572  cblktype = cblktype & (~CBLK_COMPRESSED);
573  }
574 
575  candtab[ rootnum ].cblktype = cblktype | sonstype;
576  return candtab[ rootnum ].cblktype;
577 }
578 
579 /**
580  *******************************************************************************
581  *
582  * @brief Recursive function to compute the distribution of the nodes among the
583  * different levels based on depth.
584  *
585  * This function defines which cblk are candidates to be stored with a 2D layout,
586  * computed as 2D tasks, and/or be part of the Schur complement. The criterion to
587  * remove the 2D task flag is the depth on the elimination tree.
588  *
589  *******************************************************************************
590  *
591  * @param[in] rootnum
592  * Root of the subtree.
593  *
594  * @param[in] cblktype
595  * List of flags that can be forwarded to rootnum and its
596  * descendants.
597  *
598  * @param[in] level2D
599  * The number of levels of the tree that will be flagged as 2D
600  * tasks.
601  *
602  * @param[in] ratiolimitLR
603  * Ratio that defines the minimal size to allow the flag LR for a
604  * cblk and its ascendants.
605  *
606  * @param[inout] candtab
607  * Pointer to the global candtab array where field cblktype is
608  * updated. cblktype defines the optimization/properties that are
609  * defined on each cblk and which are defined by level in the
610  * tree.
611  *
612  * @param[in] etree
613  * Pointer to the global elimination tree structure that is used
614  * to travel through the cblk, and affect the properies with the
615  * correct filiation property.
616  *
617  * @param[in] symbmtx
618  * Pointer to the symbol matrix we are working with.
619  *
620  *******************************************************************************/
621 void
623  pastix_int_t cblktype,
624  pastix_int_t level2D,
625  pastix_int_t ratiolimitLR,
626  Cand *candtab,
627  const EliminTree *etree,
628  const symbol_matrix_t *symbmtx )
629 {
630  pastix_int_t i, son;
631  pastix_int_t width = symbmtx->cblktab[ rootnum ].lcolnum - symbmtx->cblktab[ rootnum ].fcolnum + 1;
632 
633  if ( (cblktype & CBLK_IN_SCHUR) &&
634  (symbmtx->cblktab[ rootnum ].lcolnum < symbmtx->schurfcol) )
635  {
636  cblktype &= ~(CBLK_IN_SCHUR);
637  }
638 
639  if( (cblktype & CBLK_TASKS_2D) && (level2D <= 0) ) {
640  cblktype = cblktype & (~CBLK_TASKS_2D);
641  }
642 
643  if( (cblktype & CBLK_COMPRESSED) && (width < ratiolimitLR) ) {
644  cblktype = cblktype & (~CBLK_COMPRESSED);
645  }
646 
647  level2D--;
648  for(i=0; i<etree->nodetab[rootnum].sonsnbr; i++)
649  {
650  son = eTreeSonI(etree, rootnum, i);
651  candSubTreeDistribFirstLevel( son, cblktype,
652  level2D, ratiolimitLR,
653  candtab, etree, symbmtx );
654  }
655 
656  candtab[ rootnum ].cblktype = cblktype;
657 }
658 
659 /**
660  *******************************************************************************
661  *
662  * @brief Finish to build the candtab array for the proportionnal mapping.
663  *
664  * This function defines which cblk are candidates to be stored with a 2D
665  * layout, computed as 2D tasks, and/or be part of the Schur complement. It also
666  * set the cost of each cblk and the total cost of each subtree.
667  *
668  *******************************************************************************
669  *
670  * @param[in] level_tasks2d
671  * Defines the level in the elimination tree to switch from 1D to
672  * 2D tasks in the cblk flag.
673  * If < 0, automatic level is defined based on the width of the
674  * cblk with respect to the minimal width_tasks2d width.
675  * If 0, all cblks are tagged as 1D.
676  * If > 0, only the first level_tasks2d level of the tree are
677  * tagged as 2D.
678  *
679  * @param[in] width_tasks2d
680  * If level_tasks2d < 0, defines the minimal width to keep the 2D
681  * flag on a goven cblk.
682  *
683  * @param[in] lr_when
684  * Define if compression technics will be used or not. If not
685  * PastixCompressNever, then all cblk with a width larger than
686  * lr_width are tagged as compressible.
687  *
688  * @param[in] lr_width
689  * Define the minimal width to flag a cblk as compressible if
690  * lr_when != PastixCompressNever.
691  *
692  * @param[inout] candtab
693  * Pointer to the global candtab array that needs to be initialized.
694  *
695  * @param[inout] etree
696  * Pointer to the elimination tree that needs to be construct on entry.
697  * On exit, the cost of each node, and the total cost of its
698  * associated subtree is initialized.
699  *
700  * @param[in] symbmtx
701  * Pointer to the symbol matrix we are working with.
702  *
703  * @param[in] costmtx
704  * Pointer to the cost matrix associated to the symbol matrix and
705  * that holds the cost of each cblk and blok.
706  *
707  *******************************************************************************/
708 void
709 candBuild( pastix_int_t level_tasks2d, pastix_int_t width_tasks2d,
710  pastix_compress_when_t lr_when, pastix_int_t lr_width,
711  Cand *candtab,
712  EliminTree *etree,
713  const symbol_matrix_t *symbmtx,
714  const CostMatrix *costmtx )
715 {
716  double cp = 0.0;
717  pastix_int_t i, son, root = eTreeRoot(etree);
718  pastix_int_t cblktype = CBLK_LAYOUT_2D | CBLK_TASKS_2D | CBLK_IN_SCHUR | CBLK_COMPRESSED;
719 
720 #if defined(PASTIX_CUDA_FERMI)
721  cblktype = CBLK_IN_SCHUR | CBLK_COMPRESSED;
722 #endif
723 
724  /* Let's start with the root */
725  candtab[ root ].costlevel = 0.;
726  candtab[ root ].treelevel = 0;
727 
728  candSubTreeBuild( root, candtab, etree, symbmtx, costmtx, &cp );
729 
730  if ( lr_when == PastixCompressNever ) {
731  lr_width = PASTIX_INT_MAX;
732  }
733 
734  for(i=0; i<etree->nodetab[root].sonsnbr; i++)
735  {
736  son = eTreeSonI(etree, root, i);
737 #if defined(PASTIX_BLEND_DEEPEST_DISTRIB)
738  /*
739  * Find the deepest node that matches the criterion for a flag, and assign
740  * the flag to all its ancestors to the root
741  */
742  if( level_tasks2d < 0 )
743  {
744  candSubTreeDistribDeepestWidth( son, cblktype,
745  width_tasks2d, lr_width,
746  candtab, etree, symbmtx );
747  }
748  else
749  {
750  candSubTreeDistribDeepestLevel( son, cblktype,
751  level_tasks2d, lr_width,
752  candtab, etree, symbmtx );
753  }
754 #else
755  /*
756  * Propagate the flags to all the sons as long as the node matches the
757  * criterion to keep them. Stops earlier than previous case with btterfly
758  * like meshes.
759  */
760  if( level_tasks2d < 0 )
761  {
762  candSubTreeDistribFirstWidth( son, cblktype,
763  width_tasks2d, lr_width,
764  candtab, etree, symbmtx );
765  }
766  else
767  {
768  candSubTreeDistribFirstLevel( son, cblktype,
769  level_tasks2d, lr_width,
770  candtab, etree, symbmtx );
771  }
772 #endif
773  }
774 }
775 
776 /**
777  *******************************************************************************
778  *
779  * @brief Update the candtab array costs after the symbol split algorithm has
780  * been applied.
781  *
782  * This function update the costs and critical path of each node after the
783  * symbol matrix has been split to generate more parallelism.
784  *
785  *******************************************************************************
786  *
787  * @param[inout] candtab
788  * Pointer to the global candtab array that needs to be updated.
789  *
790  * @param[inout] etree
791  * Pointer to the elimination tree that needs to be construct on entry.
792  * On exit, the cost of each node, and the total cost of its
793  * associated subtree is updated.
794  *
795  * @param[in] symbmtx
796  * Pointer to the symbol matrix we are working with.
797  *
798  * @param[in] costmtx
799  * Pointer to the cost matrix associated to the symbol matrix and
800  * that holds the cost of each cblk and blok.
801  *
802  *******************************************************************************/
803 void
804 candUpdate( Cand *candtab,
805  EliminTree *etree,
806  const symbol_matrix_t *symbmtx,
807  const CostMatrix *costmtx )
808 {
809  double cp = 0.0;
810  pastix_int_t root = eTreeRoot(etree);
811 
812  /* Let's start with the root */
813  candtab[ root ].costlevel = 0.;
814  candtab[ root ].treelevel = 0;
815 
816  candSubTreeBuild( root, candtab, etree, symbmtx, costmtx, &cp );
817 }
818 
819 /**
820  *@}
821  */
822 
BEGIN_C_DECLS typedef int pastix_int_t
Definition: datatypes.h:51
pastix_int_t lcandnum
Definition: cand.h:32
pastix_int_t cluster
Definition: cand.h:35
pastix_int_t fcandnum
Definition: cand.h:31
double costlevel
Definition: cand.h:29
pastix_int_t treelevel
Definition: cand.h:30
pastix_int_t lccandnum
Definition: cand.h:34
pastix_int_t fccandnum
Definition: cand.h:33
int8_t cblktype
Definition: cand.h:36
int candCheck(const Cand *candtab, const symbol_matrix_t *symbmtx)
Check the correctness of the computed candidates.
Definition: cand.c:204
void candSubTreeDistribFirstLevel(pastix_int_t rootnum, pastix_int_t cblktype, pastix_int_t level2D, pastix_int_t ratiolimitLR, Cand *candtab, const EliminTree *etree, const symbol_matrix_t *symbmtx)
Recursive function to compute the distribution of the nodes among the different levels based on depth...
Definition: cand.c:622
void candUpdate(Cand *candtab, EliminTree *etree, const symbol_matrix_t *symbmtx, const CostMatrix *costmtx)
Update the candtab array costs after the symbol split algorithm has been applied.
Definition: cand.c:804
static double candSubTreeBuild(pastix_int_t rootnum, Cand *candtab, EliminTree *etree, const symbol_matrix_t *symbmtx, const CostMatrix *costmtx, double *cripath)
Recursive function to update the cost fields of the both the candtab array, and the elimination tree ...
Definition: cand.c:271
Cand * candInit(pastix_int_t cblknbr)
Initialize the candtab array with default values.
Definition: cand.c:48
void candSave(const Cand *candtab, pastix_int_t cblknbr, const char *directory)
Print the candidates array into the candtab.txt file.
Definition: cand.c:108
pastix_int_t candSubTreeDistribDeepestLevel(pastix_int_t rootnum, pastix_int_t cblktype, pastix_int_t level2D, pastix_int_t ratiolimitLR, Cand *candtab, const EliminTree *etree, const symbol_matrix_t *symbmtx)
Recursive function to compute the distribution of the nodes among the different levels based on depth...
Definition: cand.c:540
void candExit(Cand *candtab)
Exit and free the candtab structure given.
Definition: cand.c:83
void candSetClusterCand(Cand *candtab, pastix_int_t cblknbr, const pastix_int_t *core2clust, pastix_int_t coresnbr)
Set the clusters candidates from the cores canditates.
Definition: cand.c:158
void candBuild(pastix_int_t level_tasks2d, pastix_int_t width_tasks2d, pastix_compress_when_t lr_when, pastix_int_t lr_width, Cand *candtab, EliminTree *etree, const symbol_matrix_t *symbmtx, const CostMatrix *costmtx)
Finish to build the candtab array for the proportionnal mapping.
Definition: cand.c:709
pastix_int_t candSubTreeDistribDeepestWidth(pastix_int_t rootnum, pastix_int_t cblktype, pastix_int_t ratiolimit2D, pastix_int_t ratiolimitLR, Cand *candtab, const EliminTree *etree, const symbol_matrix_t *symbmtx)
Recursive function to compute the distribution of the nodes among the different levels.
Definition: cand.c:455
void candSubTreeDistribFirstWidth(pastix_int_t rootnum, pastix_int_t cblktype, pastix_int_t ratiolimit2D, pastix_int_t ratiolimitLR, Cand *candtab, const EliminTree *etree, const symbol_matrix_t *symbmtx)
Recursive function to compute the distribution of the nodes among the different levels.
Definition: cand.c:369
Processor candidate group to own a column blok.
Definition: cand.h:28
double * cblkcost
Definition: cost.h:33
Arrays of double to store the cost of each element in the matrix.
Definition: cost.h:30
double subpath
Definition: elimintree.h:29
double subcost
Definition: elimintree.h:28
double ndepath
Definition: elimintree.h:27
double ndecost
Definition: elimintree.h:26
eTreeNode_t * nodetab
Definition: elimintree.h:42
static pastix_int_t eTreeRoot(const EliminTree *etree)
Return the root of the elimination tree.
Definition: elimintree.h:126
static pastix_int_t eTreeSonI(const EliminTree *etree, pastix_int_t node, pastix_int_t i)
Return the i^{th} son of a given node.
Definition: elimintree.h:105
Elimination tree.
Definition: elimintree.h:39
FILE * pastix_fopenw(const char *dirname, const char *filename, const char *mode)
Open a file in the unique directory of the pastix instance.
Definition: api.c:251
enum pastix_compress_when_e pastix_compress_when_t
Compression strategy available for IPARM_COMPRESS_WHEN parameter.
@ PastixCompressNever
Definition: api.h:385
pastix_int_t fcblknm
Definition: symbol.h:63
symbol_cblk_t * cblktab
Definition: symbol.h:83
pastix_int_t schurfcol
Definition: symbol.h:82
pastix_int_t bloknum
Definition: symbol.h:48
pastix_int_t fcolnum
Definition: symbol.h:46
pastix_int_t lcolnum
Definition: symbol.h:47
symbol_blok_t * bloktab
Definition: symbol.h:84
pastix_int_t cblknbr
Definition: symbol.h:79
Symbol matrix structure.
Definition: symbol.h:77
static double cost(symbol_cblk_t *cblk)
Computes the cost of a cblk.