PaStiX Handbook  6.3.2
pastix_subtask_blend.c
Go to the documentation of this file.
1 /**
2  *
3  * @file pastix_subtask_blend.c
4  *
5  * PaStiX analyse blend subtask function
6  *
7  * @copyright 2004-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8  * Univ. Bordeaux. All rights reserved.
9  *
10  * @version 6.3.2
11  * @author Pascal Henon
12  * @author Xavier Lacoste
13  * @author Pierre Ramet
14  * @author Mathieu Faverge
15  * @author Gregoire Pichon
16  * @author Tony Delarue
17  * @date 2023-12-11
18  *
19  **/
20 #include "common.h"
21 #include <spm.h>
22 #include "graph/graph.h"
23 #include "order/order_internal.h"
24 #include "blend/perf.h"
25 #include "blend/elimintree.h"
26 #include "blend/cost.h"
27 #include "blend/cand.h"
28 #include "blend/extendVector.h"
29 #include "blendctrl.h"
30 #include "blend/solver.h"
31 #include "blend/simu.h"
32 #include "blend/blend.h"
33 
34 /**
35  *******************************************************************************
36  *
37  * @ingroup pastix_analyze
38  *
39  * @brief Compute the proportional mapping and the final solver structure.
40  *
41  * This function computes the structural information required to factorize
42  * and solve the problem. It requires an ordering structure, as well as the
43  * symbolic factorization structure. It computes a solver structure that contains
44  * all informations, architecture and problem dependent, to efficiently solve the
45  * system.
46  * On exit, the symbol structure is destroyed and only local uncompressed
47  * information is stored in the solver structure.
48  *
49  * This routine is affected by, or returns, the following parameters:
50  * IPARM_CUDA_NBR, IPARM_TASKS2D_LEVEL, IPARM_TASKS2D_WIDTH,
51  * IPARM_COMPRESS_WHEN, IPARM_COMPRESS_MIN_WIDTH, IPARM_DOF_NBR,
52  * IPARM_FACTORIZATION, IPARM_FLOAT, IPARM_GPU_CRITERIUM,
53  * IPARM_GPU_MEMORY_PERCENTAGE, IPARM_GPU_NBR, IPARM_INCOMPLETE,
54  * IPARM_MAX_BLOCKSIZE, IPARM_MIN_BLOCKSIZE, IPARM_NNZEROS,
55  * IPARM_NNZEROS_BLOCK_LOCAL, IPARM_STARPU, IPARM_THREAD_NBR, IPARM_VERBOSE
56  *
57  * DPARM_BLEND_TIME, DPARM_FACT_FLOPS, DPARM_FACT_RLFLOPS,
58  * DPARM_FACT_THFLOPS, DPARM_FILL_IN, DPARM_PRED_FACT_TIME, DPARM_SOLV_FLOPS
59  *
60  * This function is constructed as a sequence of steps that are described below.
61  *
62  * #### Construct an elimination tree
63  * A elimination tree structure is constructed out of the symbol matrix to be
64  * able to traverse the tree in a top-down fashion for the proportionnal
65  * mapping step.
66  *
67  * #### Construct the cost matrix
68  * For each column-block, and block of the symbolic structure, the cost of
69  * each operation is computed to evaluate the cost of each branch of the
70  * elimination tree. Costs of the blocks are the cost of the update generated
71  * out of this block when used as the B matrix in the GEMM update. Costs of
72  * the column block is the total cost associated to it: factorization, solve,
73  * and update in a right-looking algorithm. This means that the update cost is
74  * the one generated by this column block, and not the one received by the
75  * column-block.
76  *
77  * #### Construct the candidate array
78  * Dispatch properties such as low-rank compression, 2D tasks from the top to
79  * the bottom of the tree. Candidate array, and elimination tree are computed,
80  * and updated, simultaneously with the costs computed previously.
81  * This step is impacted by IPARM_TASKS2D_LEVEL and IPARM_TASKS2D_WIDTH that
82  * defines the minimal width of nodes which can forward 2D tasks property to
83  * their sons.
84  * Similarly, IPARM_COMPRESS_WHEN and IPARM_COMPRESS_MIN_WIDTH defines the
85  * minimal width of nodes which can forward low-rank property to their sons.
86  *
87  * #### Proportionnal Mapping
88  * This step performs the actual proportional mapping algorithm to define the
89  * subset of candidates to compute each supernode.
90  *
91  * #### Split symbol matrix
92  * Once the proportionnal mapping is performed on the original set of
93  * supernodes, the symbol matrix is split in smaller supernodes/blocks to
94  * allow for more parallelism.
95  *
96  * #### Simulation
97  * The simulation step defines the actual mapping per core of each supernode
98  * based on a simulation of the numerical factorization where each task is
99  * attributed to the first resource available to compute it.
100  *
101  * #### Solver structure generation
102  * Out of the previous step, the solver generator builds the local structure
103  * that is required for the numerical factorization and solve steps. It is
104  * mainly represented by a CSC like structure of the local blocks, linked to a
105  * CSR for the solve step and the structure that will holds the coefficients
106  * of the factorized matrix.
107  *
108  *******************************************************************************
109  *
110  * @param[inout] pastix_data
111  * The pastix_data structure that describes the solver instance.
112  *
113  *******************************************************************************
114  *
115  * @retval PASTIX_SUCCESS on successful exit
116  * @retval PASTIX_ERR_BADPARAMETER if one parameter is incorrect.
117  * @retval PASTIX_ERR_OUTOFMEMORY if one allocation failed.
118  *
119  *******************************************************************************/
120 int
122 {
123  BlendCtrl ctrl;
124  pastix_int_t procnum, verbose;
125  pastix_int_t *iparm;
126  double *dparm;
127  pastix_order_t *ordeptr;
128  symbol_matrix_t *symbmtx;
129  SolverMatrix *solvmtx_loc;
130  SolverMatrix *solvmtx_glob;
131  SimuCtrl *simuctrl;
132  double timer_all = 0.;
133  double timer_current = 0.;
134  size_t nnz;
135 
136  /*
137  * Check parameters
138  */
139  if (pastix_data == NULL) {
140  pastix_print_error( "pastix_subtask_blend: wrong pastix_data parameter" );
142  }
143  if ( !(pastix_data->steps & STEP_SYMBFACT) ) {
144  pastix_print_error( "pastix_subtask_blend: pastix_subtask_symbfact() has to be called before calling this function" );
146  }
147 
148  iparm = pastix_data->iparm;
149  dparm = pastix_data->dparm;
150  procnum = pastix_data->inter_node_procnum;
151  ordeptr = pastix_data->ordemesh;
152  symbmtx = pastix_data->symbmtx;
153  verbose = iparm[IPARM_VERBOSE];
154 
155  if (ordeptr == NULL) {
156  pastix_print_error( "pastix_subtask_blend: the pastix_data->ordemesh field has not been initialized, pastix_task_order should be called first" );
158  }
159  if (symbmtx == NULL) {
160  pastix_print_error( "pastix_subtask_blend: the pastix_data->symbmtx has not been initialized, pastix_task_symbfact should be called first" );
162  }
163  if (symbmtx->dof < 1) {
164  pastix_print_error( "pastix_subtask_blend: Dof number has not been correctly initialized" );
166  }
167 
168  /* Free graph structure, we don't need it anymore */
169  if ( pastix_data->graph != NULL ) {
170  graphExit( pastix_data->graph );
171  memFree_null( pastix_data->graph );
172  }
173 
174  /* Cleanup the solver structure if we already computed it */
175  if ( pastix_data->solvloc != NULL ) {
176  solverExit( pastix_data->solvloc );
177  memFree_null( pastix_data->solvloc );
178  }
179  if ( pastix_data->solvglob != NULL ) {
180  solverExit( pastix_data->solvglob );
181  memFree_null( pastix_data->solvglob );
182  }
183  pastix_data->solvmatr = NULL;
184 
185  solvmtx_loc = (SolverMatrix*)malloc(sizeof(SolverMatrix));
186  solvmtx_glob = (SolverMatrix*)malloc(sizeof(SolverMatrix));
187  pastix_data->solvloc = solvmtx_loc;
188  pastix_data->solvglob = solvmtx_glob;
189 
190  /* The problem is more likely to be solved by the local problem, may change later */
191  pastix_data->solvmatr = pastix_data->solvloc;
192 
193  /* Start the analyze step */
194  clockStart(timer_all);
195  if ( verbose > PastixVerboseNot ) {
196  pastix_print( procnum, 0, OUT_STEP_BLEND );
197  }
198 
199  /* Create the control structure that parameterize the analyze step */
200  blendCtrlInit( pastix_data, &ctrl );
201 
202  if( verbose > PastixVerboseNo) {
203  pastix_print( procnum, 0, OUT_BLEND_CONF,
204  (long)ctrl.clustnbr, (long)ctrl.local_nbcores, (long)ctrl.local_nbthrds);
205  }
206 
207  /* Prepare the directories for the output files if needed */
208  if ( verbose > PastixVerboseYes ) {
209  pastix_gendirectories( pastix_data );
210  }
211 
212  /* Verify the coherence of the initial symbol matrix */
213  if(ctrl.debug)
214  {
215  if( verbose > PastixVerboseYes ) {
216  pastix_print( procnum, 0, OUT_BLEND_CHKSMBMTX );
217  }
218  pastixSymbolCheck(symbmtx);
219  }
220 
221 #if defined(PASTIX_ORDER_DRAW_LASTSEP)
222  /*
223  * Draw last separator before split
224  */
226  pastixSymbolDrawMap( pastix_data, "bsplit", ordeptr->sndenbr-1 );
227  }
228 #endif
229 
230 #if !defined(PASTIX_BLEND_PROPMAP_2STEPS)
231  /*
232  * Split the existing symbol matrix according to the number of candidates
233  * and cblk types.
234  * It takes the original symbol and candtab, and returns the new symbol and
235  * candtab. If the symbmtx is modified, the costmtx is updated, as well as
236  * the tree.
237  */
238  {
239  if( verbose > PastixVerboseYes ) {
240  pastix_print( procnum, 0, OUT_BLEND_SPLITSYMB );
241  }
242  clockStart(timer_current);
243 
244  splitSymbol(&ctrl, symbmtx);
245 
246  clockStop(timer_current);
247  if( verbose > PastixVerboseNo ) {
248  pastix_print( procnum, 0, OUT_BLEND_SPLITSYMB_TIME,
249  clockVal(timer_current) );
250  }
251  }
252 #if defined(PASTIX_ORDER_DRAW_LASTSEP)
253  /*
254  * Draw last separator after split
255  */
256  pastixSymbolDrawMap( pastix_data, "asplit", ordeptr->sndenbr-1 );
257 #endif
258 #endif
259 
260  /* Build the elimination tree from the symbolic partition */
261  {
262  if( verbose > PastixVerboseYes) {
263  pastix_print( procnum, 0, OUT_BLEND_ELIMTREE );
264  }
265  clockStart(timer_current);
266 
267  ctrl.etree = eTreeBuild(symbmtx);
268 
269  clockStop(timer_current);
270  if( verbose > PastixVerboseNo ) {
271  pastix_print( procnum, 0, OUT_BLEND_ELIMTREE_TIME,
272  clockVal(timer_current) );
273  }
274  }
275 
276  /* Build the cost matrix from the symbolic partition */
277  {
278  if( verbose > PastixVerboseYes ) {
279  pastix_print( procnum, 0, OUT_BLEND_COSTMATRIX );
280  }
281  clockStart(timer_current);
282 
283  ctrl.costmtx = costMatrixBuild( symbmtx,
284  iparm[IPARM_FLOAT],
285  iparm[IPARM_FACTORIZATION] );
286 
287  clockStop(timer_current);
288  if( verbose > PastixVerboseNo ) {
289  pastix_print( procnum, 0, OUT_BLEND_COSTMATRIX_TIME,
290  clockVal(timer_current) );
291  }
292  }
293 
294  /* Build the candtab array to store candidate information on each cblk */
295  {
296  ctrl.candtab = candInit( symbmtx->cblknbr );
297 
298  /* Initialize costs in elimination tree and candtab array for proportionnal mapping */
301  ctrl.candtab,
302  ctrl.etree,
303  symbmtx,
304  ctrl.costmtx );
305 
306  if( verbose > PastixVerboseNo ) {
307  pastix_print( procnum, 0, OUT_BLEND_ELIMTREE_TOTAL_COST,
308  ctrl.etree->nodetab[ eTreeRoot(ctrl.etree) ].subcost,
309  ctrl.etree->nodetab[ eTreeRoot(ctrl.etree) ].subpath );
310  }
311  }
312 
313  /* Proportional mapping step that distributes the candidates over the tree */
314  {
315  if( verbose > PastixVerboseYes ) {
316  pastix_print( procnum, 0, OUT_BLEND_PROPMAP );
317  }
318  clockStart(timer_current);
319 
320  propMappTree( ctrl.candtab,
321  ctrl.etree,
322  ctrl.total_nbcores,
323  ctrl.nocrossproc,
324  ctrl.allcand );
325 
326  /* Set the cluster candidates according to the processor candidates */
327  candSetClusterCand( ctrl.candtab, symbmtx->cblknbr,
328  ctrl.core2clust, ctrl.total_nbcores );
329 
330  clockStop(timer_current);
331 
332  if( verbose > PastixVerboseNo ) {
333  pastix_print( procnum, 0, OUT_BLEND_PROPMAP_TIME,
334  clockVal(timer_current) );
335  }
336 
337  /* Let's check the result if ask */
338  if ( ctrl.debug ) {
339  assert( candCheck( ctrl.candtab, symbmtx ) );
340  }
341  }
342 
343 #if defined(PASTIX_BLEND_PROPMAP_2STEPS)
344  /* Dump the dot of the eTree before split */
345  if (( verbose > PastixVerboseYes ) &&
346  ( pastix_data->procnum == 0 ) )
347  {
348  FILE *stream = NULL;
349  stream = pastix_fopenw( pastix_data->dir_global, "etree.dot", "w" );
350  if ( stream ) {
351  candGenDotLevel( ctrl.etree, ctrl.candtab, stream, 5);
352  fclose(stream);
353  }
354 
355  stream = pastix_fopenw( pastix_data->dir_global, "ctree.dot", "w" );
356  if ( stream ) {
357  candGenCompressedDot( ctrl.etree, ctrl.candtab, stream );
358  fclose(stream);
359  }
360  }
361 
362  /*
363  * Split the existing symbol matrix according to the number of candidates
364  * and cblk types.
365  * It takes the original symbol and candtab, and returns the new symbol and
366  * candtab. If the symbmtx is modified, the costmtx is updated, as well as
367  * the tree.
368  */
369  {
370  if( verbose > PastixVerboseYes ) {
371  pastix_print( procnum, 0, OUT_BLEND_SPLITSYMB );
372  }
373  clockStart(timer_current);
374 
375  ctrl.up_after_split = 1;
376  splitSymbol(&ctrl, symbmtx);
377  ctrl.up_after_split = 0;
378 
379  clockStop(timer_current);
380  if( verbose > PastixVerboseNo ) {
381  pastix_print( procnum, 0, OUT_BLEND_SPLITSYMB_TIME,
382  clockVal(timer_current) );
383  }
384  }
385 
386  /* Dump the dot of the eTree after split */
387  if (( verbose > PastixVerboseYes ) &&
388  ( pastix_data->procnum == 0 ) )
389  {
390  FILE *stream = NULL;
391  stream = pastix_fopenw( pastix_data->dir_global, "etree_split.dot", "w" );
392  if ( stream ) {
393  candGenDot( ctrl.etree, ctrl.candtab, stream );
394  fclose(stream);
395  }
396 
397  stream = pastix_fopenw( pastix_data->dir_global, "ctree_split.dot", "w" );
398  if ( stream ) {
399  candGenCompressedDot( ctrl.etree, ctrl.candtab, stream );
400  fclose(stream);
401  }
402  }
403 #endif
404 
405  if(ctrl.count_ops && (ctrl.leader == procnum)) {
406  pastixSymbolGetFlops( symbmtx,
407  iparm[IPARM_FLOAT],
408  iparm[IPARM_FACTORIZATION],
409  &(dparm[DPARM_FACT_THFLOPS]),
410  &(dparm[DPARM_FACT_RLFLOPS]) );
411  }
412 
413 #if defined(PASTIX_SYMBOL_DUMP_SYMBMTX)
414  {
415  FILE *stream = NULL;
416  pastix_gendirectories( pastix_data );
417  stream = pastix_fopenw( pastix_data->dir_global, "symbol_after_split.eps", "w" );
418  if ( stream ) {
419  pastixSymbolDraw( symbmtx, stream );
420  fclose( stream );
421  }
422  }
423 #endif
424 
425  if (0)
426  {
427  FILE *file = NULL;
428  pastix_gendirectories( pastix_data );
429  file = pastix_fopenw( pastix_data->dir_global, "symbol_after_split", "w" );
430  if ( file ) {
431  pastixSymbolSave( symbmtx, file );
432  fclose( file );
433  }
434  }
435 
436  /* Simulation step to perform the data distribution over the nodes and compute the priorities of each task */
437  {
438  if( verbose > PastixVerboseYes ) {
439  pastix_print( procnum, 0, OUT_BLEND_BUILDSIMU );
440  }
441  clockStart(timer_current);
442 
443  /* Initialize simulation structure */
444  MALLOC_INTERN(simuctrl, 1, SimuCtrl);
445  simuInit( simuctrl, symbmtx, ctrl.candtab,
446  ctrl.clustnbr,
447  ctrl.total_nbcores );
448 
449  /* Create task array */
450  simuTaskBuild( simuctrl, symbmtx );
451  clockStop(timer_current);
452 
453  if( verbose > PastixVerboseNo ) {
454  pastix_print( procnum, 0, OUT_BLEND_BUILDSIMU_TIME,
455  clockVal(timer_current),
456  (long)simuctrl->tasknbr );
457  }
458 
459  if( verbose > PastixVerboseYes ) {
460  pastix_print( procnum, 0, OUT_BLEND_SIMU );
461  }
462  clockStart(timer_current);
463 
464  simuRun( simuctrl, &ctrl, symbmtx );
465 
466  clockStop(timer_current);
467  if( verbose > PastixVerboseNo ) {
468  pastix_print( procnum, 0, OUT_BLEND_SIMU_TIME,
469  clockVal(timer_current) );
470  }
471  }
472 
473 #ifdef PASTIX_DYNSCHED
474  /**
475  * If dynamic scheduling is asked, let's perform a second proportionnal
476  * mapping step:
477  * - this is made only on local data
478  * - no crossing is allowed between branches
479  */
480  {
481  clockStart(timer_current);
482 
483  splitPartLocal( &ctrl, simuctrl, symbmtx );
484 
485  clockStop(timer_current);
486  if( verbose>PastixVerboseNo)
487  pastix_print( procnum, 0, " -- Split build at time: %g --\n", clockVal(timer_current));
488  }
489 #endif
490 
491  /* CostMatrix and Elimination Tree are no further used */
492  costMatrixExit( ctrl.costmtx );
493  memFree_null( ctrl.costmtx );
494  eTreeExit( ctrl.etree );
495 
496  /**
497  * Generate the final solver structure that collects data from the different
498  * simulation structures and convert to local numbering
499  */
500  {
501  if( verbose > PastixVerboseYes ) {
502  pastix_print( procnum, 0, OUT_BLEND_SOLVER );
503  }
504  clockStart(timer_current);
505 
506  solverMatrixGenSeq( solvmtx_glob, symbmtx,
507  pastix_data->ordemesh, simuctrl, &ctrl,
508  pastix_data->inter_node_comm, pastix_data->isched, 0 );
509 
510  solverMatrixGen( solvmtx_loc, symbmtx,
511  pastix_data->ordemesh, simuctrl, &ctrl,
512  pastix_data->inter_node_comm, pastix_data->isched );
513 
514  clockStop(timer_current);
515  if( verbose > PastixVerboseNo ) {
516  pastix_print( procnum, 0, OUT_BLEND_SOLVER_TIME,
517  clockVal(timer_current) );
518  if( verbose > PastixVerboseYes ) {
519  solverPrintStats( solvmtx_loc );
520  }
521  }
522  }
523 
524  /* Free allocated memory */
525  simuExit(simuctrl, ctrl.clustnbr, ctrl.total_nbcores, ctrl.local_nbctxts);
526 
527  /* Realloc solver memory in a contiguous way */
528  {
529  solverRealloc(solvmtx_loc);
530  solverRealloc(solvmtx_glob);
531 #if defined(PASTIX_DEBUG_BLEND)
532  if (!ctrl.ricar) {
533  if( verbose > PastixVerboseYes ) {
534  pastix_print( procnum, 0, OUT_BLEND_CHKSOLVER );
535  }
536  solverCheck(solvmtx_loc);
537  solverCheck(solvmtx_glob);
538  }
539 #endif
540  }
541 
542  blendCtrlExit(&ctrl);
543 
544  /* End timing */
545  clockStop(timer_all);
546  pastix_data->dparm[DPARM_BLEND_TIME] = clockVal(timer_all);
547 
548  if (verbose > PastixVerboseYes) {
549  pastixSymbolPrintStats( pastix_data->symbmtx );
550  }
551 
552  /* Recompute nnz if overflow (int32) */
553  nnz = iparm[IPARM_NNZEROS];
554  if ( iparm[IPARM_NNZEROS] < 0 ) {
555  nnz = pastixSymbolGetNNZ( pastix_data->symbmtx );
556  }
557 
558  /* Symbol is not used anymore */
559  pastixSymbolExit(pastix_data->symbmtx);
560  memFree_null(pastix_data->symbmtx);
561 
562  /* Computes and print statistics */
563  {
564  if (iparm[IPARM_FACTORIZATION] == PastixFactLU)
565  {
566  nnz *= 2;
567  dparm[DPARM_PRED_FACT_TIME] *= 2.;
568  }
569  dparm[DPARM_SOLV_FLOPS] = (double)nnz; /* number of operations for solve */
570 
571  iparm[IPARM_NNZEROS_BLOCK_LOCAL] = solvmtx_loc->coefnbr;
572 
573  /* Affichage */
574  dparm[DPARM_FILL_IN] = (double)(nnz) / (double)(pastix_data->csc->gnnzexp);
575 
576  if (verbose > PastixVerboseNot) {
577  pastix_print( procnum, 0, OUT_BLEND_SUMMARY,
578  nnz, (double)dparm[DPARM_FILL_IN],
579  pastixFactotypeStr( iparm[IPARM_FACTORIZATION] ),
580  pastix_print_value( dparm[DPARM_FACT_THFLOPS] ),
581  pastix_print_unit( dparm[DPARM_FACT_THFLOPS] ),
582  PERF_MODEL, dparm[DPARM_PRED_FACT_TIME],
583  dparm[DPARM_BLEND_TIME] );
584 
585  if (0) /* TODO: consider that when moving to distributed */
586  {
587  if ((verbose > PastixVerboseNo))
588  {
589  fprintf(stdout, NNZERO_WITH_FILLIN, (int)procnum, (long)iparm[IPARM_NNZEROS_BLOCK_LOCAL]);
590  }
591  if (verbose > PastixVerboseYes)
592  {
593  PASTIX_Comm pastix_comm = pastix_data->inter_node_comm;
594  pastix_int_t sizeL = solvmtx_loc->coefnbr;
595  pastix_int_t sizeG = 0;
596 
597  MPI_Reduce(&sizeL, &sizeG, 1, PASTIX_MPI_INT, MPI_MAX, 0, pastix_comm);
598 
599  if (procnum == 0)
600  {
601  sizeG *= sizeof(pastix_complex64_t);
602  if (iparm[IPARM_FACTORIZATION] == PastixFactLU) {
603  sizeG *= 2;
604  }
605 
606  fprintf( stdout, OUT_COEFSIZE,
607  pastix_print_value(sizeG),
608  pastix_print_unit(sizeG) );
609  }
610  }
611  }
612  }
613  }
614 
615  /* Backup the solver for debug */
616  if (0)
617  {
618  FILE *file = NULL;
619  pastix_gendirectories( pastix_data );
620  file = pastix_fopenw( pastix_data->dir_global, "solvergen", "w" );
621  if ( file ) {
622  solverSave( solvmtx_loc, file );
623  fclose(file);
624  }
625  }
626 
627  /* Invalidate following steps, and add analyze step to the ones performed */
628  pastix_data->steps &= ~( STEP_CSC2BCSC |
629  STEP_BCSC2CTAB |
630  STEP_NUMFACT |
631  STEP_SOLVE |
632  STEP_REFINE );
633  pastix_data->steps |= STEP_ANALYSE;
634 
635  return PASTIX_SUCCESS;
636 }
BEGIN_C_DECLS typedef int pastix_int_t
Definition: datatypes.h:51
int candCheck(const Cand *candtab, const symbol_matrix_t *symbmtx)
Check the correctness of the computed candidates.
Definition: cand.c:204
Cand * candInit(pastix_int_t cblknbr)
Initialize the candtab array with default values.
Definition: cand.c:48
void candGenCompressedDot(const EliminTree *etree, const Cand *candtab, FILE *stream)
Print the compressed elimination tree in a dot file, where all nodes with the same candidates are mer...
Definition: cand_gendot.c:441
void candSetClusterCand(Cand *candtab, pastix_int_t cblknbr, const pastix_int_t *core2clust, pastix_int_t coresnbr)
Set the clusters candidates from the cores canditates.
Definition: cand.c:158
void candBuild(pastix_int_t level_tasks2d, pastix_int_t width_tasks2d, pastix_compress_when_t lr_when, pastix_int_t lr_width, Cand *candtab, EliminTree *etree, const symbol_matrix_t *symbmtx, const CostMatrix *costmtx)
Finish to build the candtab array for the proportionnal mapping.
Definition: cand.c:709
void candGenDot(const EliminTree *etree, const Cand *candtab, FILE *stream)
Print the elimination tree in a dot file.
Definition: cand_gendot.c:253
void candGenDotLevel(const EliminTree *etree, const Cand *candtab, FILE *stream, pastix_int_t level)
Print the first levels of the elimination tree in a dot file.
Definition: cand_gendot.c:405
void costMatrixExit(CostMatrix *costmtx)
Free the cost matrix structure.
Definition: cost.c:57
CostMatrix * costMatrixBuild(const symbol_matrix_t *symbmtx, pastix_coeftype_t flttype, pastix_factotype_t factotype)
Build the cost matrix structure from the symbol matrix structure.
Definition: cost.c:91
pastix_int_t * core2clust
Definition: blendctrl.h:79
pastix_int_t up_after_split
Definition: blendctrl.h:55
pastix_int_t leader
Definition: blendctrl.h:34
pastix_int_t ricar
Definition: blendctrl.h:33
EliminTree * etree
Definition: blendctrl.h:96
pastix_int_t debug
Definition: blendctrl.h:30
pastix_int_t local_nbcores
Definition: blendctrl.h:74
pastix_int_t allcand
Definition: blendctrl.h:40
pastix_int_t width_tasks2d
Definition: blendctrl.h:63
pastix_int_t clustnbr
Definition: blendctrl.h:71
pastix_int_t local_nbctxts
Definition: blendctrl.h:76
Cand * candtab
Definition: blendctrl.h:98
pastix_int_t level_tasks2d
Definition: blendctrl.h:62
pastix_int_t local_nbthrds
Definition: blendctrl.h:75
CostMatrix * costmtx
Definition: blendctrl.h:97
pastix_int_t total_nbcores
Definition: blendctrl.h:72
pastix_int_t count_ops
Definition: blendctrl.h:29
pastix_int_t nocrossproc
Definition: blendctrl.h:41
int blendCtrlInit(pastix_data_t *pastix_data, BlendCtrl *ctrl)
Initialize the Blend control structure.
Definition: blendctrl.c:162
void blendCtrlExit(BlendCtrl *)
Finalize the Blend control structure.
Definition: blendctrl.c:303
The type and structure definitions.
Definition: blendctrl.h:28
double subpath
Definition: elimintree.h:29
double subcost
Definition: elimintree.h:28
eTreeNode_t * nodetab
Definition: elimintree.h:42
static pastix_int_t eTreeRoot(const EliminTree *etree)
Return the root of the elimination tree.
Definition: elimintree.h:126
EliminTree * eTreeBuild(const symbol_matrix_t *)
Build the elimination tree.
Definition: elimintree.c:478
void eTreeExit(EliminTree *)
Free the elimination tree structure.
Definition: elimintree.c:89
pastix_int_t tasknbr
Definition: simu.h:119
void simuExit(SimuCtrl *, pastix_int_t, pastix_int_t, pastix_int_t)
Free the simulation structure.
Definition: simu.c:254
pastix_int_t simuInit(SimuCtrl *, const symbol_matrix_t *, const Cand *, pastix_int_t, pastix_int_t)
Initialize the simulation structures.
Definition: simu.c:66
void simuTaskBuild(SimuCtrl *, const symbol_matrix_t *)
Initialize the tasktab array of the simulation structure.
Definition: simu_task.c:49
Control structure for the simulation.
Definition: simu.h:116
void solverRealloc(SolverMatrix *solvptr)
Realloc in a contiguous way a given solver structure.
Definition: solver_copy.c:205
int solverSave(const SolverMatrix *solvptr, FILE *stream)
Save a solver matrix structure into a file.
Definition: solver_io.c:261
void solverPrintStats(const SolverMatrix *solvptr)
Print statistical information about the solver matrix structure.
Definition: solver.c:196
int solverCheck(const SolverMatrix *solvmtx)
Checks the consistency of the given solver matrix structure.
Definition: solver_check.c:54
void solverExit(SolverMatrix *solvmtx)
Free the content of the solver matrix structure.
Definition: solver.c:143
int pastix_subtask_blend(pastix_data_t *pastix_data)
Compute the proportional mapping and the final solver structure.
FILE * pastix_fopenw(const char *dirname, const char *filename, const char *mode)
Open a file in the unique directory of the pastix instance.
Definition: api.c:242
void pastix_gendirectories(pastix_data_t *pastix_data)
Generate a unique temporary directory to store output files.
Definition: api.c:76
@ PastixFactLU
Definition: api.h:317
@ DPARM_PRED_FACT_TIME
Definition: api.h:169
@ DPARM_FACT_THFLOPS
Definition: api.h:172
@ DPARM_BLEND_TIME
Definition: api.h:167
@ DPARM_SOLV_FLOPS
Definition: api.h:178
@ DPARM_FILL_IN
Definition: api.h:160
@ DPARM_FACT_RLFLOPS
Definition: api.h:173
@ IPARM_FACTORIZATION
Definition: api.h:99
@ IPARM_COMPRESS_WHEN
Definition: api.h:131
@ IPARM_COMPRESS_MIN_WIDTH
Definition: api.h:129
@ IPARM_FLOAT
Definition: api.h:149
@ IPARM_NNZEROS_BLOCK_LOCAL
Definition: api.h:41
@ IPARM_VERBOSE
Definition: api.h:36
@ IPARM_NNZEROS
Definition: api.h:40
@ IPARM_SPLITTING_STRATEGY
Definition: api.h:80
@ PastixVerboseYes
Definition: api.h:222
@ PastixVerboseNot
Definition: api.h:220
@ PastixVerboseNo
Definition: api.h:221
@ PASTIX_SUCCESS
Definition: api.h:367
@ PASTIX_ERR_BADPARAMETER
Definition: api.h:374
@ PastixSplitKwayProjections
Definition: api.h:418
void splitSymbol(BlendCtrl *ctrl, symbol_matrix_t *symbmtx)
Split the column blocks of the symbol matrix to generate parallelism.
Definition: splitsymbol.c:522
void propMappTree(Cand *candtab, const EliminTree *etree, pastix_int_t candnbr, int nocrossproc, int allcand)
Apply the proportional mapping algorithm.
Definition: propmap.c:415
void simuRun(SimuCtrl *, const BlendCtrl *, const symbol_matrix_t *)
Run the simulation to map the data on the nodes.
Definition: simu_run.c:986
int solverMatrixGen(SolverMatrix *solvmtx, const symbol_matrix_t *symbmtx, const pastix_order_t *ordeptr, const SimuCtrl *simuctl, const BlendCtrl *ctrl, PASTIX_Comm comm, isched_t *isched)
Initialize the solver matrix structure.
int solverMatrixGenSeq(SolverMatrix *solvmtx, const symbol_matrix_t *symbmtx, const pastix_order_t *ordeptr, const SimuCtrl *simuctl, const BlendCtrl *ctrl, PASTIX_Comm comm, isched_t *isched, pastix_int_t is_dbg)
Initialize the solver matrix structure in sequential.
void graphExit(pastix_graph_t *graph)
Free the content of the graph structure.
Definition: graph.c:73
pastix_int_t sndenbr
Definition: order.h:56
Order structure.
Definition: order.h:47
pastix_int_t dof
Definition: symbol.h:87
pastix_int_t cblknbr
Definition: symbol.h:79
void pastixSymbolPrintStats(const symbol_matrix_t *symbptr)
Print statistical information about the symbolic matrix structure.
Definition: symbol.c:389
int pastixSymbolDraw(const symbol_matrix_t *symbptr, FILE *stream)
Export the symbol structure in a PostScript format.
Definition: symbol_draw.c:248
size_t pastixSymbolGetNNZ(const symbol_matrix_t *symbptr)
Computes the number of non-zero elements in L.
Definition: symbol_cost.c:325
void pastixSymbolDrawMap(pastix_data_t *pastix_data, const char *extname, pastix_int_t sndeidx)
Dump a separator mapping into a map file.
int pastixSymbolSave(const symbol_matrix_t *symbptr, FILE *stream)
Save the given block matrix structure to the given stream.
Definition: symbol_io.c:147
void pastixSymbolGetFlops(const symbol_matrix_t *symbmtx, pastix_coeftype_t flttype, pastix_factotype_t factotype, double *thflops, double *rlflops)
Computes the number of theoretical and real flops.
Definition: symbol_cost.c:422
void pastixSymbolExit(symbol_matrix_t *symbptr)
Free the content of symbolic matrix.
Definition: symbol.c:137
int pastixSymbolCheck(const symbol_matrix_t *symbptr)
Checks the consistency of the given symbolic block matrix.
Definition: symbol_check.c:47
Symbol matrix structure.
Definition: symbol.h:77
int inter_node_procnum
Definition: pastixdata.h:83
SolverMatrix * solvmatr
Definition: pastixdata.h:102
SolverMatrix * solvglob
Definition: pastixdata.h:104
SolverMatrix * solvloc
Definition: pastixdata.h:103
pastix_order_t * ordemesh
Definition: pastixdata.h:97
pastix_int_t * iparm
Definition: pastixdata.h:69
double * dparm
Definition: pastixdata.h:70
const spmatrix_t * csc
Definition: pastixdata.h:89
pastix_graph_t * graph
Definition: pastixdata.h:91
isched_t * isched
Definition: pastixdata.h:85
symbol_matrix_t * symbmtx
Definition: pastixdata.h:99
PASTIX_Comm inter_node_comm
Definition: pastixdata.h:77
char * dir_global
Definition: pastixdata.h:109
pastix_int_t steps
Definition: pastixdata.h:72
Main PaStiX data structure.
Definition: pastixdata.h:67
pastix_int_t coefnbr
Definition: solver.h:206
Solver column block structure.
Definition: solver.h:200