PaStiX Handbook  6.2.1
pastix_subtask_blend.c
Go to the documentation of this file.
1 /**
2  *
3  * @file pastix_subtask_blend.c
4  *
5  * PaStiX analyse blend subtask function
6  *
7  * @copyright 2004-2021 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8  * Univ. Bordeaux. All rights reserved.
9  *
10  * @version 6.2.1
11  * @author Pascal Henon
12  * @author Xavier Lacoste
13  * @author Pierre Ramet
14  * @author Mathieu Faverge
15  * @author Gregoire Pichon
16  * @author Tony Delarue
17  * @date 2021-07-01
18  *
19  **/
20 #include "common.h"
21 #include <spm.h>
22 #include "graph/graph.h"
23 #include "order/order_internal.h"
24 #include "blend/perf.h"
25 #include "blend/elimintree.h"
26 #include "blend/cost.h"
27 #include "blend/cand.h"
28 #include "blend/extendVector.h"
29 #include "blendctrl.h"
30 #include "blend/solver.h"
31 #include "blend/simu.h"
32 #include "blend/blend.h"
33 
34 /**
35  *******************************************************************************
36  *
37  * @ingroup pastix_analyze
38  *
39  * @brief Compute the proportional mapping and the final solver structure.
40  *
41  * This function computes the structural information required to factorize
42  * and solve the problem. It requires an ordering structure, as well as the
43  * symbolic factorization structure. It computes a solver structure that contains
44  * all informations, architecture and problem dependent, to efficiently solve the
45  * system.
46  * On exit, the symbol structure is destroyed and only local uncompressed
47  * information is stored in the solver structure.
48  *
49  * This routine is affected by, or returns, the following parameters:
50  * IPARM_CUDA_NBR, IPARM_TASKS2D_LEVEL, IPARM_TASKS2D_WIDTH,
51  * IPARM_COMPRESS_WHEN, IPARM_COMPRESS_MIN_WIDTH, IPARM_DOF_NBR,
52  * IPARM_FACTORIZATION, IPARM_FLOAT, IPARM_GPU_CRITERIUM,
53  * IPARM_GPU_MEMORY_PERCENTAGE, IPARM_GPU_NBR, IPARM_INCOMPLETE,
54  * IPARM_MAX_BLOCKSIZE, IPARM_MIN_BLOCKSIZE, IPARM_NNZEROS,
55  * IPARM_NNZEROS_BLOCK_LOCAL, IPARM_STARPU, IPARM_THREAD_NBR, IPARM_VERBOSE
56  *
57  * DPARM_BLEND_TIME, DPARM_FACT_FLOPS, DPARM_FACT_RLFLOPS,
58  * DPARM_FACT_THFLOPS, DPARM_FILL_IN, DPARM_PRED_FACT_TIME, DPARM_SOLV_FLOPS
59  *
60  * This function is constructed as a sequence of steps that are described below.
61  *
62  * #### Construct an elimination tree
63  * A elimination tree structure is constructed out of the symbol matrix to be
64  * able to traverse the tree in a top-down fashion for the proportionnal
65  * mapping step.
66  *
67  * #### Construct the cost matrix
68  * For each column-block, and block of the symbolic structure, the cost of
69  * each operation is computed to evaluate the cost of each branch of the
70  * elimination tree. Costs of the blocks are the cost of the update generated
71  * out of this block when used as the B matrix in the GEMM update. Costs of
72  * the column block is the total cost associated to it: factorization, solve,
73  * and update in a right-looking algorithm. This means that the update cost is
74  * the one generated by this column block, and not the one received by the
75  * column-block.
76  *
77  * #### Construct the candidate array
78  * Dispatch properties such as low-rank compression, 2D tasks from the top to
79  * the bottom of the tree. Candidate array, and elimination tree are computed,
80  * and updated, simultaneously with the costs computed previously.
81  * This step is impacted by IPARM_TASKS2D_LEVEL and IPARM_TASKS2D_WIDTH that
82  * defines the minimal width of nodes which can forward 2D tasks property to
83  * their sons.
84  * Similarly, IPARM_COMPRESS_WHEN and IPARM_COMPRESS_MIN_WIDTH defines the
85  * minimal width of nodes which can forward low-rank property to their sons.
86  *
87  * #### Proportionnal Mapping
88  * This step performs the actual proportional mapping algorithm to define the
89  * subset of candidates to compute each supernode.
90  *
91  * #### Split symbol matrix
92  * Once the proportionnal mapping is performed on the original set of
93  * supernodes, the symbol matrix is split in smaller supernodes/blocks to
94  * allow for more parallelism.
95  *
96  * #### Simulation
97  * The simulation step defines the actual mapping per core of each supernode
98  * based on a simulation of the numerical factorization where each task is
99  * attributed to the first resource available to compute it.
100  *
101  * #### Solver structure generation
102  * Out of the previous step, the solver generator builds the local structure
103  * that is required for the numerical factorization and solve steps. It is
104  * mainly represented by a CSC like structure of the local blocks, linked to a
105  * CSR for the solve step and the structure that will holds the coefficients
106  * of the factorized matrix.
107  *
108  *******************************************************************************
109  *
110  * @param[inout] pastix_data
111  * The pastix_data structure that describes the solver instance.
112  *
113  *******************************************************************************
114  *
115  * @retval PASTIX_SUCCESS on successful exit
116  * @retval PASTIX_ERR_BADPARAMETER if one parameter is incorrect.
117  * @retval PASTIX_ERR_OUTOFMEMORY if one allocation failed.
118  *
119  *******************************************************************************/
120 int
121 pastix_subtask_blend( pastix_data_t *pastix_data )
122 {
123  BlendCtrl ctrl;
124  pastix_int_t procnum, verbose;
125  pastix_int_t *iparm;
126  double *dparm;
127  pastix_order_t *ordeptr;
128  symbol_matrix_t *symbmtx;
129  SolverMatrix *solvmtx_loc;
130  SolverMatrix *solvmtx_glob;
131  SimuCtrl *simuctrl;
132  double timer_all = 0.;
133  double timer_current = 0.;
134 
135  /*
136  * Check parameters
137  */
138  if (pastix_data == NULL) {
139  errorPrint("pastix_subtask_blend: wrong pastix_data parameter");
141  }
142  if ( !(pastix_data->steps & STEP_SYMBFACT) ) {
143  errorPrint("pastix_subtask_blend: pastix_subtask_symbfact() has to be called before calling this function");
145  }
146 
147  iparm = pastix_data->iparm;
148  dparm = pastix_data->dparm;
149  procnum = pastix_data->inter_node_procnum;
150  ordeptr = pastix_data->ordemesh;
151  symbmtx = pastix_data->symbmtx;
152  verbose = iparm[IPARM_VERBOSE];
153 
154  if (ordeptr == NULL) {
155  errorPrint("pastix_subtask_blend: the pastix_data->ordemesh field has not been initialized, pastix_task_order should be called first");
157  }
158  if (symbmtx == NULL) {
159  errorPrint("pastix_subtask_blend: the pastix_data->symbmtx has not been initialized, pastix_task_symbfact should be called first");
161  }
162  if (symbmtx->dof < 1) {
163  errorPrint("pastix_subtask_blend: Dof number has not been correctly initialized");
165  }
166 
167  /* Free graph structure, we don't need it anymore */
168  if ( pastix_data->graph != NULL ) {
169  graphExit( pastix_data->graph );
170  memFree_null( pastix_data->graph );
171  }
172 
173  /* Cleanup the solver structure if we already computed it */
174  if ( pastix_data->solvmatr != NULL ) {
175  solverExit( pastix_data->solvmatr );
176  memFree_null( pastix_data->solvmatr );
177  }
178  if ( pastix_data->solvglob != NULL ) {
179  solverExit( pastix_data->solvglob );
180  memFree_null( pastix_data->solvglob );
181  }
182 
183  solvmtx_loc = (SolverMatrix*)malloc(sizeof(SolverMatrix));
184  solvmtx_glob = (SolverMatrix*)malloc(sizeof(SolverMatrix));
185  pastix_data->solvloc = solvmtx_loc;
186  pastix_data->solvglob = solvmtx_glob;
187 
188  /* The problem is more likely to be solved by the local problem, may change later */
189  pastix_data->solvmatr = pastix_data->solvloc;
190 
191  /* Start the analyze step */
192  clockStart(timer_all);
193  if ( verbose > PastixVerboseNot ) {
194  pastix_print( procnum, 0, OUT_STEP_BLEND );
195  }
196 
197  /* Create the control structure that parameterize the analyze step */
198  blendCtrlInit( pastix_data, &ctrl );
199 
200  if( verbose > PastixVerboseNo) {
201  pastix_print( procnum, 0, OUT_BLEND_CONF,
202  (long)ctrl.clustnbr, (long)ctrl.local_nbcores, (long)ctrl.local_nbthrds);
203  }
204 
205  /* Prepare the directories for the output files if needed */
206  if ( verbose > PastixVerboseYes ) {
207  pastix_gendirectories( pastix_data );
208  }
209 
210  /* Verify the coherence of the initial symbol matrix */
211  if(ctrl.debug)
212  {
213  if( verbose > PastixVerboseYes ) {
214  pastix_print( procnum, 0, OUT_BLEND_CHKSMBMTX );
215  }
216  pastixSymbolCheck(symbmtx);
217  }
218 
219 #if defined(PASTIX_ORDER_DRAW_LASTSEP)
220  /*
221  * Draw last separator before split
222  */
224  pastixSymbolDrawMap( pastix_data, "bsplit", ordeptr->sndenbr-1 );
225  }
226 #endif
227 
228 #if !defined(PASTIX_BLEND_PROPMAP_2STEPS)
229  /*
230  * Split the existing symbol matrix according to the number of candidates
231  * and cblk types.
232  * It takes the original symbol and candtab, and returns the new symbol and
233  * candtab. If the symbmtx is modified, the costmtx is updated, as well as
234  * the tree.
235  */
236  {
237  if( verbose > PastixVerboseYes ) {
238  pastix_print( procnum, 0, OUT_BLEND_SPLITSYMB );
239  }
240  clockStart(timer_current);
241 
242  splitSymbol(&ctrl, symbmtx);
243 
244  clockStop(timer_current);
245  if( verbose > PastixVerboseNo ) {
246  pastix_print( procnum, 0, OUT_BLEND_SPLITSYMB_TIME,
247  clockVal(timer_current) );
248  }
249  }
250 #if defined(PASTIX_ORDER_DRAW_LASTSEP)
251  /*
252  * Draw last separator after split
253  */
254  pastixSymbolDrawMap( pastix_data, "asplit", ordeptr->sndenbr-1 );
255 #endif
256 #endif
257 
258  /* Build the elimination tree from the symbolic partition */
259  {
260  if( verbose > PastixVerboseYes) {
261  pastix_print( procnum, 0, OUT_BLEND_ELIMTREE );
262  }
263  clockStart(timer_current);
264 
265  ctrl.etree = eTreeBuild(symbmtx);
266 
267  clockStop(timer_current);
268  if( verbose > PastixVerboseNo ) {
269  pastix_print( procnum, 0, OUT_BLEND_ELIMTREE_TIME,
270  clockVal(timer_current) );
271  }
272  }
273 
274  /* Build the cost matrix from the symbolic partition */
275  {
276  if( verbose > PastixVerboseYes ) {
277  pastix_print( procnum, 0, OUT_BLEND_COSTMATRIX );
278  }
279  clockStart(timer_current);
280 
281  ctrl.costmtx = costMatrixBuild( symbmtx,
282  iparm[IPARM_FLOAT],
283  iparm[IPARM_FACTORIZATION] );
284 
285  clockStop(timer_current);
286  if( verbose > PastixVerboseNo ) {
287  pastix_print( procnum, 0, OUT_BLEND_COSTMATRIX_TIME,
288  clockVal(timer_current) );
289  }
290  }
291 
292  /* Build the candtab array to store candidate information on each cblk */
293  {
294  ctrl.candtab = candInit( symbmtx->cblknbr );
295 
296  /* Initialize costs in elimination tree and candtab array for proportionnal mapping */
299  ctrl.candtab,
300  ctrl.etree,
301  symbmtx,
302  ctrl.costmtx );
303 
304  if( verbose > PastixVerboseNo ) {
305  pastix_print( procnum, 0, OUT_BLEND_ELIMTREE_TOTAL_COST,
306  ctrl.etree->nodetab[ eTreeRoot(ctrl.etree) ].subcost,
307  ctrl.etree->nodetab[ eTreeRoot(ctrl.etree) ].subpath );
308  }
309  }
310 
311  /* Proportional mapping step that distributes the candidates over the tree */
312  {
313  if( verbose > PastixVerboseYes ) {
314  pastix_print( procnum, 0, OUT_BLEND_PROPMAP );
315  }
316  clockStart(timer_current);
317 
318  propMappTree( ctrl.candtab,
319  ctrl.etree,
320  ctrl.total_nbcores,
321  ctrl.nocrossproc,
322  ctrl.allcand );
323 
324  /* Set the cluster candidates according to the processor candidates */
325  candSetClusterCand( ctrl.candtab, symbmtx->cblknbr,
326  ctrl.core2clust, ctrl.total_nbcores );
327 
328  clockStop(timer_current);
329 
330  if( verbose > PastixVerboseNo ) {
331  pastix_print( procnum, 0, OUT_BLEND_PROPMAP_TIME,
332  clockVal(timer_current) );
333  }
334 
335  /* Let's check the result if ask */
336  if ( ctrl.debug ) {
337  assert( candCheck( ctrl.candtab, symbmtx ) );
338  }
339  }
340 
341 #if defined(PASTIX_BLEND_PROPMAP_2STEPS)
342  /* Dump the dot of the eTree before split */
343  if (( verbose > PastixVerboseYes ) &&
344  ( pastix_data->procnum == 0 ) )
345  {
346  FILE *stream = NULL;
347  stream = pastix_fopenw( pastix_data->dir_global, "etree.dot", "w" );
348  if ( stream ) {
349  candGenDotLevel( ctrl.etree, ctrl.candtab, stream, 5);
350  fclose(stream);
351  }
352 
353  stream = pastix_fopenw( pastix_data->dir_global, "ctree.dot", "w" );
354  if ( stream ) {
355  candGenCompressedDot( ctrl.etree, ctrl.candtab, stream );
356  fclose(stream);
357  }
358  }
359 
360  /*
361  * Split the existing symbol matrix according to the number of candidates
362  * and cblk types.
363  * It takes the original symbol and candtab, and returns the new symbol and
364  * candtab. If the symbmtx is modified, the costmtx is updated, as well as
365  * the tree.
366  */
367  {
368  if( verbose > PastixVerboseYes ) {
369  pastix_print( procnum, 0, OUT_BLEND_SPLITSYMB );
370  }
371  clockStart(timer_current);
372 
373  ctrl.up_after_split = 1;
374  splitSymbol(&ctrl, symbmtx);
375  ctrl.up_after_split = 0;
376 
377  clockStop(timer_current);
378  if( verbose > PastixVerboseNo ) {
379  pastix_print( procnum, 0, OUT_BLEND_SPLITSYMB_TIME,
380  clockVal(timer_current) );
381  }
382  }
383 
384  /* Dump the dot of the eTree after split */
385  if (( verbose > PastixVerboseYes ) &&
386  ( pastix_data->procnum == 0 ) )
387  {
388  FILE *stream = NULL;
389  stream = pastix_fopenw( pastix_data->dir_global, "etree_split.dot", "w" );
390  if ( stream ) {
391  candGenDot( ctrl.etree, ctrl.candtab, stream );
392  fclose(stream);
393  }
394 
395  stream = pastix_fopenw( pastix_data->dir_global, "ctree_split.dot", "w" );
396  if ( stream ) {
397  candGenCompressedDot( ctrl.etree, ctrl.candtab, stream );
398  fclose(stream);
399  }
400  }
401 #endif
402 
403  if(ctrl.count_ops && (ctrl.leader == procnum)) {
404  pastixSymbolGetFlops( symbmtx,
405  iparm[IPARM_FLOAT],
406  iparm[IPARM_FACTORIZATION],
407  &(dparm[DPARM_FACT_THFLOPS]),
408  &(dparm[DPARM_FACT_RLFLOPS]) );
409  }
410 
411 #if defined(PASTIX_SYMBOL_DUMP_SYMBMTX)
412  {
413  FILE *stream = NULL;
414  pastix_gendirectories( pastix_data );
415  stream = pastix_fopenw( pastix_data->dir_global, "symbol_after_split.eps", "w" );
416  if ( stream ) {
417  pastixSymbolDraw( symbmtx, stream );
418  fclose( stream );
419  }
420  }
421 #endif
422 
423  if (0)
424  {
425  FILE *file = NULL;
426  pastix_gendirectories( pastix_data );
427  file = pastix_fopenw( pastix_data->dir_global, "symbol_after_split", "w" );
428  if ( file ) {
429  pastixSymbolSave( symbmtx, file );
430  fclose( file );
431  }
432  }
433 
434  /* Simulation step to perform the data distribution over the nodes and compute the priorities of each task */
435  {
436  if( verbose > PastixVerboseYes ) {
437  pastix_print( procnum, 0, OUT_BLEND_BUILDSIMU );
438  }
439  clockStart(timer_current);
440 
441  /* Initialize simulation structure */
442  MALLOC_INTERN(simuctrl, 1, SimuCtrl);
443  simuInit( simuctrl, symbmtx, ctrl.candtab,
444  ctrl.clustnbr,
445  ctrl.total_nbcores );
446 
447  /* Create task array */
448  simuTaskBuild( simuctrl, symbmtx );
449  clockStop(timer_current);
450 
451  if( verbose > PastixVerboseNo ) {
452  pastix_print( procnum, 0, OUT_BLEND_BUILDSIMU_TIME,
453  clockVal(timer_current),
454  (long)simuctrl->tasknbr );
455  }
456 
457  if( verbose > PastixVerboseYes ) {
458  pastix_print( procnum, 0, OUT_BLEND_SIMU );
459  }
460  clockStart(timer_current);
461 
462  simuRun( simuctrl, &ctrl, symbmtx );
463 
464  clockStop(timer_current);
465  if( verbose > PastixVerboseNo ) {
466  pastix_print( procnum, 0, OUT_BLEND_SIMU_TIME,
467  clockVal(timer_current) );
468  }
469  }
470 
471 #ifdef PASTIX_DYNSCHED
472  /**
473  * If dynamic scheduling is asked, let's perform a second proportionnal
474  * mapping step:
475  * - this is made only on local data
476  * - no crossing is allowed between branches
477  */
478  {
479  clockStart(timer_current);
480 
481  splitPartLocal( &ctrl, simuctrl, symbmtx );
482 
483  clockStop(timer_current);
484  if( verbose>PastixVerboseNo)
485  pastix_print( procnum, 0, " -- Split build at time: %g --\n", clockVal(timer_current));
486  }
487 #endif
488 
489  /* CostMatrix and Elimination Tree are no further used */
490  costMatrixExit( ctrl.costmtx );
491  memFree_null( ctrl.costmtx );
492  eTreeExit( ctrl.etree );
493 
494  /**
495  * Generate the final solver structure that collects data from the different
496  * simulation structures and convert to local numbering
497  */
498  {
499  if( verbose > PastixVerboseYes ) {
500  pastix_print( procnum, 0, OUT_BLEND_SOLVER );
501  }
502  clockStart(timer_current);
503 
504  solverMatrixGenSeq( solvmtx_glob, symbmtx,
505  pastix_data->ordemesh, simuctrl, &ctrl,
506  pastix_data->inter_node_comm, pastix_data->isched, 0 );
507 
508  solverMatrixGen( solvmtx_loc, symbmtx,
509  pastix_data->ordemesh, simuctrl, &ctrl,
510  pastix_data->inter_node_comm, pastix_data->isched );
511 
512  clockStop(timer_current);
513  if( verbose > PastixVerboseNo ) {
514  pastix_print( procnum, 0, OUT_BLEND_SOLVER_TIME,
515  clockVal(timer_current) );
516  if( verbose > PastixVerboseYes ) {
517  solverPrintStats( solvmtx_loc );
518  }
519  }
520  }
521 
522  /* Free allocated memory */
523  simuExit(simuctrl, ctrl.clustnbr, ctrl.total_nbcores, ctrl.local_nbctxts);
524 
525  /* Realloc solver memory in a contiguous way */
526  {
527  solverRealloc(solvmtx_loc);
528  solverRealloc(solvmtx_glob);
529 #if defined(PASTIX_DEBUG_BLEND)
530  if (!ctrl.ricar) {
531  if( verbose > PastixVerboseYes ) {
532  pastix_print( procnum, 0, OUT_BLEND_CHKSOLVER );
533  }
534  solverCheck(solvmtx_loc);
535  solverCheck(solvmtx_glob);
536  }
537 #endif
538  }
539 
540  blendCtrlExit(&ctrl);
541 
542  /* End timing */
543  clockStop(timer_all);
544  pastix_data->dparm[DPARM_BLEND_TIME] = clockVal(timer_all);
545 
546  if (verbose > PastixVerboseYes) {
547  pastixSymbolPrintStats( pastix_data->symbmtx );
548  }
549 
550  /* Symbol is not used anymore */
551  pastixSymbolExit(pastix_data->symbmtx);
552  memFree_null(pastix_data->symbmtx);
553 
554  /* Computes and print statistics */
555  {
556  if (iparm[IPARM_FACTORIZATION] == PastixFactLU)
557  {
558  iparm[IPARM_NNZEROS] *= 2;
559  dparm[DPARM_PRED_FACT_TIME] *= 2.;
560  }
561  dparm[DPARM_SOLV_FLOPS] = (double)iparm[IPARM_NNZEROS]; /* number of operations for solve */
562 
563  iparm[IPARM_NNZEROS_BLOCK_LOCAL] = solvmtx_loc->coefnbr;
564 
565  /* Affichage */
566  dparm[DPARM_FILL_IN] = (double)(iparm[IPARM_NNZEROS]) / (double)(pastix_data->csc->gnnzexp);
567 
568  if (verbose > PastixVerboseNot) {
569  pastix_print( procnum, 0, OUT_BLEND_SUMMARY,
570  (long)iparm[IPARM_NNZEROS],
571  (double)dparm[DPARM_FILL_IN],
572  pastixFactotypeStr( iparm[IPARM_FACTORIZATION] ),
573  pastix_print_value( dparm[DPARM_FACT_THFLOPS] ),
574  pastix_print_unit( dparm[DPARM_FACT_THFLOPS] ),
575  PERF_MODEL, dparm[DPARM_PRED_FACT_TIME],
576  dparm[DPARM_BLEND_TIME] );
577 
578  if (0) /* TODO: consider that when moving to distributed */
579  {
580  if ((verbose > PastixVerboseNo))
581  {
582  fprintf(stdout, NNZERO_WITH_FILLIN, (int)procnum, (long)iparm[IPARM_NNZEROS_BLOCK_LOCAL]);
583  }
584  if (verbose > PastixVerboseYes)
585  {
586  PASTIX_Comm pastix_comm = pastix_data->inter_node_comm;
587  pastix_int_t sizeL = solvmtx_loc->coefnbr;
588  pastix_int_t sizeG = 0;
589 
590  MPI_Reduce(&sizeL, &sizeG, 1, PASTIX_MPI_INT, MPI_MAX, 0, pastix_comm);
591 
592  if (procnum == 0)
593  {
594  sizeG *= sizeof(pastix_complex64_t);
595  if (iparm[IPARM_FACTORIZATION] == PastixFactLU) {
596  sizeG *= 2;
597  }
598 
599  fprintf( stdout, OUT_COEFSIZE,
600  pastix_print_value(sizeG),
601  pastix_print_unit(sizeG) );
602  }
603  }
604  }
605  }
606  }
607 
608  /* Backup the solver for debug */
609  if (0)
610  {
611  FILE *file = NULL;
612  pastix_gendirectories( pastix_data );
613  file = pastix_fopenw( pastix_data->dir_global, "solvergen", "w" );
614  if ( file ) {
615  solverSave( solvmtx_loc, file );
616  fclose(file);
617  }
618  }
619 
620  /* Invalidate following steps, and add analyze step to the ones performed */
621  pastix_data->steps &= ~( STEP_CSC2BCSC |
622  STEP_BCSC2CTAB |
623  STEP_NUMFACT |
624  STEP_SOLVE |
625  STEP_REFINE );
626  pastix_data->steps |= STEP_ANALYSE;
627 
628  return PASTIX_SUCCESS;
629 }
solverSave
int solverSave(const SolverMatrix *solvptr, FILE *stream)
Save a solver matrix structure into a file.
Definition: solver_io.c:261
blendctrl_s::clustnbr
pastix_int_t clustnbr
Definition: blendctrl.h:71
solver.h
splitSymbol
void splitSymbol(BlendCtrl *ctrl, symbol_matrix_t *symbmtx)
Split the column blocks of the symbol matrix to generate parallelism.
Definition: splitsymbol.c:515
symbol_matrix_s
Symbol matrix structure.
Definition: symbol.h:75
blendctrl_s::local_nbctxts
pastix_int_t local_nbctxts
Definition: blendctrl.h:76
solverMatrixGenSeq
int solverMatrixGenSeq(SolverMatrix *solvmtx, const symbol_matrix_t *symbmtx, const pastix_order_t *ordeptr, const SimuCtrl *simuctl, const BlendCtrl *ctrl, PASTIX_Comm comm, isched_t *isched, pastix_int_t is_dbg)
Initialize the solver matrix structure in sequential.
Definition: solver_matrix_gen.c:424
blendctrl_s::up_after_split
pastix_int_t up_after_split
Definition: blendctrl.h:55
extendVector.h
pastixSymbolSave
int pastixSymbolSave(const symbol_matrix_t *symbptr, FILE *stream)
Save the given block matrix structure to the given stream.
Definition: symbol_io.c:147
simuExit
void simuExit(SimuCtrl *, pastix_int_t, pastix_int_t, pastix_int_t)
Free the simulation structure.
Definition: simu.c:259
etree_s::nodetab
eTreeNode_t * nodetab
Definition: elimintree.h:42
candInit
Cand * candInit(pastix_int_t cblknbr)
Initialize the candtab array with default values.
Definition: cand.c:48
pastixSymbolExit
void pastixSymbolExit(symbol_matrix_t *symbptr)
Free the content of symbolic matrix.
Definition: symbol.c:140
pastix_gendirectories
void pastix_gendirectories(pastix_data_t *pastix_data)
Generate a unique temporary directory to store output files.
Definition: api.c:69
pastix_order_s
Order structure.
Definition: order.h:45
IPARM_NNZEROS_BLOCK_LOCAL
@ IPARM_NNZEROS_BLOCK_LOCAL
Definition: api.h:41
costMatrixBuild
CostMatrix * costMatrixBuild(const symbol_matrix_t *symbmtx, pastix_coeftype_t flttype, pastix_factotype_t factotype)
Build the cost matrix structure from the symbol matrix structure.
Definition: cost.c:91
symbol_matrix_s::cblknbr
pastix_int_t cblknbr
Definition: symbol.h:77
blendCtrlExit
void blendCtrlExit(BlendCtrl *)
Finalize the Blend control structure.
Definition: blendctrl.c:303
blendctrl_s
The type and structure definitions.
Definition: blendctrl.h:28
blendCtrlInit
int blendCtrlInit(pastix_data_t *pastix_data, BlendCtrl *ctrl)
Initialize the Blend control structure.
Definition: blendctrl.c:162
blendctrl_s::core2clust
pastix_int_t * core2clust
Definition: blendctrl.h:79
IPARM_NNZEROS
@ IPARM_NNZEROS
Definition: api.h:40
DPARM_PRED_FACT_TIME
@ DPARM_PRED_FACT_TIME
Definition: api.h:162
solverMatrixGen
int solverMatrixGen(SolverMatrix *solvmtx, const symbol_matrix_t *symbmtx, const pastix_order_t *ordeptr, const SimuCtrl *simuctl, const BlendCtrl *ctrl, PASTIX_Comm comm, isched_t *isched)
Initialize the solver matrix structure.
Definition: solver_matrix_gen.c:81
etree_node_s::subcost
double subcost
Definition: elimintree.h:28
symbol_matrix_s::dof
pastix_int_t dof
Definition: symbol.h:85
pastixSymbolDraw
int pastixSymbolDraw(const symbol_matrix_t *symbptr, FILE *stream)
Export the symbol structure in a PostScript format.
Definition: symbol_draw.c:248
simuctrl_s
Control structure for the simulation.
Definition: simu.h:116
solverPrintStats
void solverPrintStats(const SolverMatrix *solvptr)
Print statistical information about the solver matrix structure.
Definition: solver.c:196
DPARM_FACT_THFLOPS
@ DPARM_FACT_THFLOPS
Definition: api.h:165
blendctrl_s::total_nbcores
pastix_int_t total_nbcores
Definition: blendctrl.h:72
pastix_subtask_blend
int pastix_subtask_blend(pastix_data_t *pastix_data)
Compute the proportional mapping and the final solver structure.
Definition: pastix_subtask_blend.c:121
candCheck
int candCheck(const Cand *candtab, const symbol_matrix_t *symbmtx)
Check the correctness of the computed candidates.
Definition: cand.c:204
blendctrl_s::width_tasks2d
pastix_int_t width_tasks2d
Definition: blendctrl.h:63
blendctrl_s::leader
pastix_int_t leader
Definition: blendctrl.h:34
blendctrl_s::costmtx
CostMatrix * costmtx
Definition: blendctrl.h:97
pastixSymbolGetFlops
void pastixSymbolGetFlops(const symbol_matrix_t *symbmtx, pastix_coeftype_t flttype, pastix_factotype_t factotype, double *thflops, double *rlflops)
Computes the number of theoretical and real flops.
Definition: symbol_cost.c:424
IPARM_COMPRESS_MIN_WIDTH
@ IPARM_COMPRESS_MIN_WIDTH
Definition: api.h:126
IPARM_SPLITTING_STRATEGY
@ IPARM_SPLITTING_STRATEGY
Definition: api.h:80
blendctrl.h
blendctrl_s::nocrossproc
pastix_int_t nocrossproc
Definition: blendctrl.h:41
PastixVerboseNot
@ PastixVerboseNot
Definition: api.h:207
DPARM_FILL_IN
@ DPARM_FILL_IN
Definition: api.h:153
PASTIX_SUCCESS
@ PASTIX_SUCCESS
Definition: api.h:344
candBuild
void candBuild(pastix_int_t level_tasks2d, pastix_int_t width_tasks2d, pastix_compress_when_t lr_when, pastix_int_t lr_width, Cand *candtab, EliminTree *etree, const symbol_matrix_t *symbmtx, const CostMatrix *costmtx)
Finish to build the candtab array for the proportionnal mapping.
Definition: cand.c:709
PastixVerboseNo
@ PastixVerboseNo
Definition: api.h:208
cand.h
pastix_fopenw
FILE * pastix_fopenw(const char *dirname, const char *filename, const char *mode)
Open a file in the unique directory of the pastix instance.
Definition: api.c:232
candSetClusterCand
void candSetClusterCand(Cand *candtab, pastix_int_t cblknbr, const pastix_int_t *core2clust, pastix_int_t coresnbr)
Set the clusters candidates from the cores canditates.
Definition: cand.c:158
IPARM_COMPRESS_WHEN
@ IPARM_COMPRESS_WHEN
Definition: api.h:128
elimintree.h
solverCheck
int solverCheck(const SolverMatrix *solvmtx)
Checks the consistency of the given solver matrix structure.
Definition: solver_check.c:54
solverExit
void solverExit(SolverMatrix *solvmtx)
Free the content of the solver matrix structure.
Definition: solver.c:143
etree_node_s::subpath
double subpath
Definition: elimintree.h:29
pastixSymbolCheck
int pastixSymbolCheck(const symbol_matrix_t *symbptr)
Checks the consistency of the given symbolic block matrix.
Definition: symbol_check.c:47
blendctrl_s::local_nbcores
pastix_int_t local_nbcores
Definition: blendctrl.h:74
IPARM_FLOAT
@ IPARM_FLOAT
Definition: api.h:142
eTreeExit
void eTreeExit(EliminTree *)
Free the elimination tree structure.
Definition: elimintree.c:85
candGenCompressedDot
void candGenCompressedDot(const EliminTree *etree, const Cand *candtab, FILE *stream)
Print the compressed elimination tree in a dot file, where all nodes with the same candidates are mer...
Definition: cand_gendot.c:441
simuInit
pastix_int_t simuInit(SimuCtrl *, const symbol_matrix_t *, const Cand *, pastix_int_t, pastix_int_t)
Initialize the simulation structures.
Definition: simu.c:66
DPARM_SOLV_FLOPS
@ DPARM_SOLV_FLOPS
Definition: api.h:168
perf.h
blendctrl_s::count_ops
pastix_int_t count_ops
Definition: blendctrl.h:29
blend.h
graph.h
simuRun
void simuRun(SimuCtrl *, const BlendCtrl *, const symbol_matrix_t *)
Run the simulation to map the data on the nodes.
Definition: simu_run.c:979
candGenDot
void candGenDot(const EliminTree *etree, const Cand *candtab, FILE *stream)
Print the elimination tree in a dot file.
Definition: cand_gendot.c:253
blendctrl_s::etree
EliminTree * etree
Definition: blendctrl.h:96
eTreeBuild
EliminTree * eTreeBuild(const symbol_matrix_t *)
Build the elimination tree.
Definition: elimintree.c:474
simuTaskBuild
void simuTaskBuild(SimuCtrl *, const symbol_matrix_t *)
Initialize the tasktab array of the simulation structure.
Definition: simu_task.c:49
costMatrixExit
void costMatrixExit(CostMatrix *costmtx)
Free the cost matrix structure.
Definition: cost.c:57
IPARM_VERBOSE
@ IPARM_VERBOSE
Definition: api.h:36
blendctrl_s::local_nbthrds
pastix_int_t local_nbthrds
Definition: blendctrl.h:75
PastixFactLU
@ PastixFactLU
Definition: api.h:302
eTreeRoot
static pastix_int_t eTreeRoot(const EliminTree *etree)
Return the root of the elimination tree.
Definition: elimintree.h:126
candGenDotLevel
void candGenDotLevel(const EliminTree *etree, const Cand *candtab, FILE *stream, pastix_int_t level)
Print the first levels of the elimination tree in a dot file.
Definition: cand_gendot.c:405
solverRealloc
void solverRealloc(SolverMatrix *solvptr)
Realloc in a contiguous way a given solver structure.
Definition: solver_copy.c:205
DPARM_BLEND_TIME
@ DPARM_BLEND_TIME
Definition: api.h:160
simu.h
DPARM_FACT_RLFLOPS
@ DPARM_FACT_RLFLOPS
Definition: api.h:166
pastix_order_s::sndenbr
pastix_int_t sndenbr
Definition: order.h:54
graphExit
void graphExit(pastix_graph_t *graph)
Free the content of the graph structure.
Definition: graph.c:41
blendctrl_s::level_tasks2d
pastix_int_t level_tasks2d
Definition: blendctrl.h:62
blendctrl_s::debug
pastix_int_t debug
Definition: blendctrl.h:30
pastixSymbolPrintStats
void pastixSymbolPrintStats(const symbol_matrix_t *symbptr)
Print statistical information about the symbolic matrix structure.
Definition: symbol.c:392
simuctrl_s::tasknbr
pastix_int_t tasknbr
Definition: simu.h:119
blendctrl_s::candtab
Cand * candtab
Definition: blendctrl.h:98
cost.h
PastixSplitKwayProjections
@ PastixSplitKwayProjections
Definition: api.h:395
PASTIX_ERR_BADPARAMETER
@ PASTIX_ERR_BADPARAMETER
Definition: api.h:351
blendctrl_s::ricar
pastix_int_t ricar
Definition: blendctrl.h:33
IPARM_FACTORIZATION
@ IPARM_FACTORIZATION
Definition: api.h:99
PastixVerboseYes
@ PastixVerboseYes
Definition: api.h:209
propMappTree
void propMappTree(Cand *candtab, const EliminTree *etree, pastix_int_t candnbr, int nocrossproc, int allcand)
Apply the proportional mapping algorithm.
Definition: propmap.c:415
blendctrl_s::allcand
pastix_int_t allcand
Definition: blendctrl.h:40