PaStiX Handbook 6.4.0
Loading...
Searching...
No Matches
pastix_subtask_blend.c
Go to the documentation of this file.
1/**
2 *
3 * @file pastix_subtask_blend.c
4 *
5 * PaStiX analyse blend subtask function
6 *
7 * @copyright 2004-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8 * Univ. Bordeaux. All rights reserved.
9 *
10 * @version 6.4.0
11 * @author Pascal Henon
12 * @author Xavier Lacoste
13 * @author Pierre Ramet
14 * @author Mathieu Faverge
15 * @author Gregoire Pichon
16 * @author Tony Delarue
17 * @date 2024-07-05
18 *
19 **/
20#include "common.h"
21#include <spm.h>
22#include "graph/graph.h"
23#include "order/order_internal.h"
24#include "blend/perf.h"
25#include "blend/elimintree.h"
26#include "blend/cost.h"
27#include "blend/cand.h"
28#include "blend/extendVector.h"
29#include "blendctrl.h"
30#include "blend/solver.h"
31#include "blend/simu.h"
32#include "blend/blend.h"
33
34/**
35 *******************************************************************************
36 *
37 * @ingroup pastix_analyze
38 *
39 * @brief Compute the proportional mapping and the final solver structure.
40 *
41 * This function computes the structural information required to factorize
42 * and solve the problem. It requires an ordering structure, as well as the
43 * symbolic factorization structure. It computes a solver structure that contains
44 * all informations, architecture and problem dependent, to efficiently solve the
45 * system.
46 * On exit, the symbol structure is destroyed and only local uncompressed
47 * information is stored in the solver structure.
48 *
49 * This routine is affected by, or returns, the following parameters:
50 * IPARM_CUDA_NBR, IPARM_TASKS2D_LEVEL, IPARM_TASKS2D_WIDTH,
51 * IPARM_COMPRESS_WHEN, IPARM_COMPRESS_MIN_WIDTH, IPARM_DOF_NBR,
52 * IPARM_FACTORIZATION, IPARM_FLOAT, IPARM_GPU_CRITERIUM,
53 * IPARM_GPU_MEMORY_PERCENTAGE, IPARM_GPU_NBR, IPARM_INCOMPLETE,
54 * IPARM_MAX_BLOCKSIZE, IPARM_MIN_BLOCKSIZE, IPARM_NNZEROS,
55 * IPARM_NNZEROS_BLOCK_LOCAL, IPARM_STARPU, IPARM_THREAD_NBR, IPARM_VERBOSE
56 *
57 * DPARM_BLEND_TIME, DPARM_FACT_FLOPS, DPARM_FACT_RLFLOPS,
58 * DPARM_FACT_THFLOPS, DPARM_FILL_IN, DPARM_PRED_FACT_TIME, DPARM_SOLV_FLOPS
59 *
60 * This function is constructed as a sequence of steps that are described below.
61 *
62 * #### Construct an elimination tree
63 * A elimination tree structure is constructed out of the symbol matrix to be
64 * able to traverse the tree in a top-down fashion for the proportionnal
65 * mapping step.
66 *
67 * #### Construct the cost matrix
68 * For each column-block, and block of the symbolic structure, the cost of
69 * each operation is computed to evaluate the cost of each branch of the
70 * elimination tree. Costs of the blocks are the cost of the update generated
71 * out of this block when used as the B matrix in the GEMM update. Costs of
72 * the column block is the total cost associated to it: factorization, solve,
73 * and update in a right-looking algorithm. This means that the update cost is
74 * the one generated by this column block, and not the one received by the
75 * column-block.
76 *
77 * #### Construct the candidate array
78 * Dispatch properties such as low-rank compression, 2D tasks from the top to
79 * the bottom of the tree. Candidate array, and elimination tree are computed,
80 * and updated, simultaneously with the costs computed previously.
81 * This step is impacted by IPARM_TASKS2D_LEVEL and IPARM_TASKS2D_WIDTH that
82 * defines the minimal width of nodes which can forward 2D tasks property to
83 * their sons.
84 * Similarly, IPARM_COMPRESS_WHEN and IPARM_COMPRESS_MIN_WIDTH defines the
85 * minimal width of nodes which can forward low-rank property to their sons.
86 *
87 * #### Proportionnal Mapping
88 * This step performs the actual proportional mapping algorithm to define the
89 * subset of candidates to compute each supernode.
90 *
91 * #### Split symbol matrix
92 * Once the proportionnal mapping is performed on the original set of
93 * supernodes, the symbol matrix is split in smaller supernodes/blocks to
94 * allow for more parallelism.
95 *
96 * #### Simulation
97 * The simulation step defines the actual mapping per core of each supernode
98 * based on a simulation of the numerical factorization where each task is
99 * attributed to the first resource available to compute it.
100 *
101 * #### Solver structure generation
102 * Out of the previous step, the solver generator builds the local structure
103 * that is required for the numerical factorization and solve steps. It is
104 * mainly represented by a CSC like structure of the local blocks, linked to a
105 * CSR for the solve step and the structure that will holds the coefficients
106 * of the factorized matrix.
107 *
108 *******************************************************************************
109 *
110 * @param[inout] pastix_data
111 * The pastix_data structure that describes the solver instance.
112 *
113 *******************************************************************************
114 *
115 * @retval PASTIX_SUCCESS on successful exit
116 * @retval PASTIX_ERR_BADPARAMETER if one parameter is incorrect.
117 * @retval PASTIX_ERR_OUTOFMEMORY if one allocation failed.
118 *
119 *******************************************************************************/
120int
122{
123 BlendCtrl ctrl;
124 pastix_int_t procnum, verbose;
125 pastix_int_t *iparm;
126 double *dparm;
127 pastix_order_t *ordeptr;
128 symbol_matrix_t *symbmtx;
129 SolverMatrix *solvmtx_loc;
130 SolverMatrix *solvmtx_glob;
131 SimuCtrl *simuctrl;
132 double timer_all = 0.;
133 double timer_current = 0.;
134 size_t nnz;
135
136 /*
137 * Check parameters
138 */
139 if (pastix_data == NULL) {
140 pastix_print_error( "pastix_subtask_blend: wrong pastix_data parameter" );
142 }
143 if ( !(pastix_data->steps & STEP_SYMBFACT) ) {
144 pastix_print_error( "pastix_subtask_blend: pastix_subtask_symbfact() has to be called before calling this function" );
146 }
147
148 iparm = pastix_data->iparm;
149 dparm = pastix_data->dparm;
150 procnum = pastix_data->inter_node_procnum;
151 ordeptr = pastix_data->ordemesh;
152 symbmtx = pastix_data->symbmtx;
153 verbose = iparm[IPARM_VERBOSE];
154
155 if (ordeptr == NULL) {
156 pastix_print_error( "pastix_subtask_blend: the pastix_data->ordemesh field has not been initialized, pastix_task_order should be called first" );
158 }
159 if (symbmtx == NULL) {
160 pastix_print_error( "pastix_subtask_blend: the pastix_data->symbmtx has not been initialized, pastix_task_symbfact should be called first" );
162 }
163 if (symbmtx->dof < 1) {
164 pastix_print_error( "pastix_subtask_blend: Dof number has not been correctly initialized" );
166 }
167
168 /* Free graph structure, we don't need it anymore */
169 if ( pastix_data->graph != NULL ) {
170 graphExit( pastix_data->graph );
171 memFree_null( pastix_data->graph );
172 }
173
174 /* Cleanup the solver structure if we already computed it */
175 if ( pastix_data->solvloc != NULL ) {
176 solverExit( pastix_data->solvloc );
177 memFree_null( pastix_data->solvloc );
178 }
179 if ( pastix_data->solvglob != NULL ) {
180 solverExit( pastix_data->solvglob );
181 memFree_null( pastix_data->solvglob );
182 }
183 pastix_data->solvmatr = NULL;
184
185 solvmtx_loc = (SolverMatrix*)malloc(sizeof(SolverMatrix));
186 solvmtx_glob = (SolverMatrix*)malloc(sizeof(SolverMatrix));
187 pastix_data->solvloc = solvmtx_loc;
188 pastix_data->solvglob = solvmtx_glob;
189
190 /* The problem is more likely to be solved by the local problem, may change later */
191 pastix_data->solvmatr = pastix_data->solvloc;
192
193 /* Start the analyze step */
194 clockStart(timer_all);
195 if ( verbose > PastixVerboseNot ) {
196 pastix_print( procnum, 0, OUT_STEP_BLEND );
197 }
198
199 /* Create the control structure that parameterize the analyze step */
200 blendCtrlInit( pastix_data, &ctrl );
201
202 if( verbose > PastixVerboseNo) {
203 pastix_print( procnum, 0, OUT_BLEND_CONF,
204 (long)ctrl.clustnbr, (long)ctrl.local_nbcores, (long)ctrl.local_nbthrds);
205 }
206
207 /* Prepare the directories for the output files if needed */
208 if ( verbose > PastixVerboseYes ) {
209 pastix_gendirectories( pastix_data );
210 }
211
212 /* Verify the coherence of the initial symbol matrix */
213 if(ctrl.debug)
214 {
215 if( verbose > PastixVerboseYes ) {
216 pastix_print( procnum, 0, OUT_BLEND_CHKSMBMTX );
217 }
218 pastixSymbolCheck(symbmtx);
219 }
220
221#if defined(PASTIX_ORDER_DRAW_LASTSEP)
222 /*
223 * Draw last separator before split
224 */
226 pastixSymbolDrawMap( pastix_data, "bsplit", ordeptr->sndenbr-1 );
227 }
228#endif
229
230#if !defined(PASTIX_BLEND_PROPMAP_2STEPS)
231 /*
232 * Split the existing symbol matrix according to the number of candidates
233 * and cblk types.
234 * It takes the original symbol and candtab, and returns the new symbol and
235 * candtab. If the symbmtx is modified, the costmtx is updated, as well as
236 * the tree.
237 */
238 {
239 if( verbose > PastixVerboseYes ) {
240 pastix_print( procnum, 0, OUT_BLEND_SPLITSYMB );
241 }
242 clockStart(timer_current);
243
244 splitSymbol(&ctrl, symbmtx);
245
246 clockStop(timer_current);
247 if( verbose > PastixVerboseNo ) {
248 pastix_print( procnum, 0, OUT_BLEND_SPLITSYMB_TIME,
249 clockVal(timer_current) );
250 }
251 }
252#if defined(PASTIX_ORDER_DRAW_LASTSEP)
253 /*
254 * Draw last separator after split
255 */
256 pastixSymbolDrawMap( pastix_data, "asplit", ordeptr->sndenbr-1 );
257#endif
258#endif
259
260 /* Build the elimination tree from the symbolic partition */
261 {
262 if( verbose > PastixVerboseYes) {
263 pastix_print( procnum, 0, OUT_BLEND_ELIMTREE );
264 }
265 clockStart(timer_current);
266
267 ctrl.etree = eTreeBuild(symbmtx);
268
269 clockStop(timer_current);
270 if( verbose > PastixVerboseNo ) {
271 pastix_print( procnum, 0, OUT_BLEND_ELIMTREE_TIME,
272 clockVal(timer_current) );
273 }
274 }
275
276 /* Build the cost matrix from the symbolic partition */
277 {
278 if( verbose > PastixVerboseYes ) {
279 pastix_print( procnum, 0, OUT_BLEND_COSTMATRIX );
280 }
281 clockStart(timer_current);
282
283 ctrl.costmtx = costMatrixBuild( symbmtx,
284 iparm[IPARM_FLOAT],
285 iparm[IPARM_FACTORIZATION] );
286
287 clockStop(timer_current);
288 if( verbose > PastixVerboseNo ) {
289 pastix_print( procnum, 0, OUT_BLEND_COSTMATRIX_TIME,
290 clockVal(timer_current) );
291 }
292 }
293
294 /* Build the candtab array to store candidate information on each cblk */
295 {
296 ctrl.candtab = candInit( symbmtx->cblknbr );
297
298 /* Initialize costs in elimination tree and candtab array for proportionnal mapping */
301 ctrl.candtab,
302 ctrl.etree,
303 symbmtx,
304 ctrl.costmtx );
305
306 if( verbose > PastixVerboseNo ) {
307 pastix_print( procnum, 0, OUT_BLEND_ELIMTREE_TOTAL_COST,
308 ctrl.etree->nodetab[ eTreeRoot(ctrl.etree) ].subcost,
309 ctrl.etree->nodetab[ eTreeRoot(ctrl.etree) ].subpath );
310 }
311 }
312
313 /* Proportional mapping step that distributes the candidates over the tree */
314 {
315 if( verbose > PastixVerboseYes ) {
316 pastix_print( procnum, 0, OUT_BLEND_PROPMAP );
317 }
318 clockStart(timer_current);
319
320 propMappTree( ctrl.candtab,
321 ctrl.etree,
322 ctrl.total_nbcores,
323 ctrl.nocrossproc,
324 ctrl.allcand );
325
326 /* Set the cluster candidates according to the processor candidates */
327 candSetClusterCand( ctrl.candtab, symbmtx->cblknbr,
328 ctrl.core2clust, ctrl.total_nbcores );
329
330 clockStop(timer_current);
331
332 if( verbose > PastixVerboseNo ) {
333 pastix_print( procnum, 0, OUT_BLEND_PROPMAP_TIME,
334 clockVal(timer_current) );
335 }
336
337 /* Let's check the result if ask */
338 if ( ctrl.debug ) {
339 assert( candCheck( ctrl.candtab, symbmtx ) );
340 }
341 }
342
343#if defined(PASTIX_BLEND_PROPMAP_2STEPS)
344 /* Dump the dot of the eTree before split */
345 if (( verbose > PastixVerboseYes ) &&
346 ( pastix_data->procnum == 0 ) )
347 {
348 FILE *stream = NULL;
349 stream = pastix_fopenw( pastix_data->dir_global, "etree.dot", "w" );
350 if ( stream ) {
351 candGenDotLevel( ctrl.etree, ctrl.candtab, stream, 5);
352 fclose(stream);
353 }
354
355 stream = pastix_fopenw( pastix_data->dir_global, "ctree.dot", "w" );
356 if ( stream ) {
357 candGenCompressedDot( ctrl.etree, ctrl.candtab, stream );
358 fclose(stream);
359 }
360 }
361
362 /*
363 * Split the existing symbol matrix according to the number of candidates
364 * and cblk types.
365 * It takes the original symbol and candtab, and returns the new symbol and
366 * candtab. If the symbmtx is modified, the costmtx is updated, as well as
367 * the tree.
368 */
369 {
370 if( verbose > PastixVerboseYes ) {
371 pastix_print( procnum, 0, OUT_BLEND_SPLITSYMB );
372 }
373 clockStart(timer_current);
374
375 ctrl.up_after_split = 1;
376 splitSymbol(&ctrl, symbmtx);
377 ctrl.up_after_split = 0;
378
379 clockStop(timer_current);
380 if( verbose > PastixVerboseNo ) {
381 pastix_print( procnum, 0, OUT_BLEND_SPLITSYMB_TIME,
382 clockVal(timer_current) );
383 }
384 }
385
386 /* Dump the dot of the eTree after split */
387 if (( verbose > PastixVerboseYes ) &&
388 ( pastix_data->procnum == 0 ) )
389 {
390 FILE *stream = NULL;
391 stream = pastix_fopenw( pastix_data->dir_global, "etree_split.dot", "w" );
392 if ( stream ) {
393 candGenDot( ctrl.etree, ctrl.candtab, stream );
394 fclose(stream);
395 }
396
397 stream = pastix_fopenw( pastix_data->dir_global, "ctree_split.dot", "w" );
398 if ( stream ) {
399 candGenCompressedDot( ctrl.etree, ctrl.candtab, stream );
400 fclose(stream);
401 }
402 }
403#endif
404
405 if(ctrl.count_ops && (ctrl.leader == procnum)) {
406 pastixSymbolGetFlops( symbmtx,
407 iparm[IPARM_FLOAT],
408 iparm[IPARM_FACTORIZATION],
409 &(dparm[DPARM_FACT_THFLOPS]),
410 &(dparm[DPARM_FACT_RLFLOPS]) );
411 }
412
413#if defined(PASTIX_SYMBOL_DUMP_SYMBMTX)
414 {
415 FILE *stream = NULL;
416 pastix_gendirectories( pastix_data );
417 stream = pastix_fopenw( pastix_data->dir_global, "symbol_after_split.eps", "w" );
418 if ( stream ) {
419 pastixSymbolDraw( symbmtx, stream );
420 fclose( stream );
421 }
422 }
423#endif
424
425 if (0)
426 {
427 FILE *file = NULL;
428 pastix_gendirectories( pastix_data );
429 file = pastix_fopenw( pastix_data->dir_global, "symbol_after_split", "w" );
430 if ( file ) {
431 pastixSymbolSave( symbmtx, file );
432 fclose( file );
433 }
434 }
435
436 /* Simulation step to perform the data distribution over the nodes and compute the priorities of each task */
437 {
438 if( verbose > PastixVerboseYes ) {
439 pastix_print( procnum, 0, OUT_BLEND_BUILDSIMU );
440 }
441 clockStart(timer_current);
442
443 /* Initialize simulation structure */
444 MALLOC_INTERN(simuctrl, 1, SimuCtrl);
445 simuInit( simuctrl, symbmtx, ctrl.candtab,
446 ctrl.clustnbr,
447 ctrl.total_nbcores );
448
449 /* Create task array */
450 simuTaskBuild( simuctrl, symbmtx );
451 clockStop(timer_current);
452
453 if( verbose > PastixVerboseNo ) {
454 pastix_print( procnum, 0, OUT_BLEND_BUILDSIMU_TIME,
455 clockVal(timer_current),
456 (long)simuctrl->tasknbr );
457 }
458
459 if( verbose > PastixVerboseYes ) {
460 pastix_print( procnum, 0, OUT_BLEND_SIMU );
461 }
462 clockStart(timer_current);
463
464 simuRun( simuctrl, &ctrl, symbmtx );
465
466 clockStop(timer_current);
467 if( verbose > PastixVerboseNo ) {
468 pastix_print( procnum, 0, OUT_BLEND_SIMU_TIME,
469 clockVal(timer_current) );
470 }
471 }
472
473#ifdef PASTIX_DYNSCHED
474 /**
475 * If dynamic scheduling is asked, let's perform a second proportionnal
476 * mapping step:
477 * - this is made only on local data
478 * - no crossing is allowed between branches
479 */
480 {
481 clockStart(timer_current);
482
483 splitPartLocal( &ctrl, simuctrl, symbmtx );
484
485 clockStop(timer_current);
486 if( verbose>PastixVerboseNo)
487 pastix_print( procnum, 0, " -- Split build at time: %g --\n", clockVal(timer_current));
488 }
489#endif
490
491 /* CostMatrix and Elimination Tree are no further used */
492 costMatrixExit( ctrl.costmtx );
493 memFree_null( ctrl.costmtx );
494 eTreeExit( ctrl.etree );
495
496 /**
497 * Generate the final solver structure that collects data from the different
498 * simulation structures and convert to local numbering
499 */
500 {
501 if( verbose > PastixVerboseYes ) {
502 pastix_print( procnum, 0, OUT_BLEND_SOLVER );
503 }
504 clockStart(timer_current);
505
506 solverMatrixGenSeq( solvmtx_glob, symbmtx,
507 pastix_data->ordemesh, simuctrl, &ctrl,
508 pastix_data->inter_node_comm, pastix_data->isched, 0 );
509
510 solverMatrixGen( solvmtx_loc, symbmtx,
511 pastix_data->ordemesh, simuctrl, &ctrl,
512 pastix_data->inter_node_comm, pastix_data->isched );
513
514 clockStop(timer_current);
515 if( verbose > PastixVerboseNo ) {
516 pastix_print( procnum, 0, OUT_BLEND_SOLVER_TIME,
517 clockVal(timer_current) );
518 if( verbose > PastixVerboseYes ) {
519 solverPrintStats( solvmtx_loc );
520 }
521 }
522 }
523
524 /* Free allocated memory */
525 simuExit(simuctrl, ctrl.clustnbr, ctrl.total_nbcores, ctrl.local_nbctxts);
526
527 /* Realloc solver memory in a contiguous way */
528 {
529 solverRealloc(solvmtx_loc);
530 solverRealloc(solvmtx_glob);
531#if defined(PASTIX_DEBUG_BLEND)
532 if (!ctrl.ricar) {
533 if( verbose > PastixVerboseYes ) {
534 pastix_print( procnum, 0, OUT_BLEND_CHKSOLVER );
535 }
536 solverCheck(solvmtx_loc);
537 solverCheck(solvmtx_glob);
538 }
539#endif
540 }
541
542 blendCtrlExit(&ctrl);
543
544 /* End timing */
545 clockStop(timer_all);
546 pastix_data->dparm[DPARM_BLEND_TIME] = clockVal(timer_all);
547
548 if (verbose > PastixVerboseYes) {
549 pastixSymbolPrintStats( pastix_data->symbmtx );
550 }
551
552 /* Recompute nnz if overflow (int32) */
553 nnz = iparm[IPARM_NNZEROS];
554 if ( iparm[IPARM_NNZEROS] < 0 ) {
555 nnz = pastixSymbolGetNNZ( pastix_data->symbmtx );
556 }
557
558 /* Symbol is not used anymore */
559 pastixSymbolExit(pastix_data->symbmtx);
560 memFree_null(pastix_data->symbmtx);
561
562 /* Computes and print statistics */
563 {
564 if (iparm[IPARM_FACTORIZATION] == PastixFactLU)
565 {
566 nnz *= 2;
567 dparm[DPARM_PRED_FACT_TIME] *= 2.;
568 }
569 dparm[DPARM_SOLV_FLOPS] = (double)nnz; /* number of operations for solve */
570
571 iparm[IPARM_NNZEROS_BLOCK_LOCAL] = solvmtx_loc->coefnbr;
572
573 /* Affichage */
574 dparm[DPARM_FILL_IN] = (double)(nnz) / (double)(pastix_data->csc->gnnzexp);
575
576 if (verbose > PastixVerboseNot) {
577 pastix_print( procnum, 0, OUT_BLEND_SUMMARY,
578 nnz, (double)dparm[DPARM_FILL_IN],
579 pastixFactotypeStr( iparm[IPARM_FACTORIZATION] ),
580 pastix_print_value( dparm[DPARM_FACT_THFLOPS] ),
581 pastix_print_unit( dparm[DPARM_FACT_THFLOPS] ),
582 PERF_MODEL, dparm[DPARM_PRED_FACT_TIME],
583 dparm[DPARM_BLEND_TIME] );
584
585 if (0) /* TODO: consider that when moving to distributed */
586 {
587 if ((verbose > PastixVerboseNo))
588 {
589 fprintf(stdout, NNZERO_WITH_FILLIN, (int)procnum, (long)iparm[IPARM_NNZEROS_BLOCK_LOCAL]);
590 }
591 if (verbose > PastixVerboseYes)
592 {
593 PASTIX_Comm pastix_comm = pastix_data->inter_node_comm;
594 pastix_int_t sizeL = solvmtx_loc->coefnbr;
595 pastix_int_t sizeG = 0;
596
597 MPI_Reduce(&sizeL, &sizeG, 1, PASTIX_MPI_INT, MPI_MAX, 0, pastix_comm);
598
599 if (procnum == 0)
600 {
601 sizeG *= sizeof(pastix_complex64_t);
602 if (iparm[IPARM_FACTORIZATION] == PastixFactLU) {
603 sizeG *= 2;
604 }
605
606 fprintf( stdout, OUT_COEFSIZE,
607 pastix_print_value(sizeG),
608 pastix_print_unit(sizeG) );
609 }
610 }
611 }
612 }
613 }
614
615 /* Backup the solver for debug */
616 if (0)
617 {
618 FILE *file = NULL;
619 pastix_gendirectories( pastix_data );
620 file = pastix_fopenw( pastix_data->dir_global, "solvergen", "w" );
621 if ( file ) {
622 solverSave( solvmtx_loc, file );
623 fclose(file);
624 }
625 }
626
627 /* Invalidate following steps, and add analyze step to the ones performed */
628 pastix_data->steps &= ~( STEP_CSC2BCSC |
629 STEP_BCSC2CTAB |
630 STEP_NUMFACT |
631 STEP_SOLVE |
632 STEP_REFINE );
633 pastix_data->steps |= STEP_ANALYSE;
634
635 return PASTIX_SUCCESS;
636}
BEGIN_C_DECLS typedef int pastix_int_t
Definition datatypes.h:51
int candCheck(const Cand *candtab, const symbol_matrix_t *symbmtx)
Check the correctness of the computed candidates.
Definition cand.c:204
Cand * candInit(pastix_int_t cblknbr)
Initialize the candtab array with default values.
Definition cand.c:48
void candGenCompressedDot(const EliminTree *etree, const Cand *candtab, FILE *stream)
Print the compressed elimination tree in a dot file, where all nodes with the same candidates are mer...
void candSetClusterCand(Cand *candtab, pastix_int_t cblknbr, const pastix_int_t *core2clust, pastix_int_t coresnbr)
Set the clusters candidates from the cores canditates.
Definition cand.c:158
void candBuild(pastix_int_t level_tasks2d, pastix_int_t width_tasks2d, pastix_compress_when_t lr_when, pastix_int_t lr_width, Cand *candtab, EliminTree *etree, const symbol_matrix_t *symbmtx, const CostMatrix *costmtx)
Finish to build the candtab array for the proportionnal mapping.
Definition cand.c:709
void candGenDot(const EliminTree *etree, const Cand *candtab, FILE *stream)
Print the elimination tree in a dot file.
void candGenDotLevel(const EliminTree *etree, const Cand *candtab, FILE *stream, pastix_int_t level)
Print the first levels of the elimination tree in a dot file.
void costMatrixExit(CostMatrix *costmtx)
Free the cost matrix structure.
Definition cost.c:57
CostMatrix * costMatrixBuild(const symbol_matrix_t *symbmtx, pastix_coeftype_t flttype, pastix_factotype_t factotype)
Build the cost matrix structure from the symbol matrix structure.
Definition cost.c:91
pastix_int_t * core2clust
Definition blendctrl.h:79
pastix_int_t up_after_split
Definition blendctrl.h:55
pastix_int_t leader
Definition blendctrl.h:34
pastix_int_t ricar
Definition blendctrl.h:33
EliminTree * etree
Definition blendctrl.h:96
pastix_int_t debug
Definition blendctrl.h:30
pastix_int_t local_nbcores
Definition blendctrl.h:74
pastix_int_t allcand
Definition blendctrl.h:40
pastix_int_t width_tasks2d
Definition blendctrl.h:63
pastix_int_t clustnbr
Definition blendctrl.h:71
pastix_int_t local_nbctxts
Definition blendctrl.h:76
Cand * candtab
Definition blendctrl.h:98
pastix_int_t level_tasks2d
Definition blendctrl.h:62
pastix_int_t local_nbthrds
Definition blendctrl.h:75
CostMatrix * costmtx
Definition blendctrl.h:97
pastix_int_t total_nbcores
Definition blendctrl.h:72
pastix_int_t count_ops
Definition blendctrl.h:29
pastix_int_t nocrossproc
Definition blendctrl.h:41
int blendCtrlInit(pastix_data_t *pastix_data, BlendCtrl *ctrl)
Initialize the Blend control structure.
Definition blendctrl.c:162
void blendCtrlExit(BlendCtrl *)
Finalize the Blend control structure.
Definition blendctrl.c:303
The type and structure definitions.
Definition blendctrl.h:28
double subpath
Definition elimintree.h:29
double subcost
Definition elimintree.h:28
eTreeNode_t * nodetab
Definition elimintree.h:42
static pastix_int_t eTreeRoot(const EliminTree *etree)
Return the root of the elimination tree.
Definition elimintree.h:126
EliminTree * eTreeBuild(const symbol_matrix_t *)
Build the elimination tree.
Definition elimintree.c:478
void eTreeExit(EliminTree *)
Free the elimination tree structure.
Definition elimintree.c:89
pastix_int_t tasknbr
Definition simu.h:119
void simuExit(SimuCtrl *, pastix_int_t, pastix_int_t, pastix_int_t)
Free the simulation structure.
Definition simu.c:254
pastix_int_t simuInit(SimuCtrl *, const symbol_matrix_t *, const Cand *, pastix_int_t, pastix_int_t)
Initialize the simulation structures.
Definition simu.c:66
void simuTaskBuild(SimuCtrl *, const symbol_matrix_t *)
Initialize the tasktab array of the simulation structure.
Definition simu_task.c:49
Control structure for the simulation.
Definition simu.h:116
void solverRealloc(SolverMatrix *solvptr)
Realloc in a contiguous way a given solver structure.
int solverSave(const SolverMatrix *solvptr, FILE *stream)
Save a solver matrix structure into a file.
Definition solver_io.c:261
void solverPrintStats(const SolverMatrix *solvptr)
Print statistical information about the solver matrix structure.
Definition solver.c:187
int solverCheck(const SolverMatrix *solvmtx)
Checks the consistency of the given solver matrix structure.
void solverExit(SolverMatrix *solvmtx)
Free the content of the solver matrix structure.
Definition solver.c:143
int pastix_subtask_blend(pastix_data_t *pastix_data)
Compute the proportional mapping and the final solver structure.
FILE * pastix_fopenw(const char *dirname, const char *filename, const char *mode)
Open a file in the unique directory of the pastix instance.
Definition api.c:251
void pastix_gendirectories(pastix_data_t *pastix_data)
Generate a unique temporary directory to store output files.
Definition api.c:85
@ PastixFactLU
Definition api.h:317
@ DPARM_PRED_FACT_TIME
Definition api.h:169
@ DPARM_FACT_THFLOPS
Definition api.h:172
@ DPARM_BLEND_TIME
Definition api.h:167
@ DPARM_SOLV_FLOPS
Definition api.h:178
@ DPARM_FILL_IN
Definition api.h:160
@ DPARM_FACT_RLFLOPS
Definition api.h:173
@ IPARM_FACTORIZATION
Definition api.h:99
@ IPARM_COMPRESS_WHEN
Definition api.h:131
@ IPARM_COMPRESS_MIN_WIDTH
Definition api.h:129
@ IPARM_FLOAT
Definition api.h:149
@ IPARM_NNZEROS_BLOCK_LOCAL
Definition api.h:41
@ IPARM_VERBOSE
Definition api.h:36
@ IPARM_NNZEROS
Definition api.h:40
@ IPARM_SPLITTING_STRATEGY
Definition api.h:80
@ PastixVerboseYes
Definition api.h:222
@ PastixVerboseNot
Definition api.h:220
@ PastixVerboseNo
Definition api.h:221
@ PASTIX_SUCCESS
Definition api.h:367
@ PASTIX_ERR_BADPARAMETER
Definition api.h:374
@ PastixSplitKwayProjections
Definition api.h:418
void splitSymbol(BlendCtrl *ctrl, symbol_matrix_t *symbmtx)
Split the column blocks of the symbol matrix to generate parallelism.
void propMappTree(Cand *candtab, const EliminTree *etree, pastix_int_t candnbr, int nocrossproc, int allcand)
Apply the proportional mapping algorithm.
Definition propmap.c:422
void simuRun(SimuCtrl *, const BlendCtrl *, const symbol_matrix_t *)
Run the simulation to map the data on the nodes.
Definition simu_run.c:986
int solverMatrixGen(SolverMatrix *solvmtx, const symbol_matrix_t *symbmtx, const pastix_order_t *ordeptr, const SimuCtrl *simuctl, const BlendCtrl *ctrl, PASTIX_Comm comm, isched_t *isched)
Initialize the solver matrix structure.
int solverMatrixGenSeq(SolverMatrix *solvmtx, const symbol_matrix_t *symbmtx, const pastix_order_t *ordeptr, const SimuCtrl *simuctl, const BlendCtrl *ctrl, PASTIX_Comm comm, isched_t *isched, pastix_int_t is_dbg)
Initialize the solver matrix structure in sequential.
void graphExit(pastix_graph_t *graph)
Free the content of the graph structure.
Definition graph.c:73
pastix_int_t sndenbr
Definition order.h:56
Order structure.
Definition order.h:47
pastix_int_t dof
Definition symbol.h:87
pastix_int_t cblknbr
Definition symbol.h:79
void pastixSymbolPrintStats(const symbol_matrix_t *symbptr)
Print statistical information about the symbolic matrix structure.
Definition symbol.c:389
int pastixSymbolDraw(const symbol_matrix_t *symbptr, FILE *stream)
Export the symbol structure in a PostScript format.
size_t pastixSymbolGetNNZ(const symbol_matrix_t *symbptr)
Computes the number of non-zero elements in L.
void pastixSymbolDrawMap(pastix_data_t *pastix_data, const char *extname, pastix_int_t sndeidx)
Dump a separator mapping into a map file.
int pastixSymbolSave(const symbol_matrix_t *symbptr, FILE *stream)
Save the given block matrix structure to the given stream.
Definition symbol_io.c:147
void pastixSymbolGetFlops(const symbol_matrix_t *symbmtx, pastix_coeftype_t flttype, pastix_factotype_t factotype, double *thflops, double *rlflops)
Computes the number of theoretical and real flops.
void pastixSymbolExit(symbol_matrix_t *symbptr)
Free the content of symbolic matrix.
Definition symbol.c:137
int pastixSymbolCheck(const symbol_matrix_t *symbptr)
Checks the consistency of the given symbolic block matrix.
Symbol matrix structure.
Definition symbol.h:77
int inter_node_procnum
Definition pastixdata.h:84
SolverMatrix * solvmatr
Definition pastixdata.h:103
SolverMatrix * solvglob
Definition pastixdata.h:105
SolverMatrix * solvloc
Definition pastixdata.h:104
pastix_order_t * ordemesh
Definition pastixdata.h:98
pastix_int_t * iparm
Definition pastixdata.h:70
double * dparm
Definition pastixdata.h:71
const spmatrix_t * csc
Definition pastixdata.h:90
pastix_graph_t * graph
Definition pastixdata.h:92
isched_t * isched
Definition pastixdata.h:86
symbol_matrix_t * symbmtx
Definition pastixdata.h:100
PASTIX_Comm inter_node_comm
Definition pastixdata.h:78
char * dir_global
Definition pastixdata.h:110
pastix_int_t steps
Definition pastixdata.h:73
Main PaStiX data structure.
Definition pastixdata.h:68
pastix_int_t coefnbr
Definition solver.h:209
Solver column block structure.
Definition solver.h:203