PaStiX Handbook 6.4.0
Loading...
Searching...
No Matches
simu_run.c
Go to the documentation of this file.
1/**
2 *
3 * @file simu_run.c
4 *
5 * PaStiX simulation functions.
6 *
7 * @copyright 2004-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8 * Univ. Bordeaux. All rights reserved.
9 *
10 * @version 6.4.0
11 * @author Pascal Henon
12 * @author Pierre Ramet
13 * @author Mathieu Faverge
14 * @author Gregoire Pichon
15 * @author Vincent Bridonneau
16 * @author Xavier Lacoste
17 * @date 2024-07-05
18 *
19 **/
20#ifndef DOXYGEN_SHOULD_SKIP_THIS
21#ifndef _GNU_SOURCE
22#define _GNU_SOURCE 1
23#endif
24#endif /* DOXYGEN_SHOULD_SKIP_THIS */
25#include <stdio.h>
26#include <string.h>
27#include <assert.h>
28#include <math.h>
29#include <sys/stat.h>
30#include <sys/types.h>
31#include <unistd.h>
32
33#include "common.h"
34#include "symbol/symbol.h"
35#include "extendVector.h"
36#include "queue.h"
37#include "elimintree.h"
38#include "cost.h"
39#include "cand.h"
40#include "blendctrl.h"
41#include "blend/solver.h"
42#include "simu.h"
43#include "perf.h"
44
45#if defined(PASTIX_BLEND_GENTRACE)
46#include <GTG.h>
47#include <GTGPaje.h>
48
49/**
50 *******************************************************************************
51 *
52 * @brief Increment a traced counter.
53 *
54 *******************************************************************************
55 *
56 * @param[in] time
57 * The timestamp of the event.
58 *
59 * @param[in] type
60 * The type string of the variable to modify.
61 *
62 * @param[in] cont
63 * The string of the container that holds the variable.
64 *
65 * @param[in] val
66 * The value to add to the counter.
67 *
68 *******************************************************************************/
69static inline void
70blendAddVar( int clustnum, varPrec time, const char* type,
71 const char* cont, varPrec val )
72{
73 if ( clustnum == 0 ) {
74 addVar( time, type, cont, val );
75 }
76}
77
78/**
79 *******************************************************************************
80 *
81 * @brief Decrement a traced counter.
82 *
83 *******************************************************************************
84 *
85 * @param[in] time
86 * The timestamp of the event.
87 *
88 * @param[in] type
89 * The type string of the variable to modify.
90 *
91 * @param[in] cont
92 * The string of the container that holds the variable.
93 *
94 * @param[in] val
95 * The value used to decrement the counter.
96 *
97 *******************************************************************************/
98static inline void
99blendSubVar( int clustnum, varPrec time, const char* type,
100 const char* cont, varPrec val )
101{
102 if ( clustnum == 0 ) {
103 subVar( time, type, cont, val );
104 }
105}
106
107#else
108
109#ifndef DOXYGEN_SHOULD_SKIP_THIS
110#define blendAddVar( clustnum, time, type, cont, val ) \
111 do { } while (0)
112
113#define blendSubVar( clustnum, time, type, cont, val ) \
114 do { } while (0)
115
116#endif
117#endif /* DOXYGEN_SHOULD_SKIP_THIS */
118
119/**
120 * @addtogroup blend_dev_simu
121 * @{
122 *
123 */
124
125/**
126 *******************************************************************************
127 *
128 * @brief Compute the cost of a communication and its update.
129 *
130 *******************************************************************************
131 *
132 * @param[in] ctrl
133 * The blend control structure that describes the architecture and the
134 * cost of the communication between nodes.
135 *
136 * @param[in] ftgt
137 * The fan-in for which the cost is computed.
138 *
139 * @param[in] clustsrc
140 * The index of the source pastix process.
141 *
142 * @param[in] sync_comm_nbr
143 * The number of simultaneous communication.
144 *
145 * @param[out] send
146 * The time cost of the send operation.
147 *
148 * @param[out] add
149 * The time cost of the addition operation.
150 *
151 *******************************************************************************/
152static inline void
154 const SimuFtgt *ftgt,
155 pastix_int_t clustsrc,
156 pastix_int_t sync_comm_nbr,
157 double *send,
158 double *add )
159{
160 pastix_int_t M, N;
161 pastix_int_t clustdst = ctrl->core2clust[ftgt->infotab[FTGT_PROCDST]];
162 double startup, bandwidth, addcost;
163
164 *send = 0.;
165 *add = 0.;
166
167 if( clustsrc == clustdst ) {
168 return;
169 }
170
171 assert( (clustsrc >= 0) && (clustdst >= 0) );
172
173 N = (ftgt->infotab[FTGT_LCOLNUM] - ftgt->infotab[FTGT_FCOLNUM] + 1);
174 M = (ftgt->infotab[FTGT_LROWNUM] - ftgt->infotab[FTGT_FROWNUM] + 1);
175
176 assert( (N > 0) && (M > 0) );
177
178 getCommunicationCosts( ctrl, clustsrc, clustdst, sync_comm_nbr, &startup, &bandwidth );
179
180 *send = (startup + bandwidth * (M * N * sizeof(double) + FTGT_MAXINFO * sizeof(pastix_int_t)));
181 addcost = PERF_GEAM( M, N );
182 *add = addcost > 0. ? addcost : 0.0;
183 return;
184}
185
186/**
187 *******************************************************************************
188 *
189 * @brief Compute the number of contributions to each block.
190 *
191 *******************************************************************************
192 *
193 * @param[in] symbptr
194 * The symbol matrix structure describing the problem.
195 *
196 * @param[inout] simuctrl
197 * The main simulation structure. On exit, the ctrbcnt field of the
198 * blocks is updated with the number of contributions that they each
199 * should receive.
200 *
201 * @param[in] ricar
202 * True if ILU(k) factorization is applied to change the algorithm to
203 * compute the number of contributions.
204 *
205 *******************************************************************************/
206static inline void
208 SimuCtrl *simuctrl,
209 pastix_int_t ricar )
210{
211 pastix_int_t i, j, k;
212 pastix_int_t facebloknum, firstbloknum;
213
214 /*
215 * Compute the number of contributions per block to each block.
216 * Might be optimized if we computed the input graph before.
217 */
218 {
219 symbol_cblk_t *curcblk;
220
221 curcblk = symbptr->cblktab;
222 for(i=0; i<symbptr->cblknbr; i++, curcblk++)
223 {
224 pastix_int_t fbloknum = curcblk[0].bloknum + 1;
225 pastix_int_t lbloknum = curcblk[1].bloknum;
226
227 /* 1D cblk computed */
228 for(j=fbloknum; j<lbloknum; j++)
229 {
230 firstbloknum = 0;
231
232 /* Add contribution due to E2 */
233 for(k=j; k<lbloknum; k++)
234 {
235 facebloknum = pastixSymbolGetFacingBloknum( symbptr, j, k, firstbloknum, ricar );
236 if(facebloknum >= 0) {
237 simuctrl->bloktab[facebloknum].ctrbcnt++;
238 firstbloknum = facebloknum;
239 }
240 }
241 }
242 }
243 }
244
245 /* Set up the task ctrbcnt and cblkcnt */
246 {
247 SimuTask *task = simuctrl->tasktab;
248
249 for(i=0;i<simuctrl->tasknbr;i++)
250 {
251 pastix_int_t fbloknum = symbptr->cblktab[task->cblknum ].bloknum;
252 pastix_int_t lbloknum = symbptr->cblktab[task->cblknum+1].bloknum;
253
254 task->ctrbcnt = 0;
255 for(j=fbloknum; j<lbloknum; j++) {
256 task->ctrbcnt += simuctrl->bloktab[j].ctrbcnt;
257 }
258
259 simuctrl->cblktab[task->cblknum].ctrbcnt = task->ctrbcnt;
260 task++;
261 }
262 }
263}
264
265
266/**
267 *******************************************************************************
268 *
269 * @brief Print the number of contributions per cblk and block for debug.
270 *
271 *******************************************************************************
272 *
273 * @param[in] ctrl
274 * The blendctrl structure with the simulation parameters.
275 *
276 * @param[in] symbptr
277 * The symbol matrix structure describing the problem.
278 *
279 * @param[in] simuctrl
280 * The main simulation structure.
281 *
282 *******************************************************************************/
283static inline void
285 const symbol_matrix_t *symbptr,
286 const SimuCtrl *simuctrl )
287{
288 FILE *fd1 = NULL;
289 FILE *fd2 = NULL;
290 pastix_int_t i, j;
291 symbol_cblk_t *curcblk;
292
293 fd1 = pastix_fopenw( ctrl->dirname, "contribblok.txt", "w" );
294 if ( fd1 == NULL ) {
295 return;
296 }
297 fd2 = pastix_fopenw( ctrl->dirname, "contribcblk.txt", "w" );
298 if ( fd2 == NULL ) {
299 return;
300 }
301
302 curcblk = symbptr->cblktab;
303 for(i=0; i<symbptr->cblknbr; i++, curcblk++)
304 {
305 pastix_int_t fbloknum = curcblk[0].bloknum + 1;
306 pastix_int_t lbloknum = curcblk[1].bloknum;
307
308 /* 1D cblk computed */
309 for(j=fbloknum; j<lbloknum; j++)
310 {
311 fprintf(fd1, "%ld %ld\n", (long)j, (long)simuctrl->bloktab[j].ctrbcnt);
312 }
313#if defined(PASTIX_SYMBOL_DUMP_SYMBMTX)
314 fprintf(fd2, "%ld %ld %ld\n", (long)i, (long)simuctrl->cblktab[i].ctrbcnt, (long)curcblk->split_cblk);
315#else
316 fprintf(fd2, "%ld %ld\n", (long)i, (long)simuctrl->cblktab[i].ctrbcnt);
317#endif
318 }
319
320 fclose( fd1 );
321 fclose( fd2 );
322}
323
324/**
325 *******************************************************************************
326 *
327 * @brief Insert a task in the ready task queues of all its candidates.
328 *
329 * This function according to the ready date of a task put this task on the
330 * ready queue of a processor.
331 * When the ready date of a task is inferior to the proc timer then the
332 * task is ordered according to its priorities in the elimination tree.
333 *
334 *******************************************************************************
335 *
336 * @param[in] ctrl
337 * The blend control structure to provide the candtab and the
338 * core2clust arrays.
339 *
340 * @param[inout] simuctrl
341 * The main simulation structure. On exit, the ready tasks queues of
342 * the candidates for tasknum are updated.
343 *
344 * @param[in] tasknum
345 * The index of the task to insert as a ready task.
346 *
347 *******************************************************************************/
348static inline void
350 SimuCtrl *simuctrl,
351 pastix_int_t tasknum )
352{
353 const SimuTask *task = simuctrl->tasktab + tasknum;
354 const Cand *cblkcand = ctrl->candtab + task->cblknum;
355 SimuProc *sproc;
356 double ready_date = 0.0;
357 pastix_int_t procnum;
358 pastix_int_t bloknum = task->bloknum;
359#if defined(PASTIX_BLEND_COSTLEVEL)
360 double level = cblkcand->costlevel;
361#else
362 pastix_int_t level = cblkcand->treelevel;
363#endif
364 assert( tasknum != -1 );
365
366 blendAddVar( ctrl->clustnum, timerVal( &(task->time) ), "VR_AP", "Appli", 1 );
367
368 /* Get the ready date of the task on the processor passed in parameter */
369 if( cblkcand->fccandnum == cblkcand->lccandnum )
370 {
371 ready_date = timerVal( &(task->time) );
372 sproc = &(simuctrl->proctab[cblkcand->fcandnum]);
373
374 for(procnum = cblkcand->fcandnum;
375 procnum <= cblkcand->lcandnum; procnum++, sproc++)
376 {
377 if( ready_date > timerVal( &(sproc->timer) ) ) {
378 pqueuePush2( sproc->futuretask, tasknum, ready_date, level );
379 }
380 else {
381 pqueuePush2( sproc->readytask, tasknum, level, bloknum );
382 }
383 blendAddVar( ctrl->clustnum, ready_date, "VR_TS", sproc->procalias, 1 );
384 }
385 }
386 else
387 {
388 sproc = &(simuctrl->proctab[cblkcand->fcandnum]);
389
390 for(procnum = cblkcand->fcandnum;
391 procnum <= cblkcand->lcandnum; procnum++, sproc++)
392 {
393 ready_date = timerVal( simuctrl->ftgttimetab + CLUST2INDEX(bloknum, ctrl->core2clust[procnum]) );
394
395 if( ready_date > timerVal( &(sproc->timer) ) ) {
396 pqueuePush2( sproc->futuretask, tasknum, ready_date, level );
397 }
398 else {
399 pqueuePush2( sproc->readytask, tasknum, level, bloknum );
400 }
401 blendAddVar( ctrl->clustnum, ready_date, "VR_TS", sproc->procalias, 1 );
402 }
403 }
404}
405
406/**
407 *******************************************************************************
408 *
409 * @brief Look for the best next couple (tasknum, corenum) that is ready to be
410 * executed.
411 *
412 * This function is the main and more costly one. It looks for each worker,
413 * which task is the first one available for execution, and from all those
414 * couples, which one is the first one to finish.
415 *
416 *******************************************************************************
417 *
418 * @param[in] ctrl
419 * The blend control structure to provide the candtab and the
420 * core2clust arrays.
421 *
422 * @param[inout] simuctrl
423 * The main simulation structure. On exit, the structure is updated
424 * with the extraction of the next best task to run.
425 *
426 * @param[out] procnumptr
427 * The index of the candidate to run the task.
428 *
429 *******************************************************************************
430 *
431 * @return The next task selected for execution in the simulator. The worker
432 * selected is returned in the procnumptr field.
433 *
434 *******************************************************************************/
435static inline pastix_int_t
437 SimuCtrl *simuctrl,
438 pastix_int_t *procnumptr )
439{
440 pastix_int_t p, rc;
441 pastix_int_t procnum = -1;
442 pastix_int_t tasknum;
443 double earlytimeready = PASTIX_INT_MAX;
444 double earlyproctimer = PASTIX_INT_MAX;
445 double timeready;
446 pastix_int_t earlytask = -1;
447
448 /* Find the earlier task in the processor heaps */
449 for(p=0;p<ctrl->total_nbcores;p++)
450 {
451 SimuProc *sproc = &(simuctrl->proctab[p]);
452 tasknum = -1;
453 /*
454 * First we search the earlier task in the set of tasks whose ready date
455 * is < proc timer
456 */
457 while( pqueueSize(sproc->readytask) > 0 )
458 {
459 tasknum = pqueueRead( sproc->readytask );
460 if( simuctrl->bloktab[simuctrl->tasktab[tasknum].bloknum].ownerclust >= 0 )
461 {
462 /* This task have to be remove from the heap (already mapped) */
463 rc = pqueuePop( sproc->readytask );
464 assert( rc >= 0 );
465 tasknum = -1;
466 }
467 else
468 break;
469 }
470 /*
471 * We found no task which ready date is < proc timer so we search one
472 * that minimizes ready date - proc-timer
473 */
474 if(tasknum == -1)
475 {
476 while(pqueueSize(simuctrl->proctab[p].futuretask)>0)
477 {
478 tasknum = pqueueRead(simuctrl->proctab[p].futuretask);
479 if( simuctrl->bloktab[simuctrl->tasktab[tasknum].bloknum].ownerclust >= 0 )
480 {
481 /* This task have to be remove from the heap (already mapped) */
482 rc = pqueuePop(simuctrl->proctab[p].futuretask);
483 assert( rc >= 0 );
484 tasknum = -1;
485 }
486 else {
487 break;
488 }
489 }
490 }
491
492 if(tasknum != -1)
493 {
494 const SimuTask *task = simuctrl->tasktab + tasknum;
495 timeready = MAX(timerVal(TIMER(p)),
496 timerVal(&(simuctrl->ftgttimetab[CLUST2INDEX(simuctrl->tasktab[tasknum].bloknum,
497 ctrl->core2clust[p])])));
498
499 timeready = MAX( timeready, timerVal( &(task->time) ) );
500
501 /*
502 * We prevent to distribute on the same processor set when all time
503 * are equal
504 */
505 if((timeready == earlytimeready) && (timerVal(TIMER(p)) < earlyproctimer))
506 {
507 procnum = p;
508 earlyproctimer = timerVal(TIMER(p));
509 earlytask = tasknum;
510 earlytimeready = timeready;
511 }
512
513 if(timeready < earlytimeready)
514 {
515 procnum = p;
516 earlytask = tasknum;
517 earlytimeready = timeready;
518 }
519 }
520 }
521
522#if defined(PASTIX_BLEND_GENTRACE)
523 if ( (earlytask != -1) && (ctrl->clustnum == 0) )
524 {
525 const SimuTask *task = simuctrl->tasktab + earlytask;
526 const Cand *cblkcand = ctrl->candtab + task->cblknum;
527 SimuProc *sproc = &(simuctrl->proctab[cblkcand->fcandnum]);
528
529 for(p = cblkcand->fcandnum;
530 p <= cblkcand->lcandnum; p++, sproc++)
531 {
532 blendSubVar( ctrl->clustnum, earlytimeready, "VR_TS", sproc->procalias, 1 );
533 }
534
535 blendSubVar( ctrl->clustnum, earlytimeready, "VR_AP", "Appli", 1 );
536 }
537#endif
538
539#if !defined(NDEBUG)
540 if(procnum != -1)
541 {
542 if( pqueueSize(simuctrl->proctab[procnum].readytask) > 0 ) {
543 assert(earlytask == pqueuePop(simuctrl->proctab[procnum].readytask));
544 }
545 else {
546 assert(earlytask == pqueuePop(simuctrl->proctab[procnum].futuretask));
547 }
548 }
549#endif
550
551 *procnumptr = procnum;
552 (void)rc;
553 return earlytask;
554}
555
556/**
557 *******************************************************************************
558 *
559 * @brief Compute the instant t where the task will be received by a node.
560 *
561 * Compute the time the cblk would have RECEIVED and ADDED all its contributions
562 * if it was mapped on a given cand CLUSTER.
563 * @warning These times do not include add time for fan in target
564 *
565 *******************************************************************************
566 *
567 * @param[in] ctrl
568 * The blend control structure to provide the candtab and the
569 * core2clust arrays.
570 *
571 * @param[in] symbptr
572 * The symbol matrix structure describing the problem.
573 *
574 * @param[inout] simuctrl
575 * The main simulation structure. On exit, the ready tasks queues of
576 * the candidates for tasknum are updated.
577 *
578 * @param[in] tasknum
579 * The index of the task to insert as a ready task.
580 *
581 *******************************************************************************/
582static inline void
584 const symbol_matrix_t *symbptr,
585 SimuCtrl *simuctrl,
586 pastix_int_t tasknum )
587{
588 pastix_int_t i, j;
589 double lftgttime = 0;
590 double sftgttime = 0;
591 pastix_int_t lftgtnum = -1;
592 pastix_int_t cblknum;
593 pastix_int_t bloknum;
594 pastix_int_t clustdst;
595
596 bloknum = simuctrl->tasktab[tasknum].bloknum;
597 cblknum = simuctrl->tasktab[tasknum].cblknum;
598
599 /* If the task is local, all sons sending contributions are local => no treatment */
600 if( ctrl->candtab[cblknum].fccandnum == ctrl->candtab[cblknum].lccandnum ) {
601 return;
602 }
603
604 /*
605 * Compute the cblk on proc timer that is time the cblk would have received
606 * all its contributions if it was mapped on a given cand processor These
607 * times INCLUDE add time for fan in target !!
608 */
609
610 /* Compute receive time (time at which a non-local processor should received the target) */
611 /* find the latest ftgt receive time and the second latest*/
612 for(i=simuctrl->bloktab[bloknum].ftgtnum; i<simuctrl->bloktab[bloknum+1].ftgtnum; i++)
613 {
614 /* Source of this ftgt */
615 clustdst = INDEX2CLUST(i, bloknum);
616
617 /* Task with several cand proc */
618 /* The information about ftgt costs are in the ftgt of the diagonal block;
619 this loop sums the cost of all the ftgt received by the blocks in this column block */
620 if(simuctrl->ftgttab[i].infotab[FTGT_CTRBNBR]>0) {
621 for(j=bloknum;j<symbptr->cblktab[cblknum+1].bloknum;j++)
622 {
623 if(simuctrl->ftgttab[simuctrl->bloktab[j].ftgtnum + i-simuctrl->bloktab[bloknum].ftgtnum].infotab[FTGT_CTRBNBR]>0)
624 {
625 double send, add;
626
627 simu_computeFtgtCosts( ctrl, simuctrl->ftgttab + CLUST2INDEX(j, clustdst), clustdst,
628 ctrl->candtab[cblknum].lccandnum - ctrl->candtab[cblknum].fccandnum + 1,
629 &send, &add );
630
631 simuctrl->ftgttab[i].costadd += add;
632 simuctrl->ftgttab[i].costsend += send;
633 }
634 }
635 }
636
637#if defined(PASTIX_DEBUG_BLEND)
638 if(!(simuctrl->ftgttab[i].costsend >= 0.0)) {
639 pastix_print_error( "ftgt %ld costsend %f", (long)i, simuctrl->ftgttab[i].costsend );
640 }
641 if(!(simuctrl->ftgttab[i].costadd >= 0.0)) {
642 pastix_print_error( "ftgt %ld costadd %f", (long)i, simuctrl->ftgttab[i].costadd );
643 }
644
645 assert(simuctrl->ftgttab[i].costsend >= 0.0);
646 assert(simuctrl->ftgttab[i].costadd >= 0.0);
647#endif
648
649 /* ftgttab[].timerecv is the time this ftgt will be receive */
650 timerSet(&(simuctrl->ftgttab[i].timerecv), timerVal(&(simuctrl->ftgttimetab[i])) + simuctrl->ftgttab[i].costsend + simuctrl->ftgttab[i].costadd);
651
652 /* If this ftgt the last reveived or the second last received ?? */
653 if(timerVal(&(simuctrl->ftgttab[i].timerecv)) > lftgttime)
654 {
655 lftgttime = timerVal(&(simuctrl->ftgttab[i].timerecv));
656 lftgtnum = i;
657 }
658 else {
659 if(timerVal(&(simuctrl->ftgttab[i].timerecv)) > sftgttime) {
660 sftgttime = timerVal(&(simuctrl->ftgttab[i].timerecv));
661 }
662 }
663 }
664
665
666 /*
667 * Put in ftgttimetab[] the date at which the cluster would have received
668 * and add all the ftgt if the task was mapped on it.
669 */
670 for(i=simuctrl->bloktab[bloknum].ftgtnum; i<simuctrl->bloktab[bloknum+1].ftgtnum;i++)
671 {
672 if(i != lftgtnum) {
673 timerSet(&(simuctrl->ftgttimetab[i]), lftgttime);
674 }
675 else {
676 timerSetMax( &(simuctrl->ftgttimetab[i]), sftgttime );
677 }
678 }
679}
680
681/**
682 *******************************************************************************
683 *
684 * @brief Update the Fan In target structure
685 *
686 * Increment the contribution counter of the fan-in and integrate to the ftgt
687 * area the new contribution.
688 *
689 *******************************************************************************
690 *
691 * @param[in] symbptr
692 * The pointer to the symbolic matrix structure.
693 *
694 * @param[inout] simuctrl
695 * The pointer to the simulation structure. On exit, data regarding the
696 * computational unit pr are updated.
697 *
698 * @param[in] ftgtnum
699 * Index of the fanin target to update.
700 *
701 * @param[in] bloknum
702 * Index of the first off-diagonal block generating a contribution to
703 * the ftgtnum Fan In.
704 *
705 * @param[in] fbloknum
706 * Index of the off-diagonal block that is multiplied by blocknum to
707 * produce the update.
708 *
709 *******************************************************************************/
710static inline void
712 SimuCtrl *simuctrl,
713 pastix_int_t ftgtnum,
714 pastix_int_t bloknum,
715 pastix_int_t fbloknum )
716{
717 pastix_int_t *infotab = simuctrl->ftgttab[ftgtnum].infotab;
718 symbol_blok_t *blokptr = (symbptr->bloktab) + bloknum;
719 symbol_blok_t *fblokptr = (symbptr->bloktab) + fbloknum;
720
721 infotab[FTGT_CTRBNBR]++;
722
723 /* Update ftgt dimensions to the maximum area covering all contributions */
724 if( blokptr->frownum < infotab[FTGT_FCOLNUM] ) {
725 infotab[FTGT_FCOLNUM] = blokptr->frownum;
726 }
727
728 if( blokptr->lrownum > infotab[FTGT_LCOLNUM] ) {
729 infotab[FTGT_LCOLNUM] = blokptr->lrownum;
730 }
731
732 if( fblokptr->frownum < infotab[FTGT_FROWNUM] ) {
733 infotab[FTGT_FROWNUM] = fblokptr->frownum;
734 }
735
736 if( fblokptr->lrownum > infotab[FTGT_LROWNUM] ) {
737 infotab[FTGT_LROWNUM] = fblokptr->lrownum;
738 }
739
740 assert( (infotab[FTGT_LCOLNUM] - infotab[FTGT_FCOLNUM] + 1) > 0 );
741 assert( (infotab[FTGT_LROWNUM] - infotab[FTGT_FROWNUM] + 1) > 0 );
742}
743
744/**
745 *******************************************************************************
746 *
747 * @brief Simulate the task execution.
748 *
749 * Update the timers of the selected worker, as well as those of the current
750 * cblk, and the targeted cblks by the update.
751 *
752 *******************************************************************************
753 *
754 * @param[in] ctrl
755 * The pointer to the global blend control structure.
756 *
757 * @param[in] symbptr
758 * The pointer to the symbolic matrix structure.
759 *
760 * @param[inout] simuctrl
761 * The pointer to the simulation structure. On exit, data regarding the
762 * computational unit pr are updated.
763 *
764 * @param[in] tasknum
765 * The task index of the one, we want to simulate the execution.
766 *
767 *******************************************************************************
768 *
769 * @remark In this function, we use the standard [f|l]blocknum for first and
770 * last bloknum, and facingcblk, facingblok for the facing block and column
771 * block.
772 *
773 *******************************************************************************/
774static inline void
776 const symbol_matrix_t *symbptr,
777 SimuCtrl *simuctrl,
778 pastix_int_t tasknum )
779{
780 pastix_int_t i, j;
781 pastix_int_t cblknum;
782 pastix_int_t fbloknum;
783 pastix_int_t lbloknum;
784 pastix_int_t firstfacingblok;
785 pastix_int_t facingblok;
786 pastix_int_t facingcblk;
787 pastix_int_t local;
788 pastix_int_t ftgtnum;
789 pastix_int_t procnum;
790 pastix_int_t clustnum;
791 SimuProc *sproc;
792 CostMatrix *costmtx;
793
794 cblknum = simuctrl->tasktab[tasknum].cblknum;
795 procnum = simuctrl->ownetab[cblknum];
796 clustnum = ctrl->core2clust[procnum];
797 sproc = &(simuctrl->proctab[procnum]);
798 costmtx = ctrl->costmtx;
799
800 fbloknum = symbptr->cblktab[cblknum ].bloknum;
801 lbloknum = symbptr->cblktab[cblknum+1].bloknum;
802
803 assert( (procnum >= ctrl->candtab[cblknum].fcandnum) &&
804 (procnum <= ctrl->candtab[cblknum].lcandnum) );
805
806 /* Add factorization time of the diagonal blok + cost of the TRSM operation on the cblk*/
807 timerAdd(&(sproc->timer), costmtx->blokcost[fbloknum]);
808
809 for(i=fbloknum+1; i<lbloknum; i++)
810 {
811 /* Add cost of the GEMM update related to this off-diagonal block */
812 timerAdd(&(sproc->timer), costmtx->blokcost[i]);
813
814 facingcblk = symbptr->bloktab[i].fcblknm;
815
816 /*
817 * If only one candidate cluster, we can consider the facingcblk as
818 * local because it is an ancestor of the current cblk in the
819 * elimination tree.
820 */
821 local = ( ctrl->candtab[facingcblk].fccandnum == ctrl->candtab[facingcblk].lccandnum ) ? 1 : 0;
822
823 firstfacingblok = symbptr->cblktab[facingcblk].bloknum;
824
825 for(j=i; j<lbloknum; j++)
826 {
827 /* TODO: symbolGetFacingBloknum is too expensive !! */
828 facingblok = pastixSymbolGetFacingBloknum(symbptr, i, j, firstfacingblok, ctrl->ricar);
829
830 /* If the couple (i, j) generates a contribution, applies it */
831 if( facingblok >= 0 ) {
832 pastix_int_t facingdiagblok;
833 pastix_int_t facingtask;
834
835 /* Decrease contributions on block and column block */
836 simuctrl->cblktab[facingcblk].ctrbcnt--;
837 simuctrl->bloktab[facingblok].ctrbcnt--;
838
839 /* Checks */
840 assert(simuctrl->cblktab[facingcblk].ctrbcnt >= 0);
841 assert(simuctrl->bloktab[facingblok].ctrbcnt >= 0);
842
843 /* Update to start next search from the last facing block */
844 firstfacingblok = facingblok;
845
846 facingdiagblok = symbptr->cblktab[facingcblk].bloknum;
847 facingtask = simuctrl->bloktab[facingdiagblok].tasknum;
848
849 assert( facingcblk == simuctrl->tasktab[facingtask].cblknum );
850 assert( facingtask < simuctrl->tasknbr );
851
852 if(!local)
853 {
854 ftgtnum = CLUST2INDEX(facingblok, clustnum);
855 simu_updateFtgt( symbptr, simuctrl, ftgtnum, i, j );
856
857 /* Update timer ready for receiver of the ftgt */
858 ftgtnum = CLUST2INDEX( facingdiagblok, clustnum );
859 timerSetMax( &(simuctrl->ftgttimetab[ftgtnum]),
860 timerVal(&(sproc->timer)) );
861
862 }
863 else {
864
865 /* Update timer of the task (associated to the diagonal block) */
866 timerSetMax( &(simuctrl->tasktab[facingtask].time),
867 timerVal(&(sproc->timer)) );
868 }
869
870 if( simuctrl->cblktab[facingcblk].ctrbcnt == 0 ) {
871 if (!local) {
872 simu_computeTaskReceiveTime(ctrl, symbptr, simuctrl, facingtask );
873 }
874
875 /* Put the task in the ready heap of its local candidat processor */
876 simu_putInAllReadyQueues( ctrl, simuctrl, facingtask );
877 }
878 }
879 }
880 }
881}
882
883/**
884 *******************************************************************************
885 *
886 * @brief Push all tasks from future to ready
887 *
888 * This routine pushes all future tasks from the future task heap to the ready
889 * one, if the time at which the task will be ready is already passed by the
890 * computation unit.
891 *
892 *******************************************************************************
893 *
894 * @param[in] ctrl
895 * The pointer to the global blend control structure.
896 *
897 * @param[inout] simuctrl
898 * The pointer to the simulation structure. On exit, data regarding the
899 * computational unit pr are updated.
900 *
901 * @param[in] procnum
902 * The computational unit index for which the data need to be transfer
903 * from the future task heap to ready task heap if the computational
904 * unit timer is more advanced than the ready time of the tasks.
905 *
906 *******************************************************************************/
907static inline void
909 SimuCtrl *simuctrl,
910 pastix_int_t procnum )
911{
912 SimuProc *sproc;
913 SimuTimer *timer;
914 pastix_int_t tasknum;
915 pastix_int_t cblknum;
916 pastix_int_t clustnum;
917
918 clustnum = ctrl->core2clust[procnum];
919 sproc = &(simuctrl->proctab[procnum]);
920
921 /*
922 * Move each task from future task heap to ready heap if the timer is
923 * further in the future than the ready time
924 */
925 while( pqueueSize(sproc->futuretask) > 0 )
926 {
927 tasknum = pqueueRead(sproc->futuretask);
928 cblknum = simuctrl->tasktab[tasknum].cblknum;
929
930 if( ctrl->candtab[cblknum].fccandnum == ctrl->candtab[cblknum].lccandnum) {
931 timer = &(simuctrl->tasktab[tasknum].time);
932 }
933 else {
934 timer = &(simuctrl->ftgttimetab[CLUST2INDEX(simuctrl->tasktab[tasknum].bloknum, clustnum )]);
935 }
936
937 if( timerComp( timer, &(sproc->timer) ) )
938 {
939 tasknum = pqueuePop(sproc->futuretask);
940
941 pqueuePush2( sproc->readytask, tasknum,
942#if defined(PASTIX_BLEND_COSTLEVEL)
943 ctrl->candtab[cblknum].costlevel,
944#else
945 ctrl->candtab[cblknum].treelevel,
946#endif
947 simuctrl->tasktab[tasknum].bloknum );
948 }
949 else {
950 break;
951 }
952 }
953}
954
955/**
956 * @}
957 */
958
959/**
960 *******************************************************************************
961 *
962 * @ingroup pastix_blend
963 *
964 * @brief Run the simulation to map the data on the nodes
965 *
966 * This routine simulates the numerical factorization to generate the static
967 * scheduling and the final mapping of the column block onto the PaStiX
968 * processes.
969 *
970 *******************************************************************************
971 *
972 * @param[inout] simuctrl
973 * The pointer to the simulation structure initialized by simuInit().
974 *
975 * @param[in] ctrl
976 * The pointer to the blend control structure which contains the
977 * required data, such as the worker distribution among the processes,
978 * the candidates array for each column block, and the cost of the
979 * computations.
980 *
981 * @param[in] symbptr
982 * The block symbol structure of the problem.
983 *
984 *******************************************************************************/
985void
986simuRun( SimuCtrl *simuctrl,
987 const BlendCtrl *ctrl,
988 const symbol_matrix_t *symbptr )
989{
990
991 pastix_int_t i, j, b;
992 pastix_int_t cblknum, bloknum;
993 /*pastix_int_t c;*/
994 pastix_int_t pr;
995
996#if defined(PASTIX_BLEND_GENTRACE)
997 static volatile pastix_atomic_lock_t trace_lock = PASTIX_ATOMIC_UNLOCKED;
998 char **procnames = NULL;
999
1000 if (ctrl->clustnum == 0)
1001 {
1002 pastix_atomic_lock( &trace_lock );
1003 char *tracename = NULL;
1004 int rc;
1005
1006 if ( ctrl->dirname == NULL ) {
1007 tracename = strdup( "blend" );
1008 }
1009 else {
1010 rc = asprintf( &tracename, "%s/blend", ctrl->dirname );
1011 }
1012
1013 setTraceType (PAJE);
1014 initTrace (tracename, 0, GTG_FLAG_NONE);
1015 pajeEventDefAddParam( GTG_PAJE_EVTDEF_SetState, "TaskId", GTG_PAJE_FIELDTYPE_Int );
1016 free(tracename);
1017
1018 addContType ("CT_Appli", "0", "Application" );
1019 addContType ("CT_P", "CT_Appli", "Process" );
1020 addContType ("CT_T", "CT_P", "Thread" );
1021 addStateType("ST_TS", "CT_T", "Thread State");
1022 addVarType ("VR_TS", "Ready tasks per thread", "CT_T" );
1023 addVarType ("VR_AP", "Ready tasks", "CT_Appli" );
1024
1025 addLinkType ("LT_TL", "Split Event Link", "CT_P", "CT_T", "CT_T");
1026
1027 /* Create root container of the application */
1028 addContainer (0.00000, "Appli", "CT_Appli", "0", "PaStiX Blend Simulation", "");
1029
1030 /* Add all possible states */
1031 addEntityValue ("Wait", "ST_TS", "Waiting", GTG_LIGHTGREY);
1032 addEntityValue ("Comp", "ST_TS", "Computing", GTG_RED);
1033
1034 setVar( 0.0, "VR_AP", "Appli", 0 );
1035
1036 /* Add each process and thread */
1037 SimuProc *sproc = simuctrl->proctab;
1038
1039 procnames = (char**) malloc ( ctrl->total_nbthrds * sizeof(char*) );
1040 pr = 0;
1041 for (i=0; i<ctrl->clustnbr; i++) {
1042 char *clustname;
1043 char *clustalias;
1044
1045 rc = asprintf( &clustname, "Process %02d", (int)i); assert(rc!=-1);
1046 rc = asprintf( &clustalias, "P%d", (int)i); assert(rc!=-1);
1047 addContainer (0.00000, clustalias, "CT_P", "Appli", clustname, "");
1048
1049 for (j=0; j<ctrl->local_nbthrds; j++, pr++, sproc++) {
1050 char *procname;
1051 char *procalias;
1052
1053 rc = asprintf( &procname, "Thread %02d", (int)pr); assert(rc!=-1);
1054 rc = asprintf( &procalias, "T%d", (int)pr); assert(rc!=-1);
1055 addContainer (0.00000, procalias, "CT_T", clustname, procname, "");
1056 setVar( 0.0, "VR_TS", procalias, pqueueSize( sproc->readytask ) );
1057
1058 sproc->procalias = procalias;
1059 procnames[pr] = procalias;
1060 free(procname);
1061 }
1062
1063 free(clustname); free(clustalias);
1064 }
1065 (void)rc;
1066 }
1067#endif /* defined(PASTIX_BLEND_GENTRACE) */
1068
1069 /* Compute number of contributions per blocks, cblks, tasks */
1070 simu_computeBlockCtrbNbr( symbptr, simuctrl, ctrl->ricar );
1071
1072 if ( ctrl->iparm[IPARM_VERBOSE] > 4 ) {
1073 simu_printBlockCtrbNbr( ctrl, symbptr, simuctrl );
1074 }
1075
1076 /*
1077 * All ready tasks are put in the task heaps of their respective candidates
1078 */
1079 for(i=0;i<symbptr->cblknbr;i++)
1080 {
1081 pastix_int_t tasknum;
1082 if(simuctrl->cblktab[i].ctrbcnt == 0)
1083 {
1084 tasknum = simuctrl->bloktab[symbptr->cblktab[i].bloknum].tasknum;
1085 assert(ctrl->candtab[i].treelevel < 0);
1086
1087 if( ctrl->costlevel ) {
1088 assert(ctrl->candtab[i].costlevel <= 0);
1089 }
1090
1091 assert(simuctrl->tasktab[tasknum].cblknum == i);
1092 //assert(ctrl->candtab[i].cblktype == CBLK_1D);
1093
1094 simu_putInAllReadyQueues( ctrl, simuctrl, tasknum );
1095 }
1096 }
1097
1098 /*
1099 * Run simulation and map the task onto a single candidate
1100 */
1101 while(1)
1102 {
1103 SimuTask *task;
1104 pastix_int_t clustnum;
1105
1106 /* Get the next earlier task index and the processor on which it is mapped */
1107 i = simu_getNextTaskNextProc(ctrl, simuctrl, &pr);
1108
1109 /* No more tasks */
1110 if( i == -1 ) {
1111 break;
1112 }
1113
1114 task = &(simuctrl->tasktab[i]);
1115 bloknum = task->bloknum;
1116 cblknum = task->cblknum;
1117 clustnum = ctrl->core2clust[pr];
1118
1119 assert(cblknum < symbptr->cblknbr);
1120 assert(bloknum < symbptr->bloknbr);
1121
1122 /* Make sure the cblk is not already atributed to someone and give it to the selected proc */
1123 assert( simuctrl->ownetab[cblknum] < 0 );
1124 simuctrl->ownetab[cblknum] = pr;
1125 simuctrl->cblktab[cblknum].owned = ( clustnum == ctrl->clustnum );
1126 for(j = symbptr->cblktab[cblknum].bloknum;
1127 j < symbptr->cblktab[cblknum+1].bloknum; j++)
1128 {
1129 simuctrl->bloktab[j].ownerclust = clustnum;
1130 }
1131 task->prionum = simuctrl->clustab[clustnum].prionum;
1132 simuctrl->clustab[clustnum].prionum++;
1133
1134 /* Add task to the selected processor list */
1135 extendint_Add(simuctrl->proctab[pr].tasktab, i);
1136
1137 /* Backup which cluster will get the data for the second run of proportionnal mapping */
1138 ctrl->candtab[cblknum].cluster = clustnum;
1139
1140 /*
1141 * Compute the time at which each proc cand will have added its ftgt and
1142 * received block target if the task is mapped on
1143 */
1144 if( ctrl->candtab[cblknum].fccandnum == ctrl->candtab[cblknum].lccandnum ) {
1145 /*
1146 * All contributions come from the same node
1147 * Time do not depend on the reception of a ftgt
1148 */
1149 timerSetMax( TIMER(pr), timerVal(&(task->time)) );
1150 }
1151 else {
1152 /*
1153 * Contributions might come from different nodes
1154 * Time depends on the reception of a ftgt
1155 */
1156 timerSetMax( TIMER(pr),
1157 timerVal(&(simuctrl->ftgttimetab[CLUST2INDEX(bloknum, clustnum)])) );
1158 }
1159
1160#if defined(PASTIX_BLEND_GENTRACE)
1161 if (ctrl->clustnum == 0) {
1162 char *str_val;
1163 int rc;
1164 assert( (procnames != NULL) && (pr < ctrl->total_nbthrds) );
1165 assert( procnames[pr] != NULL );
1166 rc = asprintf( &str_val, "Comp\" \"%d", (int)i );
1167 setState( timerVal( TIMER(pr) ), "ST_TS", procnames[pr], str_val );
1168 free(str_val);
1169 (void)rc;
1170 }
1171#endif
1172
1173 /*
1174 * Fill some fanintarget info (task of type E2 does not have any ftgt)
1175 */
1176 if(simuctrl->bloktab[bloknum].ftgtnum < simuctrl->bloktab[bloknum+1].ftgtnum)
1177 {
1178 /* Task with several cand cluster */
1179 for(b=bloknum; b<symbptr->cblktab[cblknum+1].bloknum; b++)
1180 {
1181 for(j=simuctrl->bloktab[b].ftgtnum; j<simuctrl->bloktab[b+1].ftgtnum; j++)
1182 {
1183 if( (simuctrl->ftgttab[j].infotab[FTGT_CTRBNBR] > 0) &&
1184 (j != CLUST2INDEX(b, clustnum)) )
1185 {
1186 simuctrl->ftgttab[j].clustnum = INDEX2CLUST(j, b);
1187 simuctrl->ftgttab[j].infotab[FTGT_PRIONUM] = task->prionum;
1188 simuctrl->ftgttab[j].infotab[FTGT_PROCDST] = pr;
1189 simuctrl->ftgttab[j].infotab[FTGT_BLOKDST] = b;
1190 simuctrl->ftgttab[j].infotab[FTGT_TASKDST] = simuctrl->bloktab[bloknum].tasknum;
1191 extendint_Add(&(simuctrl->clustab[INDEX2CLUST(j,b)].ftgtsend[clustnum]), j);
1192
1193 simuctrl->tasktab[simuctrl->bloktab[bloknum].tasknum].ftgtcnt++;
1194
1195 if (clustnum == ctrl->clustnum) {
1196 simuctrl->ftgtcnt++;
1197 }
1198 }
1199 }
1200 }
1201 simuctrl->ftgtprio++;
1202 }
1203 else {
1204 assert(ctrl->candtab[cblknum].fccandnum == ctrl->candtab[cblknum].lccandnum);
1205 }
1206
1207 /* Simulate the task computation */
1208 simu_computeTask( ctrl, symbptr, simuctrl, i );
1209
1210#if defined(PASTIX_BLEND_GENTRACE)
1211 if (ctrl->clustnum == 0) {
1212 char *str_val;
1213 int rc;
1214 assert( (procnames != NULL) && (pr < ctrl->total_nbthrds) );
1215 assert( procnames[pr] != NULL );
1216 rc = asprintf( &str_val, "Wait\" \"%d", (int)i );
1217 setState( timerVal( TIMER(pr) ), "ST_TS", procnames[pr], str_val );
1218 free(str_val);
1219 (void)rc;
1220 }
1221#endif
1222 simu_pushToReadyHeap(ctrl, simuctrl, pr);
1223 }
1224
1225 /* Compute maximum time */
1226 {
1227 double maxtime = 0;
1228 for(pr=0; pr<ctrl->total_nbcores; pr++)
1229 {
1230 if(timerVal(TIMER(pr)) > maxtime) {
1231 maxtime = timerVal(TIMER(pr));
1232 }
1233 }
1234 set_dparm(ctrl->dparm, DPARM_PRED_FACT_TIME, maxtime);
1235 }
1236
1237#if defined(PASTIX_BLEND_GENTRACE)
1238 if (ctrl->clustnum == 0) {
1239 assert( procnames != NULL );
1240 for(pr=0; pr<ctrl->total_nbthrds; pr++) {
1241 free(procnames[pr]);
1242 }
1243 free(procnames);
1244
1245 endTrace();
1246 pastix_atomic_unlock( &trace_lock );
1247 }
1248#endif
1249
1250#if defined(PASTIX_DEBUG_BLEND)
1251 for(i=0;i<simuctrl->cblknbr;i++) {
1252 /* Check valid for 1D distribution only */
1253 assert( simuctrl->ownetab[i] >= 0 );
1254 }
1255 for(i=0;i<symbptr->bloknbr;i++) {
1256 assert( simuctrl->bloktab[i].ownerclust >= 0 );
1257 }
1258#endif
1259}
BEGIN_C_DECLS typedef int pastix_int_t
Definition datatypes.h:51
pastix_int_t lcandnum
Definition cand.h:32
pastix_int_t cluster
Definition cand.h:35
pastix_int_t fcandnum
Definition cand.h:31
double costlevel
Definition cand.h:29
pastix_int_t treelevel
Definition cand.h:30
pastix_int_t lccandnum
Definition cand.h:34
pastix_int_t fccandnum
Definition cand.h:33
Processor candidate group to own a column blok.
Definition cand.h:28
double * blokcost
Definition cost.h:31
Arrays of double to store the cost of each element in the matrix.
Definition cost.h:30
pastix_int_t * core2clust
Definition blendctrl.h:79
pastix_int_t ricar
Definition blendctrl.h:33
pastix_int_t costlevel
Definition blendctrl.h:43
double * dparm
Definition blendctrl.h:87
const char * dirname
Definition blendctrl.h:88
pastix_int_t clustnbr
Definition blendctrl.h:71
pastix_int_t clustnum
Definition blendctrl.h:70
Cand * candtab
Definition blendctrl.h:98
pastix_int_t local_nbthrds
Definition blendctrl.h:75
CostMatrix * costmtx
Definition blendctrl.h:97
pastix_int_t total_nbcores
Definition blendctrl.h:72
pastix_int_t total_nbthrds
Definition blendctrl.h:73
pastix_int_t * iparm
Definition blendctrl.h:86
void getCommunicationCosts(const BlendCtrl *ctrl, pastix_int_t clustsrc, pastix_int_t clustdst, pastix_int_t sync_comm_nbr, double *startup, double *bandwidth)
Return the communication cost between two cores.
Definition blendctrl.c:60
The type and structure definitions.
Definition blendctrl.h:28
void extendint_Add(ExtendVectorINT *, pastix_int_t)
Add an element elt to the end of the vector.
void pqueuePush2(pastix_queue_t *, pastix_int_t, double, double)
Insert an element into the sorted queue.
Definition queue.c:178
pastix_int_t pqueueRead(const pastix_queue_t *)
Read the first element of the queue.
Definition queue.c:239
pastix_int_t pqueueSize(const pastix_queue_t *)
Return the size of the queue.
Definition queue.c:135
static pastix_int_t pqueuePop(pastix_queue_t *q)
Pop the head of the queue whithout returning the keys.
Definition queue.h:75
pastix_int_t tasknum
Definition simu.h:87
pastix_int_t clustnum
Definition simu.h:69
pastix_int_t infotab[FTGT_MAXINFO]
Definition simu.h:68
SimuCluster * clustab
Definition simu.h:123
SimuFtgt * ftgttab
Definition simu.h:127
SimuProc * proctab
Definition simu.h:122
pastix_int_t ftgtprio
Definition simu.h:118
char * procalias
Definition simu.h:61
pastix_queue_t * readytask
Definition simu.h:58
pastix_int_t prionum
Definition simu.h:50
pastix_queue_t * futuretask
Definition simu.h:59
int8_t owned
Definition simu.h:80
SimuTask * tasktab
Definition simu.h:121
int ownerclust
Definition simu.h:93
pastix_int_t ftgtcnt
Definition simu.h:120
SimuTimer * ftgttimetab
Definition simu.h:129
ExtendVectorINT * tasktab
Definition simu.h:60
pastix_int_t ctrbcnt
Definition simu.h:108
pastix_int_t cblknum
Definition simu.h:101
double costadd
Definition simu.h:72
SimuBlok * bloktab
Definition simu.h:126
SimuTimer time
Definition simu.h:105
pastix_int_t bloknum
Definition simu.h:102
pastix_int_t ftgtcnt
Definition simu.h:109
SimuCblk * cblktab
Definition simu.h:125
pastix_int_t ctrbcnt
Definition simu.h:91
pastix_int_t prionum
Definition simu.h:100
pastix_int_t * ownetab
Definition simu.h:124
pastix_int_t ftgtnum
Definition simu.h:88
pastix_int_t ctrbcnt
Definition simu.h:79
pastix_int_t cblknbr
Definition simu.h:117
pastix_int_t tasknbr
Definition simu.h:119
SimuTimer timerecv
Definition simu.h:70
SimuTimer timer
Definition simu.h:57
double costsend
Definition simu.h:71
static void simu_printBlockCtrbNbr(const BlendCtrl *ctrl, const symbol_matrix_t *symbptr, const SimuCtrl *simuctrl)
Print the number of contributions per cblk and block for debug.
Definition simu_run.c:284
static void simu_computeBlockCtrbNbr(const symbol_matrix_t *symbptr, SimuCtrl *simuctrl, pastix_int_t ricar)
Compute the number of contributions to each block.
Definition simu_run.c:207
static void simu_computeFtgtCosts(const BlendCtrl *ctrl, const SimuFtgt *ftgt, pastix_int_t clustsrc, pastix_int_t sync_comm_nbr, double *send, double *add)
Compute the cost of a communication and its update.
Definition simu_run.c:153
static void timerSetMax(SimuTimer *timer, double t)
Set the timer value if the value is greater than the actual one.
Definition simu_timer.h:97
static double timerVal(const SimuTimer *timer)
Get the timer value.
Definition simu_timer.h:71
static void simu_updateFtgt(const symbol_matrix_t *symbptr, SimuCtrl *simuctrl, pastix_int_t ftgtnum, pastix_int_t bloknum, pastix_int_t fbloknum)
Update the Fan In target structure.
Definition simu_run.c:711
static void simu_putInAllReadyQueues(const BlendCtrl *ctrl, SimuCtrl *simuctrl, pastix_int_t tasknum)
Insert a task in the ready task queues of all its candidates.
Definition simu_run.c:349
static void timerAdd(SimuTimer *timer, double t)
Increment the timer.
Definition simu_timer.h:59
static void simu_pushToReadyHeap(const BlendCtrl *ctrl, SimuCtrl *simuctrl, pastix_int_t procnum)
Push all tasks from future to ready.
Definition simu_run.c:908
static void simu_computeTask(const BlendCtrl *ctrl, const symbol_matrix_t *symbptr, SimuCtrl *simuctrl, pastix_int_t tasknum)
Simulate the task execution.
Definition simu_run.c:775
static void simu_computeTaskReceiveTime(const BlendCtrl *ctrl, const symbol_matrix_t *symbptr, SimuCtrl *simuctrl, pastix_int_t tasknum)
Compute the instant t where the task will be received by a node.
Definition simu_run.c:583
static int timerComp(const SimuTimer *t1, const SimuTimer *t2)
Compare two timings.
Definition simu_timer.h:39
static void timerSet(SimuTimer *timer, double t)
Set the timer value.
Definition simu_timer.h:84
static pastix_int_t simu_getNextTaskNextProc(const BlendCtrl *ctrl, SimuCtrl *simuctrl, pastix_int_t *procnumptr)
Look for the best next couple (tasknum, corenum) that is ready to be executed.
Definition simu_run.c:436
@ FTGT_LCOLNUM
Definition simu.h:37
@ FTGT_PROCDST
Definition simu.h:32
@ FTGT_CTRBNBR
Definition simu.h:30
@ FTGT_TASKDST
Definition simu.h:33
@ FTGT_FCOLNUM
Definition simu.h:36
@ FTGT_BLOKDST
Definition simu.h:34
@ FTGT_FROWNUM
Definition simu.h:38
@ FTGT_PRIONUM
Definition simu.h:35
@ FTGT_LROWNUM
Definition simu.h:39
Fan-in structure for the simulation.
Definition simu.h:67
Thread structure for the simulation.
Definition simu.h:56
Task structure for the simulation.
Definition simu.h:99
Timer for the simulation.
Definition simu_timer.h:25
Control structure for the simulation.
Definition simu.h:116
FILE * pastix_fopenw(const char *dirname, const char *filename, const char *mode)
Open a file in the unique directory of the pastix instance.
Definition api.c:251
@ DPARM_PRED_FACT_TIME
Definition api.h:169
@ IPARM_VERBOSE
Definition api.h:36
void simuRun(SimuCtrl *, const BlendCtrl *, const symbol_matrix_t *)
Run the simulation to map the data on the nodes.
Definition simu_run.c:986
pastix_int_t fcblknm
Definition symbol.h:63
pastix_int_t frownum
Definition symbol.h:60
pastix_int_t lrownum
Definition symbol.h:61
pastix_int_t bloknbr
Definition symbol.h:80
symbol_cblk_t * cblktab
Definition symbol.h:83
pastix_int_t bloknum
Definition symbol.h:48
symbol_blok_t * bloktab
Definition symbol.h:84
pastix_int_t cblknbr
Definition symbol.h:79
pastix_int_t pastixSymbolGetFacingBloknum(const symbol_matrix_t *symbptr, pastix_int_t bloksrc, pastix_int_t bloknum, pastix_int_t startsearch, int ricar)
Search the targeted block C for a couple of blocks A and B.
Definition symbol.c:232
Symbol block structure.
Definition symbol.h:59
Symbol column block structure.
Definition symbol.h:45
Symbol matrix structure.
Definition symbol.h:77