PaStiX Handbook  6.2.1
simu_run.c
Go to the documentation of this file.
1 /**
2  *
3  * @file simu_run.c
4  *
5  * PaStiX simulation functions.
6  *
7  * @copyright 2004-2021 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8  * Univ. Bordeaux. All rights reserved.
9  *
10  * @version 6.2.1
11  * @author Pascal Henon
12  * @author Pierre Ramet
13  * @author Mathieu Faverge
14  * @author Gregoire Pichon
15  * @author Vincent Bridonneau
16  * @author Xavier Lacoste
17  * @date 2021-06-29
18  *
19  **/
20 #ifndef _GNU_SOURCE
21 #define _GNU_SOURCE 1
22 #endif
23 #include <stdio.h>
24 #include <string.h>
25 #include <assert.h>
26 #include <math.h>
27 #include <sys/stat.h>
28 #include <sys/types.h>
29 #include <unistd.h>
30 
31 #include "common.h"
32 #include "symbol/symbol.h"
33 #include "extendVector.h"
34 #include "queue.h"
35 #include "elimintree.h"
36 #include "cost.h"
37 #include "cand.h"
38 #include "blendctrl.h"
39 #include "blend/solver.h"
40 #include "simu.h"
41 #include "perf.h"
42 
43 #if defined(PASTIX_BLEND_GENTRACE)
44 #include <GTG.h>
45 #include <GTGPaje.h>
46 
47 /**
48  *******************************************************************************
49  *
50  * @brief Increment a traced counter.
51  *
52  *******************************************************************************
53  *
54  * @param[in] time
55  * The timestamp of the event.
56  *
57  * @param[in] type
58  * The type string of the variable to modify.
59  *
60  * @param[in] cont
61  * The string of the container that holds the variable.
62  *
63  * @param[in] val
64  * The value to add to the counter.
65  *
66  *******************************************************************************/
67 static inline void
68 blendAddVar( int clustnum, varPrec time, const char* type,
69  const char* cont, varPrec val )
70 {
71  if ( clustnum == 0 ) {
72  addVar( time, type, cont, val );
73  }
74 }
75 
76 /**
77  *******************************************************************************
78  *
79  * @brief Decrement a traced counter.
80  *
81  *******************************************************************************
82  *
83  * @param[in] time
84  * The timestamp of the event.
85  *
86  * @param[in] type
87  * The type string of the variable to modify.
88  *
89  * @param[in] cont
90  * The string of the container that holds the variable.
91  *
92  * @param[in] val
93  * The value used to decrement the counter.
94  *
95  *******************************************************************************/
96 static inline void
97 blendSubVar( int clustnum, varPrec time, const char* type,
98  const char* cont, varPrec val )
99 {
100  if ( clustnum == 0 ) {
101  subVar( time, type, cont, val );
102  }
103 }
104 
105 #else
106 
107 #define blendAddVar( clustnum, time, type, cont, val ) \
108  do { } while (0)
109 
110 #define blendSubVar( clustnum, time, type, cont, val ) \
111  do { } while (0)
112 
113 #endif
114 
115 /**
116  * @addtogroup blend_dev_simu
117  * @{
118  *
119  */
120 
121 /**
122  *******************************************************************************
123  *
124  * @brief Compute the cost of a communication and its update.
125  *
126  *******************************************************************************
127  *
128  * @param[in] ctrl
129  * The blend control structure that describes the architecture and the
130  * cost of the communication between nodes.
131  *
132  * @param[in] ftgt
133  * The fan-in for which the cost is computed.
134  *
135  * @param[in] clustsrc
136  * The index of the source pastix process.
137  *
138  * @param[in] sync_comm_nbr
139  * The number of simultaneous communication.
140  *
141  * @param[out] send
142  * The time cost of the send operation.
143  *
144  * @param[out] add
145  * The time cost of the addition operation.
146  *
147  *******************************************************************************/
148 static inline void
150  const SimuFtgt *ftgt,
151  pastix_int_t clustsrc,
152  pastix_int_t sync_comm_nbr,
153  double *send,
154  double *add )
155 {
156  pastix_int_t M, N;
157  pastix_int_t clustdst = ctrl->core2clust[ftgt->infotab[FTGT_PROCDST]];
158  double startup, bandwidth, addcost;
159 
160  *send = 0.;
161  *add = 0.;
162 
163  if( clustsrc == clustdst ) {
164  return;
165  }
166 
167  assert( (clustsrc >= 0) && (clustdst >= 0) );
168 
169  N = (ftgt->infotab[FTGT_LCOLNUM] - ftgt->infotab[FTGT_FCOLNUM] + 1);
170  M = (ftgt->infotab[FTGT_LROWNUM] - ftgt->infotab[FTGT_FROWNUM] + 1);
171 
172  assert( (N > 0) && (M > 0) );
173 
174  getCommunicationCosts( ctrl, clustsrc, clustdst, sync_comm_nbr, &startup, &bandwidth );
175 
176  *send = (startup + bandwidth * (M * N * sizeof(double) + FTGT_MAXINFO * sizeof(pastix_int_t)));
177  addcost = PERF_GEAM( M, N );
178  *add = addcost > 0. ? addcost : 0.0;
179  return;
180 }
181 
182 /**
183  *******************************************************************************
184  *
185  * @brief Compute the number of contributions to each block.
186  *
187  *******************************************************************************
188  *
189  * @param[in] symbptr
190  * The symbol matrix structure describing the problem.
191  *
192  * @param[inout] simuctrl
193  * The main simulation structure. On exit, the ctrbcnt field of the
194  * blocks is updated with the number of contributions that they each
195  * should receive.
196  *
197  * @param[in] ricar
198  * True if ILU(k) factorization is applied to change the algorithm to
199  * compute the number of contributions.
200  *
201  *******************************************************************************/
202 static inline void
204  SimuCtrl *simuctrl,
205  pastix_int_t ricar )
206 {
207  pastix_int_t i, j, k;
208  pastix_int_t facebloknum, firstbloknum;
209 
210  /*
211  * Compute the number of contributions per block to each block.
212  * Might be optimized if we computed the input graph before.
213  */
214  {
215  symbol_cblk_t *curcblk;
216 
217  curcblk = symbptr->cblktab;
218  for(i=0; i<symbptr->cblknbr; i++, curcblk++)
219  {
220  pastix_int_t fbloknum = curcblk[0].bloknum + 1;
221  pastix_int_t lbloknum = curcblk[1].bloknum;
222 
223  /* 1D cblk computed */
224  for(j=fbloknum; j<lbloknum; j++)
225  {
226  firstbloknum = 0;
227 
228  /* Add contribution due to E2 */
229  for(k=j; k<lbloknum; k++)
230  {
231  facebloknum = pastixSymbolGetFacingBloknum( symbptr, j, k, firstbloknum, ricar );
232  if(facebloknum >= 0) {
233  simuctrl->bloktab[facebloknum].ctrbcnt++;
234  firstbloknum = facebloknum;
235  }
236  }
237  }
238  }
239  }
240 
241  /* Set up the task ctrbcnt and cblkcnt */
242  {
243  SimuTask *task = simuctrl->tasktab;
244 
245  for(i=0;i<simuctrl->tasknbr;i++)
246  {
247  pastix_int_t fbloknum = symbptr->cblktab[task->cblknum ].bloknum;
248  pastix_int_t lbloknum = symbptr->cblktab[task->cblknum+1].bloknum;
249 
250  task->ctrbcnt = 0;
251  for(j=fbloknum; j<lbloknum; j++) {
252  task->ctrbcnt += simuctrl->bloktab[j].ctrbcnt;
253  }
254 
255  simuctrl->cblktab[task->cblknum].ctrbcnt = task->ctrbcnt;
256  task++;
257  }
258  }
259 }
260 
261 
262 /**
263  *******************************************************************************
264  *
265  * @brief Print the number of contributions per cblk and block for debug.
266  *
267  *******************************************************************************
268  *
269  * @param[in] ctrl
270  * The blendctrl structure with the simulation parameters.
271  *
272  * @param[in] symbptr
273  * The symbol matrix structure describing the problem.
274  *
275  * @param[in] simuctrl
276  * The main simulation structure.
277  *
278  *******************************************************************************/
279 static inline void
281  const symbol_matrix_t *symbptr,
282  const SimuCtrl *simuctrl )
283 {
284  FILE *fd1 = NULL;
285  FILE *fd2 = NULL;
286  pastix_int_t i, j;
287  symbol_cblk_t *curcblk;
288 
289  fd1 = pastix_fopenw( ctrl->dirname, "contribblok.txt", "w" );
290  if ( fd1 == NULL ) {
291  return;
292  }
293  fd2 = pastix_fopenw( ctrl->dirname, "contribcblk.txt", "w" );
294  if ( fd2 == NULL ) {
295  return;
296  }
297 
298  curcblk = symbptr->cblktab;
299  for(i=0; i<symbptr->cblknbr; i++, curcblk++)
300  {
301  pastix_int_t fbloknum = curcblk[0].bloknum + 1;
302  pastix_int_t lbloknum = curcblk[1].bloknum;
303 
304  /* 1D cblk computed */
305  for(j=fbloknum; j<lbloknum; j++)
306  {
307  fprintf(fd1, "%ld %ld\n", (long)j, (long)simuctrl->bloktab[j].ctrbcnt);
308  }
309 #if defined(PASTIX_SYMBOL_DUMP_SYMBMTX)
310  fprintf(fd2, "%ld %ld %ld\n", (long)i, (long)simuctrl->cblktab[i].ctrbcnt, (long)curcblk->split_cblk);
311 #else
312  fprintf(fd2, "%ld %ld\n", (long)i, (long)simuctrl->cblktab[i].ctrbcnt);
313 #endif
314  }
315 
316  fclose( fd1 );
317  fclose( fd2 );
318 }
319 
320 /**
321  *******************************************************************************
322  *
323  * @brief Insert a task in the ready task queues of all its candidates.
324  *
325  * This function according to the ready date of a task put this task on the
326  * ready queue of a processor.
327  * When the ready date of a task is inferior to the proc timer then the
328  * task is ordered according to its priorities in the elimination tree.
329  *
330  *******************************************************************************
331  *
332  * @param[in] ctrl
333  * The blend control structure to provide the candtab and the
334  * core2clust arrays.
335  *
336  * @param[inout] simuctrl
337  * The main simulation structure. On exit, the ready tasks queues of
338  * the candidates for tasknum are updated.
339  *
340  * @param[in] tasknum
341  * The index of the task to insert as a ready task.
342  *
343  *******************************************************************************/
344 static inline void
346  SimuCtrl *simuctrl,
347  pastix_int_t tasknum )
348 {
349  const SimuTask *task = simuctrl->tasktab + tasknum;
350  const Cand *cblkcand = ctrl->candtab + task->cblknum;
351  SimuProc *sproc;
352  double ready_date = 0.0;
353  pastix_int_t procnum;
354  pastix_int_t bloknum = task->bloknum;
355 #if defined(PASTIX_BLEND_COSTLEVEL)
356  double level = cblkcand->costlevel;
357 #else
358  pastix_int_t level = cblkcand->treelevel;
359 #endif
360  assert( tasknum != -1 );
361 
362  blendAddVar( ctrl->clustnum, timerVal( &(task->time) ), "VR_AP", "Appli", 1 );
363 
364  /* Get the ready date of the task on the processor passed in parameter */
365  if( cblkcand->fccandnum == cblkcand->lccandnum )
366  {
367  ready_date = timerVal( &(task->time) );
368  sproc = &(simuctrl->proctab[cblkcand->fcandnum]);
369 
370  for(procnum = cblkcand->fcandnum;
371  procnum <= cblkcand->lcandnum; procnum++, sproc++)
372  {
373  if( ready_date > timerVal( &(sproc->timer) ) ) {
374  pqueuePush2( sproc->futuretask, tasknum, ready_date, level );
375  }
376  else {
377  pqueuePush2( sproc->readytask, tasknum, level, bloknum );
378  }
379  blendAddVar( ctrl->clustnum, ready_date, "VR_TS", sproc->procalias, 1 );
380  }
381  }
382  else
383  {
384  sproc = &(simuctrl->proctab[cblkcand->fcandnum]);
385 
386  for(procnum = cblkcand->fcandnum;
387  procnum <= cblkcand->lcandnum; procnum++, sproc++)
388  {
389  ready_date = timerVal( simuctrl->ftgttimetab + CLUST2INDEX(bloknum, ctrl->core2clust[procnum]) );
390 
391  if( ready_date > timerVal( &(sproc->timer) ) ) {
392  pqueuePush2( sproc->futuretask, tasknum, ready_date, level );
393  }
394  else {
395  pqueuePush2( sproc->readytask, tasknum, level, bloknum );
396  }
397  blendAddVar( ctrl->clustnum, ready_date, "VR_TS", sproc->procalias, 1 );
398  }
399  }
400 }
401 
402 /**
403  *******************************************************************************
404  *
405  * @brief Look for the best next couple (tasknum, corenum) that is ready to be
406  * executed.
407  *
408  * This function is the main and more costly one. It looks for each worker,
409  * which task is the first one available for execution, and from all those
410  * couples, which one is the first one to finish.
411  *
412  *******************************************************************************
413  *
414  * @param[in] ctrl
415  * The blend control structure to provide the candtab and the
416  * core2clust arrays.
417  *
418  * @param[inout] simuctrl
419  * The main simulation structure. On exit, the structure is updated
420  * with the extraction of the next best task to run.
421  *
422  * @param[out] procnumptr
423  * The index of the candidate to run the task.
424  *
425  *******************************************************************************
426  *
427  * @return The next task selected for execution in the simulator. The worker
428  * selected is returned in the procnumptr field.
429  *
430  *******************************************************************************/
431 static inline pastix_int_t
433  SimuCtrl *simuctrl,
434  pastix_int_t *procnumptr )
435 {
436  pastix_int_t p;
437  pastix_int_t procnum = -1;
438  pastix_int_t tasknum;
439  double earlytimeready = PASTIX_INT_MAX;
440  double earlyproctimer = PASTIX_INT_MAX;
441  double timeready;
442  pastix_int_t earlytask = -1;
443 
444  /* Find the earlier task in the processor heaps */
445  for(p=0;p<ctrl->total_nbcores;p++)
446  {
447  SimuProc *sproc = &(simuctrl->proctab[p]);
448  tasknum = -1;
449  /*
450  * First we search the earlier task in the set of tasks whose ready date
451  * is < proc timer
452  */
453  while( pqueueSize(sproc->readytask) > 0 )
454  {
455  tasknum = pqueueRead( sproc->readytask );
456  if( simuctrl->bloktab[simuctrl->tasktab[tasknum].bloknum].ownerclust >= 0 )
457  {
458  /* This task have to be remove from the heap (already mapped) */
459  pqueuePop( sproc->readytask );
460  tasknum = -1;
461  }
462  else
463  break;
464  }
465  /*
466  * We found no task which ready date is < proc timer so we search one
467  * that minimizes ready date - proc-timer
468  */
469  if(tasknum == -1)
470  {
471  while(pqueueSize(simuctrl->proctab[p].futuretask)>0)
472  {
473  tasknum = pqueueRead(simuctrl->proctab[p].futuretask);
474  if( simuctrl->bloktab[simuctrl->tasktab[tasknum].bloknum].ownerclust >= 0 )
475  {
476  /* This task have to be remove from the heap (already mapped) */
477  pqueuePop(simuctrl->proctab[p].futuretask);
478  tasknum = -1;
479  }
480  else {
481  break;
482  }
483  }
484  }
485 
486  if(tasknum != -1)
487  {
488  const SimuTask *task = simuctrl->tasktab + tasknum;
489  timeready = MAX(timerVal(TIMER(p)),
490  timerVal(&(simuctrl->ftgttimetab[CLUST2INDEX(simuctrl->tasktab[tasknum].bloknum,
491  ctrl->core2clust[p])])));
492 
493  timeready = MAX( timeready, timerVal( &(task->time) ) );
494 
495  /*
496  * We prevent to distribute on the same processor set when all time
497  * are equal
498  */
499  if((timeready == earlytimeready) && (timerVal(TIMER(p)) < earlyproctimer))
500  {
501  procnum = p;
502  earlyproctimer = timerVal(TIMER(p));
503  earlytask = tasknum;
504  earlytimeready = timeready;
505  }
506 
507  if(timeready < earlytimeready)
508  {
509  procnum = p;
510  earlytask = tasknum;
511  earlytimeready = timeready;
512  }
513  }
514  }
515 
516 #if defined(PASTIX_BLEND_GENTRACE)
517  if ( (earlytask != -1) && (ctrl->clustnum == 0) )
518  {
519  const SimuTask *task = simuctrl->tasktab + earlytask;
520  const Cand *cblkcand = ctrl->candtab + task->cblknum;
521  SimuProc *sproc = &(simuctrl->proctab[cblkcand->fcandnum]);
522 
523  for(p = cblkcand->fcandnum;
524  p <= cblkcand->lcandnum; p++, sproc++)
525  {
526  blendSubVar( ctrl->clustnum, earlytimeready, "VR_TS", sproc->procalias, 1 );
527  }
528 
529  blendSubVar( ctrl->clustnum, earlytimeready, "VR_AP", "Appli", 1 );
530  }
531 #endif
532 
533 #if !defined(NDEBUG)
534  if(procnum != -1)
535  {
536  if( pqueueSize(simuctrl->proctab[procnum].readytask) > 0 ) {
537  assert(earlytask == pqueuePop(simuctrl->proctab[procnum].readytask));
538  }
539  else {
540  assert(earlytask == pqueuePop(simuctrl->proctab[procnum].futuretask));
541  }
542  }
543 #endif
544 
545  *procnumptr = procnum;
546  return earlytask;
547 }
548 
549 /**
550  *******************************************************************************
551  *
552  * @brief Compute the instant t where the task will be received by a node.
553  *
554  * Compute the time the cblk would have RECEIVED and ADDED all its contributions
555  * if it was mapped on a given cand CLUSTER.
556  * @warning These times do not include add time for fan in target
557  *
558  *******************************************************************************
559  *
560  * @param[in] ctrl
561  * The blend control structure to provide the candtab and the
562  * core2clust arrays.
563  *
564  * @param[in] symbptr
565  * The symbol matrix structure describing the problem.
566  *
567  * @param[inout] simuctrl
568  * The main simulation structure. On exit, the ready tasks queues of
569  * the candidates for tasknum are updated.
570  *
571  * @param[in] tasknum
572  * The index of the task to insert as a ready task.
573  *
574  *******************************************************************************/
575 static inline void
577  const symbol_matrix_t *symbptr,
578  SimuCtrl *simuctrl,
579  pastix_int_t tasknum )
580 {
581  pastix_int_t i, j;
582  double lftgttime = 0;
583  double sftgttime = 0;
584  pastix_int_t lftgtnum = -1;
585  pastix_int_t cblknum;
586  pastix_int_t bloknum;
587  pastix_int_t clustdst;
588 
589  bloknum = simuctrl->tasktab[tasknum].bloknum;
590  cblknum = simuctrl->tasktab[tasknum].cblknum;
591 
592  /* If the task is local, all sons sending contributions are local => no treatment */
593  if( ctrl->candtab[cblknum].fccandnum == ctrl->candtab[cblknum].lccandnum ) {
594  return;
595  }
596 
597  /*
598  * Compute the cblk on proc timer that is time the cblk would have received
599  * all its contributions if it was mapped on a given cand processor These
600  * times INCLUDE add time for fan in target !!
601  */
602 
603  /* Compute receive time (time at which a non-local processor should received the target) */
604  /* find the latest ftgt receive time and the second latest*/
605  for(i=simuctrl->bloktab[bloknum].ftgtnum; i<simuctrl->bloktab[bloknum+1].ftgtnum; i++)
606  {
607  /* Source of this ftgt */
608  clustdst = INDEX2CLUST(i, bloknum);
609 
610  /* Task with several cand proc */
611  /* The information about ftgt costs are in the ftgt of the diagonal block;
612  this loop sums the cost of all the ftgt received by the blocks in this column block */
613  if(simuctrl->ftgttab[i].infotab[FTGT_CTRBNBR]>0) {
614  for(j=bloknum;j<symbptr->cblktab[cblknum+1].bloknum;j++)
615  {
616  if(simuctrl->ftgttab[simuctrl->bloktab[j].ftgtnum + i-simuctrl->bloktab[bloknum].ftgtnum].infotab[FTGT_CTRBNBR]>0)
617  {
618  double send, add;
619 
620  simu_computeFtgtCosts( ctrl, simuctrl->ftgttab + CLUST2INDEX(j, clustdst), clustdst,
621  ctrl->candtab[cblknum].lccandnum - ctrl->candtab[cblknum].fccandnum + 1,
622  &send, &add );
623 
624  simuctrl->ftgttab[i].costadd += add;
625  simuctrl->ftgttab[i].costsend += send;
626  }
627  }
628  }
629 
630 #if defined(PASTIX_DEBUG_BLEND)
631  if(!(simuctrl->ftgttab[i].costsend >= 0.0)) {
632  errorPrint("ftgt %ld costsend %f", (long)i, simuctrl->ftgttab[i].costsend);
633  }
634  if(!(simuctrl->ftgttab[i].costadd >= 0.0)) {
635  errorPrint("ftgt %ld costadd %f", (long)i, simuctrl->ftgttab[i].costadd);
636  }
637 
638  assert(simuctrl->ftgttab[i].costsend >= 0.0);
639  assert(simuctrl->ftgttab[i].costadd >= 0.0);
640 #endif
641 
642  /* ftgttab[].timerecv is the time this ftgt will be receive */
643  timerSet(&(simuctrl->ftgttab[i].timerecv), timerVal(&(simuctrl->ftgttimetab[i])) + simuctrl->ftgttab[i].costsend + simuctrl->ftgttab[i].costadd);
644 
645  /* If this ftgt the last reveived or the second last received ?? */
646  if(timerVal(&(simuctrl->ftgttab[i].timerecv)) > lftgttime)
647  {
648  lftgttime = timerVal(&(simuctrl->ftgttab[i].timerecv));
649  lftgtnum = i;
650  }
651  else {
652  if(timerVal(&(simuctrl->ftgttab[i].timerecv)) > sftgttime) {
653  sftgttime = timerVal(&(simuctrl->ftgttab[i].timerecv));
654  }
655  }
656  }
657 
658 
659  /*
660  * Put in ftgttimetab[] the date at which the cluster would have received
661  * and add all the ftgt if the task was mapped on it.
662  */
663  for(i=simuctrl->bloktab[bloknum].ftgtnum; i<simuctrl->bloktab[bloknum+1].ftgtnum;i++)
664  {
665  if(i != lftgtnum) {
666  timerSet(&(simuctrl->ftgttimetab[i]), lftgttime);
667  }
668  else {
669  timerSetMax( &(simuctrl->ftgttimetab[i]), sftgttime );
670  }
671  }
672 }
673 
674 /**
675  *******************************************************************************
676  *
677  * @brief Update the Fan In target structure
678  *
679  * Increment the contribution counter of the fan-in and integrate to the ftgt
680  * area the new contribution.
681  *
682  *******************************************************************************
683  *
684  * @param[in] symbptr
685  * The pointer to the symbolic matrix structure.
686  *
687  * @param[inout] simuctrl
688  * The pointer to the simulation structure. On exit, data regarding the
689  * computational unit pr are updated.
690  *
691  * @param[in] ftgtnum
692  * Index of the fanin target to update.
693  *
694  * @param[in] bloknum
695  * Index of the first off-diagonal block generating a contribution to
696  * the ftgtnum Fan In.
697  *
698  * @param[in] fbloknum
699  * Index of the off-diagonal block that is multiplied by blocknum to
700  * produce the update.
701  *
702  *******************************************************************************/
703 static inline void
705  SimuCtrl *simuctrl,
706  pastix_int_t ftgtnum,
707  pastix_int_t bloknum,
708  pastix_int_t fbloknum )
709 {
710  pastix_int_t *infotab = simuctrl->ftgttab[ftgtnum].infotab;
711  symbol_blok_t *blokptr = (symbptr->bloktab) + bloknum;
712  symbol_blok_t *fblokptr = (symbptr->bloktab) + fbloknum;
713 
714  infotab[FTGT_CTRBNBR]++;
715 
716  /* Update ftgt dimensions to the maximum area covering all contributions */
717  if( blokptr->frownum < infotab[FTGT_FCOLNUM] ) {
718  infotab[FTGT_FCOLNUM] = blokptr->frownum;
719  }
720 
721  if( blokptr->lrownum > infotab[FTGT_LCOLNUM] ) {
722  infotab[FTGT_LCOLNUM] = blokptr->lrownum;
723  }
724 
725  if( fblokptr->frownum < infotab[FTGT_FROWNUM] ) {
726  infotab[FTGT_FROWNUM] = fblokptr->frownum;
727  }
728 
729  if( fblokptr->lrownum > infotab[FTGT_LROWNUM] ) {
730  infotab[FTGT_LROWNUM] = fblokptr->lrownum;
731  }
732 
733  assert( (infotab[FTGT_LCOLNUM] - infotab[FTGT_FCOLNUM] + 1) > 0 );
734  assert( (infotab[FTGT_LROWNUM] - infotab[FTGT_FROWNUM] + 1) > 0 );
735 }
736 
737 /**
738  *******************************************************************************
739  *
740  * @brief Simulate the task execution.
741  *
742  * Update the timers of the selected worker, as well as those of the current
743  * cblk, and the targeted cblks by the update.
744  *
745  *******************************************************************************
746  *
747  * @param[in] ctrl
748  * The pointer to the global blend control structure.
749  *
750  * @param[in] symbptr
751  * The pointer to the symbolic matrix structure.
752  *
753  * @param[inout] simuctrl
754  * The pointer to the simulation structure. On exit, data regarding the
755  * computational unit pr are updated.
756  *
757  * @param[in] tasknum
758  * The task index of the one, we want to simulate the execution.
759  *
760  *******************************************************************************
761  *
762  * @remark In this function, we use the standard [f|l]blocknum for first and
763  * last bloknum, and facingcblk, facingblok for the facing block and column
764  * block.
765  *
766  *******************************************************************************/
767 static inline void
769  const symbol_matrix_t *symbptr,
770  SimuCtrl *simuctrl,
771  pastix_int_t tasknum )
772 {
773  pastix_int_t i, j;
774  pastix_int_t cblknum;
775  pastix_int_t fbloknum;
776  pastix_int_t lbloknum;
777  pastix_int_t firstfacingblok;
778  pastix_int_t facingblok;
779  pastix_int_t facingcblk;
780  pastix_int_t local;
781  pastix_int_t ftgtnum;
782  pastix_int_t procnum;
783  pastix_int_t clustnum;
784  SimuProc *sproc;
785  CostMatrix *costmtx;
786 
787  cblknum = simuctrl->tasktab[tasknum].cblknum;
788  procnum = simuctrl->ownetab[cblknum];
789  clustnum = ctrl->core2clust[procnum];
790  sproc = &(simuctrl->proctab[procnum]);
791  costmtx = ctrl->costmtx;
792 
793  fbloknum = symbptr->cblktab[cblknum ].bloknum;
794  lbloknum = symbptr->cblktab[cblknum+1].bloknum;
795 
796  assert( (procnum >= ctrl->candtab[cblknum].fcandnum) &&
797  (procnum <= ctrl->candtab[cblknum].lcandnum) );
798 
799  /* Add factorization time of the diagonal blok + cost of the TRSM operation on the cblk*/
800  timerAdd(&(sproc->timer), costmtx->blokcost[fbloknum]);
801 
802  for(i=fbloknum+1; i<lbloknum; i++)
803  {
804  /* Add cost of the GEMM update related to this off-diagonal block */
805  timerAdd(&(sproc->timer), costmtx->blokcost[i]);
806 
807  facingcblk = symbptr->bloktab[i].fcblknm;
808 
809  /*
810  * If only one candidate cluster, we can consider the facingcblk as
811  * local because it is an ancestor of the current cblk in the
812  * elimination tree.
813  */
814  local = ( ctrl->candtab[facingcblk].fccandnum == ctrl->candtab[facingcblk].lccandnum ) ? 1 : 0;
815 
816  firstfacingblok = symbptr->cblktab[facingcblk].bloknum;
817 
818  for(j=i; j<lbloknum; j++)
819  {
820  /* TODO: symbolGetFacingBloknum is too expensive !! */
821  facingblok = pastixSymbolGetFacingBloknum(symbptr, i, j, firstfacingblok, ctrl->ricar);
822 
823  /* If the couple (i, j) generates a contribution, applies it */
824  if( facingblok >= 0 ) {
825  pastix_int_t facingdiagblok;
826  pastix_int_t facingtask;
827 
828  /* Decrease contributions on block and column block */
829  simuctrl->cblktab[facingcblk].ctrbcnt--;
830  simuctrl->bloktab[facingblok].ctrbcnt--;
831 
832  /* Checks */
833  assert(simuctrl->cblktab[facingcblk].ctrbcnt >= 0);
834  assert(simuctrl->bloktab[facingblok].ctrbcnt >= 0);
835 
836  /* Update to start next search from the last facing block */
837  firstfacingblok = facingblok;
838 
839  facingdiagblok = symbptr->cblktab[facingcblk].bloknum;
840  facingtask = simuctrl->bloktab[facingdiagblok].tasknum;
841 
842  assert( facingcblk == simuctrl->tasktab[facingtask].cblknum );
843  assert( facingtask < simuctrl->tasknbr );
844 
845  if(!local)
846  {
847  ftgtnum = CLUST2INDEX(facingblok, clustnum);
848  simu_updateFtgt( symbptr, simuctrl, ftgtnum, i, j );
849 
850  /* Update timer ready for receiver of the ftgt */
851  ftgtnum = CLUST2INDEX( facingdiagblok, clustnum );
852  timerSetMax( &(simuctrl->ftgttimetab[ftgtnum]),
853  timerVal(&(sproc->timer)) );
854 
855  }
856  else {
857 
858  /* Update timer of the task (associated to the diagonal block) */
859  timerSetMax( &(simuctrl->tasktab[facingtask].time),
860  timerVal(&(sproc->timer)) );
861  }
862 
863  if( simuctrl->cblktab[facingcblk].ctrbcnt == 0 ) {
864  if (!local) {
865  simu_computeTaskReceiveTime(ctrl, symbptr, simuctrl, facingtask );
866  }
867 
868  /* Put the task in the ready heap of its local candidat processor */
869  simu_putInAllReadyQueues( ctrl, simuctrl, facingtask );
870  }
871  }
872  }
873  }
874 }
875 
876 /**
877  *******************************************************************************
878  *
879  * @brief Push all tasks from future to ready
880  *
881  * This routine pushes all future tasks from the future task heap to the ready
882  * one, if the time at which the task will be ready is already passed by the
883  * computation unit.
884  *
885  *******************************************************************************
886  *
887  * @param[in] ctrl
888  * The pointer to the global blend control structure.
889  *
890  * @param[inout] simuctrl
891  * The pointer to the simulation structure. On exit, data regarding the
892  * computational unit pr are updated.
893  *
894  * @param[in] procnum
895  * The computational unit index for which the data need to be transfer
896  * from the future task heap to ready task heap if the computational
897  * unit timer is more advanced than the ready time of the tasks.
898  *
899  *******************************************************************************/
900 static inline void
902  SimuCtrl *simuctrl,
903  pastix_int_t procnum )
904 {
905  SimuProc *sproc;
906  SimuTimer *timer;
907  pastix_int_t tasknum;
908  pastix_int_t cblknum;
909  pastix_int_t clustnum;
910 
911  clustnum = ctrl->core2clust[procnum];
912  sproc = &(simuctrl->proctab[procnum]);
913 
914  /*
915  * Move each task from future task heap to ready heap if the timer is
916  * further in the future than the ready time
917  */
918  while( pqueueSize(sproc->futuretask) > 0 )
919  {
920  tasknum = pqueueRead(sproc->futuretask);
921  cblknum = simuctrl->tasktab[tasknum].cblknum;
922 
923  if( ctrl->candtab[cblknum].fccandnum == ctrl->candtab[cblknum].lccandnum) {
924  timer = &(simuctrl->tasktab[tasknum].time);
925  }
926  else {
927  timer = &(simuctrl->ftgttimetab[CLUST2INDEX(simuctrl->tasktab[tasknum].bloknum, clustnum )]);
928  }
929 
930  if( timerComp( timer, &(sproc->timer) ) )
931  {
932  tasknum = pqueuePop(sproc->futuretask);
933 
934  pqueuePush2( sproc->readytask, tasknum,
935 #if defined(PASTIX_BLEND_COSTLEVEL)
936  ctrl->candtab[cblknum].costlevel,
937 #else
938  ctrl->candtab[cblknum].treelevel,
939 #endif
940  simuctrl->tasktab[tasknum].bloknum );
941  }
942  else {
943  break;
944  }
945  }
946 }
947 
948 /**
949  * @}
950  */
951 
952 /**
953  *******************************************************************************
954  *
955  * @ingroup pastix_blend
956  *
957  * @brief Run the simulation to map the data on the nodes
958  *
959  * This routine simulates the numerical factorization to generate the static
960  * scheduling and the final mapping of the column block onto the PaStiX
961  * processes.
962  *
963  *******************************************************************************
964  *
965  * @param[inout] simuctrl
966  * The pointer to the simulation structure initialized by simuInit().
967  *
968  * @param[in] ctrl
969  * The pointer to the blend control structure which contains the
970  * required data, such as the worker distribution among the processes,
971  * the candidates array for each column block, and the cost of the
972  * computations.
973  *
974  * @param[in] symbptr
975  * The block symbol structure of the problem.
976  *
977  *******************************************************************************/
978 void
979 simuRun( SimuCtrl *simuctrl,
980  const BlendCtrl *ctrl,
981  const symbol_matrix_t *symbptr )
982 {
983 
984  pastix_int_t i, j, b;
985  pastix_int_t cblknum, bloknum;
986  /*pastix_int_t c;*/
987  pastix_int_t pr;
988 
989 #if defined(PASTIX_BLEND_GENTRACE)
990  static volatile pastix_atomic_lock_t trace_lock = PASTIX_ATOMIC_UNLOCKED;
991  char **procnames = NULL;
992 
993  if (ctrl->clustnum == 0)
994  {
995  pastix_atomic_lock( &trace_lock );
996  char *tracename = NULL;
997  int rc;
998 
999  if ( ctrl->dirname == NULL ) {
1000  tracename = strdup( "blend" );
1001  }
1002  else {
1003  rc = asprintf( &tracename, "%s/blend", ctrl->dirname );
1004  }
1005 
1006  setTraceType (PAJE);
1007  initTrace (tracename, 0, GTG_FLAG_NONE);
1008  pajeEventDefAddParam( GTG_PAJE_EVTDEF_SetState, "TaskId", GTG_PAJE_FIELDTYPE_Int );
1009  free(tracename);
1010 
1011  addContType ("CT_Appli", "0", "Application" );
1012  addContType ("CT_P", "CT_Appli", "Process" );
1013  addContType ("CT_T", "CT_P", "Thread" );
1014  addStateType("ST_TS", "CT_T", "Thread State");
1015  addVarType ("VR_TS", "Ready tasks per thread", "CT_T" );
1016  addVarType ("VR_AP", "Ready tasks", "CT_Appli" );
1017 
1018  addLinkType ("LT_TL", "Split Event Link", "CT_P", "CT_T", "CT_T");
1019 
1020  /* Create root container of the application */
1021  addContainer (0.00000, "Appli", "CT_Appli", "0", "PaStiX Blend Simulation", "");
1022 
1023  /* Add all possible states */
1024  addEntityValue ("Wait", "ST_TS", "Waiting", GTG_LIGHTGREY);
1025  addEntityValue ("Comp", "ST_TS", "Computing", GTG_RED);
1026 
1027  setVar( 0.0, "VR_AP", "Appli", 0 );
1028 
1029  /* Add each process and thread */
1030  SimuProc *sproc = simuctrl->proctab;
1031 
1032  procnames = (char**) malloc ( ctrl->total_nbthrds * sizeof(char*) );
1033  pr = 0;
1034  for (i=0; i<ctrl->clustnbr; i++) {
1035  char *clustname;
1036  char *clustalias;
1037 
1038  rc = asprintf( &clustname, "Process %02d", (int)i); assert(rc!=-1);
1039  rc = asprintf( &clustalias, "P%d", (int)i); assert(rc!=-1);
1040  addContainer (0.00000, clustalias, "CT_P", "Appli", clustname, "");
1041 
1042  for (j=0; j<ctrl->local_nbthrds; j++, pr++, sproc++) {
1043  char *procname;
1044  char *procalias;
1045 
1046  rc = asprintf( &procname, "Thread %02d", (int)pr); assert(rc!=-1);
1047  rc = asprintf( &procalias, "T%d", (int)pr); assert(rc!=-1);
1048  addContainer (0.00000, procalias, "CT_T", clustname, procname, "");
1049  setVar( 0.0, "VR_TS", procalias, pqueueSize( sproc->readytask ) );
1050 
1051  sproc->procalias = procalias;
1052  procnames[pr] = procalias;
1053  free(procname);
1054  }
1055 
1056  free(clustname); free(clustalias);
1057  }
1058  (void)rc;
1059  }
1060 #endif /* defined(PASTIX_BLEND_GENTRACE) */
1061 
1062  /* Compute number of contributions per blocks, cblks, tasks */
1063  simu_computeBlockCtrbNbr( symbptr, simuctrl, ctrl->ricar );
1064 
1065  if ( ctrl->iparm[IPARM_VERBOSE] > 4 ) {
1066  simu_printBlockCtrbNbr( ctrl, symbptr, simuctrl );
1067  }
1068 
1069  /*
1070  * All ready tasks are put in the task heaps of their respective candidates
1071  */
1072  for(i=0;i<symbptr->cblknbr;i++)
1073  {
1074  pastix_int_t tasknum;
1075  if(simuctrl->cblktab[i].ctrbcnt == 0)
1076  {
1077  tasknum = simuctrl->bloktab[symbptr->cblktab[i].bloknum].tasknum;
1078  assert(ctrl->candtab[i].treelevel < 0);
1079 
1080  if( ctrl->costlevel ) {
1081  assert(ctrl->candtab[i].costlevel <= 0);
1082  }
1083 
1084  assert(simuctrl->tasktab[tasknum].cblknum == i);
1085  //assert(ctrl->candtab[i].cblktype == CBLK_1D);
1086 
1087  simu_putInAllReadyQueues( ctrl, simuctrl, tasknum );
1088  }
1089  }
1090 
1091  /*
1092  * Run simulation and map the task onto a single candidate
1093  */
1094  while(1)
1095  {
1096  SimuTask *task;
1097  pastix_int_t clustnum;
1098 
1099  /* Get the next earlier task index and the processor on which it is mapped */
1100  i = simu_getNextTaskNextProc(ctrl, simuctrl, &pr);
1101 
1102  /* No more tasks */
1103  if( i == -1 ) {
1104  break;
1105  }
1106 
1107  task = &(simuctrl->tasktab[i]);
1108  bloknum = task->bloknum;
1109  cblknum = task->cblknum;
1110  clustnum = ctrl->core2clust[pr];
1111 
1112  assert(cblknum < symbptr->cblknbr);
1113  assert(bloknum < symbptr->bloknbr);
1114 
1115  /* Make sure the cblk is not already atributed to someone and give it to the selected proc */
1116  assert( simuctrl->ownetab[cblknum] < 0 );
1117  simuctrl->ownetab[cblknum] = pr;
1118  simuctrl->cblktab[cblknum].owned = ( clustnum == ctrl->clustnum );
1119  for(j = symbptr->cblktab[cblknum].bloknum;
1120  j < symbptr->cblktab[cblknum+1].bloknum; j++)
1121  {
1122  simuctrl->bloktab[j].ownerclust = clustnum;
1123  }
1124  task->prionum = simuctrl->clustab[clustnum].prionum;
1125  simuctrl->clustab[clustnum].prionum++;
1126 
1127  /* Add task to the selected processor list */
1128  extendint_Add(simuctrl->proctab[pr].tasktab, i);
1129 
1130  /* Backup which cluster will get the data for the second run of proportionnal mapping */
1131  ctrl->candtab[cblknum].cluster = clustnum;
1132 
1133  /*
1134  * Compute the time at which each proc cand will have added its ftgt and
1135  * received block target if the task is mapped on
1136  */
1137  if( ctrl->candtab[cblknum].fccandnum == ctrl->candtab[cblknum].lccandnum ) {
1138  /*
1139  * All contributions come from the same node
1140  * Time do not depend on the reception of a ftgt
1141  */
1142  timerSetMax( TIMER(pr), timerVal(&(task->time)) );
1143  }
1144  else {
1145  /*
1146  * Contributions might come from different nodes
1147  * Time depends on the reception of a ftgt
1148  */
1149  timerSetMax( TIMER(pr),
1150  timerVal(&(simuctrl->ftgttimetab[CLUST2INDEX(bloknum, clustnum)])) );
1151  }
1152 
1153 #if defined(PASTIX_BLEND_GENTRACE)
1154  if (ctrl->clustnum == 0) {
1155  char *str_val;
1156  int rc;
1157  assert( (procnames != NULL) && (pr < ctrl->total_nbthrds) );
1158  assert( procnames[pr] != NULL );
1159  rc = asprintf( &str_val, "Comp\" \"%d", (int)i );
1160  setState( timerVal( TIMER(pr) ), "ST_TS", procnames[pr], str_val );
1161  free(str_val);
1162  (void)rc;
1163  }
1164 #endif
1165 
1166  /*
1167  * Fill some fanintarget info (task of type E2 does not have any ftgt)
1168  */
1169  if(simuctrl->bloktab[bloknum].ftgtnum < simuctrl->bloktab[bloknum+1].ftgtnum)
1170  {
1171  /* Task with several cand cluster */
1172  for(b=bloknum; b<symbptr->cblktab[cblknum+1].bloknum; b++)
1173  {
1174  for(j=simuctrl->bloktab[b].ftgtnum; j<simuctrl->bloktab[b+1].ftgtnum; j++)
1175  {
1176  if( (simuctrl->ftgttab[j].infotab[FTGT_CTRBNBR] > 0) &&
1177  (j != CLUST2INDEX(b, clustnum)) )
1178  {
1179  simuctrl->ftgttab[j].clustnum = INDEX2CLUST(j, b);
1180  simuctrl->ftgttab[j].infotab[FTGT_PRIONUM] = task->prionum;
1181  simuctrl->ftgttab[j].infotab[FTGT_PROCDST] = pr;
1182  simuctrl->ftgttab[j].infotab[FTGT_BLOKDST] = b;
1183  simuctrl->ftgttab[j].infotab[FTGT_TASKDST] = simuctrl->bloktab[bloknum].tasknum;
1184  extendint_Add(&(simuctrl->clustab[INDEX2CLUST(j,b)].ftgtsend[clustnum]), j);
1185 
1186  simuctrl->tasktab[simuctrl->bloktab[bloknum].tasknum].ftgtcnt++;
1187 
1188  if (clustnum == ctrl->clustnum) {
1189  simuctrl->ftgtcnt++;
1190  }
1191  }
1192  }
1193  }
1194  simuctrl->ftgtprio++;
1195  }
1196  else {
1197  assert(ctrl->candtab[cblknum].fccandnum == ctrl->candtab[cblknum].lccandnum);
1198  }
1199 
1200  /* Simulate the task computation */
1201  simu_computeTask( ctrl, symbptr, simuctrl, i );
1202 
1203 #if defined(PASTIX_BLEND_GENTRACE)
1204  if (ctrl->clustnum == 0) {
1205  char *str_val;
1206  int rc;
1207  assert( (procnames != NULL) && (pr < ctrl->total_nbthrds) );
1208  assert( procnames[pr] != NULL );
1209  rc = asprintf( &str_val, "Wait\" \"%d", (int)i );
1210  setState( timerVal( TIMER(pr) ), "ST_TS", procnames[pr], str_val );
1211  free(str_val);
1212  (void)rc;
1213  }
1214 #endif
1215  simu_pushToReadyHeap(ctrl, simuctrl, pr);
1216  }
1217 
1218  /* Compute maximum time */
1219  {
1220  double maxtime = 0;
1221  for(pr=0; pr<ctrl->total_nbcores; pr++)
1222  {
1223  if(timerVal(TIMER(pr)) > maxtime) {
1224  maxtime = timerVal(TIMER(pr));
1225  }
1226  }
1227  set_dparm(ctrl->dparm, DPARM_PRED_FACT_TIME, maxtime);
1228  }
1229 
1230 #if defined(PASTIX_BLEND_GENTRACE)
1231  if (ctrl->clustnum == 0) {
1232  assert( procnames != NULL );
1233  for(pr=0; pr<ctrl->total_nbthrds; pr++) {
1234  free(procnames[pr]);
1235  }
1236  free(procnames);
1237 
1238  endTrace();
1239  pastix_atomic_unlock( &trace_lock );
1240  }
1241 #endif
1242 
1243 #if defined(PASTIX_DEBUG_BLEND)
1244  for(i=0;i<simuctrl->cblknbr;i++) {
1245  /* Check valid for 1D distribution only */
1246  assert( simuctrl->ownetab[i] >= 0 );
1247  }
1248  for(i=0;i<symbptr->bloknbr;i++) {
1249  assert( simuctrl->bloktab[i].ownerclust >= 0 );
1250  }
1251 #endif
1252 }
simu_proc_s::procalias
char * procalias
Definition: simu.h:61
blendctrl_s::clustnbr
pastix_int_t clustnbr
Definition: blendctrl.h:71
solver.h
simu_task_s::ctrbcnt
pastix_int_t ctrbcnt
Definition: simu.h:108
symbol_matrix_s
Symbol matrix structure.
Definition: symbol.h:75
cost_matrix_s
Arrays of double to store the cost of each element in the matrix.
Definition: cost.h:30
simu_ftgt_s::costadd
double costadd
Definition: simu.h:72
blendctrl_s::dparm
double * dparm
Definition: blendctrl.h:87
simu_cblk_s::ctrbcnt
pastix_int_t ctrbcnt
Definition: simu.h:79
simuctrl_s::ftgtcnt
pastix_int_t ftgtcnt
Definition: simu.h:120
simu_task_s::time
SimuTimer time
Definition: simu.h:105
blendctrl_s::iparm
pastix_int_t * iparm
Definition: blendctrl.h:86
extendVector.h
simu_blok_s::ownerclust
int ownerclust
Definition: simu.h:93
FTGT_BLOKDST
@ FTGT_BLOKDST
Definition: simu.h:34
symbol_matrix_s::bloktab
symbol_blok_t * bloktab
Definition: symbol.h:82
simu_task_s
Task structure for the simulation.
Definition: simu.h:99
simu_task_s::ftgtcnt
pastix_int_t ftgtcnt
Definition: simu.h:109
FTGT_CTRBNBR
@ FTGT_CTRBNBR
Definition: simu.h:30
simu_ftgt_s::costsend
double costsend
Definition: simu.h:71
simuctrl_s::tasktab
SimuTask * tasktab
Definition: simu.h:121
FTGT_FCOLNUM
@ FTGT_FCOLNUM
Definition: simu.h:36
FTGT_PROCDST
@ FTGT_PROCDST
Definition: simu.h:32
pqueuePush2
void pqueuePush2(pastix_queue_t *, pastix_int_t, double, double)
Insert an element into the sorted queue.
Definition: queue.c:178
simu_blok_s::tasknum
pastix_int_t tasknum
Definition: simu.h:87
simuctrl_s::proctab
SimuProc * proctab
Definition: simu.h:122
simu_computeTask
static void simu_computeTask(const BlendCtrl *ctrl, const symbol_matrix_t *symbptr, SimuCtrl *simuctrl, pastix_int_t tasknum)
Simulate the task execution.
Definition: simu_run.c:768
getCommunicationCosts
void getCommunicationCosts(const BlendCtrl *ctrl, pastix_int_t clustsrc, pastix_int_t clustdst, pastix_int_t sync_comm_nbr, double *startup, double *bandwidth)
Return the communication cost between two cores.
Definition: blendctrl.c:60
simu_proc_s::futuretask
pastix_queue_t * futuretask
Definition: simu.h:59
simuctrl_s::bloktab
SimuBlok * bloktab
Definition: simu.h:126
pqueueRead
pastix_int_t pqueueRead(const pastix_queue_t *)
Read the first element of the queue.
Definition: queue.c:239
simu_ftgt_s
Fan-in structure for the simulation.
Definition: simu.h:67
simuctrl_s::clustab
SimuCluster * clustab
Definition: simu.h:123
blendctrl_s::clustnum
pastix_int_t clustnum
Definition: blendctrl.h:70
symbol_matrix_s::cblknbr
pastix_int_t cblknbr
Definition: symbol.h:77
blendctrl_s
The type and structure definitions.
Definition: blendctrl.h:28
simu_computeTaskReceiveTime
static void simu_computeTaskReceiveTime(const BlendCtrl *ctrl, const symbol_matrix_t *symbptr, SimuCtrl *simuctrl, pastix_int_t tasknum)
Compute the instant t where the task will be received by a node.
Definition: simu_run.c:576
blendctrl_s::core2clust
pastix_int_t * core2clust
Definition: blendctrl.h:79
simu_task_s::prionum
pastix_int_t prionum
Definition: simu.h:100
simu_proc_s
Thread structure for the simulation.
Definition: simu.h:56
DPARM_PRED_FACT_TIME
@ DPARM_PRED_FACT_TIME
Definition: api.h:162
simu_putInAllReadyQueues
static void simu_putInAllReadyQueues(const BlendCtrl *ctrl, SimuCtrl *simuctrl, pastix_int_t tasknum)
Insert a task in the ready task queues of all its candidates.
Definition: simu_run.c:345
blendctrl_s::total_nbthrds
pastix_int_t total_nbthrds
Definition: blendctrl.h:73
symbol_matrix_s::bloknbr
pastix_int_t bloknbr
Definition: symbol.h:78
symbol_blok_s::fcblknm
pastix_int_t fcblknm
Definition: symbol.h:61
cand_s
Processor candidate group to own a column blok.
Definition: cand.h:28
FTGT_TASKDST
@ FTGT_TASKDST
Definition: simu.h:33
simuctrl_s
Control structure for the simulation.
Definition: simu.h:116
cost_matrix_s::blokcost
double * blokcost
Definition: cost.h:31
simu_proc_s::tasktab
ExtendVectorINT * tasktab
Definition: simu.h:60
blendctrl_s::total_nbcores
pastix_int_t total_nbcores
Definition: blendctrl.h:72
symbol_cblk_s::bloknum
pastix_int_t bloknum
Definition: symbol.h:46
simu_blok_s::ftgtnum
pastix_int_t ftgtnum
Definition: simu.h:88
cand_s::lccandnum
pastix_int_t lccandnum
Definition: cand.h:34
simu_proc_s::timer
SimuTimer timer
Definition: simu.h:57
blendctrl_s::costlevel
pastix_int_t costlevel
Definition: blendctrl.h:43
blendctrl_s::costmtx
CostMatrix * costmtx
Definition: blendctrl.h:97
blendctrl.h
cand_s::treelevel
pastix_int_t treelevel
Definition: cand.h:30
cand_s::fcandnum
pastix_int_t fcandnum
Definition: cand.h:31
cand.h
pastix_fopenw
FILE * pastix_fopenw(const char *dirname, const char *filename, const char *mode)
Open a file in the unique directory of the pastix instance.
Definition: api.c:232
simu_updateFtgt
static void simu_updateFtgt(const symbol_matrix_t *symbptr, SimuCtrl *simuctrl, pastix_int_t ftgtnum, pastix_int_t bloknum, pastix_int_t fbloknum)
Update the Fan In target structure.
Definition: simu_run.c:704
extendint_Add
void extendint_Add(ExtendVectorINT *, pastix_int_t)
Add an element elt to the end of the vector.
Definition: extendVector.c:90
simu_getNextTaskNextProc
static pastix_int_t simu_getNextTaskNextProc(const BlendCtrl *ctrl, SimuCtrl *simuctrl, pastix_int_t *procnumptr)
Look for the best next couple (tasknum, corenum) that is ready to be executed.
Definition: simu_run.c:432
symbol.h
elimintree.h
FTGT_FROWNUM
@ FTGT_FROWNUM
Definition: simu.h:38
FTGT_LCOLNUM
@ FTGT_LCOLNUM
Definition: simu.h:37
queue.h
blendctrl_s::dirname
const char * dirname
Definition: blendctrl.h:88
simuctrl_s::ftgttimetab
SimuTimer * ftgttimetab
Definition: simu.h:129
timerAdd
static void timerAdd(SimuTimer *timer, double t)
Increment the timer.
Definition: simu_timer.h:59
simu_ftgt_s::timerecv
SimuTimer timerecv
Definition: simu.h:70
symbol_blok_s::frownum
pastix_int_t frownum
Definition: symbol.h:58
timerSet
static void timerSet(SimuTimer *timer, double t)
Set the timer value.
Definition: simu_timer.h:84
cand_s::cluster
pastix_int_t cluster
Definition: cand.h:35
perf.h
pqueueSize
pastix_int_t pqueueSize(const pastix_queue_t *)
Return the size of the queue.
Definition: queue.c:135
simu_cluster_s::prionum
pastix_int_t prionum
Definition: simu.h:50
simuRun
void simuRun(SimuCtrl *, const BlendCtrl *, const symbol_matrix_t *)
Run the simulation to map the data on the nodes.
Definition: simu_run.c:979
simu_ftgt_s::clustnum
pastix_int_t clustnum
Definition: simu.h:69
symbol_matrix_s::cblktab
symbol_cblk_t * cblktab
Definition: symbol.h:81
simu_computeFtgtCosts
static void simu_computeFtgtCosts(const BlendCtrl *ctrl, const SimuFtgt *ftgt, pastix_int_t clustsrc, pastix_int_t sync_comm_nbr, double *send, double *add)
Compute the cost of a communication and its update.
Definition: simu_run.c:149
simu_ftgt_s::infotab
pastix_int_t infotab[FTGT_MAXINFO]
Definition: simu.h:68
symbol_blok_s
Symbol block structure.
Definition: symbol.h:57
IPARM_VERBOSE
@ IPARM_VERBOSE
Definition: api.h:36
blendctrl_s::local_nbthrds
pastix_int_t local_nbthrds
Definition: blendctrl.h:75
simuctrl_s::ownetab
pastix_int_t * ownetab
Definition: simu.h:124
simuctrl_s::cblknbr
pastix_int_t cblknbr
Definition: simu.h:117
simu_task_s::bloknum
pastix_int_t bloknum
Definition: simu.h:102
symbol_cblk_s
Symbol column block structure.
Definition: symbol.h:43
cand_s::fccandnum
pastix_int_t fccandnum
Definition: cand.h:33
simu_pushToReadyHeap
static void simu_pushToReadyHeap(const BlendCtrl *ctrl, SimuCtrl *simuctrl, pastix_int_t procnum)
Push all tasks from future to ready.
Definition: simu_run.c:901
simuctrl_s::cblktab
SimuCblk * cblktab
Definition: simu.h:125
timerComp
static int timerComp(const SimuTimer *t1, const SimuTimer *t2)
Compare two timings.
Definition: simu_timer.h:39
simu_proc_s::readytask
pastix_queue_t * readytask
Definition: simu.h:58
simu_blok_s::ctrbcnt
pastix_int_t ctrbcnt
Definition: simu.h:91
simu_timer_s
Timer for the simulation.
Definition: simu_timer.h:25
simu_task_s::cblknum
pastix_int_t cblknum
Definition: simu.h:101
simu.h
pqueuePop
static pastix_int_t pqueuePop(pastix_queue_t *q)
Pop the head of the queue whithout returning the keys.
Definition: queue.h:75
simu_cblk_s::owned
int8_t owned
Definition: simu.h:80
FTGT_PRIONUM
@ FTGT_PRIONUM
Definition: simu.h:35
cand_s::costlevel
double costlevel
Definition: cand.h:29
simuctrl_s::tasknbr
pastix_int_t tasknbr
Definition: simu.h:119
blendctrl_s::candtab
Cand * candtab
Definition: blendctrl.h:98
simu_printBlockCtrbNbr
static void simu_printBlockCtrbNbr(const BlendCtrl *ctrl, const symbol_matrix_t *symbptr, const SimuCtrl *simuctrl)
Print the number of contributions per cblk and block for debug.
Definition: simu_run.c:280
cost.h
timerSetMax
static void timerSetMax(SimuTimer *timer, double t)
Set the timer value if the value is greater than the actual one.
Definition: simu_timer.h:97
symbol_blok_s::lrownum
pastix_int_t lrownum
Definition: symbol.h:59
simuctrl_s::ftgttab
SimuFtgt * ftgttab
Definition: simu.h:127
simuctrl_s::ftgtprio
pastix_int_t ftgtprio
Definition: simu.h:118
pastixSymbolGetFacingBloknum
pastix_int_t pastixSymbolGetFacingBloknum(const symbol_matrix_t *symbptr, pastix_int_t bloksrc, pastix_int_t bloknum, pastix_int_t startsearch, int ricar)
Search the targeted block C for a couple of blocks A and B.
Definition: symbol.c:235
blendctrl_s::ricar
pastix_int_t ricar
Definition: blendctrl.h:33
simu_computeBlockCtrbNbr
static void simu_computeBlockCtrbNbr(const symbol_matrix_t *symbptr, SimuCtrl *simuctrl, pastix_int_t ricar)
Compute the number of contributions to each block.
Definition: simu_run.c:203
timerVal
static double timerVal(const SimuTimer *timer)
Get the timer value.
Definition: simu_timer.h:71
FTGT_LROWNUM
@ FTGT_LROWNUM
Definition: simu.h:39