PaStiX Handbook  6.3.2
simu_run.c
Go to the documentation of this file.
1 /**
2  *
3  * @file simu_run.c
4  *
5  * PaStiX simulation functions.
6  *
7  * @copyright 2004-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8  * Univ. Bordeaux. All rights reserved.
9  *
10  * @version 6.3.2
11  * @author Pascal Henon
12  * @author Pierre Ramet
13  * @author Mathieu Faverge
14  * @author Gregoire Pichon
15  * @author Vincent Bridonneau
16  * @author Xavier Lacoste
17  * @date 2023-07-21
18  *
19  **/
20 #ifndef DOXYGEN_SHOULD_SKIP_THIS
21 #ifndef _GNU_SOURCE
22 #define _GNU_SOURCE 1
23 #endif
24 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
25 #include <stdio.h>
26 #include <string.h>
27 #include <assert.h>
28 #include <math.h>
29 #include <sys/stat.h>
30 #include <sys/types.h>
31 #include <unistd.h>
32 
33 #include "common.h"
34 #include "symbol/symbol.h"
35 #include "extendVector.h"
36 #include "queue.h"
37 #include "elimintree.h"
38 #include "cost.h"
39 #include "cand.h"
40 #include "blendctrl.h"
41 #include "blend/solver.h"
42 #include "simu.h"
43 #include "perf.h"
44 
45 #if defined(PASTIX_BLEND_GENTRACE)
46 #include <GTG.h>
47 #include <GTGPaje.h>
48 
49 /**
50  *******************************************************************************
51  *
52  * @brief Increment a traced counter.
53  *
54  *******************************************************************************
55  *
56  * @param[in] time
57  * The timestamp of the event.
58  *
59  * @param[in] type
60  * The type string of the variable to modify.
61  *
62  * @param[in] cont
63  * The string of the container that holds the variable.
64  *
65  * @param[in] val
66  * The value to add to the counter.
67  *
68  *******************************************************************************/
69 static inline void
70 blendAddVar( int clustnum, varPrec time, const char* type,
71  const char* cont, varPrec val )
72 {
73  if ( clustnum == 0 ) {
74  addVar( time, type, cont, val );
75  }
76 }
77 
78 /**
79  *******************************************************************************
80  *
81  * @brief Decrement a traced counter.
82  *
83  *******************************************************************************
84  *
85  * @param[in] time
86  * The timestamp of the event.
87  *
88  * @param[in] type
89  * The type string of the variable to modify.
90  *
91  * @param[in] cont
92  * The string of the container that holds the variable.
93  *
94  * @param[in] val
95  * The value used to decrement the counter.
96  *
97  *******************************************************************************/
98 static inline void
99 blendSubVar( int clustnum, varPrec time, const char* type,
100  const char* cont, varPrec val )
101 {
102  if ( clustnum == 0 ) {
103  subVar( time, type, cont, val );
104  }
105 }
106 
107 #else
108 
109 #ifndef DOXYGEN_SHOULD_SKIP_THIS
110 #define blendAddVar( clustnum, time, type, cont, val ) \
111  do { } while (0)
112 
113 #define blendSubVar( clustnum, time, type, cont, val ) \
114  do { } while (0)
115 
116 #endif
117 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
118 
119 /**
120  * @addtogroup blend_dev_simu
121  * @{
122  *
123  */
124 
125 /**
126  *******************************************************************************
127  *
128  * @brief Compute the cost of a communication and its update.
129  *
130  *******************************************************************************
131  *
132  * @param[in] ctrl
133  * The blend control structure that describes the architecture and the
134  * cost of the communication between nodes.
135  *
136  * @param[in] ftgt
137  * The fan-in for which the cost is computed.
138  *
139  * @param[in] clustsrc
140  * The index of the source pastix process.
141  *
142  * @param[in] sync_comm_nbr
143  * The number of simultaneous communication.
144  *
145  * @param[out] send
146  * The time cost of the send operation.
147  *
148  * @param[out] add
149  * The time cost of the addition operation.
150  *
151  *******************************************************************************/
152 static inline void
154  const SimuFtgt *ftgt,
155  pastix_int_t clustsrc,
156  pastix_int_t sync_comm_nbr,
157  double *send,
158  double *add )
159 {
160  pastix_int_t M, N;
161  pastix_int_t clustdst = ctrl->core2clust[ftgt->infotab[FTGT_PROCDST]];
162  double startup, bandwidth, addcost;
163 
164  *send = 0.;
165  *add = 0.;
166 
167  if( clustsrc == clustdst ) {
168  return;
169  }
170 
171  assert( (clustsrc >= 0) && (clustdst >= 0) );
172 
173  N = (ftgt->infotab[FTGT_LCOLNUM] - ftgt->infotab[FTGT_FCOLNUM] + 1);
174  M = (ftgt->infotab[FTGT_LROWNUM] - ftgt->infotab[FTGT_FROWNUM] + 1);
175 
176  assert( (N > 0) && (M > 0) );
177 
178  getCommunicationCosts( ctrl, clustsrc, clustdst, sync_comm_nbr, &startup, &bandwidth );
179 
180  *send = (startup + bandwidth * (M * N * sizeof(double) + FTGT_MAXINFO * sizeof(pastix_int_t)));
181  addcost = PERF_GEAM( M, N );
182  *add = addcost > 0. ? addcost : 0.0;
183  return;
184 }
185 
186 /**
187  *******************************************************************************
188  *
189  * @brief Compute the number of contributions to each block.
190  *
191  *******************************************************************************
192  *
193  * @param[in] symbptr
194  * The symbol matrix structure describing the problem.
195  *
196  * @param[inout] simuctrl
197  * The main simulation structure. On exit, the ctrbcnt field of the
198  * blocks is updated with the number of contributions that they each
199  * should receive.
200  *
201  * @param[in] ricar
202  * True if ILU(k) factorization is applied to change the algorithm to
203  * compute the number of contributions.
204  *
205  *******************************************************************************/
206 static inline void
208  SimuCtrl *simuctrl,
209  pastix_int_t ricar )
210 {
211  pastix_int_t i, j, k;
212  pastix_int_t facebloknum, firstbloknum;
213 
214  /*
215  * Compute the number of contributions per block to each block.
216  * Might be optimized if we computed the input graph before.
217  */
218  {
219  symbol_cblk_t *curcblk;
220 
221  curcblk = symbptr->cblktab;
222  for(i=0; i<symbptr->cblknbr; i++, curcblk++)
223  {
224  pastix_int_t fbloknum = curcblk[0].bloknum + 1;
225  pastix_int_t lbloknum = curcblk[1].bloknum;
226 
227  /* 1D cblk computed */
228  for(j=fbloknum; j<lbloknum; j++)
229  {
230  firstbloknum = 0;
231 
232  /* Add contribution due to E2 */
233  for(k=j; k<lbloknum; k++)
234  {
235  facebloknum = pastixSymbolGetFacingBloknum( symbptr, j, k, firstbloknum, ricar );
236  if(facebloknum >= 0) {
237  simuctrl->bloktab[facebloknum].ctrbcnt++;
238  firstbloknum = facebloknum;
239  }
240  }
241  }
242  }
243  }
244 
245  /* Set up the task ctrbcnt and cblkcnt */
246  {
247  SimuTask *task = simuctrl->tasktab;
248 
249  for(i=0;i<simuctrl->tasknbr;i++)
250  {
251  pastix_int_t fbloknum = symbptr->cblktab[task->cblknum ].bloknum;
252  pastix_int_t lbloknum = symbptr->cblktab[task->cblknum+1].bloknum;
253 
254  task->ctrbcnt = 0;
255  for(j=fbloknum; j<lbloknum; j++) {
256  task->ctrbcnt += simuctrl->bloktab[j].ctrbcnt;
257  }
258 
259  simuctrl->cblktab[task->cblknum].ctrbcnt = task->ctrbcnt;
260  task++;
261  }
262  }
263 }
264 
265 
266 /**
267  *******************************************************************************
268  *
269  * @brief Print the number of contributions per cblk and block for debug.
270  *
271  *******************************************************************************
272  *
273  * @param[in] ctrl
274  * The blendctrl structure with the simulation parameters.
275  *
276  * @param[in] symbptr
277  * The symbol matrix structure describing the problem.
278  *
279  * @param[in] simuctrl
280  * The main simulation structure.
281  *
282  *******************************************************************************/
283 static inline void
285  const symbol_matrix_t *symbptr,
286  const SimuCtrl *simuctrl )
287 {
288  FILE *fd1 = NULL;
289  FILE *fd2 = NULL;
290  pastix_int_t i, j;
291  symbol_cblk_t *curcblk;
292 
293  fd1 = pastix_fopenw( ctrl->dirname, "contribblok.txt", "w" );
294  if ( fd1 == NULL ) {
295  return;
296  }
297  fd2 = pastix_fopenw( ctrl->dirname, "contribcblk.txt", "w" );
298  if ( fd2 == NULL ) {
299  return;
300  }
301 
302  curcblk = symbptr->cblktab;
303  for(i=0; i<symbptr->cblknbr; i++, curcblk++)
304  {
305  pastix_int_t fbloknum = curcblk[0].bloknum + 1;
306  pastix_int_t lbloknum = curcblk[1].bloknum;
307 
308  /* 1D cblk computed */
309  for(j=fbloknum; j<lbloknum; j++)
310  {
311  fprintf(fd1, "%ld %ld\n", (long)j, (long)simuctrl->bloktab[j].ctrbcnt);
312  }
313 #if defined(PASTIX_SYMBOL_DUMP_SYMBMTX)
314  fprintf(fd2, "%ld %ld %ld\n", (long)i, (long)simuctrl->cblktab[i].ctrbcnt, (long)curcblk->split_cblk);
315 #else
316  fprintf(fd2, "%ld %ld\n", (long)i, (long)simuctrl->cblktab[i].ctrbcnt);
317 #endif
318  }
319 
320  fclose( fd1 );
321  fclose( fd2 );
322 }
323 
324 /**
325  *******************************************************************************
326  *
327  * @brief Insert a task in the ready task queues of all its candidates.
328  *
329  * This function according to the ready date of a task put this task on the
330  * ready queue of a processor.
331  * When the ready date of a task is inferior to the proc timer then the
332  * task is ordered according to its priorities in the elimination tree.
333  *
334  *******************************************************************************
335  *
336  * @param[in] ctrl
337  * The blend control structure to provide the candtab and the
338  * core2clust arrays.
339  *
340  * @param[inout] simuctrl
341  * The main simulation structure. On exit, the ready tasks queues of
342  * the candidates for tasknum are updated.
343  *
344  * @param[in] tasknum
345  * The index of the task to insert as a ready task.
346  *
347  *******************************************************************************/
348 static inline void
350  SimuCtrl *simuctrl,
351  pastix_int_t tasknum )
352 {
353  const SimuTask *task = simuctrl->tasktab + tasknum;
354  const Cand *cblkcand = ctrl->candtab + task->cblknum;
355  SimuProc *sproc;
356  double ready_date = 0.0;
357  pastix_int_t procnum;
358  pastix_int_t bloknum = task->bloknum;
359 #if defined(PASTIX_BLEND_COSTLEVEL)
360  double level = cblkcand->costlevel;
361 #else
362  pastix_int_t level = cblkcand->treelevel;
363 #endif
364  assert( tasknum != -1 );
365 
366  blendAddVar( ctrl->clustnum, timerVal( &(task->time) ), "VR_AP", "Appli", 1 );
367 
368  /* Get the ready date of the task on the processor passed in parameter */
369  if( cblkcand->fccandnum == cblkcand->lccandnum )
370  {
371  ready_date = timerVal( &(task->time) );
372  sproc = &(simuctrl->proctab[cblkcand->fcandnum]);
373 
374  for(procnum = cblkcand->fcandnum;
375  procnum <= cblkcand->lcandnum; procnum++, sproc++)
376  {
377  if( ready_date > timerVal( &(sproc->timer) ) ) {
378  pqueuePush2( sproc->futuretask, tasknum, ready_date, level );
379  }
380  else {
381  pqueuePush2( sproc->readytask, tasknum, level, bloknum );
382  }
383  blendAddVar( ctrl->clustnum, ready_date, "VR_TS", sproc->procalias, 1 );
384  }
385  }
386  else
387  {
388  sproc = &(simuctrl->proctab[cblkcand->fcandnum]);
389 
390  for(procnum = cblkcand->fcandnum;
391  procnum <= cblkcand->lcandnum; procnum++, sproc++)
392  {
393  ready_date = timerVal( simuctrl->ftgttimetab + CLUST2INDEX(bloknum, ctrl->core2clust[procnum]) );
394 
395  if( ready_date > timerVal( &(sproc->timer) ) ) {
396  pqueuePush2( sproc->futuretask, tasknum, ready_date, level );
397  }
398  else {
399  pqueuePush2( sproc->readytask, tasknum, level, bloknum );
400  }
401  blendAddVar( ctrl->clustnum, ready_date, "VR_TS", sproc->procalias, 1 );
402  }
403  }
404 }
405 
406 /**
407  *******************************************************************************
408  *
409  * @brief Look for the best next couple (tasknum, corenum) that is ready to be
410  * executed.
411  *
412  * This function is the main and more costly one. It looks for each worker,
413  * which task is the first one available for execution, and from all those
414  * couples, which one is the first one to finish.
415  *
416  *******************************************************************************
417  *
418  * @param[in] ctrl
419  * The blend control structure to provide the candtab and the
420  * core2clust arrays.
421  *
422  * @param[inout] simuctrl
423  * The main simulation structure. On exit, the structure is updated
424  * with the extraction of the next best task to run.
425  *
426  * @param[out] procnumptr
427  * The index of the candidate to run the task.
428  *
429  *******************************************************************************
430  *
431  * @return The next task selected for execution in the simulator. The worker
432  * selected is returned in the procnumptr field.
433  *
434  *******************************************************************************/
435 static inline pastix_int_t
437  SimuCtrl *simuctrl,
438  pastix_int_t *procnumptr )
439 {
440  pastix_int_t p, rc;
441  pastix_int_t procnum = -1;
442  pastix_int_t tasknum;
443  double earlytimeready = PASTIX_INT_MAX;
444  double earlyproctimer = PASTIX_INT_MAX;
445  double timeready;
446  pastix_int_t earlytask = -1;
447 
448  /* Find the earlier task in the processor heaps */
449  for(p=0;p<ctrl->total_nbcores;p++)
450  {
451  SimuProc *sproc = &(simuctrl->proctab[p]);
452  tasknum = -1;
453  /*
454  * First we search the earlier task in the set of tasks whose ready date
455  * is < proc timer
456  */
457  while( pqueueSize(sproc->readytask) > 0 )
458  {
459  tasknum = pqueueRead( sproc->readytask );
460  if( simuctrl->bloktab[simuctrl->tasktab[tasknum].bloknum].ownerclust >= 0 )
461  {
462  /* This task have to be remove from the heap (already mapped) */
463  rc = pqueuePop( sproc->readytask );
464  assert( rc >= 0 );
465  tasknum = -1;
466  }
467  else
468  break;
469  }
470  /*
471  * We found no task which ready date is < proc timer so we search one
472  * that minimizes ready date - proc-timer
473  */
474  if(tasknum == -1)
475  {
476  while(pqueueSize(simuctrl->proctab[p].futuretask)>0)
477  {
478  tasknum = pqueueRead(simuctrl->proctab[p].futuretask);
479  if( simuctrl->bloktab[simuctrl->tasktab[tasknum].bloknum].ownerclust >= 0 )
480  {
481  /* This task have to be remove from the heap (already mapped) */
482  rc = pqueuePop(simuctrl->proctab[p].futuretask);
483  assert( rc >= 0 );
484  tasknum = -1;
485  }
486  else {
487  break;
488  }
489  }
490  }
491 
492  if(tasknum != -1)
493  {
494  const SimuTask *task = simuctrl->tasktab + tasknum;
495  timeready = MAX(timerVal(TIMER(p)),
496  timerVal(&(simuctrl->ftgttimetab[CLUST2INDEX(simuctrl->tasktab[tasknum].bloknum,
497  ctrl->core2clust[p])])));
498 
499  timeready = MAX( timeready, timerVal( &(task->time) ) );
500 
501  /*
502  * We prevent to distribute on the same processor set when all time
503  * are equal
504  */
505  if((timeready == earlytimeready) && (timerVal(TIMER(p)) < earlyproctimer))
506  {
507  procnum = p;
508  earlyproctimer = timerVal(TIMER(p));
509  earlytask = tasknum;
510  earlytimeready = timeready;
511  }
512 
513  if(timeready < earlytimeready)
514  {
515  procnum = p;
516  earlytask = tasknum;
517  earlytimeready = timeready;
518  }
519  }
520  }
521 
522 #if defined(PASTIX_BLEND_GENTRACE)
523  if ( (earlytask != -1) && (ctrl->clustnum == 0) )
524  {
525  const SimuTask *task = simuctrl->tasktab + earlytask;
526  const Cand *cblkcand = ctrl->candtab + task->cblknum;
527  SimuProc *sproc = &(simuctrl->proctab[cblkcand->fcandnum]);
528 
529  for(p = cblkcand->fcandnum;
530  p <= cblkcand->lcandnum; p++, sproc++)
531  {
532  blendSubVar( ctrl->clustnum, earlytimeready, "VR_TS", sproc->procalias, 1 );
533  }
534 
535  blendSubVar( ctrl->clustnum, earlytimeready, "VR_AP", "Appli", 1 );
536  }
537 #endif
538 
539 #if !defined(NDEBUG)
540  if(procnum != -1)
541  {
542  if( pqueueSize(simuctrl->proctab[procnum].readytask) > 0 ) {
543  assert(earlytask == pqueuePop(simuctrl->proctab[procnum].readytask));
544  }
545  else {
546  assert(earlytask == pqueuePop(simuctrl->proctab[procnum].futuretask));
547  }
548  }
549 #endif
550 
551  *procnumptr = procnum;
552  (void)rc;
553  return earlytask;
554 }
555 
556 /**
557  *******************************************************************************
558  *
559  * @brief Compute the instant t where the task will be received by a node.
560  *
561  * Compute the time the cblk would have RECEIVED and ADDED all its contributions
562  * if it was mapped on a given cand CLUSTER.
563  * @warning These times do not include add time for fan in target
564  *
565  *******************************************************************************
566  *
567  * @param[in] ctrl
568  * The blend control structure to provide the candtab and the
569  * core2clust arrays.
570  *
571  * @param[in] symbptr
572  * The symbol matrix structure describing the problem.
573  *
574  * @param[inout] simuctrl
575  * The main simulation structure. On exit, the ready tasks queues of
576  * the candidates for tasknum are updated.
577  *
578  * @param[in] tasknum
579  * The index of the task to insert as a ready task.
580  *
581  *******************************************************************************/
582 static inline void
584  const symbol_matrix_t *symbptr,
585  SimuCtrl *simuctrl,
586  pastix_int_t tasknum )
587 {
588  pastix_int_t i, j;
589  double lftgttime = 0;
590  double sftgttime = 0;
591  pastix_int_t lftgtnum = -1;
592  pastix_int_t cblknum;
593  pastix_int_t bloknum;
594  pastix_int_t clustdst;
595 
596  bloknum = simuctrl->tasktab[tasknum].bloknum;
597  cblknum = simuctrl->tasktab[tasknum].cblknum;
598 
599  /* If the task is local, all sons sending contributions are local => no treatment */
600  if( ctrl->candtab[cblknum].fccandnum == ctrl->candtab[cblknum].lccandnum ) {
601  return;
602  }
603 
604  /*
605  * Compute the cblk on proc timer that is time the cblk would have received
606  * all its contributions if it was mapped on a given cand processor These
607  * times INCLUDE add time for fan in target !!
608  */
609 
610  /* Compute receive time (time at which a non-local processor should received the target) */
611  /* find the latest ftgt receive time and the second latest*/
612  for(i=simuctrl->bloktab[bloknum].ftgtnum; i<simuctrl->bloktab[bloknum+1].ftgtnum; i++)
613  {
614  /* Source of this ftgt */
615  clustdst = INDEX2CLUST(i, bloknum);
616 
617  /* Task with several cand proc */
618  /* The information about ftgt costs are in the ftgt of the diagonal block;
619  this loop sums the cost of all the ftgt received by the blocks in this column block */
620  if(simuctrl->ftgttab[i].infotab[FTGT_CTRBNBR]>0) {
621  for(j=bloknum;j<symbptr->cblktab[cblknum+1].bloknum;j++)
622  {
623  if(simuctrl->ftgttab[simuctrl->bloktab[j].ftgtnum + i-simuctrl->bloktab[bloknum].ftgtnum].infotab[FTGT_CTRBNBR]>0)
624  {
625  double send, add;
626 
627  simu_computeFtgtCosts( ctrl, simuctrl->ftgttab + CLUST2INDEX(j, clustdst), clustdst,
628  ctrl->candtab[cblknum].lccandnum - ctrl->candtab[cblknum].fccandnum + 1,
629  &send, &add );
630 
631  simuctrl->ftgttab[i].costadd += add;
632  simuctrl->ftgttab[i].costsend += send;
633  }
634  }
635  }
636 
637 #if defined(PASTIX_DEBUG_BLEND)
638  if(!(simuctrl->ftgttab[i].costsend >= 0.0)) {
639  pastix_print_error( "ftgt %ld costsend %f", (long)i, simuctrl->ftgttab[i].costsend );
640  }
641  if(!(simuctrl->ftgttab[i].costadd >= 0.0)) {
642  pastix_print_error( "ftgt %ld costadd %f", (long)i, simuctrl->ftgttab[i].costadd );
643  }
644 
645  assert(simuctrl->ftgttab[i].costsend >= 0.0);
646  assert(simuctrl->ftgttab[i].costadd >= 0.0);
647 #endif
648 
649  /* ftgttab[].timerecv is the time this ftgt will be receive */
650  timerSet(&(simuctrl->ftgttab[i].timerecv), timerVal(&(simuctrl->ftgttimetab[i])) + simuctrl->ftgttab[i].costsend + simuctrl->ftgttab[i].costadd);
651 
652  /* If this ftgt the last reveived or the second last received ?? */
653  if(timerVal(&(simuctrl->ftgttab[i].timerecv)) > lftgttime)
654  {
655  lftgttime = timerVal(&(simuctrl->ftgttab[i].timerecv));
656  lftgtnum = i;
657  }
658  else {
659  if(timerVal(&(simuctrl->ftgttab[i].timerecv)) > sftgttime) {
660  sftgttime = timerVal(&(simuctrl->ftgttab[i].timerecv));
661  }
662  }
663  }
664 
665 
666  /*
667  * Put in ftgttimetab[] the date at which the cluster would have received
668  * and add all the ftgt if the task was mapped on it.
669  */
670  for(i=simuctrl->bloktab[bloknum].ftgtnum; i<simuctrl->bloktab[bloknum+1].ftgtnum;i++)
671  {
672  if(i != lftgtnum) {
673  timerSet(&(simuctrl->ftgttimetab[i]), lftgttime);
674  }
675  else {
676  timerSetMax( &(simuctrl->ftgttimetab[i]), sftgttime );
677  }
678  }
679 }
680 
681 /**
682  *******************************************************************************
683  *
684  * @brief Update the Fan In target structure
685  *
686  * Increment the contribution counter of the fan-in and integrate to the ftgt
687  * area the new contribution.
688  *
689  *******************************************************************************
690  *
691  * @param[in] symbptr
692  * The pointer to the symbolic matrix structure.
693  *
694  * @param[inout] simuctrl
695  * The pointer to the simulation structure. On exit, data regarding the
696  * computational unit pr are updated.
697  *
698  * @param[in] ftgtnum
699  * Index of the fanin target to update.
700  *
701  * @param[in] bloknum
702  * Index of the first off-diagonal block generating a contribution to
703  * the ftgtnum Fan In.
704  *
705  * @param[in] fbloknum
706  * Index of the off-diagonal block that is multiplied by blocknum to
707  * produce the update.
708  *
709  *******************************************************************************/
710 static inline void
712  SimuCtrl *simuctrl,
713  pastix_int_t ftgtnum,
714  pastix_int_t bloknum,
715  pastix_int_t fbloknum )
716 {
717  pastix_int_t *infotab = simuctrl->ftgttab[ftgtnum].infotab;
718  symbol_blok_t *blokptr = (symbptr->bloktab) + bloknum;
719  symbol_blok_t *fblokptr = (symbptr->bloktab) + fbloknum;
720 
721  infotab[FTGT_CTRBNBR]++;
722 
723  /* Update ftgt dimensions to the maximum area covering all contributions */
724  if( blokptr->frownum < infotab[FTGT_FCOLNUM] ) {
725  infotab[FTGT_FCOLNUM] = blokptr->frownum;
726  }
727 
728  if( blokptr->lrownum > infotab[FTGT_LCOLNUM] ) {
729  infotab[FTGT_LCOLNUM] = blokptr->lrownum;
730  }
731 
732  if( fblokptr->frownum < infotab[FTGT_FROWNUM] ) {
733  infotab[FTGT_FROWNUM] = fblokptr->frownum;
734  }
735 
736  if( fblokptr->lrownum > infotab[FTGT_LROWNUM] ) {
737  infotab[FTGT_LROWNUM] = fblokptr->lrownum;
738  }
739 
740  assert( (infotab[FTGT_LCOLNUM] - infotab[FTGT_FCOLNUM] + 1) > 0 );
741  assert( (infotab[FTGT_LROWNUM] - infotab[FTGT_FROWNUM] + 1) > 0 );
742 }
743 
744 /**
745  *******************************************************************************
746  *
747  * @brief Simulate the task execution.
748  *
749  * Update the timers of the selected worker, as well as those of the current
750  * cblk, and the targeted cblks by the update.
751  *
752  *******************************************************************************
753  *
754  * @param[in] ctrl
755  * The pointer to the global blend control structure.
756  *
757  * @param[in] symbptr
758  * The pointer to the symbolic matrix structure.
759  *
760  * @param[inout] simuctrl
761  * The pointer to the simulation structure. On exit, data regarding the
762  * computational unit pr are updated.
763  *
764  * @param[in] tasknum
765  * The task index of the one, we want to simulate the execution.
766  *
767  *******************************************************************************
768  *
769  * @remark In this function, we use the standard [f|l]blocknum for first and
770  * last bloknum, and facingcblk, facingblok for the facing block and column
771  * block.
772  *
773  *******************************************************************************/
774 static inline void
776  const symbol_matrix_t *symbptr,
777  SimuCtrl *simuctrl,
778  pastix_int_t tasknum )
779 {
780  pastix_int_t i, j;
781  pastix_int_t cblknum;
782  pastix_int_t fbloknum;
783  pastix_int_t lbloknum;
784  pastix_int_t firstfacingblok;
785  pastix_int_t facingblok;
786  pastix_int_t facingcblk;
787  pastix_int_t local;
788  pastix_int_t ftgtnum;
789  pastix_int_t procnum;
790  pastix_int_t clustnum;
791  SimuProc *sproc;
792  CostMatrix *costmtx;
793 
794  cblknum = simuctrl->tasktab[tasknum].cblknum;
795  procnum = simuctrl->ownetab[cblknum];
796  clustnum = ctrl->core2clust[procnum];
797  sproc = &(simuctrl->proctab[procnum]);
798  costmtx = ctrl->costmtx;
799 
800  fbloknum = symbptr->cblktab[cblknum ].bloknum;
801  lbloknum = symbptr->cblktab[cblknum+1].bloknum;
802 
803  assert( (procnum >= ctrl->candtab[cblknum].fcandnum) &&
804  (procnum <= ctrl->candtab[cblknum].lcandnum) );
805 
806  /* Add factorization time of the diagonal blok + cost of the TRSM operation on the cblk*/
807  timerAdd(&(sproc->timer), costmtx->blokcost[fbloknum]);
808 
809  for(i=fbloknum+1; i<lbloknum; i++)
810  {
811  /* Add cost of the GEMM update related to this off-diagonal block */
812  timerAdd(&(sproc->timer), costmtx->blokcost[i]);
813 
814  facingcblk = symbptr->bloktab[i].fcblknm;
815 
816  /*
817  * If only one candidate cluster, we can consider the facingcblk as
818  * local because it is an ancestor of the current cblk in the
819  * elimination tree.
820  */
821  local = ( ctrl->candtab[facingcblk].fccandnum == ctrl->candtab[facingcblk].lccandnum ) ? 1 : 0;
822 
823  firstfacingblok = symbptr->cblktab[facingcblk].bloknum;
824 
825  for(j=i; j<lbloknum; j++)
826  {
827  /* TODO: symbolGetFacingBloknum is too expensive !! */
828  facingblok = pastixSymbolGetFacingBloknum(symbptr, i, j, firstfacingblok, ctrl->ricar);
829 
830  /* If the couple (i, j) generates a contribution, applies it */
831  if( facingblok >= 0 ) {
832  pastix_int_t facingdiagblok;
833  pastix_int_t facingtask;
834 
835  /* Decrease contributions on block and column block */
836  simuctrl->cblktab[facingcblk].ctrbcnt--;
837  simuctrl->bloktab[facingblok].ctrbcnt--;
838 
839  /* Checks */
840  assert(simuctrl->cblktab[facingcblk].ctrbcnt >= 0);
841  assert(simuctrl->bloktab[facingblok].ctrbcnt >= 0);
842 
843  /* Update to start next search from the last facing block */
844  firstfacingblok = facingblok;
845 
846  facingdiagblok = symbptr->cblktab[facingcblk].bloknum;
847  facingtask = simuctrl->bloktab[facingdiagblok].tasknum;
848 
849  assert( facingcblk == simuctrl->tasktab[facingtask].cblknum );
850  assert( facingtask < simuctrl->tasknbr );
851 
852  if(!local)
853  {
854  ftgtnum = CLUST2INDEX(facingblok, clustnum);
855  simu_updateFtgt( symbptr, simuctrl, ftgtnum, i, j );
856 
857  /* Update timer ready for receiver of the ftgt */
858  ftgtnum = CLUST2INDEX( facingdiagblok, clustnum );
859  timerSetMax( &(simuctrl->ftgttimetab[ftgtnum]),
860  timerVal(&(sproc->timer)) );
861 
862  }
863  else {
864 
865  /* Update timer of the task (associated to the diagonal block) */
866  timerSetMax( &(simuctrl->tasktab[facingtask].time),
867  timerVal(&(sproc->timer)) );
868  }
869 
870  if( simuctrl->cblktab[facingcblk].ctrbcnt == 0 ) {
871  if (!local) {
872  simu_computeTaskReceiveTime(ctrl, symbptr, simuctrl, facingtask );
873  }
874 
875  /* Put the task in the ready heap of its local candidat processor */
876  simu_putInAllReadyQueues( ctrl, simuctrl, facingtask );
877  }
878  }
879  }
880  }
881 }
882 
883 /**
884  *******************************************************************************
885  *
886  * @brief Push all tasks from future to ready
887  *
888  * This routine pushes all future tasks from the future task heap to the ready
889  * one, if the time at which the task will be ready is already passed by the
890  * computation unit.
891  *
892  *******************************************************************************
893  *
894  * @param[in] ctrl
895  * The pointer to the global blend control structure.
896  *
897  * @param[inout] simuctrl
898  * The pointer to the simulation structure. On exit, data regarding the
899  * computational unit pr are updated.
900  *
901  * @param[in] procnum
902  * The computational unit index for which the data need to be transfer
903  * from the future task heap to ready task heap if the computational
904  * unit timer is more advanced than the ready time of the tasks.
905  *
906  *******************************************************************************/
907 static inline void
909  SimuCtrl *simuctrl,
910  pastix_int_t procnum )
911 {
912  SimuProc *sproc;
913  SimuTimer *timer;
914  pastix_int_t tasknum;
915  pastix_int_t cblknum;
916  pastix_int_t clustnum;
917 
918  clustnum = ctrl->core2clust[procnum];
919  sproc = &(simuctrl->proctab[procnum]);
920 
921  /*
922  * Move each task from future task heap to ready heap if the timer is
923  * further in the future than the ready time
924  */
925  while( pqueueSize(sproc->futuretask) > 0 )
926  {
927  tasknum = pqueueRead(sproc->futuretask);
928  cblknum = simuctrl->tasktab[tasknum].cblknum;
929 
930  if( ctrl->candtab[cblknum].fccandnum == ctrl->candtab[cblknum].lccandnum) {
931  timer = &(simuctrl->tasktab[tasknum].time);
932  }
933  else {
934  timer = &(simuctrl->ftgttimetab[CLUST2INDEX(simuctrl->tasktab[tasknum].bloknum, clustnum )]);
935  }
936 
937  if( timerComp( timer, &(sproc->timer) ) )
938  {
939  tasknum = pqueuePop(sproc->futuretask);
940 
941  pqueuePush2( sproc->readytask, tasknum,
942 #if defined(PASTIX_BLEND_COSTLEVEL)
943  ctrl->candtab[cblknum].costlevel,
944 #else
945  ctrl->candtab[cblknum].treelevel,
946 #endif
947  simuctrl->tasktab[tasknum].bloknum );
948  }
949  else {
950  break;
951  }
952  }
953 }
954 
955 /**
956  * @}
957  */
958 
959 /**
960  *******************************************************************************
961  *
962  * @ingroup pastix_blend
963  *
964  * @brief Run the simulation to map the data on the nodes
965  *
966  * This routine simulates the numerical factorization to generate the static
967  * scheduling and the final mapping of the column block onto the PaStiX
968  * processes.
969  *
970  *******************************************************************************
971  *
972  * @param[inout] simuctrl
973  * The pointer to the simulation structure initialized by simuInit().
974  *
975  * @param[in] ctrl
976  * The pointer to the blend control structure which contains the
977  * required data, such as the worker distribution among the processes,
978  * the candidates array for each column block, and the cost of the
979  * computations.
980  *
981  * @param[in] symbptr
982  * The block symbol structure of the problem.
983  *
984  *******************************************************************************/
985 void
986 simuRun( SimuCtrl *simuctrl,
987  const BlendCtrl *ctrl,
988  const symbol_matrix_t *symbptr )
989 {
990 
991  pastix_int_t i, j, b;
992  pastix_int_t cblknum, bloknum;
993  /*pastix_int_t c;*/
994  pastix_int_t pr;
995 
996 #if defined(PASTIX_BLEND_GENTRACE)
997  static volatile pastix_atomic_lock_t trace_lock = PASTIX_ATOMIC_UNLOCKED;
998  char **procnames = NULL;
999 
1000  if (ctrl->clustnum == 0)
1001  {
1002  pastix_atomic_lock( &trace_lock );
1003  char *tracename = NULL;
1004  int rc;
1005 
1006  if ( ctrl->dirname == NULL ) {
1007  tracename = strdup( "blend" );
1008  }
1009  else {
1010  rc = asprintf( &tracename, "%s/blend", ctrl->dirname );
1011  }
1012 
1013  setTraceType (PAJE);
1014  initTrace (tracename, 0, GTG_FLAG_NONE);
1015  pajeEventDefAddParam( GTG_PAJE_EVTDEF_SetState, "TaskId", GTG_PAJE_FIELDTYPE_Int );
1016  free(tracename);
1017 
1018  addContType ("CT_Appli", "0", "Application" );
1019  addContType ("CT_P", "CT_Appli", "Process" );
1020  addContType ("CT_T", "CT_P", "Thread" );
1021  addStateType("ST_TS", "CT_T", "Thread State");
1022  addVarType ("VR_TS", "Ready tasks per thread", "CT_T" );
1023  addVarType ("VR_AP", "Ready tasks", "CT_Appli" );
1024 
1025  addLinkType ("LT_TL", "Split Event Link", "CT_P", "CT_T", "CT_T");
1026 
1027  /* Create root container of the application */
1028  addContainer (0.00000, "Appli", "CT_Appli", "0", "PaStiX Blend Simulation", "");
1029 
1030  /* Add all possible states */
1031  addEntityValue ("Wait", "ST_TS", "Waiting", GTG_LIGHTGREY);
1032  addEntityValue ("Comp", "ST_TS", "Computing", GTG_RED);
1033 
1034  setVar( 0.0, "VR_AP", "Appli", 0 );
1035 
1036  /* Add each process and thread */
1037  SimuProc *sproc = simuctrl->proctab;
1038 
1039  procnames = (char**) malloc ( ctrl->total_nbthrds * sizeof(char*) );
1040  pr = 0;
1041  for (i=0; i<ctrl->clustnbr; i++) {
1042  char *clustname;
1043  char *clustalias;
1044 
1045  rc = asprintf( &clustname, "Process %02d", (int)i); assert(rc!=-1);
1046  rc = asprintf( &clustalias, "P%d", (int)i); assert(rc!=-1);
1047  addContainer (0.00000, clustalias, "CT_P", "Appli", clustname, "");
1048 
1049  for (j=0; j<ctrl->local_nbthrds; j++, pr++, sproc++) {
1050  char *procname;
1051  char *procalias;
1052 
1053  rc = asprintf( &procname, "Thread %02d", (int)pr); assert(rc!=-1);
1054  rc = asprintf( &procalias, "T%d", (int)pr); assert(rc!=-1);
1055  addContainer (0.00000, procalias, "CT_T", clustname, procname, "");
1056  setVar( 0.0, "VR_TS", procalias, pqueueSize( sproc->readytask ) );
1057 
1058  sproc->procalias = procalias;
1059  procnames[pr] = procalias;
1060  free(procname);
1061  }
1062 
1063  free(clustname); free(clustalias);
1064  }
1065  (void)rc;
1066  }
1067 #endif /* defined(PASTIX_BLEND_GENTRACE) */
1068 
1069  /* Compute number of contributions per blocks, cblks, tasks */
1070  simu_computeBlockCtrbNbr( symbptr, simuctrl, ctrl->ricar );
1071 
1072  if ( ctrl->iparm[IPARM_VERBOSE] > 4 ) {
1073  simu_printBlockCtrbNbr( ctrl, symbptr, simuctrl );
1074  }
1075 
1076  /*
1077  * All ready tasks are put in the task heaps of their respective candidates
1078  */
1079  for(i=0;i<symbptr->cblknbr;i++)
1080  {
1081  pastix_int_t tasknum;
1082  if(simuctrl->cblktab[i].ctrbcnt == 0)
1083  {
1084  tasknum = simuctrl->bloktab[symbptr->cblktab[i].bloknum].tasknum;
1085  assert(ctrl->candtab[i].treelevel < 0);
1086 
1087  if( ctrl->costlevel ) {
1088  assert(ctrl->candtab[i].costlevel <= 0);
1089  }
1090 
1091  assert(simuctrl->tasktab[tasknum].cblknum == i);
1092  //assert(ctrl->candtab[i].cblktype == CBLK_1D);
1093 
1094  simu_putInAllReadyQueues( ctrl, simuctrl, tasknum );
1095  }
1096  }
1097 
1098  /*
1099  * Run simulation and map the task onto a single candidate
1100  */
1101  while(1)
1102  {
1103  SimuTask *task;
1104  pastix_int_t clustnum;
1105 
1106  /* Get the next earlier task index and the processor on which it is mapped */
1107  i = simu_getNextTaskNextProc(ctrl, simuctrl, &pr);
1108 
1109  /* No more tasks */
1110  if( i == -1 ) {
1111  break;
1112  }
1113 
1114  task = &(simuctrl->tasktab[i]);
1115  bloknum = task->bloknum;
1116  cblknum = task->cblknum;
1117  clustnum = ctrl->core2clust[pr];
1118 
1119  assert(cblknum < symbptr->cblknbr);
1120  assert(bloknum < symbptr->bloknbr);
1121 
1122  /* Make sure the cblk is not already atributed to someone and give it to the selected proc */
1123  assert( simuctrl->ownetab[cblknum] < 0 );
1124  simuctrl->ownetab[cblknum] = pr;
1125  simuctrl->cblktab[cblknum].owned = ( clustnum == ctrl->clustnum );
1126  for(j = symbptr->cblktab[cblknum].bloknum;
1127  j < symbptr->cblktab[cblknum+1].bloknum; j++)
1128  {
1129  simuctrl->bloktab[j].ownerclust = clustnum;
1130  }
1131  task->prionum = simuctrl->clustab[clustnum].prionum;
1132  simuctrl->clustab[clustnum].prionum++;
1133 
1134  /* Add task to the selected processor list */
1135  extendint_Add(simuctrl->proctab[pr].tasktab, i);
1136 
1137  /* Backup which cluster will get the data for the second run of proportionnal mapping */
1138  ctrl->candtab[cblknum].cluster = clustnum;
1139 
1140  /*
1141  * Compute the time at which each proc cand will have added its ftgt and
1142  * received block target if the task is mapped on
1143  */
1144  if( ctrl->candtab[cblknum].fccandnum == ctrl->candtab[cblknum].lccandnum ) {
1145  /*
1146  * All contributions come from the same node
1147  * Time do not depend on the reception of a ftgt
1148  */
1149  timerSetMax( TIMER(pr), timerVal(&(task->time)) );
1150  }
1151  else {
1152  /*
1153  * Contributions might come from different nodes
1154  * Time depends on the reception of a ftgt
1155  */
1156  timerSetMax( TIMER(pr),
1157  timerVal(&(simuctrl->ftgttimetab[CLUST2INDEX(bloknum, clustnum)])) );
1158  }
1159 
1160 #if defined(PASTIX_BLEND_GENTRACE)
1161  if (ctrl->clustnum == 0) {
1162  char *str_val;
1163  int rc;
1164  assert( (procnames != NULL) && (pr < ctrl->total_nbthrds) );
1165  assert( procnames[pr] != NULL );
1166  rc = asprintf( &str_val, "Comp\" \"%d", (int)i );
1167  setState( timerVal( TIMER(pr) ), "ST_TS", procnames[pr], str_val );
1168  free(str_val);
1169  (void)rc;
1170  }
1171 #endif
1172 
1173  /*
1174  * Fill some fanintarget info (task of type E2 does not have any ftgt)
1175  */
1176  if(simuctrl->bloktab[bloknum].ftgtnum < simuctrl->bloktab[bloknum+1].ftgtnum)
1177  {
1178  /* Task with several cand cluster */
1179  for(b=bloknum; b<symbptr->cblktab[cblknum+1].bloknum; b++)
1180  {
1181  for(j=simuctrl->bloktab[b].ftgtnum; j<simuctrl->bloktab[b+1].ftgtnum; j++)
1182  {
1183  if( (simuctrl->ftgttab[j].infotab[FTGT_CTRBNBR] > 0) &&
1184  (j != CLUST2INDEX(b, clustnum)) )
1185  {
1186  simuctrl->ftgttab[j].clustnum = INDEX2CLUST(j, b);
1187  simuctrl->ftgttab[j].infotab[FTGT_PRIONUM] = task->prionum;
1188  simuctrl->ftgttab[j].infotab[FTGT_PROCDST] = pr;
1189  simuctrl->ftgttab[j].infotab[FTGT_BLOKDST] = b;
1190  simuctrl->ftgttab[j].infotab[FTGT_TASKDST] = simuctrl->bloktab[bloknum].tasknum;
1191  extendint_Add(&(simuctrl->clustab[INDEX2CLUST(j,b)].ftgtsend[clustnum]), j);
1192 
1193  simuctrl->tasktab[simuctrl->bloktab[bloknum].tasknum].ftgtcnt++;
1194 
1195  if (clustnum == ctrl->clustnum) {
1196  simuctrl->ftgtcnt++;
1197  }
1198  }
1199  }
1200  }
1201  simuctrl->ftgtprio++;
1202  }
1203  else {
1204  assert(ctrl->candtab[cblknum].fccandnum == ctrl->candtab[cblknum].lccandnum);
1205  }
1206 
1207  /* Simulate the task computation */
1208  simu_computeTask( ctrl, symbptr, simuctrl, i );
1209 
1210 #if defined(PASTIX_BLEND_GENTRACE)
1211  if (ctrl->clustnum == 0) {
1212  char *str_val;
1213  int rc;
1214  assert( (procnames != NULL) && (pr < ctrl->total_nbthrds) );
1215  assert( procnames[pr] != NULL );
1216  rc = asprintf( &str_val, "Wait\" \"%d", (int)i );
1217  setState( timerVal( TIMER(pr) ), "ST_TS", procnames[pr], str_val );
1218  free(str_val);
1219  (void)rc;
1220  }
1221 #endif
1222  simu_pushToReadyHeap(ctrl, simuctrl, pr);
1223  }
1224 
1225  /* Compute maximum time */
1226  {
1227  double maxtime = 0;
1228  for(pr=0; pr<ctrl->total_nbcores; pr++)
1229  {
1230  if(timerVal(TIMER(pr)) > maxtime) {
1231  maxtime = timerVal(TIMER(pr));
1232  }
1233  }
1234  set_dparm(ctrl->dparm, DPARM_PRED_FACT_TIME, maxtime);
1235  }
1236 
1237 #if defined(PASTIX_BLEND_GENTRACE)
1238  if (ctrl->clustnum == 0) {
1239  assert( procnames != NULL );
1240  for(pr=0; pr<ctrl->total_nbthrds; pr++) {
1241  free(procnames[pr]);
1242  }
1243  free(procnames);
1244 
1245  endTrace();
1246  pastix_atomic_unlock( &trace_lock );
1247  }
1248 #endif
1249 
1250 #if defined(PASTIX_DEBUG_BLEND)
1251  for(i=0;i<simuctrl->cblknbr;i++) {
1252  /* Check valid for 1D distribution only */
1253  assert( simuctrl->ownetab[i] >= 0 );
1254  }
1255  for(i=0;i<symbptr->bloknbr;i++) {
1256  assert( simuctrl->bloktab[i].ownerclust >= 0 );
1257  }
1258 #endif
1259 }
BEGIN_C_DECLS typedef int pastix_int_t
Definition: datatypes.h:51
pastix_int_t cluster
Definition: cand.h:35
pastix_int_t fcandnum
Definition: cand.h:31
double costlevel
Definition: cand.h:29
pastix_int_t treelevel
Definition: cand.h:30
pastix_int_t lccandnum
Definition: cand.h:34
pastix_int_t fccandnum
Definition: cand.h:33
Processor candidate group to own a column blok.
Definition: cand.h:28
double * blokcost
Definition: cost.h:31
Arrays of double to store the cost of each element in the matrix.
Definition: cost.h:30
pastix_int_t * core2clust
Definition: blendctrl.h:79
pastix_int_t ricar
Definition: blendctrl.h:33
pastix_int_t costlevel
Definition: blendctrl.h:43
double * dparm
Definition: blendctrl.h:87
const char * dirname
Definition: blendctrl.h:88
pastix_int_t clustnbr
Definition: blendctrl.h:71
pastix_int_t clustnum
Definition: blendctrl.h:70
Cand * candtab
Definition: blendctrl.h:98
pastix_int_t local_nbthrds
Definition: blendctrl.h:75
CostMatrix * costmtx
Definition: blendctrl.h:97
pastix_int_t total_nbcores
Definition: blendctrl.h:72
pastix_int_t total_nbthrds
Definition: blendctrl.h:73
pastix_int_t * iparm
Definition: blendctrl.h:86
void getCommunicationCosts(const BlendCtrl *ctrl, pastix_int_t clustsrc, pastix_int_t clustdst, pastix_int_t sync_comm_nbr, double *startup, double *bandwidth)
Return the communication cost between two cores.
Definition: blendctrl.c:60
The type and structure definitions.
Definition: blendctrl.h:28
void extendint_Add(ExtendVectorINT *, pastix_int_t)
Add an element elt to the end of the vector.
Definition: extendVector.c:90
void pqueuePush2(pastix_queue_t *, pastix_int_t, double, double)
Insert an element into the sorted queue.
Definition: queue.c:178
pastix_int_t pqueueRead(const pastix_queue_t *)
Read the first element of the queue.
Definition: queue.c:239
pastix_int_t pqueueSize(const pastix_queue_t *)
Return the size of the queue.
Definition: queue.c:135
static pastix_int_t pqueuePop(pastix_queue_t *q)
Pop the head of the queue whithout returning the keys.
Definition: queue.h:75
pastix_int_t tasknum
Definition: simu.h:87
pastix_int_t clustnum
Definition: simu.h:69
pastix_int_t infotab[FTGT_MAXINFO]
Definition: simu.h:68
SimuCluster * clustab
Definition: simu.h:123
SimuFtgt * ftgttab
Definition: simu.h:127
SimuProc * proctab
Definition: simu.h:122
pastix_int_t ftgtprio
Definition: simu.h:118
char * procalias
Definition: simu.h:61
pastix_queue_t * readytask
Definition: simu.h:58
pastix_int_t prionum
Definition: simu.h:50
pastix_queue_t * futuretask
Definition: simu.h:59
int8_t owned
Definition: simu.h:80
SimuTask * tasktab
Definition: simu.h:121
int ownerclust
Definition: simu.h:93
pastix_int_t ftgtcnt
Definition: simu.h:120
SimuTimer * ftgttimetab
Definition: simu.h:129
ExtendVectorINT * tasktab
Definition: simu.h:60
pastix_int_t ctrbcnt
Definition: simu.h:108
pastix_int_t cblknum
Definition: simu.h:101
double costadd
Definition: simu.h:72
SimuBlok * bloktab
Definition: simu.h:126
SimuTimer time
Definition: simu.h:105
pastix_int_t bloknum
Definition: simu.h:102
pastix_int_t ftgtcnt
Definition: simu.h:109
SimuCblk * cblktab
Definition: simu.h:125
pastix_int_t ctrbcnt
Definition: simu.h:91
pastix_int_t prionum
Definition: simu.h:100
pastix_int_t * ownetab
Definition: simu.h:124
pastix_int_t ftgtnum
Definition: simu.h:88
pastix_int_t ctrbcnt
Definition: simu.h:79
pastix_int_t cblknbr
Definition: simu.h:117
pastix_int_t tasknbr
Definition: simu.h:119
SimuTimer timerecv
Definition: simu.h:70
SimuTimer timer
Definition: simu.h:57
double costsend
Definition: simu.h:71
static void simu_printBlockCtrbNbr(const BlendCtrl *ctrl, const symbol_matrix_t *symbptr, const SimuCtrl *simuctrl)
Print the number of contributions per cblk and block for debug.
Definition: simu_run.c:284
static void simu_computeBlockCtrbNbr(const symbol_matrix_t *symbptr, SimuCtrl *simuctrl, pastix_int_t ricar)
Compute the number of contributions to each block.
Definition: simu_run.c:207
static void simu_computeFtgtCosts(const BlendCtrl *ctrl, const SimuFtgt *ftgt, pastix_int_t clustsrc, pastix_int_t sync_comm_nbr, double *send, double *add)
Compute the cost of a communication and its update.
Definition: simu_run.c:153
static void timerSetMax(SimuTimer *timer, double t)
Set the timer value if the value is greater than the actual one.
Definition: simu_timer.h:97
static double timerVal(const SimuTimer *timer)
Get the timer value.
Definition: simu_timer.h:71
static void simu_updateFtgt(const symbol_matrix_t *symbptr, SimuCtrl *simuctrl, pastix_int_t ftgtnum, pastix_int_t bloknum, pastix_int_t fbloknum)
Update the Fan In target structure.
Definition: simu_run.c:711
static void simu_putInAllReadyQueues(const BlendCtrl *ctrl, SimuCtrl *simuctrl, pastix_int_t tasknum)
Insert a task in the ready task queues of all its candidates.
Definition: simu_run.c:349
static void timerAdd(SimuTimer *timer, double t)
Increment the timer.
Definition: simu_timer.h:59
static void simu_pushToReadyHeap(const BlendCtrl *ctrl, SimuCtrl *simuctrl, pastix_int_t procnum)
Push all tasks from future to ready.
Definition: simu_run.c:908
static void simu_computeTask(const BlendCtrl *ctrl, const symbol_matrix_t *symbptr, SimuCtrl *simuctrl, pastix_int_t tasknum)
Simulate the task execution.
Definition: simu_run.c:775
static void simu_computeTaskReceiveTime(const BlendCtrl *ctrl, const symbol_matrix_t *symbptr, SimuCtrl *simuctrl, pastix_int_t tasknum)
Compute the instant t where the task will be received by a node.
Definition: simu_run.c:583
static int timerComp(const SimuTimer *t1, const SimuTimer *t2)
Compare two timings.
Definition: simu_timer.h:39
static void timerSet(SimuTimer *timer, double t)
Set the timer value.
Definition: simu_timer.h:84
static pastix_int_t simu_getNextTaskNextProc(const BlendCtrl *ctrl, SimuCtrl *simuctrl, pastix_int_t *procnumptr)
Look for the best next couple (tasknum, corenum) that is ready to be executed.
Definition: simu_run.c:436
@ FTGT_LCOLNUM
Definition: simu.h:37
@ FTGT_PROCDST
Definition: simu.h:32
@ FTGT_CTRBNBR
Definition: simu.h:30
@ FTGT_TASKDST
Definition: simu.h:33
@ FTGT_FCOLNUM
Definition: simu.h:36
@ FTGT_BLOKDST
Definition: simu.h:34
@ FTGT_FROWNUM
Definition: simu.h:38
@ FTGT_PRIONUM
Definition: simu.h:35
@ FTGT_LROWNUM
Definition: simu.h:39
Fan-in structure for the simulation.
Definition: simu.h:67
Thread structure for the simulation.
Definition: simu.h:56
Task structure for the simulation.
Definition: simu.h:99
Timer for the simulation.
Definition: simu_timer.h:25
Control structure for the simulation.
Definition: simu.h:116
FILE * pastix_fopenw(const char *dirname, const char *filename, const char *mode)
Open a file in the unique directory of the pastix instance.
Definition: api.c:242
@ DPARM_PRED_FACT_TIME
Definition: api.h:169
@ IPARM_VERBOSE
Definition: api.h:36
void simuRun(SimuCtrl *, const BlendCtrl *, const symbol_matrix_t *)
Run the simulation to map the data on the nodes.
Definition: simu_run.c:986
pastix_int_t fcblknm
Definition: symbol.h:63
pastix_int_t frownum
Definition: symbol.h:60
pastix_int_t lrownum
Definition: symbol.h:61
pastix_int_t bloknbr
Definition: symbol.h:80
symbol_cblk_t * cblktab
Definition: symbol.h:83
pastix_int_t bloknum
Definition: symbol.h:48
symbol_blok_t * bloktab
Definition: symbol.h:84
pastix_int_t cblknbr
Definition: symbol.h:79
pastix_int_t pastixSymbolGetFacingBloknum(const symbol_matrix_t *symbptr, pastix_int_t bloksrc, pastix_int_t bloknum, pastix_int_t startsearch, int ricar)
Search the targeted block C for a couple of blocks A and B.
Definition: symbol.c:232
Symbol block structure.
Definition: symbol.h:59
Symbol column block structure.
Definition: symbol.h:45
Symbol matrix structure.
Definition: symbol.h:77