92 memset( localindex, 0, clustnbr *
sizeof(
pastix_int_t) );
93 for ( i = 0; i < simuctrl->
tasknbr; i++ ) {
96 tasklocalnum[i] = localindex[c];
99 solvmtx->tasknbr = localindex[clustnum];
105 memset( localindex, 0, clustnbr *
sizeof(
pastix_int_t ) );
113 for ( i = 0; i < symbmtx->
cblknbr; i++, symbcblk++ ) {
120 flaglocal = ( simuctrl->
cblktab[i].
owned ) || ( ftgttab[i] != NULL );
122 cblklocalnum[i] = -i - 1;
124 for ( j=symbcblk[0].bloknum; j<symbcblk[1].
bloknum; j++ ) {
125 bloklocalnum[j] = -1;
146 for ( j = symbcblk[0].brownum; j < symbcblk[1].
brownum; j++ ) {
148 symbblok = symbmtx->
bloktab + k;
151 assert( i == symbblok->
fcblknm );
154 if ( c != clustnum ) {
158 symbblok, symbcblk, c );
161 assert( brownbr >= 0 );
168 ftgtcblk = ftgttab[i];
169 while( ftgtcblk != NULL ) {
170 assert( (ftgtcblk->ownerid != -1) &&
171 (ftgtcblk->ownerid != clustnum) );
181 ftgtcblk = ftgtcblk->next;
190 for ( j=symbcblk[0].bloknum; j<symbcblk[1].
bloknum; j++, symbblok++ ) {
194 bloklocalnum[j] = localindex[clustnum];
195 localindex[clustnum]++;
200 symbfcbk = symbmtx->
cblktab + fcblknum;
208 if ( fc != clustnum ) {
210 symbmtx, symbcblk, symbblok, symbfcbk, fc );
221 for ( j = symbcblk[0].brownum; j < symbcblk[1].
brownum; j++ ) {
224 if ( c != clustnum ) {
237 assert( ftgtcblk != NULL );
238 assert( ftgtcblk->next == NULL );
243 faninnbr_tab[2 * ftgtcblk->ownerid]++;
245 for ( j=symbcblk[0].bloknum; j<symbcblk[1].
bloknum; j++, symbblok++, ftgtblok++ )
250 (ftgtblok->
frownum >= symbblok->frownum) &&
251 (ftgtblok->
lrownum <= symbblok->lrownum) )
253 bloklocalnum[j] = localindex[clustnum];
254 localindex[clustnum]++;
257 faninnbr_tab[2 * ftgtcblk->ownerid + 1]++;
260 bloklocalnum[j] = -1;
267 cblklocalnum[i] = cblknum;
272 assert( brownum <= symbcblk[1].brownum );
276 solvmtx->
bloknbr = localindex[clustnum];
283#if defined(PASTIX_WITH_MPI)
284 if ( clustnbr > 1 ) {
290 fanin_tab[0] = faninnbr;
291 fanin_tab[1] = faninbloknbr;
295 if ( clustnum != 0 ) {
297 MPI_Recv( &fanin_tab[2], 2, PASTIX_MPI_INT, clustnum-1, clustnum-1,
298 solvmtx->solv_comm, MPI_STATUS_IGNORE);
299 fanin_tab[0] += fanin_tab[2];
300 fanin_tab[1] += fanin_tab[3];
302 if ( clustnum != clustnbr - 1 ) {
304 MPI_Send( &fanin_tab[0], 2, PASTIX_MPI_INT, clustnum+1, clustnum,
305 solvmtx->solv_comm );
309 gfaninnbr[0] = fanin_tab[0];
310 gfaninnbr[1] = fanin_tab[1];
311 MPI_Bcast( &gfaninnbr, 2, PASTIX_MPI_INT, clustnbr-1, solvmtx->solv_comm );
315 fanincnt = fanin_tab[2];
316 faninblokcnt = fanin_tab[3];
317 for ( k = 0; k < clustnbr; k++ ) {
318 if ( faninnbr_tab[2 * k] == 0 ) {
320 faninnbr_tab[2 * k] = -1;
322 faninnbr_tab[2 * k + 1] = -1;
325 if ( faninnbr_tab[2 * k] != -1 ) {
327 fanin_tmp = faninnbr_tab[2 * k];
328 faninnbr_tab[ 2 * k] = fanincnt;
329 fanincnt += fanin_tmp;
331 faninblok_tmp = faninnbr_tab[2 * k + 1];
332 faninnbr_tab[ 2 * k + 1] = faninblokcnt;
333 faninblokcnt += faninblok_tmp;
338 MPI_Alltoall( faninnbr_tab, 2, PASTIX_MPI_INT, faninnbr_tab+2*clustnbr,
339 2, PASTIX_MPI_INT, solvmtx->solv_comm );
343 memFree_null( localindex );
411 assert( brownbr <= symbmtx->browmax );
437 for ( j=0, j1d=0; j < jmax; j++, b++ ) {
439 symbblok = symbmtx->
bloktab + (*b);
441 lcblknm = ( cblklocalnum == NULL ) ? symbblok->
lcblknm : cblklocalnum[ symbblok->
lcblknm ];
451 browcblk = solvmtx->
cblktab + lcblknm;
454 assert( !(browcblk->
cblktype & CBLK_RECV) );
457 if( browcblk->
cblktype & CBLK_FANIN ) {
464 if ( browcblk->
cblktype & CBLK_TASKS_2D ) {
465 j2d = ( j2d == -1 ) ? j : j2d;
470 lbloknm = ( bloklocalnum == NULL ) ? *b : bloklocalnum[ *b ];
471 solvblok = solvmtx->
bloktab + lbloknm;
473 assert( solvblok->
lcblknm == lcblknm );
478 assert( ( frownum == solvblok->
frownum ) &&
479 ( lrownum == solvblok->
lrownum ) );
483 assert( brownum + j1d < solvmtx->brownbr );
484 solvmtx->
browtab[brownum + j1d] = lbloknm;
485 solvblok->
browind = brownum + j1d;
491 assert( j1d <= brownbr );
497 for ( j = j2d; j < jmax; j++, b++ ) {
498 symbblok = symbmtx->
bloktab + ( *b );
503 lcblknm = ( cblklocalnum == NULL ) ? symbblok->
lcblknm : cblklocalnum[ symbblok->
lcblknm ];
504 assert( lcblknm >= 0 );
507 browcblk = solvmtx->
cblktab + lcblknm;
508 assert( (cblklocalnum == NULL) ||
509 (browcblk->
ownerid == solvmtx->clustnum) );
512 lbloknm = ( bloklocalnum == NULL ) ? *b : bloklocalnum[ *b ];
513 solvblok = solvmtx->
bloktab + lbloknm;
515 assert( solvblok->
lcblknm == lcblknm );
519 assert( brownum + j1d < solvmtx->brownbr );
520 solvmtx->
browtab[brownum + j1d] = lbloknm;
521 solvblok->
browind = brownum + j1d;
525 assert( j1d == brownbr );
560 struct args_ttsktab *arg = (
struct args_ttsktab*)args;
562 const SimuCtrl *simuctrl = arg->simuctrl;
565 int rank = ctx->rank;
574 solvmtx->ttsknbr[rank] = ttsknbr;
576 MALLOC_INTERN(solvmtx->ttsktab[rank], ttsknbr,
pastix_int_t);
579 solvmtx->ttsktab[rank] = NULL;
582 for(i=0; i<ttsknbr; i++)
585 if( tasklocalnum != NULL ){
586 jloc = tasklocalnum[j];
593 solvmtx->ttsktab[rank][i] = jloc;
596#if defined(PASTIX_DYNSCHED)
597 solvmtx->tasktab[jloc].threadid = rank;
599 priomax = pastix_imax( solvmtx->tasktab[jloc].
prionum, priomax );
600 priomin = pastix_imin( solvmtx->tasktab[jloc].
prionum, priomin );
603#if defined(PASTIX_DYNSCHED)
604 solvmtx->btree->nodetab[rank].priomin = priomin;
605 solvmtx->btree->nodetab[rank].priomax = priomax;
627 struct args_ttsktab *arg = (
struct args_ttsktab*)args;
631 int rank = ctx->rank;
632 int nthread = ctx->global_ctx->world_size;
637 size = (rank == nthread-1) ? (solvmtx->tasknbr - (nthread-1) * tasknbr) : tasknbr;
638 solvmtx->ttsknbr[rank] = size;
641 MALLOC_INTERN(solvmtx->ttsktab[rank], size,
pastix_int_t);
644 solvmtx->ttsktab[rank] = NULL;
647 j = ((solvmtx->tasknbr - (nthread-1) * tasknbr) * rank);
648 for(i=0; i < size; i++)
650 solvmtx->ttsktab[rank][i] = j;
652#if defined(PASTIX_DYNSCHED)
653 solvmtx->tasktab[j].threadid = rank;
655 priomax = pastix_imax( solvmtx->tasktab[j].
prionum, priomax );
656 priomin = pastix_imin( solvmtx->tasktab[j].
prionum, priomin );
660#if defined(PASTIX_DYNSCHED)
661 solvmtx->btree->nodetab[rank].priomin = priomin;
662 solvmtx->btree->nodetab[rank].priomax = priomax;
716 MALLOC_INTERN( solvmtx->tasktab, solvmtx->tasknbr+1,
Task );
717 solvtask = solvmtx->tasktab;
720 if ( tasklocalnum == NULL )
722 for(i=0; i<simuctrl->
tasknbr; i++, simutask++)
724 solvtask->
taskid = COMP_1D;
734 if ( solvcblk->
cblktype & (CBLK_IN_SCHUR | CBLK_FANIN | CBLK_RECV ) ) {
735 tasknum++; solvtask++;
740 if ( solvcblk->
cblktype & CBLK_TASKS_2D )
746 for( ; blok < lblk; blok++ ) {
749 while ( ( blok < lblk ) &&
750 ( blok[0].fcblknm == blok[1].fcblknm ) &&
751 ( blok[0].lcblknm == blok[1].lcblknm ) )
758 tasknum++; solvtask++;
763 for(i=0; i<simuctrl->
tasknbr; i++, simutask++)
770 assert( tasknum == tasklocalnum[i] );
772 solvtask->
taskid = COMP_1D;
782 if ( solvcblk->
cblktype & (CBLK_IN_SCHUR | CBLK_FANIN | CBLK_RECV ) ) {
783 tasknum++; solvtask++;
788 if ( solvcblk->
cblktype & CBLK_TASKS_2D )
794 for( ; blok < lblk; blok++ ) {
797 while ( ( blok < lblk ) &&
798 ( blok[0].fcblknm == blok[1].fcblknm ) &&
799 ( blok[0].lcblknm == blok[1].lcblknm ) )
807 tasknum++; solvtask++;
810 assert(tasknum == solvmtx->tasknbr);
811 solvmtx->tasknbr_1dp = tasknbr_1dp;
821 MALLOC_INTERN(solvmtx->ttsknbr, solvmtx->bublnbr,
pastix_int_t );
822 MALLOC_INTERN(solvmtx->ttsktab, solvmtx->bublnbr,
pastix_int_t* );
825 struct args_ttsktab args = { solvmtx, NULL, tasklocalnum, clustnum };
829 struct args_ttsktab args = { solvmtx, simuctrl, tasklocalnum, clustnum };
866 for(i=0; i<solvmtx->
cblknbr; i++, solvcblk++)
880 offdmax = pastix_imax( offdmax, offdarea );
888 for( ; solvblok<lblok; solvblok++ ) {
895 if ( !(solvcblk->
cblktype & CBLK_LAYOUT_2D) ) {
896 gemmarea = (m+1) * k;
897 gemmmax = pastix_imax( gemmmax, gemmarea );
904 if ( solvcblk->
cblktype & (CBLK_TASKS_2D | CBLK_COMPRESSED) ) {
905 if ( solvblok->
fcblknm == cblk_m ) {
915 blokmax = pastix_imax( n * (acc_m+1), blokmax );
921 solvmtx->offdmax = offdmax;
922 solvmtx->gemmmax = gemmmax;
923 solvmtx->blokmax = blokmax;
944#if defined(PASTIX_SUPERNODE_STATS)
948 for(i=0; i<solvmtx->
bloknbr; i++, solvblok++ ) {
951 if ( fcblk->
cblktype & CBLK_IN_LAST ) {
952 if ( lcblk->
cblktype & CBLK_IN_LAST ) {
1005 const Cand *candcblk,
1021 assert( solvblok != NULL );
1022 assert( brownum >= 0 );
1023 assert( symbblok->
lcblknm == gcblknm );
1024 assert( (cblklocalnum == NULL) || (lcblknm == cblklocalnum[gcblknm]) );
1034 for ( j=symbcblk[0].bloknum; j<symbcblk[1].
bloknum; j++, symbblok++ )
1039 assert( nbrows >= 1 );
1043 cblklocalnum == NULL ? symbblok->
fcblknm : cblklocalnum[symbblok->
fcblknm],
1044 frownum, lrownum, stride, nbcols,
1052 fcolnum, lcolnum, brownum, stride,
1111 const Cand *candcblk,
1130 assert( solvblok != NULL );
1131 assert( brownum >= 0 );
1132 assert( symbblok->
lcblknm == gcblknm );
1137 if ( symbmtx->
dof < 0 ) {
1139 lcolnum = symbmtx->
dofs[recvcblk->
lcolnum + 1] - 1;
1143 lcolnum = symbmtx->
dof * ( recvcblk->
lcolnum + 1 ) - 1;
1145 nbcols = lcolnum - fcolnum + 1;
1150 for ( j=symbcblk[0].bloknum; j<symbcblk[1].
bloknum; j++, recvblok++, symbblok++ )
1154 if ( symbmtx->
dof < 0 ) {
1156 lrownum = symbmtx->
dofs[recvblok->
lrownum + 1] - 1;
1160 lrownum = symbmtx->
dof * ( recvblok->
lrownum + 1 ) - 1;
1162 nbrows = lrownum - frownum + 1;
1170 lcblknm, cblklocalnum[symbblok->
fcblknm],
1171 frownum, lrownum, stride, nbcols,
1176 if ( solvcblk->
cblktype & CBLK_FANIN ) {
1177 assert( faninnbr_tab );
1179 faninnbr_tab[2 * ownerid + 1]++;
1181 if ( solvcblk->
cblktype & CBLK_RECV ) {
1182 assert( faninnbr_tab );
1183 solvblok->
gfaninnm = faninnbr_tab[2 * (clustnbr + ownerid) + 1] + solvmtx->
gbloknbr;
1184 faninnbr_tab[2 * (clustnbr + ownerid) + 1]++;
1190 fcolnum, lcolnum, brownum, stride,
1195#if defined(PASTIX_BLEND_FANIN_FR)
1196 if( solvcblk->
cblktype & CBLK_COMPRESSED ) {
1197 solvcblk->
cblktype &= (~CBLK_COMPRESSED);
1201 if( solvcblk->
cblktype & CBLK_IN_SCHUR ) {
1202 solvcblk->
cblktype &= (~CBLK_IN_SCHUR);
BEGIN_C_DECLS typedef int pastix_int_t
Processor candidate group to own a column blok.
pastix_int_t extendint_Size(const ExtendVectorINT *)
Return the number of element stored in the vector.
pastix_int_t extendint_Read(const ExtendVectorINT *, pastix_int_t)
Return the element of index eltnum.
ExtendVectorINT * tasktab
Thread structure for the simulation.
Task structure for the simulation.
Control structure for the simulation.
SolverBlok * solvMatGen_register_local_cblk(const symbol_matrix_t *symbmtx, const Cand *candcblk, const pastix_int_t *cblklocalnum, SolverCblk *solvcblk, SolverBlok *solvblok, pastix_int_t lcblknm, pastix_int_t brownum, pastix_int_t gcblknm, pastix_int_t ownerid)
Register a local cblk from a symbol_cblk_t structure !(Fanin|Recv)
int solver_recv_get_bloknbr(const solver_cblk_recv_t *ftgtptr, const symbol_cblk_t *symbcblk, const symbol_blok_t *symbblok)
Compute the number of valid blocks in fanin/recv cblk.
SolverBlok * solvMatGen_register_remote_cblk(const SolverMatrix *solvmtx, const symbol_matrix_t *symbmtx, const solver_cblk_recv_t *recvcblk, const Cand *candcblk, const pastix_int_t *cblklocalnum, SolverCblk *solvcblk, SolverBlok *solvblok, pastix_int_t lcblknm, pastix_int_t brownum, pastix_int_t gcblknm, pastix_int_t *faninnbr_tab)
Register a remote cblk from a solver_recv_cblk_t structure (Fanin|Recv)
void solvMatGen_fill_localnums(const symbol_matrix_t *symbmtx, const SimuCtrl *simuctrl, SolverMatrix *solvmtx, pastix_int_t *cblklocalnum, pastix_int_t *bloklocalnum, pastix_int_t *tasklocalnum, solver_cblk_recv_t **ftgttab, pastix_int_t *faninnbr_tab)
Fill the local numbering arrays to compress the symbol information into solver.
void solver_recv_update_fanin(solver_cblk_recv_t **faninptr, const symbol_matrix_t *symbmtx, const symbol_cblk_t *cblk, const symbol_blok_t *blok, const symbol_cblk_t *fcblk, int ownerid)
Register a new contribution to a fanin cblk.
void solver_recv_update_recv(solver_cblk_recv_t **recvptr, const symbol_matrix_t *symbmtx, const symbol_cblk_t *cblk, const symbol_blok_t *blok, const symbol_cblk_t *fcblk, int ownerid)
Register a new contribution to a recv cblk.
static void solvMatGen_init_cblk(SolverCblk *solvcblk, SolverBlok *fblokptr, const Cand *candcblk, const symbol_cblk_t *symbcblk, pastix_int_t fcolnum, pastix_int_t lcolnum, pastix_int_t brownum, pastix_int_t stride, pastix_int_t cblknum, int ownerid)
Initialize a solver cblk.
void solvMatGen_fill_tasktab(SolverMatrix *solvmtx, isched_t *isched, const SimuCtrl *simuctrl, const pastix_int_t *tasklocalnum, const pastix_int_t *cblklocalnum, const pastix_int_t *bloklocalnum, pastix_int_t clustnum, int is_dbg)
Fill the global tasktab array, as well as the thread ttsktab arrays.
void solvMatGen_stats_last(SolverMatrix *solvmtx)
Mark blocks if they belong to the last supernode, or if they are facing it for statistical purpose on...
void solvMatGen_fill_ttsktab(isched_thread_t *ctx, void *args)
Fill the ttsktab for it's own thread.
void solvMatGen_fill_ttsktab_dbg(isched_thread_t *ctx, void *args)
Fill in ttsktab for it's own thread. Only for debugging factorization.
pastix_int_t solvMatGen_reorder_browtab(const symbol_matrix_t *symbmtx, const symbol_cblk_t *symbcblk, SolverMatrix *solvmtx, SolverCblk *solvcblk, pastix_int_t *browtmp, const pastix_int_t *cblklocalnum, const pastix_int_t *bloklocalnum, pastix_int_t brownum)
Reorder the browtab from the symbol structure in a distributed way. First stock the 1D blocks and the...
void solvMatGen_max_buffers(SolverMatrix *solvmtx)
Compute the maximum area of the temporary buffers used during computation.
static void solvMatGen_init_blok(SolverBlok *solvblok, pastix_int_t lcblknm, pastix_int_t fcblknm, pastix_int_t frownum, pastix_int_t lrownum, pastix_int_t stride, pastix_int_t nbcols, pastix_int_t layout2D)
Initialize a solver block.
static pastix_int_t symbol_blok_get_rownum(const symbol_matrix_t *symbmtx, symbol_blok_t *symbblok, pastix_int_t *frownum, pastix_int_t *lrownum)
Get the expanded row index of a symbol_blok.
static pastix_int_t symbol_cblk_get_colnum(const symbol_matrix_t *symbmtx, symbol_cblk_t *symbcblk, pastix_int_t *fcolnum, pastix_int_t *lcolnum)
Get the expanded column indexes of a symbol_cblk.
Symbol column block structure.
pastix_int_t gfanincblknbr
static pastix_int_t blok_rownbr(const SolverBlok *blok)
Compute the number of rows of a block.
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
SolverBlok *restrict bloktab
pastix_int_t gfaninbloknbr
pastix_int_t *restrict browtab
pastix_int_t volatile ctrbcnt
solver_blok_recv_t bloktab[1]
SolverCblk *restrict cblktab
Solver recv block structure.
Solver recv column block structure.
Solver column block structure.
Solver column block structure.
The task structure for the numerical factorization.