77 cost = PERF_POTRF( L ) + PERF_TRSM( L, G );
84 while ( ( i < n ) && ( ja[i] == ja[i - 1] + 1 ) ) {
88 cost += (double)( PERF_GEMM( G, H, L ) );
130 cost = PERF_TRSV( L ) + PERF_GEMV( L, n - L );
174 for ( i = sonindex[node]; i < sonindex[node + 1]; i++ ) {
177 if ( colweight[s] <= 0 ) {
227 costa = colweight[a];
228 costb = colweight[b];
233 while ( ( ia < na ) && ( ja[ia] < jb[0] ) )
237 while ( ( ia < na ) && ( ib < nb ) ) {
238 if ( ja[ia] < jb[ib] ) {
243 if ( ja[ia] > jb[ib] ) {
249 assert( ja[ia] == jb[ib] );
254 cost += costb * ( na - ia );
255 cost += costa * ( nb - ib );
303 double costa, costb, costm;
306 costa = cblktime( P->nnz[a], P->rows[a], colweight[a] );
307 costb = cblktime( P->nnz[b], P->rows[b], colweight[b] );
309 nm = pastix_intset_union( P->nnz[a], P->rows[a], P->nnz[b], P->rows[b], tmp );
311 costm = cblktime( nm, tmp, colweight[a] + colweight[b] );
313 return costm - costa - costb;
343 n = pastix_intset_union( P->nnz[a], P->rows[a], P->nnz[b], P->rows[b], tmp );
345 if ( P->rows[a] != NULL ) {
347 memFree( P->rows[a] );
350 if ( P->rows[b] != NULL ) {
351 memFree( P->rows[b] );
413 PASTIX_Comm pastix_comm )
446 MPI_Comm_rank( pastix_comm, &procnum );
462 if ( rat_blas < 0 ) {
463 rat_blas = -rat_blas;
464 rat_cblk = MIN( rat_blas,
RAT_CBLK );
479 assert( graphL->n == oldcblknbr );
480 for ( i = 0; i < n; i++ ) {
481 colweight[i] = oldrangtab[i + 1] - oldrangtab[i];
482 assert( ( graphL->nnz[i] >= colweight[i] ) && ( colweight[i] > 0 ) );
484 #if !defined( NDEBUG ) && defined( PASTIX_DEBUG_SYMBOL )
489 for ( j = 0; j < n; j++ ) {
491 for ( l = oldrangtab[j]; l < oldrangtab[j + 1]; l++ ) {
492 assert( graphL->rows[j][k] == l );
501 nn = oldrangtab[oldcblknbr];
515 nnzL = graphL->total_nnz;
516 fillcblk = (
pastix_int_t)lround( (
double)nnzL * rat_cblk );
517 fillblas = (
pastix_int_t)lround( (
double)nnzL * rat_blas );
518 fillwhile = fillcblk;
520 #if defined( PASTIX_DEBUG_SYMBOL )
524 pastix_print( procnum, 0,
"fillcblk %ld fillmax %ld \n", (
long)fillcblk, (
long)fillblas );
526 for ( i = 0; i < graphL->n; i++ ) {
527 key += cblktime( graphL->nnz[i], graphL->rows[i], colweight[i] );
529 pastix_print( procnum, 0,
"COST of the NON AMALGAMATED MATRIX = %e\n", key );
543 for ( i = 0; i < n - 1; i++ ) {
544 if ( oldtreetab[i] >= 0 ) {
545 tmp[oldtreetab[i]]++;
549 assert( nbsons < n );
559 for ( i = 0; i < n; i++ ) {
560 sonindex[i + 1] = sonindex[i] + tmp[i];
565 for ( i = 0; i < n - 1; i++ ) {
567 assert( ( father != i ) && ( father < n ) );
570 assert( sontab[tmp[father]] == -1 );
571 sontab[tmp[father]] = i;
573 assert( tmp[father] <= sonindex[father + 1] );
582 MALLOC_INTERN( gain, n,
double );
583 for ( i = 0; i < n; i++ ) {
584 father = oldtreetab[i];
585 if ( ( father == -1 ) || ( father == i ) ) {
592 gain[i] = (double)( nnzadd[i] );
599 for ( i = 0; i < n; i++ ) {
600 assert( colweight[i] > 0 );
602 if ( ( colweight[i] != 0 ) && ( nnzadd[i] == 0 ) ) {
603 father = oldtreetab[i];
604 assert( ( father > 0 ) && ( father != i ) );
612 colweight[father] += colweight[i];
616 k = oldtreetab[father];
617 if ( ( k != -1 ) && ( k != father ) ) {
619 gain[father] = (double)( nnzadd[father] );
624 for ( j = 0; j < ind; j++ ) {
626 assert( k != father );
627 oldtreetab[k] = father;
629 gain[k] = (double)( nnzadd[k] );
634 pastix_print( procnum,
636 "Number of cblk after amalgamation initialization = %ld (reduced by %ld)\n",
637 (
long)( n - nbcblk_merged ),
638 (
long)nbcblk_merged );
643 for ( i = 0; i < n; i++ ) {
644 if ( ( colweight[i] > 0 ) && ( oldtreetab[i] > 0 ) && ( oldtreetab[i] != i ) ) {
656 while ( (
pqueueSize( &heap ) > 0 ) && ( fill < fillwhile ) ) {
660 if ( ( gain[i] != key ) || ( colweight[i] <= 0 ) )
664 if ( ( fill + nnzadd[i] ) > fillwhile )
670 father = oldtreetab[i];
671 assert( ( father > 0 ) && ( father != i ) );
672 assert( colweight[father] > 0 );
678 colweight[father] += colweight[i];
682 k = oldtreetab[father];
683 if ( ( k != -1 ) && ( k != father ) ) {
685 if ( blas_gain == 1 ) {
689 if ( gain[father] <= 0 )
693 gain[father] = (double)( nnzadd[father] );
700 for ( j = 0; j < ind; j++ ) {
702 assert( k != father );
703 oldtreetab[k] = father;
706 if ( blas_gain == 1 ) {
713 gain[k] = (double)( nnzadd[k] );
720 pastix_print( procnum,
722 "Number of cblk after amalgamation phase = %ld (reduced by %ld)\n",
723 (
long)( n - nbcblk_merged ),
724 (
long)nbcblk_merged );
727 if ( fillwhile < fillblas ) {
729 assert( blas_gain == 0 );
732 fillwhile = fillblas;
736 for ( i = 0; i < n; i++ ) {
737 father = oldtreetab[i];
738 if ( father == -1 || father == i ) {
745 if ( ( colweight[i] > 0 ) && ( oldtreetab[i] > 0 ) && ( oldtreetab[i] != i ) &&
746 ( gain[i] <= 0. ) ) {
756 if ( tmp2 != NULL ) {
771 order->
cblknbr = n - nbcblk_merged;
785 for ( i = 0; i < n; i++ ) {
786 if ( colweight[i] > 0 ) {
788 newrangtab[k] = colweight[i];
794 for ( i = 0; i < order->
cblknbr; i++ ) {
795 newrangtab[i + 1] += newrangtab[i];
799 for ( i = 0; i < n; i++ ) {
801 if ( colweight[i] <= 0 ) {
803 while ( colweight[father] <= 0 ) {
804 father = oldtreetab[father];
805 assert( father > 0 );
809 newfather = newnum[father];
812 for ( j = oldrangtab[i]; j < oldrangtab[i + 1]; j++ ) {
813 newperitab[newrangtab[newfather]++] = oldperitab[j];
817 if ( oldtreetab[father] != -1 ) {
818 newtreetab[newfather] = newnum[oldtreetab[father]];
821 newtreetab[newfather] = -1;
826 for ( i = order->
cblknbr; i > 0; i-- ) {
827 newrangtab[i] = newrangtab[i - 1];
832 for ( i = 0; i < order->
vertnbr; i++ ) {
833 order->
permtab[newperitab[i]] = i;
836 #if defined( PASTIX_DEBUG_SYMBOL )
842 assert( graphL->n == order->
cblknbr );
845 for ( i = 0; i < graphL->n; i++ ) {
847 ja = graphL->rows[i];
848 for ( j = 0; j < graphL->nnz[i]; j++ ) {
853 for ( i = 0; i < graphL->n; i++ ) {
854 key += cblktime( graphL->nnz[i], graphL->rows[i], colweight[i] );
856 pastix_print( procnum, 0,
"COST of the AMALGAMATED MATRIX = %e\n", key );
860 memFree( oldrangtab );
861 memFree( oldtreetab );
862 memFree( oldperitab );
867 memFree( colweight );
869 #if !defined( NDEBUG )
BEGIN_C_DECLS typedef int pastix_int_t
static void pqueuePush1(pastix_queue_t *q, pastix_int_t elt, double key1)
Push an element with a single key.
void pqueueExit(pastix_queue_t *)
Free the structure associated to the queue.
pastix_int_t pqueueSize(const pastix_queue_t *)
Return the size of the queue.
static pastix_int_t pqueuePop1(pastix_queue_t *q, double *key1)
Pop the head of the queue and get the associated first key.
int pqueueInit(pastix_queue_t *, pastix_int_t)
Initialize the queue structure with an initial space to store the elements.
int pastixOrderCheck(const pastix_order_t *ordeptr)
This routine checks the correctness of the ordering structure.
static pastix_int_t amalgamate_get_sonslist(pastix_int_t node, const pastix_int_t *sonindex, const pastix_int_t *sontab, const pastix_int_t *colweight, pastix_int_t *list)
Return the list of sons of a given node.
#define INFINI
Define an infinite time.
#define RAT_CBLK
Minimal memory increase accepted by the amalgamation algorithm (2%)
static void amalgamate_merge_col(pastix_int_t a, pastix_int_t b, fax_csr_t *P, pastix_int_t *tmp)
Merge two nodes together withing the given graph.
static double cblk_time_solve(pastix_int_t n, const pastix_int_t *ja, pastix_int_t colnbr)
Compute a time estimation cost of the solve step.
static pastix_int_t amalgamate_merge_cost(pastix_int_t a, pastix_int_t b, const fax_csr_t *P, const pastix_int_t *colweight)
Compute the cost of merging two nodes a and b together. The additional cost is returned.
void faxCSRAmalgamate(int ilu, double rat_cblk, double rat_blas, fax_csr_t *graphL, pastix_order_t *order, PASTIX_Comm pastix_comm)
Amalgamate the given graph up to a given tolerance.
void faxCSRCompact(fax_csr_t *csr)
Compact a compressed graph.
static double amalgamate_merge_gain(pastix_int_t a, pastix_int_t b, const fax_csr_t *P, const pastix_int_t *colweight, pastix_int_t *tmp, double(*cblktime)(pastix_int_t, const pastix_int_t *, pastix_int_t))
Computes the computational gain of merging two nodes a and b together.
static double cblk_time_fact(pastix_int_t n, const pastix_int_t *ja, pastix_int_t colnbr)
Compute a time estimation cost of the factorization.
Fax blocked csr structure.
static double cost(symbol_cblk_t *cblk)
Computes the cost of a cblk.