21 #include <parsec/data.h>
22 #include <parsec/data_distribution.h>
23 #if defined(PASTIX_CUDA_FERMI)
24 #include <parsec/devices/cuda/dev_cuda.h>
67 parsec_data_key_t key2;
86 *cblknum = cblkmin2d + n;
130 uplo = va_arg(ap,
int);
131 cblknum = va_arg(ap,
int);
132 bloknum = va_arg(ap,
int) - 1;
137 if ( bloknum == -1 ) {
138 return cblknum * 2 + uplo;
147 offset = cblknbr * 2;
148 n = cblknum - cblkmin2d;
150 return offset + n * ld + bloknum * 2 + uplo;
187 uplo = va_arg(ap,
int);
188 cblknum = va_arg(ap,
int);
189 bloknum = va_arg(ap,
int) - 1;
220 parsec_data_key_t key )
229 &uplo, &cblknum, &bloknum );
231 cblk = solvmtx->
cblktab + cblknum;
286 parsec_data_key_t key )
288 (void)mat; (void)key;
315 static parsec_data_t *
325 uplo = va_arg(ap,
int);
326 cblknum = va_arg(ap,
int);
327 bloknum = va_arg(ap,
int) - 1;
335 if ( bloknum == -1 ) {
337 return (parsec_data_t*)(cblk->
handler[uplo]);
344 return (parsec_data_t*)(blok->
handler[uplo]);
366 static parsec_data_t *
368 parsec_data_key_t key )
377 &uplo, &cblknum, &bloknum );
379 cblk = solvmtx->
cblktab + cblknum;
382 if ( bloknum == -1 ) {
384 return (parsec_data_t*)(cblk->
handler[uplo]);
391 return (parsec_data_t*)(blok->
handler[uplo]);
395 #if defined(PARSEC_PROF_TRACE)
423 parsec_sparse_matrix_key_to_string( parsec_data_collection_t *mat,
425 char *buffer, uint32_t buffer_size )
433 &uplo, &cblknum, &bloknum );
435 res = snprintf(buffer, buffer_size,
"(%d, %ld, %ld)",
436 uplo, (
long int)cblknum, (
long int)bloknum);
439 printf(
"error in key_to_string for tile (%d, %ld, %ld) key: %u\n",
440 uplo, (
long int)cblknum, (
long int)bloknum, key);
446 #if defined(PASTIX_CUDA_FERMI)
464 gpu_device_t* gpu_device;
470 ndevices = parsec_devices_enabled();
475 size = 2 * bloknbr *
sizeof(int);
480 bloktab = (
int*)malloc( size );
482 for (b=0, blok = solvmtx->
bloktab;
493 fprintf(stderr,
"ndevices = %ld\n", ndevices );
494 for(i = 0; i < ndevices; i++) {
495 if( NULL == (gpu_device = (gpu_device_t*)parsec_devices_get(i+2)) )
continue;
497 fprintf(stderr,
"cuda index = %d\n", gpu_device->cuda_index );
498 cudaSetDevice( gpu_device->cuda_index );
505 cudaMemcpyHostToDevice );
523 gpu_device_t* gpu_device;
526 ndevices = parsec_devices_enabled();
531 for(i = 0; i < ndevices; i++) {
532 if( NULL == (gpu_device = (gpu_device_t*)parsec_devices_get(i+2)) )
continue;
534 cudaSetDevice( gpu_device->cuda_index );
566 parsec_data_t **handler,
574 parsec_data_create( handler, o,
id, dataptr, size );
604 parsec_data_t **handler,
613 parsec_data_create( handler, o,
id, dataptr, size );
641 parsec_data_t **handler = (parsec_data_t **)( cblk->
handler );
643 if ( cblk->
cblktype & CBLK_COMPRESSED ) {
654 o, handler + 1, cblknum * 2 + 1, spmtx, cblk,
PastixUCoef );
696 int nodes,
int myrank )
699 parsec_data_collection_t *o;
701 parsec_data_key_t key1, key2;
709 if ( spmtx != NULL ) {
716 o = (parsec_data_collection_t*)spmtx;
717 parsec_data_collection_init( o, nodes, myrank );
720 #if defined(PARSEC_PROF_TRACE)
721 o->key_to_string = parsec_sparse_matrix_key_to_string;
744 cblknum++, n++, cblk++ )
746 if ( cblk->
ownerid != myrank ) {
755 for(cblknum = cblkmin2d, n = 0;
757 cblknum++, n++, cblk++ )
759 if ( cblk->
ownerid != myrank ) {
765 if ( !(cblk->
cblktype & CBLK_TASKS_2D) ) {
769 if ( cblk->
cblktype & CBLK_COMPRESSED ) {
780 assert( offset == 0 );
781 parsec_data_create( (parsec_data_t **)&( blok->
handler[0] ),
782 o, key1 + key2, dataptrL + offset, size );
785 parsec_data_create( (parsec_data_t **)&( blok->
handler[1] ),
786 o, key1 + key2 + 1, dataptrU + offset, size );
798 for ( ; blok < lblok; blok++, key2 += 2 ) {
805 while ( ( blok < lblok ) && ( blok[0].fcblknm == blok[1].fcblknm ) &&
806 ( blok[0].lcblknm == blok[1].lcblknm ) ) {
813 parsec_data_create( (parsec_data_t **)&( fblok->
handler[0] ),
820 parsec_data_create( (parsec_data_t **)&( fblok->
handler[1] ),
844 assert( offset == 0 );
846 (parsec_data_t **)&( blok->
handler[0] ), o, key1 + key2, ptrL + offset, size );
849 parsec_data_create( (parsec_data_t **)&( blok->
handler[1] ),
865 for ( ; blok < lblok; blok++, key2 += 2 ) {
871 while ( ( blok < lblok ) &&
873 ( blok[0].lcblknm == blok[1].lcblknm ) )
881 parsec_data_create( (parsec_data_t **)&( fblok->
handler[0] ),
888 parsec_data_create( (parsec_data_t **)&( fblok->
handler[1] ),
903 #if defined( PASTIX_CUDA_FERMI )
904 parsec_sparse_matrix_init_fermi( spmtx, solvmtx );
907 solvmtx->parsec_desc = spmtx;
931 #if defined(PASTIX_CUDA_FERMI)
932 parsec_sparse_matrix_destroy_fermi( spmtx );
937 for(i=0; i<cblkmin2d; i++, cblk++)
940 parsec_data_destroy( cblk->
handler[0] );
943 parsec_data_destroy( cblk->
handler[1] );
954 parsec_data_destroy( cblk->
handler[0] );
956 parsec_data_destroy( cblk->
handler[1] );
964 while( blok < cblk[1].fblokptr )
967 parsec_data_destroy( blok->
handler[0] );
969 parsec_data_destroy( blok->
handler[1] );
980 parsec_data_collection_destroy( (parsec_data_collection_t*)spmtx );
BEGIN_C_DECLS typedef int pastix_int_t
struct pastix_lrblock_s pastix_lrblock_t
The block low-rank structure to hold a matrix in low-rank form.
The block low-rank structure to hold a matrix in low-rank form.
spm_mtxtype_t pastix_mtxtype_t
Matrix symmetry type property.
parsec_data_collection_t super
void parsec_sparse_matrix_init(SolverMatrix *solvmtx, int typesize, pastix_mtxtype_t mtxtype, int nodes, int myrank)
Generate the PaRSEC descriptor of the sparse matrix.
static uint32_t parsec_sparse_matrix_data_key(parsec_data_collection_t *mat,...)
Compute the unique key from the triplet (uplo, cblknum, bloknum).
static void pastix_parsec_register_cblk_lr(parsec_data_collection_t *o, parsec_data_t **handler, pastix_int_t id, const SolverCblk *cblk, int side)
TODO.
static int32_t parsec_sparse_matrix_vpid_of_key(parsec_data_collection_t *mat, parsec_data_key_t key)
Return the rank of the virtual process owner of the piece of data (key)
static void pastix_parsec_register_cblk(parsec_data_collection_t *o, pastix_int_t cblknum, const parsec_sparse_matrix_desc_t *spmtx, const SolverCblk *cblk)
TODO.
static parsec_data_t * parsec_sparse_matrix_data_of_key(parsec_data_collection_t *mat, parsec_data_key_t key)
Return the data handler associated to the piece of data (key).
static uint32_t parsec_sparse_matrix_rank_of(parsec_data_collection_t *mat,...)
Return the rank of the owner of the piece of data (uplo, cblknum, bloknum).
static void pastix_parsec_register_cblk_fr(parsec_data_collection_t *o, parsec_data_t **handler, pastix_int_t id, const parsec_sparse_matrix_desc_t *spmtx, const SolverCblk *cblk, int side)
TODO.
static parsec_data_t * parsec_sparse_matrix_data_of(parsec_data_collection_t *mat,...)
Return the data handler associated to the piece of data (uplo, cblknum, bloknum).
static int32_t parsec_sparse_matrix_vpid_of(parsec_data_collection_t *mat,...)
Return the rank of the virtual process owner of the piece of data (uplo, cblknum, bloknum).
static uint32_t parsec_sparse_matrix_rank_of_key(parsec_data_collection_t *mat, parsec_data_key_t key)
Return the rank of the owner of the piece of data (key)
void parsec_sparse_matrix_destroy(parsec_sparse_matrix_desc_t *desc)
Free the PaRSEC descriptor of the sparse matrix.
static void spm_data_key_to_value(parsec_data_key_t key, const SolverMatrix *solvmtx, int *uplo, pastix_int_t *cblknum, pastix_int_t *bloknum)
Compute the triplet (uplo, cblknum, bloknum) from the key.
PaRSEC descriptor stucture for the sparse matrix.
static pastix_int_t blok_rownbr(const SolverBlok *blok)
Compute the number of rows of a block.
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
SolverBlok *restrict bloktab
pastix_lrblock_t * LRblock[2]
SolverCblk *restrict cblktab
Solver column block structure.
Solver column block structure.