21 #include <parsec/data.h>
22 #include <parsec/data_distribution.h>
23 #if defined(PASTIX_CUDA_FERMI)
24 #include <parsec/devices/cuda/dev_cuda.h>
62 const SolverMatrix *solvmtx,
64 pastix_int_t *cblknum,
65 pastix_int_t *bloknum )
67 parsec_data_key_t key2;
68 pastix_int_t cblkmin2d, cblknbr;
70 cblknbr = solvmtx->cblknbr;
71 cblkmin2d = solvmtx->cblkmin2d;
76 pastix_int_t m, n, ld;
79 ld = solvmtx->cblkmaxblk * 2;
86 *cblknum = cblkmin2d + n;
127 pastix_int_t cblknum, bloknum;
130 uplo = va_arg(ap,
int);
131 cblknum = va_arg(ap,
int);
132 bloknum = va_arg(ap,
int) - 1;
137 if ( bloknum == -1 ) {
138 return cblknum * 2 + uplo;
141 pastix_int_t offset, ld, cblknbr;
142 pastix_int_t cblkmin2d, n;
144 cblknbr = spmtx->
solvmtx->cblknbr;
145 cblkmin2d = spmtx->
solvmtx->cblkmin2d;
146 ld = spmtx->
solvmtx->cblkmaxblk * 2;
147 offset = cblknbr * 2;
148 n = cblknum - cblkmin2d;
150 return offset + n * ld + bloknum * 2 + uplo;
184 pastix_int_t cblknum, bloknum;
187 uplo = va_arg(ap,
int);
188 cblknum = va_arg(ap,
int);
189 bloknum = va_arg(ap,
int) - 1;
194 cblk = spmtx->
solvmtx->cblktab + cblknum;
220 parsec_data_key_t key )
223 SolverMatrix *solvmtx = spmtx->
solvmtx;
226 pastix_int_t cblknum, bloknum;
229 &uplo, &cblknum, &bloknum );
231 cblk = solvmtx->cblktab + cblknum;
286 parsec_data_key_t key )
288 (void)mat; (void)key;
315 static parsec_data_t *
322 pastix_int_t cblknum, bloknum;
325 uplo = va_arg(ap,
int);
326 cblknum = va_arg(ap,
int);
327 bloknum = va_arg(ap,
int) - 1;
332 cblk = spmtx->
solvmtx->cblktab + cblknum;
335 if ( bloknum == -1 ) {
337 return (parsec_data_t*)(cblk->
handler[uplo]);
344 return (parsec_data_t*)(blok->
handler[uplo]);
366 static parsec_data_t *
368 parsec_data_key_t key )
371 SolverMatrix *solvmtx = spmtx->
solvmtx;
374 pastix_int_t cblknum, bloknum;
377 &uplo, &cblknum, &bloknum );
379 cblk = solvmtx->cblktab + cblknum;
382 if ( bloknum == -1 ) {
384 return (parsec_data_t*)(cblk->
handler[uplo]);
391 return (parsec_data_t*)(blok->
handler[uplo]);
395 #if defined(PARSEC_PROF_TRACE)
423 parsec_sparse_matrix_key_to_string( parsec_data_collection_t *mat,
425 char *buffer, uint32_t buffer_size )
429 pastix_int_t cblknum, bloknum;
433 &uplo, &cblknum, &bloknum );
435 res = snprintf(buffer, buffer_size,
"(%d, %ld, %ld)",
436 uplo, (
long int)cblknum, (
long int)bloknum);
439 printf(
"error in key_to_string for tile (%d, %ld, %ld) key: %u\n",
440 uplo, (
long int)cblknum, (
long int)bloknum, key);
446 #if defined(PASTIX_CUDA_FERMI)
449 const SolverMatrix *solvmtx )
451 gpu_device_t* gpu_device;
453 pastix_int_t i, b, bloknbr, ndevices;
457 ndevices = parsec_devices_enabled();
461 bloknbr = solvmtx->bloknbr;
462 size = 2 * bloknbr *
sizeof(int);
467 bloktab = (
int*)malloc( size );
469 for (b=0, blok = solvmtx->bloktab;
480 fprintf(stderr,
"ndevices = %ld\n", ndevices );
481 for(i = 0; i < ndevices; i++) {
482 if( NULL == (gpu_device = (gpu_device_t*)parsec_devices_get(i+2)) )
continue;
484 fprintf(stderr,
"cuda index = %d\n", gpu_device->cuda_index );
485 cudaSetDevice( gpu_device->cuda_index );
492 cudaMemcpyHostToDevice );
500 gpu_device_t* gpu_device;
501 pastix_int_t i, ndevices;
503 ndevices = parsec_devices_enabled();
508 for(i = 0; i < ndevices; i++) {
509 if( NULL == (gpu_device = (gpu_device_t*)parsec_devices_get(i+2)) )
continue;
511 cudaSetDevice( gpu_device->cuda_index );
520 pastix_parsec_register_cblk_lr( parsec_data_collection_t *o,
521 parsec_data_t **handler,
529 parsec_data_create( handler, o,
id, dataptr, size );
533 pastix_parsec_register_cblk_fr( parsec_data_collection_t *o,
534 parsec_data_t **handler,
543 parsec_data_create( handler, o,
id, dataptr, size );
547 pastix_parsec_register_cblk( parsec_data_collection_t *o,
548 pastix_int_t cblknum,
552 parsec_data_t **handler = (parsec_data_t **)( cblk->
handler );
554 if ( cblk->
cblktype & CBLK_COMPRESSED ) {
555 pastix_parsec_register_cblk_lr( o, handler, cblknum * 2, cblk,
PastixLCoef );
558 pastix_parsec_register_cblk_lr( o, handler + 1, cblknum * 2 + 1, cblk,
PastixUCoef );
562 pastix_parsec_register_cblk_fr( o, handler, cblknum * 2, spmtx, cblk,
PastixLCoef );
564 pastix_parsec_register_cblk_fr(
565 o, handler + 1, cblknum * 2 + 1, spmtx, cblk,
PastixUCoef );
606 int typesize,
int mtxtype,
607 int nodes,
int myrank )
610 parsec_data_collection_t *o;
611 pastix_int_t cblknbr, cblkmin2d, ld;
612 parsec_data_key_t key1, key2;
615 pastix_int_t m=0, n=0, cblknum, nbrow;
620 if ( spmtx != NULL ) {
627 o = (parsec_data_collection_t*)spmtx;
628 parsec_data_collection_init( o, nodes, myrank );
631 #if defined(PARSEC_PROF_TRACE)
632 o->key_to_string = parsec_sparse_matrix_key_to_string;
646 cblknbr = solvmtx->cblknbr;
647 cblkmin2d = solvmtx->cblkmin2d;
648 ld = solvmtx->cblkmaxblk * 2;
652 cblk = spmtx->
solvmtx->cblktab;
655 cblknum++, n++, cblk++ )
657 if ( cblk->
ownerid != myrank ) {
660 pastix_parsec_register_cblk( o, cblknum, spmtx, cblk );
665 cblk = spmtx->
solvmtx->cblktab + cblkmin2d;
666 for(cblknum = cblkmin2d, n = 0;
668 cblknum++, n++, cblk++ )
670 if ( cblk->
ownerid != myrank ) {
674 pastix_parsec_register_cblk( o, cblknum, spmtx, cblk );
676 if ( !(cblk->
cblktype & CBLK_TASKS_2D) ) {
680 if ( cblk->
cblktype & CBLK_COMPRESSED ) {
691 assert( offset == 0 );
692 parsec_data_create( (parsec_data_t **)&( blok->
handler[0] ),
693 o, key1 + key2, dataptrL + offset, size );
696 parsec_data_create( (parsec_data_t **)&( blok->
handler[1] ),
697 o, key1 + key2 + 1, dataptrU + offset, size );
709 for ( ; blok < lblok; blok++, key2 += 2 ) {
716 while ( ( blok < lblok ) && ( blok[0].fcblknm == blok[1].fcblknm ) &&
717 ( blok[0].lcblknm == blok[1].lcblknm ) ) {
724 parsec_data_create( (parsec_data_t **)&( fblok->
handler[0] ),
731 parsec_data_create( (parsec_data_t **)&( fblok->
handler[1] ),
755 assert( offset == 0 );
757 (parsec_data_t **)&( blok->
handler[0] ), o, key1 + key2, ptrL + offset, size );
760 parsec_data_create( (parsec_data_t **)&( blok->
handler[1] ),
776 for ( ; blok < lblok; blok++, key2 += 2 ) {
782 while ( ( blok < lblok ) &&
784 ( blok[0].lcblknm == blok[1].lcblknm ) )
792 parsec_data_create( (parsec_data_t **)&( fblok->
handler[0] ),
799 parsec_data_create( (parsec_data_t **)&( fblok->
handler[1] ),
814 #if defined( PASTIX_CUDA_FERMI )
815 parsec_sparse_matrix_init_fermi( spmtx, solvmtx );
818 solvmtx->parsec_desc = spmtx;
840 pastix_int_t i, cblkmin2d;
842 #if defined(PASTIX_CUDA_FERMI)
843 parsec_sparse_matrix_destroy_fermi( spmtx );
846 cblkmin2d = spmtx->
solvmtx->cblkmin2d;
847 cblk = spmtx->
solvmtx->cblktab;
848 for(i=0; i<cblkmin2d; i++, cblk++)
851 parsec_data_destroy( cblk->
handler[0] );
854 parsec_data_destroy( cblk->
handler[1] );
862 for(i=cblkmin2d; i<spmtx->
solvmtx->cblknbr; i++, cblk++)
865 parsec_data_destroy( cblk->
handler[0] );
867 parsec_data_destroy( cblk->
handler[1] );
875 while( blok < cblk[1].fblokptr )
878 parsec_data_destroy( blok->
handler[0] );
880 parsec_data_destroy( blok->
handler[1] );
891 parsec_data_collection_destroy( (parsec_data_collection_t*)spmtx );