19 #include "common/common.h"
28 #ifndef DOXYGEN_SHOULD_SKIP_THIS
30 #if defined(PASTIX_STARPU_INTERFACE_DEBUG)
31 #define pastix_starpu_logger fprintf( stderr, "pastix_starpu: %s\n", __func__ )
33 #define pastix_starpu_logger do {} while(0)
37 psi_register_data_handle( starpu_data_handle_t handle,
int home_node,
void *data_interface )
44 for ( node = 0; node < STARPU_MAXNODES; node++ ) {
50 if ( node != home_node ) {
51 local_interface->
dataptr = NULL;
56 static inline starpu_ssize_t
57 psi_allocate_data_on_node(
void *data_interface,
unsigned node )
60 starpu_ssize_t allocated_memory;
67 if ( allocated_memory <= 0 ) {
71 handle = starpu_malloc_on_node( node, allocated_memory );
76 #if defined(PASTIX_DEBUG_STARPU)
80 fprintf( stderr,
"allocate fanin %d [%p](%ld)\n",
81 cblk->
gfaninnum, (
void*)handle, allocated_memory );
84 fprintf( stderr,
"allocate recv %d [%p](%ld)\n",
85 cblk->
gfaninnum, (
void*)handle, allocated_memory );
90 if ( starpu_node_get_kind( node ) != STARPU_OPENCL_RAM ) {
132 return allocated_memory;
136 psi_free_data_on_node(
void *data_interface,
unsigned node )
142 pastix_starpu_logger;
150 psi_init(
void *data_interface )
156 pastix_starpu_logger;
160 psi_to_pointer(
void *data_interface,
unsigned node )
165 pastix_starpu_logger;
171 psi_get_size( starpu_data_handle_t handle )
174 starpu_data_get_interface_on_node( handle, STARPU_MAIN_RAM );
180 if ( interf->
offset == -1 ) {
188 for( ; fblok < lblok; fblok++ ) {
193 size = ncols * nrows;
197 "psi_get_size: The given data is not a pastix interface for starpu." );
204 psi_get_alloc_size( starpu_data_handle_t handle )
207 starpu_data_get_interface_on_node( handle, STARPU_MAIN_RAM );
209 pastix_starpu_logger;
213 "psi_get_alloc_size: The given data is not a pastix interface for starpu." );
216 STARPU_ASSERT_MSG( interf->
allocsize != (
size_t)-1,
217 "psi_get_alloc_size: The allocation size needs to be defined" );
222 static inline uint32_t
223 psi_footprint( starpu_data_handle_t handle )
226 starpu_data_get_interface_on_node( handle, STARPU_MAIN_RAM );
229 pastix_starpu_logger;
231 return starpu_hash_crc32c_be( cblk->
gcblknum, interf->
offset + 1 );
234 static inline uint32_t
235 psi_alloc_footprint( starpu_data_handle_t handle )
238 starpu_data_get_interface_on_node( handle, STARPU_MAIN_RAM );
240 pastix_starpu_logger;
242 return starpu_hash_crc32c_be( interf->
allocsize, 0 );
246 psi_compare(
void *data_interface_a,
void *data_interface_b )
254 pastix_starpu_logger;
257 return ( solva == solvb );
261 psi_alloc_compare(
void *data_interface_a,
void *data_interface_b )
266 pastix_starpu_logger;
273 psi_display( starpu_data_handle_t handle, FILE *f )
280 pastix_starpu_logger;
282 if ( interf->
offset == -1 ) {
283 fprintf( f,
"Cblk%ld", (
long)( cblk->
gcblknum ) );
286 fprintf( f,
"Cblk%ldBlok%ld", (
long)( cblk->
gcblknum ), (
long)( interf->
offset ) );
295 size_t elemsize = pastix_size_of( interf->
flttype );
302 pastix_starpu_logger;
306 for ( i = 0; i < interf->
nbblok; i++, blok++, LRblock++ ) {
316 assert( !( interf->
cblk->
cblktype & CBLK_COMPRESSED ) );
318 pastix_starpu_logger;
336 pastix_starpu_logger;
338 for ( ; j < interf->
nbblok; j++, blok++, LRblock++ ) {
342 case PastixComplex64:
345 case PastixComplex32:
363 assert( !( interf->
cblk->
cblktype & CBLK_COMPRESSED ) );
365 pastix_starpu_logger;
367 memcpy( *ptr, interf->
dataptr, *count );
371 psi_pack_data( starpu_data_handle_t handle,
unsigned node,
void **ptr, starpu_ssize_t *count )
373 STARPU_ASSERT( starpu_data_test_if_allocated_on_node( handle, node ) );
380 pastix_starpu_logger;
382 if ( cblk->
cblktype & CBLK_COMPRESSED ) {
383 *count = psi_compute_size_lr( interf );
386 *count = psi_compute_size_fr( interf );
390 *ptr = (
void *)starpu_malloc_on_node_flags( node, *count, 0 );
392 if ( cblk->
cblktype & CBLK_COMPRESSED ) {
393 psi_pack_lr( interf, ptr );
396 psi_pack_fr( interf, ptr, count );
408 const char *input = ptr;
414 pastix_starpu_logger;
420 count -= interf->
nbblok *
sizeof( int );
423 psi_allocate_data_on_node( interf, node );
429 for ( i=0; i < interf->
nbblok; i++, blok++, LRblock++ ) {
434 case PastixComplex64:
437 case PastixComplex32:
455 pastix_starpu_logger;
458 memcpy( interf->
dataptr, ptr, count );
462 psi_peek_data( starpu_data_handle_t handle,
unsigned node,
void *ptr,
size_t count )
467 STARPU_ASSERT( starpu_data_test_if_allocated_on_node( handle, node ) );
469 pastix_starpu_logger;
472 psi_unpack_lr( interf, node, ptr, count );
475 psi_unpack_fr( interf, ptr, count );
482 psi_unpack_data( starpu_data_handle_t handle,
unsigned node,
void *ptr,
size_t count )
484 pastix_starpu_logger;
486 psi_peek_data( handle, node, ptr, count );
489 starpu_free_on_node_flags( node, (uintptr_t)ptr, count, 0 );
494 static inline starpu_ssize_t
495 psi_describe(
void *data_interface,
char *buf,
size_t size )
500 pastix_starpu_logger;
502 return snprintf( buf, size,
"Cblk%ld", (
long)( cblk->
gcblknum ) );
506 psi_copy_any_to_any(
void *src_interface,
517 pastix_starpu_logger;
519 assert( !( pastix_src->
cblk->
cblktype & CBLK_COMPRESSED ) );
522 if ( starpu_interface_copy( (uintptr_t)pastix_src->
dataptr, 0, src_node,
523 (uintptr_t)pastix_dst->
dataptr, 0, dst_node,
529 starpu_interface_data_copy( src_node, dst_node, pastix_src->
allocsize );
534 static const struct starpu_data_copy_methods psi_copy_methods = {
535 .any_to_any = psi_copy_any_to_any,
546 .register_data_handle = psi_register_data_handle,
547 .allocate_data_on_node = psi_allocate_data_on_node,
548 .free_data_on_node = psi_free_data_on_node,
550 .copy_methods = &psi_copy_methods,
551 .to_pointer = psi_to_pointer,
552 .get_size = psi_get_size,
553 .get_alloc_size = psi_get_alloc_size,
554 .footprint = psi_footprint,
555 .alloc_footprint = psi_alloc_footprint,
556 .compare = psi_compare,
557 .alloc_compare = psi_alloc_compare,
558 .display = psi_display,
559 .describe = psi_describe,
560 .pack_data = psi_pack_data,
561 #if defined( HAVE_STARPU_DATA_PEEK )
562 .peek_data = psi_peek_data,
564 .unpack_data = psi_unpack_data,
565 .interfaceid = STARPU_UNKNOWN_INTERFACE_ID,
568 .name =
"PASTIX_STARPU_INTERFACE"
665 pastix_starpu_logger;
672 if ( !( cblk->
cblktype & CBLK_COMPRESSED ) ) {
675 if ( interf.
dataptr != NULL ) {
676 home_node = STARPU_MAIN_RAM;
684 home_node = STARPU_MAIN_RAM;
687 if ( (cblk->
cblktype & CBLK_FANIN ) &&
694 interf.
nbblok = lblok - fblok;
697 #if defined(PASTIX_STARPU_INTERFACE_DEBUG)
699 "cblk (%9s, size=%8zu, nbblok=%2ld )\n",
700 cblk->
cblktype & CBLK_COMPRESSED ?
"Low-rank" :
"Full-rank",
708 pastix_starpu_register_ws( starpu_data_handle_t *handleptr,
727 pastix_starpu_logger;
732 interf.
nbblok = lblok - fblok;
735 if ( (cblk->
cblktype & CBLK_COMPRESSED) &&
738 size = M * N * pastix_size_of( flttype );
742 if ( !(cblk->
cblktype & CBLK_COMPRESSED) )
744 size = M * N * pastix_size_of( flttype );
749 #if defined(PASTIX_STARPU_INTERFACE_DEBUG)
751 "cblk (%9s, size=%8zu, nbblok=%2ld )\n",
752 cblk->
cblktype & CBLK_COMPRESSED ?
"Low-rank" :
"Full-rank",
760 pastix_starpu_register_blok( starpu_data_handle_t *handleptr,
780 pastix_starpu_logger;
786 while( (cblok[0].lcblknm == cblok[1].lcblknm) &&
787 (cblok[0].fcblknm == cblok[1].fcblknm) )
800 if ( !( cblk->
cblktype & CBLK_COMPRESSED ) )
802 size = M * N * pastix_size_of( flttype );
805 interf.
nbblok = lblok - fblok;
808 #if defined(PASTIX_STARPU_INTERFACE_DEBUG)
810 "cblk (%9s, size=%8zu, nbblok=%2ld )\n",
811 cblk->
cblktype & CBLK_COMPRESSED ?
"Low-rank" :
"Full-rank",
BEGIN_C_DECLS typedef int pastix_int_t
struct pastix_lrblock_s pastix_lrblock_t
The block low-rank structure to hold a matrix in low-rank form.
The block low-rank structure to hold a matrix in low-rank form.
const char * core_slrunpack2(pastix_int_t M, pastix_int_t N, pastix_lrblock_t *A, const char *input, char **outptr)
Unpack low rank data and fill the cblk concerned by the computation.
char * core_clrpack(pastix_int_t M, pastix_int_t N, const pastix_lrblock_t *A, char *buffer)
Pack low-rank data by side.
char * core_slrpack(pastix_int_t M, pastix_int_t N, const pastix_lrblock_t *A, char *buffer)
Pack low-rank data by side.
char * core_dlrpack(pastix_int_t M, pastix_int_t N, const pastix_lrblock_t *A, char *buffer)
Pack low-rank data by side.
const char * core_dlrunpack2(pastix_int_t M, pastix_int_t N, pastix_lrblock_t *A, const char *input, char **outptr)
Unpack low rank data and fill the cblk concerned by the computation.
const char * core_clrunpack2(pastix_int_t M, pastix_int_t N, pastix_lrblock_t *A, const char *input, char **outptr)
Unpack low rank data and fill the cblk concerned by the computation.
char * core_zlrpack(pastix_int_t M, pastix_int_t N, const pastix_lrblock_t *A, char *buffer)
Pack low-rank data by side.
const char * core_zlrunpack2(pastix_int_t M, pastix_int_t N, pastix_lrblock_t *A, const char *input, char **outptr)
Unpack low rank data and fill the cblk concerned by the computation.
size_t core_zlrgetsize(pastix_int_t M, pastix_int_t N, pastix_lrblock_t *A)
Compute the size of a block to send in LR.
spm_coeftype_t pastix_coeftype_t
Arithmetic types.
enum pastix_coefside_e pastix_coefside_t
Data blocks used in the kernel.
enum starpu_data_interface_id id
pastix_coeftype_t flttype
struct starpu_data_interface_ops pastix_starpu_interface_ops
TODO.
void pastix_starpu_interface_init()
Initialize the interface ID.
void pastix_starpu_interface_fini()
Finalize the interface and reset the ID.
#define PASTIX_STARPU_INTERFACE_ID
Alias to get the Interface id.
struct pastix_starpu_interface_s pastix_starpu_interface_t
Interface data structure to register the pieces of data in StarPU.
void pastix_starpu_register(starpu_data_handle_t *handleptr, const SolverCblk *cblk, pastix_coefside_t side, pastix_coeftype_t flttype)
Register a cblk at the StarPU level.
Interface data structure to register the pieces of data in StarPU.
static pastix_int_t blok_rownbr(const SolverBlok *blok)
Compute the number of rows of a block.
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
static void * cblk_getdataL(const SolverCblk *cblk)
Get the pointer to the data associated to the lower part of the cblk.
pastix_lrblock_t * LRblock[2]
Solver column block structure.