25 #ifndef DOXYGEN_SHOULD_SKIP_THIS
30 #include "sopalin/sopalin_data.h"
39 struct cl_blok_sscalo_args_s {
41 sopalin_data_t *sopalin_data;
54 #if defined( PASTIX_STARPU_PROFILING )
55 starpu_profile_t blok_sscalo_profile = {
63 void blok_sscalo_profile_register(
void ) __attribute__( ( constructor ) );
65 blok_sscalo_profile_register(
void )
67 profiling_register_cl( &blok_sscalo_profile );
70 static void (*blok_strsmsp_callback)(
void*) = cl_profiling_callback;
82 struct starpu_perfmodel_arch *arch,
85 struct cl_blok_sscalo_args_s *args = (
struct cl_blok_sscalo_args_s *)(task->cl_arg);
92 switch( arch->devices->type ) {
93 case STARPU_CPU_WORKER:
96 case STARPU_CUDA_WORKER:
105 cost = modelsGetCost2Param( coefs, M, N ) * 1e6;
111 static struct starpu_perfmodel starpu_blok_sscalo_model = {
112 #if defined(PASTIX_STARPU_COST_PER_ARCH)
113 .type = STARPU_PER_ARCH,
116 .type = STARPU_HISTORY_BASED,
118 .symbol =
"blok_sscalo",
121 #if !defined(PASTIX_STARPU_SIMULATION)
128 struct cl_blok_sscalo_args_s *args = (
struct cl_blok_sscalo_args_s *)cl_arg;
133 A = pastix_starpu_blok_get_ptr( descr[0] );
134 D = pastix_starpu_blok_get_ptr( descr[1] );
135 B = pastix_starpu_blok_get_ptr( descr[2] );
137 assert( args->cblk->cblktype & CBLK_TASKS_2D );
143 CODELETS_CPU( blok_sscalo, 3 );
175 struct cl_blok_sscalo_args_s *cl_arg = NULL;
177 #if defined(PASTIX_DEBUG_STARPU) || defined(PASTIX_STARPU_PROFILING_LOG)
181 starpu_data_handle_t *handler = (starpu_data_handle_t *)( blok->
handler );
184 pastix_starpu_register_blok( handler+1, cblk, blok, PastixFloat );
186 #if defined(PASTIX_WITH_MPI)
188 int64_t tag_desc = sopalin_data->solvmtx->starpu_desc->mpitag;
189 int64_t tag_cblk = 2 * sopalin_data->solvmtx->gcblknbr;
190 int64_t tag_blok = 2 * (blok - sopalin_data->solvmtx->bloktab) + 1;
192 starpu_mpi_data_register( *(handler + 1),
193 tag_desc + tag_cblk + tag_blok,
201 #if defined(PASTIX_WITH_MPI)
204 if ( cblk->
ownerid == sopalin_data->solvmtx->clustnum ) {
210 if ( !need_submit ) {
220 cl_arg = malloc(
sizeof(
struct cl_blok_sscalo_args_s) );
221 cl_arg->sopalin_data = sopalin_data;
222 #if defined(PASTIX_STARPU_PROFILING)
223 cl_arg->profile_data.measures = blok_sscalo_profile.measures;
224 cl_arg->profile_data.flops = NAN;
226 cl_arg->trans = trans;
228 cl_arg->blok_m = blok_m;
231 #if defined(PASTIX_DEBUG_STARPU) || defined(PASTIX_STARPU_PROFILING_LOG)
233 asprintf( &task_name,
"%s( %ld, %ld )",
234 cl_blok_sscalo_cpu.name,
235 (
long)(cblk - sopalin_data->solvmtx->cblktab),
236 (
long)(blok - sopalin_data->solvmtx->bloktab) );
239 pastix_starpu_insert_task(
241 STARPU_CL_ARGS, cl_arg,
sizeof(
struct cl_blok_sscalo_args_s ),
242 #
if defined(PASTIX_STARPU_PROFILING)
243 STARPU_CALLBACK_WITH_ARG_NFREE, blok_sscalo_callback, cl_arg,
248 #
if defined(PASTIX_DEBUG_STARPU) || defined(PASTIX_STARPU_PROFILING_LOG)
249 STARPU_NAME, task_name,
251 #
if defined(PASTIX_STARPU_HETEROPRIO)
252 STARPU_PRIORITY, BucketScalo,
254 STARPU_PRIORITY, prio,
BEGIN_C_DECLS typedef int pastix_int_t
void cpublok_sscalo(pastix_trans_t trans, const SolverCblk *cblk, pastix_int_t blok_m, const void *dataA, const void *dataD, void *dataB)
Copy the lower terms of the block with scaling for the two-terms algorithm.
enum pastix_trans_e pastix_trans_t
Transpostion.
void starpu_task_blok_sscalo(sopalin_data_t *sopalin_data, pastix_trans_t trans, const SolverCblk *cblk, SolverBlok *blok, int prio)
TODO.
static pastix_fixdbl_t fct_blok_sscalo_cost(struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl)
Functions to profile the codelet.
static void fct_blok_sscalo_cpu(void *descr[], void *cl_arg)
StarPU CPU implementation.
Base structure to all codelet arguments that include the profiling data.
static double cost(symbol_cblk_t *cblk)
Computes the cost of a cblk.
static pastix_int_t blok_rownbr_ext(const SolverBlok *blok)
Compute the number of rows of a contiguous block in front of the same cblk.
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
Solver column block structure.