25#ifndef DOXYGEN_SHOULD_SKIP_THIS
30#include "sopalin/sopalin_data.h"
39struct cl_blok_sscalo_args_s {
41 sopalin_data_t *sopalin_data;
54#if defined( PASTIX_STARPU_PROFILING )
55starpu_profile_t blok_sscalo_profile = {
63void blok_sscalo_profile_register(
void ) __attribute__( ( constructor ) );
65blok_sscalo_profile_register(
void )
67 profiling_register_cl( &blok_sscalo_profile );
70static void (*blok_strsmsp_callback)(
void*) = cl_profiling_callback;
74#if defined(PASTIX_STARPU_COST_PER_ARCH)
82fct_blok_sscalo_cost(
struct starpu_task *task,
83 struct starpu_perfmodel_arch *arch,
86 struct cl_blok_sscalo_args_s *args = (
struct cl_blok_sscalo_args_s *)(task->cl_arg);
93 switch( arch->devices->type ) {
94 case STARPU_CPU_WORKER:
97 case STARPU_CUDA_WORKER:
106 cost = modelsGetCost2Param( coefs, M, N ) * 1e6;
114#if defined(PASTIX_STARPU_COST_PER_ARCH)
115 .type = STARPU_PER_ARCH,
116 .arch_cost_function = fct_blok_sscalo_cost,
118 .type = STARPU_HISTORY_BASED,
120 .symbol =
"blok_sscalo",
123#if !defined(PASTIX_STARPU_SIMULATION)
130 struct cl_blok_sscalo_args_s *args = (
struct cl_blok_sscalo_args_s *)cl_arg;
135 A = pastix_starpu_blok_get_ptr( descr[0] );
136 D = pastix_starpu_blok_get_ptr( descr[1] );
137 B = pastix_starpu_blok_get_ptr( descr[2] );
139 assert( args->cblk->cblktype & CBLK_TASKS_2D );
145CODELETS_CPU( blok_sscalo, 3 );
177 struct cl_blok_sscalo_args_s *cl_arg = NULL;
179#if defined(PASTIX_DEBUG_STARPU) || defined(PASTIX_STARPU_PROFILING_LOG)
183 starpu_data_handle_t *handler = (starpu_data_handle_t *)( blok->
handler );
186 pastix_starpu_register_blok( handler+1, cblk, blok, PastixFloat );
188#if defined(PASTIX_WITH_MPI)
190 int64_t tag_desc = sopalin_data->solvmtx->starpu_desc->mpitag;
191 int64_t tag_cblk = 2 * sopalin_data->solvmtx->gcblknbr;
192 int64_t tag_blok = 2 * (blok - sopalin_data->solvmtx->bloktab) + 1;
194 starpu_mpi_data_register( *(handler + 1),
195 tag_desc + tag_cblk + tag_blok,
203#if defined(PASTIX_WITH_MPI)
206 if ( cblk->
ownerid == sopalin_data->solvmtx->clustnum ) {
212 if ( !need_submit ) {
222 cl_arg = malloc(
sizeof(
struct cl_blok_sscalo_args_s) );
223 cl_arg->sopalin_data = sopalin_data;
224#if defined(PASTIX_STARPU_PROFILING)
225 cl_arg->profile_data.measures = blok_sscalo_profile.measures;
226 cl_arg->profile_data.flops = NAN;
228 cl_arg->trans = trans;
230 cl_arg->blok_m = blok_m;
233#if defined(PASTIX_DEBUG_STARPU) || defined(PASTIX_STARPU_PROFILING_LOG)
235 asprintf( &task_name,
"%s( %ld, %ld )",
236 cl_blok_sscalo_cpu.name,
237 (
long)(cblk - sopalin_data->solvmtx->cblktab),
238 (
long)(blok - sopalin_data->solvmtx->bloktab) );
241 pastix_starpu_insert_task(
243 STARPU_CL_ARGS, cl_arg,
sizeof(
struct cl_blok_sscalo_args_s ),
244#
if defined(PASTIX_STARPU_PROFILING)
245 STARPU_CALLBACK_WITH_ARG_NFREE, blok_sscalo_callback, cl_arg,
250#
if defined(PASTIX_DEBUG_STARPU) || defined(PASTIX_STARPU_PROFILING_LOG)
251 STARPU_NAME, task_name,
253#
if defined(PASTIX_STARPU_HETEROPRIO)
254 STARPU_PRIORITY, BucketScalo,
256 STARPU_PRIORITY, prio,
BEGIN_C_DECLS typedef int pastix_int_t
void cpublok_sscalo(pastix_trans_t trans, const SolverCblk *cblk, pastix_int_t blok_m, const void *dataA, const void *dataD, void *dataB)
Copy the lower terms of the block with scaling for the two-terms algorithm.
enum pastix_trans_e pastix_trans_t
Transpostion.
static struct starpu_perfmodel starpu_blok_sscalo_model
Functions to profile the codelet.
void starpu_task_blok_sscalo(sopalin_data_t *sopalin_data, pastix_trans_t trans, const SolverCblk *cblk, SolverBlok *blok, int prio)
TODO.
static void fct_blok_sscalo_cpu(void *descr[], void *cl_arg)
StarPU CPU implementation.
Base structure to all codelet arguments that include the profiling data.
static double cost(symbol_cblk_t *cblk)
Computes the cost of a cblk.
static pastix_int_t blok_rownbr_ext(const SolverBlok *blok)
Compute the number of rows of a contiguous block in front of the same cblk.
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
Solver column block structure.