23 #ifndef DOXYGEN_SHOULD_SKIP_THIS
28 #include "sopalin/sopalin_data.h"
37 struct cl_cblk_dgetrfsp_args_s {
39 sopalin_data_t *sopalin_data;
43 #if defined(PASTIX_STARPU_PROFILING)
51 starpu_profile_t cblk_dgetrfsp_profile = {
53 .name =
"cblk_dgetrfsp"
59 void cblk_dgetrfsp_profile_register(
void ) __attribute__( ( constructor ) );
61 cblk_dgetrfsp_profile_register(
void )
63 profiling_register_cl( &cblk_dgetrfsp_profile );
66 #ifndef DOXYGEN_SHOULD_SKIP_THIS
67 #if defined(PASTIX_STARPU_PROFILING_LOG)
69 cl_profiling_cb_cblk_dgetrfsp(
void *callback_arg )
71 cl_profiling_callback( callback_arg );
73 struct starpu_task *task = starpu_task_get_current();
74 struct starpu_profiling_task_info *info = task->profiling_info;
81 struct cl_cblk_dgetrfsp_args_s *args = (
struct cl_cblk_dgetrfsp_args_s *) callback_arg;
83 pastix_fixdbl_t duration = starpu_timing_timespec_delay_us( &info->start_time, &info->end_time );
90 cl_profiling_log_register( task->name,
"cblk_dgetrfsp", M, N, 0, flops, speed );
94 #if defined(PASTIX_STARPU_PROFILING_LOG)
95 static void (*cblk_dgetrfsp_callback)(
void*) = cl_profiling_cb_cblk_dgetrfsp;
97 static void (*cblk_dgetrfsp_callback)(
void*) = cl_profiling_callback;
129 struct starpu_perfmodel_arch *arch,
132 struct cl_cblk_dgetrfsp_args_s *args = (
struct cl_cblk_dgetrfsp_args_s *)(task->cl_arg);
140 switch( arch->devices->type ) {
141 case STARPU_CPU_WORKER:
142 coefs1 = &(args->sopalin_data->cpu_models->coefficients[PastixDouble-2][
PastixKernelGETRF][0]);
145 case STARPU_CUDA_WORKER:
146 coefs1 = &(args->sopalin_data->gpu_models->coefficients[PastixDouble-2][
PastixKernelGETRF][0]);
155 cost = modelsGetCost1Param( coefs1, N );
156 cost += modelsGetCost2Param( coefs2, M, N ) * 2.;
162 #ifndef DOXYGEN_SHOULD_SKIP_THIS
163 static struct starpu_perfmodel starpu_cblk_dgetrfsp_model = {
164 #if defined( PASTIX_STARPU_COST_PER_ARCH )
165 .type = STARPU_PER_ARCH,
166 .arch_cost_function = cblk_getrf_cost,
168 .type = STARPU_HISTORY_BASED,
170 .symbol =
"cblk_dgetrfsp",
173 #if !defined(PASTIX_STARPU_SIMULATION)
189 fct_cblk_dgetrfsp_cpu(
void *descr[],
void *cl_arg )
191 struct cl_cblk_dgetrfsp_args_s *args = (
struct cl_cblk_dgetrfsp_args_s *)cl_arg;
195 L = pastix_starpu_cblk_get_ptr( descr[0] );
196 U = pastix_starpu_cblk_get_ptr( descr[1] );
202 CODELETS_CPU( cblk_dgetrfsp, 2 );
227 struct cl_cblk_dgetrfsp_args_s *cl_arg = NULL;
229 #if defined(PASTIX_DEBUG_STARPU)
236 #if defined(PASTIX_WITH_MPI)
239 if ( cblk->
ownerid == sopalin_data->solvmtx->clustnum ) {
245 if ( starpu_mpi_cached_receive( cblk->
handler[0] ) ) {
248 if ( starpu_mpi_cached_receive( cblk->
handler[1] ) ) {
251 if ( !need_submit ) {
261 cl_arg = malloc(
sizeof(
struct cl_cblk_dgetrfsp_args_s) );
262 cl_arg->sopalin_data = sopalin_data;
263 #if defined(PASTIX_STARPU_PROFILING)
264 cl_arg->profile_data.measures = cblk_dgetrfsp_profile.measures;
265 cl_arg->profile_data.flops = NAN;
270 #if defined(PASTIX_DEBUG_STARPU)
272 asprintf( &task_name,
"%s( %ld )",
273 cl_cblk_dgetrfsp_cpu.name,
274 (
long)(cblk - sopalin_data->solvmtx->cblktab) );
277 pastix_starpu_insert_task(
278 &cl_cblk_dgetrfsp_cpu,
279 STARPU_CL_ARGS, cl_arg,
sizeof(
struct cl_cblk_dgetrfsp_args_s ),
280 #
if defined(PASTIX_STARPU_PROFILING)
281 STARPU_CALLBACK_WITH_ARG_NFREE, cblk_dgetrfsp_callback, cl_arg,
285 #
if defined(PASTIX_DEBUG_STARPU)
286 STARPU_NAME, task_name,
288 #
if defined(PASTIX_STARPU_HETEROPRIO)
289 STARPU_PRIORITY, BucketFacto1D,
291 STARPU_PRIORITY, prio,
BEGIN_C_DECLS typedef int pastix_int_t
int cpucblk_dgetrfsp1d_panel(SolverMatrix *solvmtx, SolverCblk *cblk, void *L, void *U)
Compute the LU factorization of one panel.
static pastix_fixdbl_t fct_cblk_dgetrfsp_cost(struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl)
Cost model function.
void starpu_task_cblk_dgetrfsp(sopalin_data_t *sopalin_data, SolverCblk *cblk, int prio)
TODO.
Base structure to all codelet arguments that include the profiling data.
static double cost(symbol_cblk_t *cblk)
Computes the cost of a cblk.
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
Solver column block structure.