24 #ifndef DOXYGEN_SHOULD_SKIP_THIS
29 #include "sopalin/sopalin_data.h"
38 struct cl_cblk_csytrfsp_args_s {
40 sopalin_data_t *sopalin_data;
44 #if defined(PASTIX_STARPU_PROFILING)
52 starpu_profile_t cblk_csytrfsp_profile = {
54 .name =
"cblk_csytrfsp"
60 void cblk_csytrfsp_profile_register(
void ) __attribute__( ( constructor ) );
62 cblk_csytrfsp_profile_register(
void )
64 profiling_register_cl( &cblk_csytrfsp_profile );
67 #ifndef DOXYGEN_SHOULD_SKIP_THIS
68 #if defined(PASTIX_STARPU_PROFILING_LOG)
70 cl_profiling_cb_cblk_csytrfsp(
void *callback_arg )
72 cl_profiling_callback( callback_arg );
74 struct starpu_task *task = starpu_task_get_current();
75 struct starpu_profiling_task_info *info = task->profiling_info;
82 struct cl_cblk_csytrfsp_args_s *args = (
struct cl_cblk_csytrfsp_args_s *) callback_arg;
84 pastix_fixdbl_t duration = starpu_timing_timespec_delay_us( &info->start_time, &info->end_time );
91 cl_profiling_log_register( task->name,
"cblk_csytrfsp", M, N, 0, flops, speed );
95 #if defined(PASTIX_STARPU_PROFILING_LOG)
96 static void (*cblk_csytrfsp_callback)(
void*) = cl_profiling_cb_cblk_csytrfsp;
98 static void (*cblk_csytrfsp_callback)(
void*) = cl_profiling_callback;
130 struct starpu_perfmodel_arch *arch,
133 struct cl_cblk_csytrfsp_args_s *args = (
struct cl_cblk_csytrfsp_args_s *)(task->cl_arg);
141 switch( arch->devices->type ) {
142 case STARPU_CPU_WORKER:
143 coefs1 = &(args->sopalin_data->cpu_models->coefficients[PastixComplex32-2][
PastixKernelSYTRF][0]);
146 case STARPU_CUDA_WORKER:
147 coefs1 = &(args->sopalin_data->gpu_models->coefficients[PastixComplex32-2][
PastixKernelSYTRF][0]);
156 cost = modelsGetCost1Param( coefs1, N );
157 cost += modelsGetCost2Param( coefs2, M, N );
163 #ifndef DOXYGEN_SHOULD_SKIP_THIS
164 static struct starpu_perfmodel starpu_cblk_csytrfsp_model = {
165 #if defined( PASTIX_STARPU_COST_PER_ARCH )
166 .type = STARPU_PER_ARCH,
167 .arch_cost_function = cblk_sytrf_cost,
169 .type = STARPU_HISTORY_BASED,
171 .symbol =
"cblk_csytrfsp",
174 #if !defined(PASTIX_STARPU_SIMULATION)
190 fct_cblk_csytrfsp_cpu(
void *descr[],
void *cl_arg )
192 struct cl_cblk_csytrfsp_args_s *args = (
struct cl_cblk_csytrfsp_args_s *)cl_arg;
196 L = pastix_starpu_cblk_get_ptr( descr[0] );
197 DL = pastix_starpu_cblk_get_ptr( descr[1] );
199 if ( (args->cblk->cblktype & CBLK_COMPRESSED) && (DL != NULL) ) {
201 ws += (args->cblk[1].fblokptr - args->cblk[0].fblokptr) *
sizeof(
pastix_lrblock_t );
208 CODELETS_CPU( cblk_csytrfsp, 2 );
233 struct cl_cblk_csytrfsp_args_s *cl_arg = NULL;
235 #if defined(PASTIX_DEBUG_STARPU)
239 starpu_data_handle_t *handler = (starpu_data_handle_t *)( cblk->
handler );
241 pastix_starpu_register_ws( handler + 1, cblk, PastixComplex32 );
243 #if defined(PASTIX_WITH_MPI)
245 int64_t tag_desc = sopalin_data->solvmtx->starpu_desc->mpitag;
246 int64_t tag_cblk = 2 * cblk->
gcblknum + 1;
248 starpu_mpi_data_register( *(handler + 1),
257 #if defined(PASTIX_WITH_MPI)
260 if ( cblk->
ownerid == sopalin_data->solvmtx->clustnum ) {
266 if ( starpu_mpi_cached_receive( cblk->
handler[0] ) ) {
269 if ( !need_submit ) {
279 cl_arg = malloc(
sizeof(
struct cl_cblk_csytrfsp_args_s) );
280 cl_arg->sopalin_data = sopalin_data;
281 #if defined(PASTIX_STARPU_PROFILING)
282 cl_arg->profile_data.measures = cblk_csytrfsp_profile.measures;
283 cl_arg->profile_data.flops = NAN;
288 #if defined(PASTIX_DEBUG_STARPU)
290 asprintf( &task_name,
"%s( %ld )",
291 cl_cblk_csytrfsp_cpu.name,
292 (
long)(cblk - sopalin_data->solvmtx->cblktab) );
295 pastix_starpu_insert_task(
296 &cl_cblk_csytrfsp_cpu,
297 STARPU_CL_ARGS, cl_arg,
sizeof(
struct cl_cblk_csytrfsp_args_s ),
298 #
if defined(PASTIX_STARPU_PROFILING)
299 STARPU_CALLBACK_WITH_ARG_NFREE, cblk_csytrfsp_callback, cl_arg,
303 #
if defined(PASTIX_DEBUG_STARPU)
304 STARPU_NAME, task_name,
306 #
if defined(PASTIX_STARPU_HETEROPRIO)
307 STARPU_PRIORITY, BucketFacto1D,
309 STARPU_PRIORITY, prio,
BEGIN_C_DECLS typedef int pastix_int_t
float _Complex pastix_complex32_t
void cpucblk_calloc_lrws(const SolverCblk *cblk, pastix_lrblock_t *lrblok, pastix_complex32_t *ws)
Initialize lrblock structure from a workspace for all blocks of the cblk associated.
int cpucblk_csytrfsp1d_panel(SolverMatrix *solvmtx, SolverCblk *cblk, void *L, void *DLt)
Compute the LDL^t factorization of one panel.
The block low-rank structure to hold a matrix in low-rank form.
static pastix_fixdbl_t fct_cblk_csytrfsp_cost(struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl)
Cost model function.
void starpu_task_cblk_csytrfsp(sopalin_data_t *sopalin_data, SolverCblk *cblk, int prio)
TODO.
Base structure to all codelet arguments that include the profiling data.
static double cost(symbol_cblk_t *cblk)
Computes the cost of a cblk.
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
Solver column block structure.