PaStiX Handbook  6.3.2
codelet_solve_dgemm.c
Go to the documentation of this file.
1 /**
2  *
3  * @file codelet_solve_dgemm.c
4  *
5  * StarPU codelet for gemm function
6  *
7  * @copyright 2016-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8  * Univ. Bordeaux. All rights reserved.
9  *
10  * @version 6.3.2
11  * @author Vincent Bridonneau
12  * @author Mathieu Faverge
13  * @author Pierre Ramet
14  * @date 2023-07-21
15  *
16  * @generated from /builds/solverstack/pastix/sopalin/starpu/codelet_solve_zgemm.c, normal z -> d, Wed Dec 13 12:09:28 2023
17  *
18  * @addtogroup pastix_starpu
19  * @{
20  *
21  **/
22 #ifndef DOXYGEN_SHOULD_SKIP_THIS
23 #define _GNU_SOURCE
24 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
25 #include "common.h"
26 #include "blend/solver.h"
27 #include "sopalin/sopalin_data.h"
28 #include "pastix_dcores.h"
29 #include "pastix_starpu.h"
30 #include "pastix_dstarpu.h"
31 #include "codelets.h"
32 
33 #if defined( PASTIX_STARPU_PROFILING )
34 /**
35  * @brief Block version
36  */
37 starpu_profile_t solve_blok_dgemm_profile = {
38  .next = NULL,
39  .name = "solve_blok_dgemm"
40 };
41 
42 /**
43  * @brief Profiling registration function
44  */
45 void solve_blok_dgemm_profile_register( void ) __attribute__( ( constructor ) );
46 void
47 solve_blok_dgemm_profile_register( void )
48 {
49  profiling_register_cl( &solve_blok_dgemm_profile );
50 }
51 #endif
52 
53 #ifndef DOXYGEN_SHOULD_SKIP_THIS
54 struct cl_solve_blok_dgemm_args_s {
55  profile_data_t profile_data;
56  pastix_side_t side;
57  pastix_trans_t trans;
58  const SolverCblk *cblk;
59  const SolverBlok *blok;
60  SolverCblk *fcbk;
61 };
62 
63 static struct starpu_perfmodel starpu_solve_blok_dgemm_model = {
64  .type = STARPU_HISTORY_BASED,
65  .symbol = "solve_blok_dgemm",
66 };
67 
68 #if !defined(PASTIX_STARPU_SIMULATION)
69 static void
70 fct_solve_blok_dgemm_cpu( void *descr[], void *cl_arg )
71 {
72  const void *dataA = NULL;
73  const pastix_lrblock_t *lrA;
74  const double *A;
75  double *B, *C;
76  pastix_int_t nrhs, ldb, ldc;
77  struct cl_solve_blok_dgemm_args_s *args = (struct cl_solve_blok_dgemm_args_s *)cl_arg;
78 
79  dataA = pastix_starpu_cblk_get_ptr( descr[0] );
80  B = (double *)STARPU_MATRIX_GET_PTR( descr[1] );
81  ldb = (pastix_int_t) STARPU_MATRIX_GET_LD( descr[1] );
82  nrhs = (pastix_int_t) STARPU_MATRIX_GET_NY( descr[1] );
83  C = (double *)STARPU_MATRIX_GET_PTR( descr[2] );
84  ldc = (pastix_int_t) STARPU_MATRIX_GET_LD( descr[2] );
85 
86  /*
87  * Make sure we get the correct pointer to the lrA, or to the right position in [lu]coeftab
88  */
89  if ( (args->side == PastixLeft) && (args->cblk->cblktype & CBLK_COMPRESSED) ) {
90  lrA = dataA;
91  lrA += (args->blok - args->cblk->fblokptr);
92  dataA = lrA;
93  }
94  else if ( (args->side == PastixRight) && (args->fcbk->cblktype & CBLK_COMPRESSED) ) {
95  lrA = dataA;
96  lrA += (args->blok - args->fcbk->fblokptr);
97  dataA = lrA;
98  }
99  else {
100  A = dataA;
101  A += args->blok->coefind;
102  dataA = A;
103  }
104 
105  solve_blok_dgemm( args->side, args->trans, nrhs,
106  args->cblk, args->blok, args->fcbk, dataA, B, ldb, C, ldc );
107 }
108 #endif /* !defined(PASTIX_STARPU_SIMULATION) */
109 
110 CODELETS_CPU( solve_blok_dgemm, 3 );
111 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
112 
113 /**
114  *******************************************************************************
115  *
116  * @brief Submit a task to perform a gemm.
117  *
118  *******************************************************************************
119  *
120  * @param[in] coef
121  * Specify whether the computation are made with the L part, or the U
122  * part of A. It has to be either PastixLCoef, or PastixUCoef.
123  *
124  * @param[in] side
125  * Specify the side parameter of the TRSM.
126  *
127  * @param[in] trans
128  * Specify the transposition used for the matrix A in the
129  * computation. It has to be either PastixTrans or PastixTrans.
130  *
131  * @param[in] cblk
132  * The cblk structure that corresponds to the A and B matrix.
133  *
134  * @param[in] blok
135  * The blok structure that corresponds to the A matrix, and that
136  * belongs either to cblk or fcbk depending on the side parameter.
137  *
138  * @param[inout] fcbk
139  * The cblk structure that corresponds to the C matrix.
140 
141  *
142  * @param[in] sopalin_data
143  * The data that provide the SolverMatrix structure from PaStiX, and
144  * descriptor of b (providing nrhs, b and ldb).
145 
146  * @param[in] prio
147  * The priority of the task in th DAG.
148  *
149  *******************************************************************************/
150 void
151 starpu_stask_blok_dgemm( sopalin_data_t *sopalin_data,
152  pastix_coefside_t coef,
153  pastix_side_t side,
154  pastix_trans_t trans,
155  const SolverCblk *cblk,
156  const SolverBlok *blok,
157  SolverCblk *fcbk,
158  pastix_int_t prio )
159 {
160  struct cl_solve_blok_dgemm_args_s *cl_arg;
161  SolverMatrix *solvmtx = sopalin_data->solvmtx;
162  pastix_int_t cblknum = cblk - solvmtx->cblktab;
163  pastix_int_t fcbknum = fcbk - solvmtx->cblktab;
164  starpu_data_handle_t handle;
165 #if defined(PASTIX_DEBUG_STARPU)
166  char *task_name;
167 #endif
168 
169  /*
170  * Check if it needs to be submitted
171  */
172 #if defined(PASTIX_WITH_MPI)
173  {
174  int need_submit = 0;
175  if ( cblk->ownerid == sopalin_data->solvmtx->clustnum ) {
176  need_submit = 1;
177  }
178  if ( fcbk->ownerid == sopalin_data->solvmtx->clustnum ) {
179  need_submit = 1;
180  }
181  if ( starpu_mpi_cached_receive( solvmtx->starpu_desc_rhs->handletab[fcbknum] ) ) {
182  need_submit = 1;
183  }
184  if ( !need_submit ) {
185  return;
186  }
187  }
188 #endif
189 
190  /*
191  * Create the arguments array
192  */
193  cl_arg = malloc( sizeof(struct cl_solve_blok_dgemm_args_s) );
194 #if defined(PASTIX_STARPU_PROFILING)
195  cl_arg->profile_data.measures = solve_blok_dgemm_profile.measures;
196  cl_arg->profile_data.flops = NAN;
197 #endif
198  cl_arg->side = side;
199  cl_arg->trans = trans;
200  cl_arg->cblk = cblk;
201  cl_arg->blok = blok;
202  cl_arg->fcbk = fcbk;
203 
204 #if defined(PASTIX_DEBUG_STARPU)
205  asprintf( &task_name, "%s( %ld, %ld, %ld )",
206  cl_solve_blok_dgemm_cpu.name,
207  (long)( ( side == PastixRight ) ? fcbknum : cblknum ),
208  (long)cblknum,
209  (long)fcbknum );
210 #endif
211 
212  if ( side == PastixRight ) {
213  handle = fcbk->handler[coef];
214  }
215  else {
216  handle = cblk->handler[coef];
217  }
218 
219  pastix_starpu_insert_task(
220  &cl_solve_blok_dgemm_cpu,
221  STARPU_CL_ARGS, cl_arg, sizeof( struct cl_solve_blok_dgemm_args_s ),
222 #if defined(PASTIX_STARPU_PROFILING)
223  STARPU_CALLBACK_WITH_ARG_NFREE, cl_profiling_callback, cl_arg,
224 #endif
225  STARPU_R, handle,
226  STARPU_R, solvmtx->starpu_desc_rhs->handletab[cblknum],
227  STARPU_RW, solvmtx->starpu_desc_rhs->handletab[fcbknum],
228 #if defined(PASTIX_DEBUG_STARPU)
229  STARPU_NAME, task_name,
230 #endif
231 #if defined(PASTIX_STARPU_HETEROPRIO)
232  STARPU_PRIORITY, BucketSolveGEMM,
233 #endif
234  0);
235  (void)prio;
236 }
237 
238 /**
239  * @}
240  */
BEGIN_C_DECLS typedef int pastix_int_t
Definition: datatypes.h:51
The block low-rank structure to hold a matrix in low-rank form.
void solve_blok_dgemm(pastix_side_t side, pastix_trans_t trans, pastix_int_t nrhs, const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcbk, const void *dataA, const double *B, pastix_int_t ldb, double *C, pastix_int_t ldc)
Apply a solve gemm update related to a single block of the matrix A.
enum pastix_side_e pastix_side_t
Side of the operation.
enum pastix_trans_e pastix_trans_t
Transpostion.
enum pastix_coefside_e pastix_coefside_t
Data blocks used in the kernel.
@ PastixRight
Definition: api.h:496
@ PastixLeft
Definition: api.h:495
void starpu_stask_blok_dgemm(sopalin_data_t *sopalin_data, pastix_coefside_t coef, pastix_side_t side, pastix_trans_t trans, const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcbk, pastix_int_t prio)
Submit a task to perform a gemm.
Base structure to all codelet arguments that include the profiling data.
SolverCblk *restrict cblktab
Definition: solver.h:222
void * handler[2]
Definition: solver.h:173
int ownerid
Definition: solver.h:175
Solver block structure.
Definition: solver.h:137
Solver column block structure.
Definition: solver.h:156
Solver column block structure.
Definition: solver.h:200