PaStiX Handbook  6.2.1
starpu.c
Go to the documentation of this file.
1 /**
2  *
3  * @file starpu.c
4  *
5  * @copyright 2017-2021 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
6  * Univ. Bordeaux. All rights reserved.
7  *
8  * @version 6.2.1
9  * @author Pierre Ramet
10  * @author Mathieu Faverge
11  * @date 2021-06-21
12  *
13  * @addtogroup pastix_starpu
14  * @{
15  *
16  **/
17 #include "common.h"
18 #if !defined(PASTIX_WITH_STARPU)
19 #error "This file should not be compiled if Starpu is not enabled"
20 #endif
21 #include <stdio.h>
22 #include "pastix_starpu.h"
23 
24 #if defined(PASTIX_STARPU_HETEROPRIO)
25 #include <starpu_heteroprio.h>
26 /**
27  *******************************************************************************
28  *
29  * @brief Inits the heteroprio priorities, mappings and accelerations.
30  *
31  * This function initializes the heteroprio system in an arbitrary manner for now.
32  * Used as a callback by starpu directly.
33  *
34  ******************************************************************************/
35 void
36 init_heteroprio( unsigned ctx )
37 {
38  unsigned idx;
39  /* CPU uses 4 buckets and visits them in the natural order */
40  starpu_heteroprio_set_nb_prios( ctx, STARPU_CPU_IDX, BucketNumber );
41  /* It uses direct mapping idx => idx */
42  for ( idx = 0; idx < BucketNumber; ++idx ) {
43  starpu_heteroprio_set_mapping( ctx, STARPU_CPU_IDX, idx, idx );
44  /* If there are no CUDA workers, we must tell that CPU is faster */
45  starpu_heteroprio_set_faster_arch( ctx, STARPU_CPU_IDX, idx );
46  }
47  if ( starpu_cuda_worker_get_count() ) {
48  const int cuda_matching[] = { BucketGEMM2D, BucketTRSM2D, BucketGEMM1D };
49  const float cuda_factor = 125.0f / starpu_cpu_worker_get_count();
50  const float cuda_factors[] = { cuda_factor, cuda_factor, cuda_factor };
51  /* CUDA is enabled and uses 2 buckets */
52  starpu_heteroprio_set_nb_prios( ctx, STARPU_CUDA_IDX, 3 );
53 
54  for ( idx = 0; idx < 3; ++idx ) {
55  /* CUDA has its own mapping */
56  starpu_heteroprio_set_mapping( ctx, STARPU_CUDA_IDX, idx, cuda_matching[idx] );
57  /* For its buckets, CUDA is the fastest */
58  starpu_heteroprio_set_faster_arch( ctx, STARPU_CUDA_IDX, cuda_matching[idx] );
59  /* And CPU is slower by a factor dependant on the bucket */
60  starpu_heteroprio_set_arch_slow_factor(
61  ctx, STARPU_CPU_IDX, cuda_matching[idx], cuda_factors[idx] );
62  }
63  }
64 }
65 #endif
66 
67 /**
68  * Set the tag sizes
69  */
70 #if defined(PASTIX_WITH_MPI)
71 
72 /* Take 24 bits for the tile id, and 7 bits for descriptor id.
73  These values can be changed through the call CHAMELEON_user_tag_size(int tag_width, int tag_sep) */
74 #define TAG_WIDTH_MIN 20
75 static int starpu_tag_width = 63;
76 static int starpu_tag_sep = 31;
77 static volatile int32_t starpu_tag_counter = 0;
78 static int _tag_mpi_initialized_ = 0;
79 
80 /**
81  *******************************************************************************
82  *
83  * @brief Init a new StarPU tag.
84  *
85  * This function initializes the StarPU tags thanks to global values
86  *
87  *******************************************************************************
88  *
89  * @param[inout] pastix
90  * The main pastix_data structure.
91  *
92  ******************************************************************************/
93 static int
94 pastix_starpu_tag_init( pastix_data_t *pastix )
95 {
96  if (!_tag_mpi_initialized_) {
97  int ok = 0;
98  uintptr_t tag_ub;
99 
100  void *tag_ub_p = NULL;
101 
102  starpu_tag_width = 63;
103  starpu_tag_sep = 24;
104 
105  starpu_mpi_comm_get_attr( pastix->inter_node_comm, STARPU_MPI_TAG_UB, &tag_ub_p, &ok );
106  tag_ub = (uintptr_t)tag_ub_p;
107 
108  if ( !ok ) {
109  pastix_print_error("pastix_starpu_tag_init: MPI_TAG_UB not known by StarPU\n");
110  }
111 
112  while ( ((uintptr_t)((1UL<<starpu_tag_width) - 1) > tag_ub ) &&
113  (starpu_tag_width >= TAG_WIDTH_MIN) )
114  {
115  starpu_tag_width--;
116  starpu_tag_sep = starpu_tag_width / 2;
117  }
118 
119  if ( starpu_tag_width < TAG_WIDTH_MIN ) {
120  pastix_print_error("pastix_starpu_tag_init: MPI_TAG_UB may be too small to identify all the pieces of data\n");
121  return PASTIX_ERR_INTERNAL;
122  }
123 
124  _tag_mpi_initialized_ = 1;
125  return PASTIX_SUCCESS;
126  }
127  else {
128  return PASTIX_ERR_INTERNAL;
129  }
130 }
131 
132 /**
133  *******************************************************************************
134  *
135  * @brief Get the StarPU unique current tag.
136  *
137  * This function returns a new tag for the StarPU distributed version thanks to
138  * global variables.
139  *
140  ******************************************************************************/
141 int64_t
143 {
144  return ((int64_t)pastix_atomic_inc_32b( &starpu_tag_counter )) << starpu_tag_sep;
145 }
146 #else /* defined(PASTIX_WITH_MPI) */
147 int64_t
149  return 1;
150 }
151 #endif
152 
153 /**
154  *******************************************************************************
155  *
156  * @brief Startup the StarPU runtime system.
157  *
158  * This function initialize and startup the StarPU runtime system with PaStix
159  * configuration variables
160  *
161  *******************************************************************************
162  *
163  * @param[inout] pastix
164  * The main pastix_data structure.
165  *
166  * @param[inout] argc
167  * The number of arguments of the main program.
168  *
169  * @param[inout] argv
170  * The list of argument given to the main program.
171  *
172  * @param[in] bindtab
173  * The binding array of size the number of threads if a specific
174  * binding is required, NULL otherwise.
175  *
176  ******************************************************************************/
177 void
178 pastix_starpu_init( pastix_data_t *pastix,
179  int *argc, char **argv[],
180  const int *bindtab )
181 {
182  struct starpu_conf *conf;
183  pastix_int_t *iparm = pastix->iparm;
184  int rc;
185 
186  if ( pastix->starpu != NULL )
187  return;
188 
189  pastix->starpu = malloc(sizeof(struct starpu_conf));
190  starpu_conf_init( pastix->starpu );
191 
192  /* Force no GPUs if CUDA has not been enabled in PaStiX */
193 #if !defined(PASTIX_WITH_CUDA)
194  iparm[IPARM_GPU_NBR] = 0;
195 #endif
196 
197  conf = pastix->starpu;
198  conf->ncpus = pastix_imax( 1, (iparm[IPARM_THREAD_NBR] - iparm[IPARM_GPU_NBR] - 1) );
199  conf->ncuda = iparm[IPARM_GPU_NBR];
200  conf->nopencl = 0;
201 
202 #if defined(PASTIX_STARPU_HETEROPRIO)
203  /*
204  * Set scheduling to heteroprio in any case if requested at compilation
205  */
206  conf->sched_policy_name = "heteroprio";
207  conf->sched_policy_init = &init_heteroprio;
208 #else /* PASTIX_STARPU_HETEROPRIO */
209 
210  if (conf->ncuda > 0) {
211 #if defined(PASTIX_GENERATE_MODEL)
212  pastix_print( pastix->procnum, 0,
213  "WARNING: PaStiX compiled with -DPASTIX_GENERATE_MODEL forces:\n"
214  " - a single event per stream\n"
215  " - a single stream per GPU\n"
216  " - restore the automatic detection of the number of threads\n" );
217 
218  conf->ncpus = -1;
219 
220  pastix_setenv( "STARPU_NWORKER_PER_CUDA", "1", 1 );
221  pastix_setenv( "STARPU_CUDA_PIPELINE", "0", 1 );
222 #endif
223 
224  conf->sched_policy_name = "dmdas";
225  }
226  else {
227  /*
228  * Set scheduling to "ws"/"lws" if no cuda devices used because it
229  * behaves better on homogneneous architectures. If the user wants
230  * to use another scheduling strategy, he can set STARPU_SCHED
231  * env. var. to whatever he wants
232  */
233 #if (STARPU_MAJOR_VERSION > 1) || ((STARPU_MAJOR_VERSION == 1) && (STARPU_MINOR_VERSION >= 2))
234  conf->sched_policy_name = "lws";
235 #else
236  conf->sched_policy_name = "ws";
237 #endif
238  }
239 #endif /* PASTIX_STARPU_HETEROPRIO */
240 
241  if ( bindtab != NULL ) {
242  int i;
243 
244  assert( iparm[IPARM_THREAD_NBR] < STARPU_NMAXWORKERS );
245  conf->use_explicit_workers_bindid = 1;
246 
247  for(i=0; i < pastix_imin( iparm[IPARM_THREAD_NBR], STARPU_NMAXWORKERS ); i++) {
248  conf->workers_bindid[i] = bindtab[i];
249  }
250  }
251 #if defined(STARPU_USE_FXT)
252  starpu_fxt_autostart_profiling( 0 ); /* FxT starts profiling upon explicit call only */
253 #endif
254  rc = starpu_init( conf );
255 
256  starpu_malloc_on_node_set_default_flags( STARPU_MAIN_RAM,
257  STARPU_MALLOC_PINNED
258  | STARPU_MALLOC_COUNT
259 #if defined(PASTIX_STARPU_SIMULATION)
260  | STARPU_MALLOC_SIMULATION_FOLDED
261 #endif
262  );
263 
264 #if defined(PASTIX_WITH_MPI)
265 #if defined(PASTIX_DEBUG_MPI) && !defined(PASTIX_STARPU_SIMULATION)
266  {
267  int flag = 0;
268  MPI_Initialized( &flag );
269  assert( flag );
270  }
271 #endif
272  starpu_mpi_init_comm( argc, argv, 0, pastix->inter_node_comm );
273  pastix_starpu_tag_init( pastix );
274 #endif
275 
276 #if defined(PASTIX_WITH_CUDA) && !defined(PASTIX_STARPU_SIMULATION)
277  starpu_cublas_init();
278 #endif
279 
280  /* Suspend threads until we need them */
281  starpu_pause();
282 
283  assert( pastix->starpu != NULL );
284 
285  (void)argc; (void)argv;
286  (void)rc;
287 }
288 
289 /**
290  *******************************************************************************
291  *
292  * @brief Finalize the StarPU runtime system.
293  *
294  * This function stop the StarPU runtime system.
295  *
296  *******************************************************************************
297  *
298  * @param[inout] pastix
299  * The main pastix_data structure.
300  *
301  ******************************************************************************/
302 void
304 {
305  if (pastix->starpu != NULL) {
306  starpu_resume();
307 
308 #if defined(PASTIX_WITH_MPI)
309  starpu_mpi_shutdown();
310 #endif
311 #if defined(PASTIX_WITH_CUDA) && !defined(PASTIX_STARPU_SIMULATION)
312  starpu_cublas_shutdown();
313 #endif
314  starpu_shutdown();
315 
316  free( pastix->starpu );
317  pastix->starpu = NULL;
318  }
319 }
320 
321 /**
322  * @}
323  */
pastix_starpu_finalize
void pastix_starpu_finalize(pastix_data_t *pastix)
Finalize the StarPU runtime system.
Definition: starpu.c:303
IPARM_THREAD_NBR
@ IPARM_THREAD_NBR
Definition: api.h:117
pastix_starpu_get_tag
int64_t pastix_starpu_get_tag()
Definition: starpu.c:148
pastix_starpu_init
void pastix_starpu_init(pastix_data_t *pastix, int *argc, char **argv[], const int *bindtab)
Startup the StarPU runtime system.
Definition: starpu.c:178
PASTIX_SUCCESS
@ PASTIX_SUCCESS
Definition: api.h:344
IPARM_GPU_NBR
@ IPARM_GPU_NBR
Definition: api.h:121
pastix_starpu.h
pastix
BEGIN_C_DECLS int pastix(pastix_data_t **pastix_data, PASTIX_Comm pastix_comm, pastix_int_t n, pastix_int_t *colptr, pastix_int_t *row, void *avals, pastix_int_t *perm, pastix_int_t *invp, void *b, pastix_int_t nrhs, pastix_int_t *iparm, double *dparm)
Main function for compatibility with former releases.
Definition: pastix.c:103
PASTIX_ERR_INTERNAL
@ PASTIX_ERR_INTERNAL
Definition: api.h:350