PaStiX Handbook  6.4.0
cpucblk_dmpi_rhs_bwd.c
Go to the documentation of this file.
1 /**
2  *
3  * @file cpucblk_dmpi_rhs_bwd.c
4  *
5  * Precision dependent routines to manag communications for the solve part.
6  *
7  * @copyright 2015-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8  * Univ. Bordeaux. All rights reserved.
9  *
10  * @version 6.4.0
11  * @author Mathieu Faverge
12  * @author Alycia Lisito
13  * @date 2024-07-05
14  *
15  * @generated from /builds/solverstack/pastix/kernels/cpucblk_zmpi_rhs_bwd.c, normal z -> d, Thu Aug 29 14:20:22 2024
16  *
17  **/
18 #include "common/common.h"
19 #include "common/pastixdata.h"
20 #include <lapacke.h>
21 #include "blend/solver.h"
23 #include "kernels.h"
24 #include "pastix_dcores.h"
25 #include "pastix_dlrcores.h"
26 
27 #if defined( PASTIX_WITH_MPI )
28 /**
29  *******************************************************************************
30  *
31  * @brief Asynchronously sends the rhs associated to a cblk->lcolidx to the remote node.
32  *
33  *******************************************************************************
34  *
35  * @param[in] solvmtx
36  * The solver matrix holding the communicator.
37  *
38  * @param[in] rhsb
39  * The pointer to the rhs data structure that holds the vectors of the
40  * right hand side.
41  *
42  * @param[in] cblk
43  * The cblk which defines the part to sent.
44  *
45  *******************************************************************************/
46 void
47 cpucblk_disend_rhs_bwd( SolverMatrix *solvmtx,
48  pastix_rhs_t rhsb,
49  SolverCblk *cblk )
50 {
51  MPI_Request request;
52  double *b;
53  pastix_int_t colnbr = cblk_colnbr(cblk);
54  pastix_int_t size = colnbr * rhsb->n;
55  pastix_int_t idx = - cblk->bcscnum - 1;
56  int rc;
57 
58  assert( colnbr <= solvmtx->colmax );
59  assert( cblk->cblktype & CBLK_RECV );
60 
61 #if defined (PASTIX_DEBUG_MPI)
62  fprintf( stderr, "[%2d] RHS Bwd: Post Isend cblk %ld to %2d at index %ld of size %ld\n",
63  solvmtx->clustnum, (long)cblk->gcblknum, cblk->ownerid,
64  (long)cblk->lcolidx, (long)colnbr );
65 #endif
66 
67  /* Get the pointer to the right hand side */
68  b = rhsb->b;
69  b += cblk->lcolidx;
70 
71  /* Pack the data into a temporary buffer if non-contiguous */
72  assert( rhsb->cblkb[ idx ] == NULL );
73  if ( rhsb->n > 1 ) {
74  rhsb->cblkb[ idx ] = malloc( size * sizeof(double) );
75 
76  rc = LAPACKE_dlacpy_work( LAPACK_COL_MAJOR, 'A', colnbr, rhsb->n,
77  b, rhsb->ld, rhsb->cblkb[ idx ], colnbr );
78  assert( rc == 0 );
79 
80  b = rhsb->cblkb[ idx ];
81  }
82 
83  rc = MPI_Isend( b, size, PASTIX_MPI_DOUBLE,
84  cblk->ownerid, cblk->gcblknum, solvmtx->solv_comm, &request );
85  assert( rc == MPI_SUCCESS );
86 
87  solverCommMatrixAdd( solvmtx, cblk->ownerid, size * sizeof(double) );
88 
89  /* Register the request to make it progress */
90  pastix_atomic_lock( &(solvmtx->reqlock) );
91 
92  assert( solvmtx->reqidx[ solvmtx->reqnum ] == -1 );
93  assert( solvmtx->reqnum >= 0 );
94  assert( solvmtx->reqnum < solvmtx->reqnbr );
95 
96  solvmtx->reqtab[ solvmtx->reqnum ] = request;
97  solvmtx->reqidx[ solvmtx->reqnum ] = cblk - solvmtx->cblktab;
98  solvmtx->reqnum++;
99 
100  pastix_atomic_unlock( &(solvmtx->reqlock) );
101 
102  (void)rc;
103 }
104 
105 /**
106  *******************************************************************************
107  *
108  * @brief Handle a finished request on a fanin
109  *
110  *******************************************************************************
111  *
112  * @param[in] enums
113  * Enums needed for the solve.
114  *
115  * @param[in] solvmtx
116  * The solver matrix structure.
117  *
118  * @param[in] rhsb
119  * The pointer to the rhs data structure that holds the vectors of the
120  * right hand side.
121  *
122  * @param[inout] cblk
123  * The cblk concerned by the computation.
124  *
125  *******************************************************************************/
126 void
127 cpucblk_drequest_rhs_bwd_handle_send( const args_solve_t *enums,
128  SolverMatrix *solvmtx,
129  pastix_rhs_t rhsb,
130  const SolverCblk *cblk )
131 {
132  pastix_int_t idx = - cblk->bcscnum - 1;
133 
134  assert( cblk->cblktype & CBLK_RECV );
135  assert( enums->solve_step == PastixSolveBackward );
136 
137 #if defined(PASTIX_DEBUG_MPI)
138  {
139  size_t cblksize = cblk_colnbr( cblk ) * rhsb->n * sizeof(double);
140 
141  fprintf( stderr, "[%2d] RHS Bwd: Isend for cblk %ld toward %2d ( %ld Bytes ) (DONE)\n",
142  solvmtx->clustnum, (long)cblk->gcblknum, cblk->ownerid, (long)cblksize );
143  }
144 #endif
145 
146  if ( rhsb->cblkb[ idx ] ) {
147  memFree_null( rhsb->cblkb[ idx ] );
148  }
149  (void)solvmtx;
150  (void)enums;
151 }
152 
153 /**
154  *******************************************************************************
155  *
156  * @brief Handle a finished request on a recv cblk.
157  *
158  *******************************************************************************
159  *
160  * @param[in] enums
161  * Enums needed for the solve.
162  *
163  * @param[inout] solvmtx
164  * The solver matrix structure.
165  *
166  * @param[in] rhsb
167  * The pointer to the rhs data structure that holds the vectors of the
168  * right hand side.
169  *
170  * @param[inout] threadid
171  * Id of the thread calling this method.
172  *
173  * @param[inout] status
174  * Statuses of the completed request.
175  *
176  * @param[inout] recvbuf
177  * Reception buffer of the completed request.
178  *
179  *******************************************************************************/
180 static inline void
181 cpucblk_drequest_rhs_bwd_handle_recv( const args_solve_t *enums,
182  SolverMatrix *solvmtx,
183  pastix_rhs_t rhsb,
184  int threadid,
185  const MPI_Status *status,
186  double *recvbuf )
187 {
188  SolverCblk *cblk;
189  int src = status->MPI_SOURCE;
190  int tag = status->MPI_TAG;
191  pastix_int_t idx;
192 
193  assert( ( 0 <= src ) && ( src < solvmtx->clustnbr ) );
194  assert( ( 0 <= tag ) && ( tag < solvmtx->gcblknbr ) );
195 
196  /*
197  * Let's look for the local cblk
198  */
199  cblk = solvmtx->cblktab + solvmtx->gcbl2loc[ tag ];
200  assert( cblk->cblktype & CBLK_FANIN );
201 
202 #if defined(PASTIX_DEBUG_MPI)
203  {
204  pastix_int_t colnbr;
205  pastix_int_t size;
206  int rc;
207  int count = 0;
208 
209  colnbr = cblk_colnbr( cblk );
210  size = colnbr * rhsb->n * sizeof(double);
211 
212  rc = MPI_Get_count( status, MPI_CHAR, &count );
213  assert( rc == MPI_SUCCESS );
214  assert( count == size );
215 
216  /* We can't know the sender easily, so we don't print it */
217  fprintf( stderr, "[%2d] RHS Bwd : recv of size %d/%ld for cblk %ld (DONE)\n",
218  solvmtx->clustnum, count, (long)size, (long)cblk->gcblknum );
219  }
220 #endif
221 
222  idx = - cblk->bcscnum - 1;
223  rhsb->cblkb[ idx ] = recvbuf;
224 
225  cblk->threadid = threadid;
226  if ( solvmtx->computeQueue ) {
227  pastix_queue_t *queue = solvmtx->computeQueue[ cblk->threadid ];
228  const SolverBlok *blok = solvmtx->bloktab + solvmtx->browtab[ cblk[1].brownum-1 ];
229  const SolverCblk *fcbk = solvmtx->cblktab + blok->lcblknm;
230 
231  pqueuePush1( queue, cblk - solvmtx->cblktab, - fcbk->priority );
232  }
233  else {
234  solve_cblk_dtrsmsp_backward( enums, solvmtx, cblk, rhsb );
235  /* Check it has been freed */
236  assert( rhsb->cblkb[ idx ] == NULL );
237  }
238 
239  (void)src;
240 }
241 
242 /**
243  *******************************************************************************
244  *
245  * @brief Handle a finished request.
246  *
247  * If cblktype & CBLK_FANIN : Will deallocate the coeftab
248  * If cblktype & CBLK_RECV : Will add cblk and deallocate the coeftab
249  *
250  *******************************************************************************
251  *
252  * @param[in] enums
253  * Enums needed for the solve.
254  *
255  * @param[inout] solvmtx
256  * The solver matrix structure.
257  *
258  * @param[in] rhsb
259  * The pointer to the rhs data structure that holds the vectors of the
260  * right hand side.
261  *
262  * @param[in] threadid
263  * Id of the thread calling this method.
264  *
265  * @param[in] outcount
266  * Amount of finshed requests.
267  *
268  * @param[in] indexes
269  * Array of completed requests.
270  *
271  * @param[in] statuses
272  * Array of statuses for the completed requests.
273  *
274  *******************************************************************************
275  *
276  * @retval The amount of finished requests updated.
277  *
278  *******************************************************************************/
279 static inline int
280 cpucblk_drequest_rhs_bwd_handle( const args_solve_t *enums,
281  SolverMatrix *solvmtx,
282  pastix_rhs_t rhsb,
283  int threadid,
284  int outcount,
285  const int *indexes,
286  const MPI_Status *statuses )
287 {
288  pastix_int_t i, reqid;
289  int nbrequest = outcount;
290 
291  for ( i = 0; i < outcount; i++ ) {
292  reqid = indexes[i];
293 
294  /*
295  * Handle the reception
296  */
297  if ( solvmtx->reqidx[reqid] == -1 ) {
298  /* We're on a cblk recv, copy datas and restart communications */
299  double *recvbuf;
300  MPI_Status status;
301  int size;
302 
303  memcpy( &status, statuses + i, sizeof(MPI_Status) );
304  MPI_Get_count( &status, PASTIX_MPI_DOUBLE, &size );
305 
306  MALLOC_INTERN( recvbuf, size, double );
307  memcpy( recvbuf, solvmtx->rcoeftab, size * sizeof(double) );
308 
309  solvmtx->fanincnt--;
310 
311  /* Let's restart the communication */
312  assert( solvmtx->fanincnt >= 0 );
313  if ( solvmtx->fanincnt > 0 ) {
314  MPI_Start( solvmtx->reqtab + reqid );
315  nbrequest--;
316  }
317  else {
318  MPI_Request_free( solvmtx->reqtab + reqid );
319  solvmtx->reqtab[reqid] = MPI_REQUEST_NULL;
320  }
321 
322  cpucblk_drequest_rhs_bwd_handle_recv( enums, solvmtx, rhsb,
323  threadid, &status, recvbuf );
324  }
325  /*
326  * Handle the emission
327  */
328  else {
329  SolverCblk *cblk = solvmtx->cblktab + solvmtx->reqidx[ reqid ];
330  assert( cblk->cblktype & CBLK_RECV );
331 
332  cpucblk_drequest_rhs_bwd_handle_send( enums, solvmtx, rhsb, cblk );
333 
334 #if !defined(NDEBUG)
335  solvmtx->reqidx[ reqid ] = -1;
336 #endif
337  solvmtx->recvcnt--;
338  }
339  }
340 
341  return nbrequest;
342 }
343 
344 /**
345  *******************************************************************************
346  *
347  * @ingroup kernel_fact
348  * @brief Progress communications for one process
349  *
350  * If a communication is completed, it will be treated.
351  * If cblktype & CBLK_FANIN : Will deallocate coeftab
352  * If cblktype & CBLK_RECV : Will add cblk to fcblk
353  *
354  *******************************************************************************
355  *
356  * @param[in] enums
357  * Enums needed for the solve.
358  *
359  * @param[inout] solvmtx
360  * The solver matrix structure.
361  *
362  * @param[in] rhsb
363  * The pointer to the rhs data structure that holds the vectors of the
364  * right hand side.
365  *
366  * @param[in] threadid
367  * Id of the thread calling this method.
368  *
369  *******************************************************************************/
370 void
371 cpucblk_dmpi_rhs_bwd_progress( const args_solve_t *enums,
372  SolverMatrix *solvmtx,
373  pastix_rhs_t rhsb,
374  int threadid )
375 {
376  pthread_t tid = pthread_self();
377  int outcount = 1;
378  int nbrequest, nbfree;
379  int indexes[ solvmtx->reqnbr ];
380  MPI_Status statuses[ solvmtx->reqnbr ];
381 
382  /* Check if someone is already communicating or not */
383  pthread_mutex_lock( &pastix_comm_lock );
384  if ( pastix_comm_tid == (pthread_t)-1 ) {
385  pastix_comm_tid = tid;
386  }
387  pthread_mutex_unlock( &pastix_comm_lock );
388 
389  if ( tid != pastix_comm_tid ) {
390  pastix_yield();
391  return;
392  }
393 
394  /*
395  * Let's register the number of active requests.
396  * We now suppose that the current thread is working on the first nbrequest
397  * active in the reqtab array. Additional requests can be posted during this
398  * progression, but it will be with a larger index. Thus, we do not need to
399  * protect every changes in these requests.
400  * When this is done, the requests arrays is locked to be packed, and the
401  * number of requests is updated for the next round.
402  */
403  pastix_atomic_lock( &(solvmtx->reqlock) );
404  nbrequest = solvmtx->reqnum;
405  pastix_atomic_unlock( &(solvmtx->reqlock) );
406 
407  while( (outcount > 0) && (nbrequest > 0) )
408  {
409  MPI_Testsome( nbrequest, solvmtx->reqtab, &outcount, indexes, statuses );
410  nbfree = 0;
411 
412  /* Handle all the completed requests */
413  if ( outcount > 0 ) {
414  nbfree = cpucblk_drequest_rhs_bwd_handle( enums, solvmtx, rhsb, threadid,
415  outcount, indexes, statuses );
416  }
417 
418  /*
419  * Pack the request arrays, and update the number of active requests by
420  * removing the completed ones
421  */
422  pastix_atomic_lock( &(solvmtx->reqlock) );
423  if ( nbfree > 0 ) {
424  cpucblk_dupdate_reqtab( solvmtx );
425  }
426  nbrequest = solvmtx->reqnum;
427  pastix_atomic_unlock( &(solvmtx->reqlock) );
428  }
429 
430  pastix_comm_tid = (pthread_t)-1;
431  pastix_yield();
432 }
433 #endif /* defined(PASTIX_WITH_MPI) */
434 
435 /**
436  *******************************************************************************
437  *
438  * @brief Wait for incoming dependencies, and return when cblk->ctrbcnt has
439  * reached 0.
440  *
441  *******************************************************************************
442  *
443  * @param[in] rank
444  * The rank of the current thread.
445  *
446  * @param[in] enums
447  * Enums needed for the solve.
448  *
449  * @param[inout] solvmtx
450  * The solver matrix structure.
451  *
452  * @param[inout] cblk
453  * The column block that contribute to fcblk.
454  *
455  * @param[in] rhsb
456  * The pointer to the rhs data structure that holds the vectors of the
457  * right hand side.
458  *
459  *******************************************************************************
460  *
461  * @return 1 if the cblk is a fanin, 0 otherwise
462  *
463  *******************************************************************************/
464 int
466  const args_solve_t *enums,
467  SolverMatrix *solvmtx,
468  SolverCblk *cblk,
469  pastix_rhs_t rhsb )
470 {
471 #if defined(PASTIX_WITH_MPI)
472  if ( cblk->cblktype & CBLK_FANIN ) {
473  /*
474  * We are in the sequential case, we progress on communications and
475  * return if nothing.
476  */
477  //cpucblk_dtestsome( side, solvmtx );
478  return 1;
479  }
480 
481  if ( cblk->cblktype & CBLK_RECV ) {
482  return 1;
483  }
484 
485  /* Make sure we receive every contribution */
486  while( cblk->ctrbcnt > 0 ) {
487  cpucblk_dmpi_rhs_bwd_progress( enums, solvmtx, rhsb, rank );
488  }
489 #else
490  assert( !(cblk->cblktype & (CBLK_FANIN | CBLK_RECV)) );
491  do { pastix_yield(); } while( cblk->ctrbcnt > 0 );
492 #endif
493 
494  (void)rank;
495  (void)enums;
496  (void)solvmtx;
497  (void)rhsb;
498 
499  return 0;
500 }
501 
502 /**
503  *******************************************************************************
504  *
505  * @brief Release the dependencies of the given cblk after an update.
506  *
507  *******************************************************************************
508  *
509  * @param[in] enums
510  * Enums needed for the solve.
511  *
512  * @param[inout] solvmtx
513  * The solver matrix structure.
514  *
515  * @param[in] rhsb
516  * The pointer to the rhs data structure that holds the vectors of the
517  * right hand side.
518  *
519  * @param[in] cblk
520  * The column block that contribute to fcblk.
521  *
522  * @param[inout] fcbk
523  * The facing column block that is updated by cblk.
524  *
525  *******************************************************************************/
526 void
528  SolverMatrix *solvmtx,
529  pastix_rhs_t rhsb,
530  const SolverCblk *cblk,
531  SolverCblk *fcbk )
532 {
533  int32_t ctrbcnt;
534  if ( enums->sched == PastixSchedSequential ) {
535  return;
536  }
537  ctrbcnt = pastix_atomic_dec_32b( &(fcbk->ctrbcnt) );
538  if ( !ctrbcnt ) {
539 #if defined(PASTIX_WITH_MPI)
540  if ( fcbk->cblktype & CBLK_RECV ) {
541  cpucblk_disend_rhs_bwd( solvmtx, rhsb, fcbk );
542  return;
543  }
544 #else
545  (void)rhsb;
546 #endif
547  if ( solvmtx->computeQueue ) {
548  pastix_queue_t *queue = solvmtx->computeQueue[ cblk->threadid ];
549  assert( fcbk->priority != -1 );
550  pqueuePush1( queue, fcbk - solvmtx->cblktab, - fcbk->priority );
551  }
552  }
553  (void)enums;
554 }
555 
556 /**
557  *******************************************************************************
558  *
559  * @brief Waitall routine for current cblk request
560  *
561  * It may be possible that some cblk will not be deallocated with the static
562  * scheduler. So a cleanup may be necessary.
563  *
564  *******************************************************************************
565  *
566  * @param[in] enums
567  * Enums needed for the solve.
568  *
569  * @param[in] sched
570  * Define which sched is used
571  * @arg PastixSchedSequential if sequential
572  * @arg PastixSchedStatic if multi-threaded static scheduler
573  * @arg PastixSchedDynamic if multi-threaded dynamic scheduler
574  * No other scheduler is supported.
575  *
576  * @param[inout] solvmtx
577  * The solver matrix structure.
578  *
579  * @param[in] rhsb
580  * The pointer to the rhs data structure that holds the vectors of the
581  * right hand side.
582  *
583  *******************************************************************************/
584 void
586  pastix_int_t sched,
587  SolverMatrix *solvmtx,
588  pastix_rhs_t rhsb )
589 {
590  if ( (sched != PastixSchedSequential) &&
591  (sched != PastixSchedStatic) &&
592  (sched != PastixSchedDynamic) )
593  {
594  return;
595  }
596 #if defined(PASTIX_WITH_MPI)
597  pastix_int_t i;
598  int rc;
599  SolverCblk *cblk;
600  int reqnbr = solvmtx->reqnum;
601  MPI_Status status;
602 
603 #if defined(PASTIX_DEBUG_MPI)
604  fprintf( stderr, "[%2d] Wait for all pending communications\n",
605  solvmtx->clustnum );
606 #endif
607 
608  for ( i=0; i<reqnbr; i++ ) {
609  if ( solvmtx->reqtab[i] == MPI_REQUEST_NULL ) {
610  assert( 0 /* MPI_REQUEST_NULL should have been pushed to the end */ );
611  solvmtx->reqnum--;
612  continue;
613  }
614 
615  /* Make sure that we don't have an already cleaned request in dynamic */
616  assert( solvmtx->reqidx[i] != -1 );
617 
618  rc = MPI_Wait( solvmtx->reqtab + i, &status );
619  assert( rc == MPI_SUCCESS );
620 
621  cblk = solvmtx->cblktab + solvmtx->reqidx[i];
622 
623  /* We should wait only for recv */
624  assert( cblk->cblktype & CBLK_RECV );
625 
626  cpucblk_drequest_rhs_bwd_handle_send( enums, solvmtx, rhsb, cblk );
627 
628  solvmtx->reqnum--;
629  }
630  assert( solvmtx->reqnum == 0 );
631  (void)rc;
632 #else
633  (void)enums;
634  (void)solvmtx;
635  (void)rhsb;
636 #endif
637 }
BEGIN_C_DECLS typedef int pastix_int_t
Definition: datatypes.h:51
static void pqueuePush1(pastix_queue_t *q, pastix_int_t elt, double key1)
Push an element with a single key.
Definition: queue.h:64
Queue structure.
Definition: queue.h:38
void cpucblk_drequest_rhs_bwd_cleanup(const args_solve_t *enums, pastix_int_t sched, SolverMatrix *solvmtx, pastix_rhs_t rhsb)
Waitall routine for current cblk request.
int cpucblk_dincoming_rhs_bwd_deps(int rank, const args_solve_t *enums, SolverMatrix *solvmtx, SolverCblk *cblk, pastix_rhs_t rhsb)
Wait for incoming dependencies, and return when cblk->ctrbcnt has reached 0.
void cpucblk_drelease_rhs_bwd_deps(const args_solve_t *enums, SolverMatrix *solvmtx, pastix_rhs_t rhsb, const SolverCblk *cblk, SolverCblk *fcbk)
Release the dependencies of the given cblk after an update.
void solve_cblk_dtrsmsp_backward(const args_solve_t *enums, SolverMatrix *datacode, SolverCblk *cblk, pastix_rhs_t rhsb)
Apply a backward solve related to one cblk to all the right hand side.
@ PastixSchedStatic
Definition: api.h:335
@ PastixSchedDynamic
Definition: api.h:338
@ PastixSchedSequential
Definition: api.h:334
void ** cblkb
Definition: pastixdata.h:162
pastix_int_t ld
Definition: pastixdata.h:160
pastix_int_t n
Definition: pastixdata.h:159
Main PaStiX RHS structure.
Definition: pastixdata.h:155
pastix_int_t reqnbr
Definition: solver.h:271
pastix_atomic_lock_t reqlock
Definition: solver.h:273
pastix_int_t brownum
Definition: solver.h:171
pastix_int_t priority
Definition: solver.h:183
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
Definition: solver.h:329
MPI_Request * reqtab
Definition: solver.h:269
pastix_int_t recvcnt
Definition: solver.h:217
void * rcoeftab
Definition: solver.h:274
pastix_int_t * gcbl2loc
Definition: solver.h:234
SolverBlok *restrict bloktab
Definition: solver.h:229
pastix_int_t fanincnt
Definition: solver.h:214
volatile int32_t ctrbcnt
Definition: solver.h:163
pastix_int_t *restrict browtab
Definition: solver.h:230
volatile int32_t reqnum
Definition: solver.h:272
pastix_int_t lcblknm
Definition: solver.h:143
pastix_int_t gcblknum
Definition: solver.h:174
int threadid
Definition: solver.h:182
pastix_int_t lcolidx
Definition: solver.h:170
pastix_int_t bcscnum
Definition: solver.h:175
SolverCblk *restrict cblktab
Definition: solver.h:228
int8_t cblktype
Definition: solver.h:164
pastix_int_t * reqidx
Definition: solver.h:270
int ownerid
Definition: solver.h:181
Arguments for the solve.
Definition: solver.h:88
Solver block structure.
Definition: solver.h:141
Solver column block structure.
Definition: solver.h:161
Solver column block structure.
Definition: solver.h:203