PaStiX Handbook  6.3.1
cpucblk_zmpi_rhs_bwd.c
Go to the documentation of this file.
1 /**
2  *
3  * @file cpucblk_zmpi_rhs_bwd.c
4  *
5  * Precision dependent routines to manag communications for the solve part.
6  *
7  * @copyright 2015-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8  * Univ. Bordeaux. All rights reserved.
9  *
10  * @version 6.3.1
11  * @author Mathieu Faverge
12  * @author Alycia Lisito
13  * @date 2023-09-20
14  *
15  * @generated from /builds/solverstack/pastix/kernels/cpucblk_zmpi_rhs_bwd.c, normal z -> z, Thu Nov 23 09:55:48 2023
16  *
17  **/
18 #include "common/common.h"
19 #include "common/pastixdata.h"
20 #include <lapacke.h>
21 #include "blend/solver.h"
23 #include "kernels.h"
24 #include "pastix_zcores.h"
25 #include "pastix_zlrcores.h"
26 
27 #if defined( PASTIX_WITH_MPI )
28 /**
29  *******************************************************************************
30  *
31  * @brief Asynchronously sends the rhs associated to a cblk->lcolidx to the remote node.
32  *
33  *******************************************************************************
34  *
35  * @param[in] solvmtx
36  * The solver matrix holding the communicator.
37  *
38  * @param[in] rhsb
39  * The pointer to the rhs data structure that holds the vectors of the
40  * right hand side.
41  *
42  * @param[in] cblk
43  * The cblk which defines the part to sent.
44  *
45  *******************************************************************************/
46 void
47 cpucblk_zisend_rhs_bwd( SolverMatrix *solvmtx,
48  pastix_rhs_t rhsb,
49  SolverCblk *cblk )
50 {
51  MPI_Request request;
52  pastix_complex64_t *b;
53  pastix_int_t colnbr = cblk_colnbr(cblk);
54  pastix_int_t size = colnbr * rhsb->n;
55  pastix_int_t idx = - cblk->bcscnum - 1;
56  int rc;
57 
58  assert( colnbr <= solvmtx->colmax );
59  assert( cblk->cblktype & CBLK_RECV );
60 
61 #if defined (PASTIX_DEBUG_MPI)
62  fprintf( stderr, "[%2d] RHS Bwd: Post Isend cblk %ld to %2d at index %ld of size %ld\n",
63  solvmtx->clustnum, (long)cblk->gcblknum, cblk->ownerid,
64  (long)cblk->lcolidx, (long)colnbr );
65 #endif
66 
67  /* Get the pointer to the right hand side */
68  b = rhsb->b;
69  b += cblk->lcolidx;
70 
71  /* Pack the data into a temporary buffer if non-contiguous */
72  assert( rhsb->cblkb[ idx ] == NULL );
73  if ( rhsb->n > 1 ) {
74  rhsb->cblkb[ idx ] = malloc( size * sizeof(pastix_complex64_t) );
75 
76  rc = LAPACKE_zlacpy_work( LAPACK_COL_MAJOR, 'A', colnbr, rhsb->n,
77  b, rhsb->ld, rhsb->cblkb[ idx ], colnbr );
78  assert( rc == 0 );
79 
80  b = rhsb->cblkb[ idx ];
81  }
82 
83  rc = MPI_Isend( b, size, PASTIX_MPI_COMPLEX64,
84  cblk->ownerid, cblk->gcblknum, solvmtx->solv_comm, &request );
85  assert( rc == MPI_SUCCESS );
86 
87  solverCommMatrixAdd( solvmtx, cblk->ownerid, size * sizeof(pastix_complex64_t) );
88 
89  /* Register the request to make it progress */
90  pastix_atomic_lock( &(solvmtx->reqlock) );
91 
92  assert( solvmtx->reqidx[ solvmtx->reqnum ] == -1 );
93  assert( solvmtx->reqnum >= 0 );
94  assert( solvmtx->reqnum < solvmtx->reqnbr );
95 
96  solvmtx->reqtab[ solvmtx->reqnum ] = request;
97  solvmtx->reqidx[ solvmtx->reqnum ] = cblk - solvmtx->cblktab;
98  solvmtx->reqnum++;
99 
100  pastix_atomic_unlock( &(solvmtx->reqlock) );
101 
102  (void)rc;
103 }
104 
105 /**
106  *******************************************************************************
107  *
108  * @brief Handle a finished request on a fanin
109  *
110  *******************************************************************************
111  *
112  * @param[in] enums
113  * Enums needed for the solve.
114  *
115  * @param[in] solvmtx
116  * The solver matrix structure.
117  *
118  * @param[in] rhsb
119  * The pointer to the rhs data structure that holds the vectors of the
120  * right hand side.
121  *
122  * @param[inout] cblk
123  * The cblk concerned by the computation.
124  *
125  *******************************************************************************/
126 void
127 cpucblk_zrequest_rhs_bwd_handle_send( const args_solve_t *enums,
128  SolverMatrix *solvmtx,
129  pastix_rhs_t rhsb,
130  const SolverCblk *cblk )
131 {
132  pastix_int_t idx = - cblk->bcscnum - 1;
133 
134  assert( cblk->cblktype & CBLK_RECV );
135  assert( enums->solve_step == PastixSolveBackward );
136 
137 #if defined(PASTIX_DEBUG_MPI)
138  {
139  size_t cblksize = cblk_colnbr( cblk ) * rhsb->n * sizeof(pastix_complex64_t);
140 
141  fprintf( stderr, "[%2d] RHS Bwd: Isend for cblk %ld toward %2d ( %ld Bytes ) (DONE)\n",
142  solvmtx->clustnum, (long)cblk->gcblknum, cblk->ownerid, (long)cblksize );
143  }
144 #endif
145 
146  if ( rhsb->cblkb[ idx ] ) {
147  memFree_null( rhsb->cblkb[ idx ] );
148  }
149  (void)solvmtx;
150  (void)enums;
151 }
152 
153 /**
154  *******************************************************************************
155  *
156  * @brief Handle a finished request on a recv cblk.
157  *
158  *******************************************************************************
159  *
160  * @param[in] enums
161  * Enums needed for the solve.
162  *
163  * @param[inout] solvmtx
164  * The solver matrix structure.
165  *
166  * @param[in] rhsb
167  * The pointer to the rhs data structure that holds the vectors of the
168  * right hand side.
169  *
170  * @param[inout] threadid
171  * Id of the thread calling this method.
172  *
173  * @param[inout] status
174  * Statuses of the completed request.
175  *
176  * @param[inout] recvbuf
177  * Reception buffer of the completed request.
178  *
179  *******************************************************************************/
180 static inline void
181 cpucblk_zrequest_rhs_bwd_handle_recv( const args_solve_t *enums,
182  SolverMatrix *solvmtx,
183  pastix_rhs_t rhsb,
184  int threadid,
185  const MPI_Status *status,
186  pastix_complex64_t *recvbuf )
187 {
188  SolverCblk *cblk;
189  int src = status->MPI_SOURCE;
190  int tag = status->MPI_TAG;
191  pastix_int_t idx;
192 
193  assert( ( 0 <= src ) && ( src < solvmtx->clustnbr ) );
194  assert( ( 0 <= tag ) && ( tag < solvmtx->gcblknbr ) );
195 
196  /*
197  * Let's look for the local cblk
198  */
199  cblk = solvmtx->cblktab + solvmtx->gcbl2loc[ tag ];
200  assert( cblk->cblktype & CBLK_FANIN );
201 
202 #if defined(PASTIX_DEBUG_MPI)
203  {
204  pastix_int_t colnbr;
205  pastix_int_t size;
206  int rc;
207  int count = 0;
208 
209  colnbr = cblk_colnbr( cblk );
210  size = colnbr * rhsb->n * sizeof(pastix_complex64_t);
211 
212  rc = MPI_Get_count( status, MPI_CHAR, &count );
213  assert( rc == MPI_SUCCESS );
214  assert( count == size );
215 
216  /* We can't know the sender easily, so we don't print it */
217  fprintf( stderr, "[%2d] RHS Bwd : recv of size %d/%ld for cblk %ld (DONE)\n",
218  solvmtx->clustnum, count, (long)size, (long)cblk->gcblknum );
219  }
220 #endif
221 
222  idx = - cblk->bcscnum - 1;
223  rhsb->cblkb[ idx ] = recvbuf;
224 
225  cblk->threadid = threadid;
226  if ( solvmtx->computeQueue ) {
227  pastix_queue_t *queue = solvmtx->computeQueue[ cblk->threadid ];
228  const SolverBlok *blok = solvmtx->bloktab + solvmtx->browtab[ cblk[1].brownum-1 ];
229  const SolverCblk *fcbk = solvmtx->cblktab + blok->lcblknm;
230 
231  pqueuePush1( queue, cblk - solvmtx->cblktab, - fcbk->priority );
232  }
233  else {
234  solve_cblk_ztrsmsp_backward( enums, solvmtx, cblk, rhsb );
235  /* Check it has been freed */
236  assert( rhsb->cblkb[ idx ] == NULL );
237  }
238 }
239 
240 /**
241  *******************************************************************************
242  *
243  * @brief Handle a finished request.
244  *
245  * If cblktype & CBLK_FANIN : Will deallocate the coeftab
246  * If cblktype & CBLK_RECV : Will add cblk and deallocate the coeftab
247  *
248  *******************************************************************************
249  *
250  * @param[in] enums
251  * Enums needed for the solve.
252  *
253  * @param[inout] solvmtx
254  * The solver matrix structure.
255  *
256  * @param[in] rhsb
257  * The pointer to the rhs data structure that holds the vectors of the
258  * right hand side.
259  *
260  * @param[in] threadid
261  * Id of the thread calling this method.
262  *
263  * @param[in] outcount
264  * Amount of finshed requests.
265  *
266  * @param[in] indexes
267  * Array of completed requests.
268  *
269  * @param[in] statuses
270  * Array of statuses for the completed requests.
271  *
272  *******************************************************************************
273  *
274  * @retval The amount of finished requests updated.
275  *
276  *******************************************************************************/
277 static inline int
278 cpucblk_zrequest_rhs_bwd_handle( const args_solve_t *enums,
279  SolverMatrix *solvmtx,
280  pastix_rhs_t rhsb,
281  int threadid,
282  int outcount,
283  const int *indexes,
284  const MPI_Status *statuses )
285 {
286  pastix_int_t i, reqid;
287  int nbrequest = outcount;
288 
289  for ( i = 0; i < outcount; i++ ) {
290  reqid = indexes[i];
291 
292  /*
293  * Handle the reception
294  */
295  if ( solvmtx->reqidx[reqid] == -1 ) {
296  /* We're on a cblk recv, copy datas and restart communications */
297  pastix_complex64_t *recvbuf;
298  MPI_Status status;
299  int size;
300 
301  memcpy( &status, statuses + i, sizeof(MPI_Status) );
302  MPI_Get_count( &status, PASTIX_MPI_COMPLEX64, &size );
303 
304  MALLOC_INTERN( recvbuf, size, pastix_complex64_t );
305  memcpy( recvbuf, solvmtx->rcoeftab, size * sizeof(pastix_complex64_t) );
306 
307  solvmtx->fanincnt--;
308 
309  /* Let's restart the communication */
310  assert( solvmtx->fanincnt >= 0 );
311  if ( solvmtx->fanincnt > 0 ) {
312  MPI_Start( solvmtx->reqtab + reqid );
313  nbrequest--;
314  }
315  else {
316  MPI_Request_free( solvmtx->reqtab + reqid );
317  solvmtx->reqtab[reqid] = MPI_REQUEST_NULL;
318  }
319 
320  cpucblk_zrequest_rhs_bwd_handle_recv( enums, solvmtx, rhsb,
321  threadid, &status, recvbuf );
322  }
323  /*
324  * Handle the emission
325  */
326  else {
327  SolverCblk *cblk = solvmtx->cblktab + solvmtx->reqidx[ reqid ];
328  assert( cblk->cblktype & CBLK_RECV );
329 
330  cpucblk_zrequest_rhs_bwd_handle_send( enums, solvmtx, rhsb, cblk );
331 
332 #if !defined(NDEBUG)
333  solvmtx->reqidx[ reqid ] = -1;
334 #endif
335  solvmtx->recvcnt--;
336  }
337  }
338 
339  return nbrequest;
340 }
341 
342 /**
343  *******************************************************************************
344  *
345  * @ingroup kernel_fact
346  * @brief Progress communications for one process
347  *
348  * If a communication is completed, it will be treated.
349  * If cblktype & CBLK_FANIN : Will deallocate coeftab
350  * If cblktype & CBLK_RECV : Will add cblk to fcblk
351  *
352  *******************************************************************************
353  *
354  * @param[in] enums
355  * Enums needed for the solve.
356  *
357  * @param[inout] solvmtx
358  * The solver matrix structure.
359  *
360  * @param[in] rhsb
361  * The pointer to the rhs data structure that holds the vectors of the
362  * right hand side.
363  *
364  * @param[in] threadid
365  * Id of the thread calling this method.
366  *
367  *******************************************************************************/
368 void
369 cpucblk_zmpi_rhs_bwd_progress( const args_solve_t *enums,
370  SolverMatrix *solvmtx,
371  pastix_rhs_t rhsb,
372  int threadid )
373 {
374  pthread_t tid = pthread_self();
375  int outcount = 1;
376  int nbrequest, nbfree;
377  int indexes[ solvmtx->reqnbr ];
378  MPI_Status statuses[ solvmtx->reqnbr ];
379 
380  /* Check if someone is already communicating or not */
381  pthread_mutex_lock( &pastix_comm_lock );
382  if ( pastix_comm_tid == (pthread_t)-1 ) {
383  pastix_comm_tid = tid;
384  }
385  pthread_mutex_unlock( &pastix_comm_lock );
386 
387  if ( tid != pastix_comm_tid ) {
388  pastix_yield();
389  return;
390  }
391 
392  /*
393  * Let's register the number of active requests.
394  * We now suppose that the current thread is working on the first nbrequest
395  * active in the reqtab array. Additional requests can be posted during this
396  * progression, but it will be with a larger index. Thus, we do not need to
397  * protect every changes in these requests.
398  * When this is done, the requests arrays is locked to be packed, and the
399  * number of requests is updated for the next round.
400  */
401  pastix_atomic_lock( &(solvmtx->reqlock) );
402  nbrequest = solvmtx->reqnum;
403  pastix_atomic_unlock( &(solvmtx->reqlock) );
404 
405  while( (outcount > 0) && (nbrequest > 0) )
406  {
407  MPI_Testsome( nbrequest, solvmtx->reqtab, &outcount, indexes, statuses );
408  nbfree = 0;
409 
410  /* Handle all the completed requests */
411  if ( outcount > 0 ) {
412  nbfree = cpucblk_zrequest_rhs_bwd_handle( enums, solvmtx, rhsb, threadid,
413  outcount, indexes, statuses );
414  }
415 
416  /*
417  * Pack the request arrays, and update the number of active requests by
418  * removing the completed ones
419  */
420  pastix_atomic_lock( &(solvmtx->reqlock) );
421  if ( nbfree > 0 ) {
422  cpucblk_zupdate_reqtab( solvmtx );
423  }
424  nbrequest = solvmtx->reqnum;
425  pastix_atomic_unlock( &(solvmtx->reqlock) );
426  }
427 
428  pastix_comm_tid = (pthread_t)-1;
429  pastix_yield();
430 }
431 #endif /* defined(PASTIX_WITH_MPI) */
432 
433 /**
434  *******************************************************************************
435  *
436  * @brief Wait for incoming dependencies, and return when cblk->ctrbcnt has
437  * reached 0.
438  *
439  *******************************************************************************
440  *
441  * @param[in] rank
442  * The rank of the current thread.
443  *
444  * @param[in] enums
445  * Enums needed for the solve.
446  *
447  * @param[inout] solvmtx
448  * The solver matrix structure.
449  *
450  * @param[inout] cblk
451  * The column block that contribute to fcblk.
452  *
453  * @param[in] rhsb
454  * The pointer to the rhs data structure that holds the vectors of the
455  * right hand side.
456  *
457  *******************************************************************************
458  *
459  * @return 1 if the cblk is a fanin, 0 otherwise
460  *
461  *******************************************************************************/
462 int
464  const args_solve_t *enums,
465  SolverMatrix *solvmtx,
466  SolverCblk *cblk,
467  pastix_rhs_t rhsb )
468 {
469 #if defined(PASTIX_WITH_MPI)
470  if ( cblk->cblktype & CBLK_FANIN ) {
471  /*
472  * We are in the sequential case, we progress on communications and
473  * return if nothing.
474  */
475  //cpucblk_ztestsome( side, solvmtx );
476  return 1;
477  }
478 
479  if ( cblk->cblktype & CBLK_RECV ) {
480  return 1;
481  }
482 
483  /* Make sure we receive every contribution */
484  while( cblk->ctrbcnt > 0 ) {
485  cpucblk_zmpi_rhs_bwd_progress( enums, solvmtx, rhsb, rank );
486  }
487 #else
488  assert( !(cblk->cblktype & (CBLK_FANIN | CBLK_RECV)) );
489  do { pastix_yield(); } while( cblk->ctrbcnt > 0 );
490 #endif
491 
492  (void)rank;
493  (void)enums;
494  (void)solvmtx;
495  (void)rhsb;
496 
497  return 0;
498 }
499 
500 /**
501  *******************************************************************************
502  *
503  * @brief Release the dependencies of the given cblk after an update.
504  *
505  *******************************************************************************
506  *
507  * @param[in] enums
508  * Enums needed for the solve.
509  *
510  * @param[inout] solvmtx
511  * The solver matrix structure.
512  *
513  * @param[in] rhsb
514  * The pointer to the rhs data structure that holds the vectors of the
515  * right hand side.
516  *
517  * @param[in] cblk
518  * The column block that contribute to fcblk.
519  *
520  * @param[inout] fcbk
521  * The facing column block that is updated by cblk.
522  *
523  *******************************************************************************/
524 void
526  SolverMatrix *solvmtx,
527  pastix_rhs_t rhsb,
528  const SolverCblk *cblk,
529  SolverCblk *fcbk )
530 {
531  int32_t ctrbcnt;
532  ctrbcnt = pastix_atomic_dec_32b( &(fcbk->ctrbcnt) );
533  if ( !ctrbcnt ) {
534 #if defined(PASTIX_WITH_MPI)
535  if ( fcbk->cblktype & CBLK_RECV ) {
536  cpucblk_zisend_rhs_bwd( solvmtx, rhsb, fcbk );
537  return;
538  }
539 #else
540  (void)rhsb;
541 #endif
542  if ( solvmtx->computeQueue ) {
543  pastix_queue_t *queue = solvmtx->computeQueue[ cblk->threadid ];
544  assert( fcbk->priority != -1 );
545  pqueuePush1( queue, fcbk - solvmtx->cblktab, - fcbk->priority );
546  }
547  }
548  (void)enums;
549 }
550 
551 /**
552  *******************************************************************************
553  *
554  * @brief Waitall routine for current cblk request
555  *
556  * It may be possible that some cblk will not be deallocated with the static
557  * scheduler. So a cleanup may be necessary.
558  *
559  *******************************************************************************
560  *
561  * @param[in] enums
562  * Enums needed for the solve.
563  *
564  * @param[in] sched
565  * Define which sched is used
566  * @arg PastixSchedSequential if sequential
567  * @arg PastixSchedStatic if multi-threaded static scheduler
568  * @arg PastixSchedDynamic if multi-threaded dynamic scheduler
569  * No other scheduler is supported.
570  *
571  * @param[inout] solvmtx
572  * The solver matrix structure.
573  *
574  * @param[in] rhsb
575  * The pointer to the rhs data structure that holds the vectors of the
576  * right hand side.
577  *
578  *******************************************************************************/
579 void
581  pastix_int_t sched,
582  SolverMatrix *solvmtx,
583  pastix_rhs_t rhsb )
584 {
585  if ( (sched != PastixSchedSequential) &&
586  (sched != PastixSchedStatic) &&
587  (sched != PastixSchedDynamic) )
588  {
589  return;
590  }
591 #if defined(PASTIX_WITH_MPI)
592  pastix_int_t i;
593  int rc;
594  SolverCblk *cblk;
595  int reqnbr = solvmtx->reqnum;
596  MPI_Status status;
597 
598 #if defined(PASTIX_DEBUG_MPI)
599  fprintf( stderr, "[%2d] Wait for all pending communications\n",
600  solvmtx->clustnum );
601 #endif
602 
603  for ( i=0; i<reqnbr; i++ ) {
604  if ( solvmtx->reqtab[i] == MPI_REQUEST_NULL ) {
605  assert( 0 /* MPI_REQUEST_NULL should have been pushed to the end */ );
606  solvmtx->reqnum--;
607  continue;
608  }
609 
610  /* Make sure that we don't have an already cleaned request in dynamic */
611  assert( solvmtx->reqidx[i] != -1 );
612 
613  rc = MPI_Wait( solvmtx->reqtab + i, &status );
614  assert( rc == MPI_SUCCESS );
615 
616  cblk = solvmtx->cblktab + solvmtx->reqidx[i];
617 
618  /* We should wait only for recv */
619  assert( cblk->cblktype & CBLK_RECV );
620 
621  cpucblk_zrequest_rhs_bwd_handle_send( enums, solvmtx, rhsb, cblk );
622 
623  solvmtx->reqnum--;
624  }
625  assert( solvmtx->reqnum == 0 );
626  (void)rc;
627 #else
628  (void)enums;
629  (void)solvmtx;
630  (void)rhsb;
631 #endif
632 }
BEGIN_C_DECLS typedef int pastix_int_t
Definition: datatypes.h:51
static void pqueuePush1(pastix_queue_t *q, pastix_int_t elt, double key1)
Push an element with a single key.
Definition: queue.h:64
Queue structure.
Definition: queue.h:38
void cpucblk_zrelease_rhs_bwd_deps(const args_solve_t *enums, SolverMatrix *solvmtx, pastix_rhs_t rhsb, const SolverCblk *cblk, SolverCblk *fcbk)
Release the dependencies of the given cblk after an update.
void cpucblk_zrequest_rhs_bwd_cleanup(const args_solve_t *enums, pastix_int_t sched, SolverMatrix *solvmtx, pastix_rhs_t rhsb)
Waitall routine for current cblk request.
int cpucblk_zincoming_rhs_bwd_deps(int rank, const args_solve_t *enums, SolverMatrix *solvmtx, SolverCblk *cblk, pastix_rhs_t rhsb)
Wait for incoming dependencies, and return when cblk->ctrbcnt has reached 0.
void solve_cblk_ztrsmsp_backward(const args_solve_t *enums, SolverMatrix *datacode, SolverCblk *cblk, pastix_rhs_t rhsb)
Apply a backward solve related to one cblk to all the right hand side.
@ PastixSchedStatic
Definition: api.h:335
@ PastixSchedDynamic
Definition: api.h:338
@ PastixSchedSequential
Definition: api.h:334
void ** cblkb
Definition: pastixdata.h:157
pastix_int_t ld
Definition: pastixdata.h:155
pastix_int_t n
Definition: pastixdata.h:154
Main PaStiX RHS structure.
Definition: pastixdata.h:150
pastix_int_t reqnbr
Definition: solver.h:266
pastix_atomic_lock_t reqlock
Definition: solver.h:268
pastix_int_t brownum
Definition: solver.h:166
pastix_int_t priority
Definition: solver.h:177
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
Definition: solver.h:324
MPI_Request * reqtab
Definition: solver.h:264
pastix_int_t recvcnt
Definition: solver.h:213
void * rcoeftab
Definition: solver.h:269
pastix_int_t * gcbl2loc
Definition: solver.h:228
SolverBlok *restrict bloktab
Definition: solver.h:223
pastix_int_t fanincnt
Definition: solver.h:210
volatile int32_t ctrbcnt
Definition: solver.h:158
pastix_int_t *restrict browtab
Definition: solver.h:224
volatile int32_t reqnum
Definition: solver.h:267
pastix_int_t lcblknm
Definition: solver.h:139
pastix_int_t gcblknum
Definition: solver.h:169
int threadid
Definition: solver.h:176
pastix_int_t lcolidx
Definition: solver.h:165
pastix_int_t bcscnum
Definition: solver.h:170
SolverCblk *restrict cblktab
Definition: solver.h:222
int8_t cblktype
Definition: solver.h:159
pastix_int_t * reqidx
Definition: solver.h:265
int ownerid
Definition: solver.h:175
Arguments for the solve.
Definition: solver.h:85
Solver block structure.
Definition: solver.h:137
Solver column block structure.
Definition: solver.h:156
Solver column block structure.
Definition: solver.h:200