PaStiX Handbook 6.4.0
Loading...
Searching...
No Matches
cpucblk_cmpi_rhs_bwd.c
Go to the documentation of this file.
1/**
2 *
3 * @file cpucblk_cmpi_rhs_bwd.c
4 *
5 * Precision dependent routines to manag communications for the solve part.
6 *
7 * @copyright 2015-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8 * Univ. Bordeaux. All rights reserved.
9 *
10 * @version 6.4.0
11 * @author Mathieu Faverge
12 * @author Alycia Lisito
13 * @date 2024-07-05
14 *
15 * @generated from /builds/2mk6rsew/0/solverstack/pastix/kernels/cpucblk_zmpi_rhs_bwd.c, normal z -> c, Tue Feb 25 14:34:59 2025
16 *
17 **/
18#include "common/common.h"
19#include "common/pastixdata.h"
20#include <lapacke.h>
21#include "blend/solver.h"
23#include "kernels.h"
24#include "pastix_ccores.h"
25#include "pastix_clrcores.h"
26
27#if defined( PASTIX_WITH_MPI )
28/**
29 *******************************************************************************
30 *
31 * @brief Asynchronously sends the rhs associated to a cblk->lcolidx to the remote node.
32 *
33 *******************************************************************************
34 *
35 * @param[in] solvmtx
36 * The solver matrix holding the communicator.
37 *
38 * @param[in] rhsb
39 * The pointer to the rhs data structure that holds the vectors of the
40 * right hand side.
41 *
42 * @param[in] cblk
43 * The cblk which defines the part to sent.
44 *
45 *******************************************************************************/
46void
47cpucblk_cisend_rhs_bwd( SolverMatrix *solvmtx,
48 pastix_rhs_t rhsb,
49 SolverCblk *cblk )
50{
51 MPI_Request request;
53 pastix_int_t colnbr = cblk_colnbr(cblk);
54 pastix_int_t size = colnbr * rhsb->n;
55 pastix_int_t idx = - cblk->bcscnum - 1;
56 int rc;
57
58 assert( colnbr <= solvmtx->colmax );
59 assert( cblk->cblktype & CBLK_RECV );
60
61#if defined (PASTIX_DEBUG_MPI)
62 fprintf( stderr, "[%2d] RHS Bwd: Post Isend cblk %ld to %2d at index %ld of size %ld\n",
63 solvmtx->clustnum, (long)cblk->gcblknum, cblk->ownerid,
64 (long)cblk->lcolidx, (long)colnbr );
65#endif
66
67 /* Get the pointer to the right hand side */
68 b = rhsb->b;
69 b += cblk->lcolidx;
70
71 /* Pack the data into a temporary buffer if non-contiguous */
72 assert( rhsb->cblkb[ idx ] == NULL );
73 if ( rhsb->n > 1 ) {
74 rhsb->cblkb[ idx ] = malloc( size * sizeof(pastix_complex32_t) );
75
76 rc = LAPACKE_clacpy_work( LAPACK_COL_MAJOR, 'A', colnbr, rhsb->n,
77 b, rhsb->ld, rhsb->cblkb[ idx ], colnbr );
78 assert( rc == 0 );
79
80 b = rhsb->cblkb[ idx ];
81 }
82
83 rc = MPI_Isend( b, size, PASTIX_MPI_COMPLEX32,
84 cblk->ownerid, cblk->gcblknum, solvmtx->solv_comm, &request );
85 assert( rc == MPI_SUCCESS );
86
87 solverCommMatrixAdd( solvmtx, cblk->ownerid, size * sizeof(pastix_complex32_t) );
88
89 /* Register the request to make it progress */
90 pastix_atomic_lock( &(solvmtx->reqlock) );
91
92 assert( solvmtx->reqidx[ solvmtx->reqnum ] == -1 );
93 assert( solvmtx->reqnum >= 0 );
94 assert( solvmtx->reqnum < solvmtx->reqnbr );
95
96 solvmtx->reqtab[ solvmtx->reqnum ] = request;
97 solvmtx->reqidx[ solvmtx->reqnum ] = cblk - solvmtx->cblktab;
98 solvmtx->reqnum++;
99
100 pastix_atomic_unlock( &(solvmtx->reqlock) );
101
102 (void)rc;
103}
104
105/**
106 *******************************************************************************
107 *
108 * @brief Handle a finished request on a fanin
109 *
110 *******************************************************************************
111 *
112 * @param[in] enums
113 * Enums needed for the solve.
114 *
115 * @param[in] solvmtx
116 * The solver matrix structure.
117 *
118 * @param[in] rhsb
119 * The pointer to the rhs data structure that holds the vectors of the
120 * right hand side.
121 *
122 * @param[inout] cblk
123 * The cblk concerned by the computation.
124 *
125 *******************************************************************************/
126void
127cpucblk_crequest_rhs_bwd_handle_send( const args_solve_t *enums,
128 SolverMatrix *solvmtx,
129 pastix_rhs_t rhsb,
130 const SolverCblk *cblk )
131{
132 pastix_int_t idx = - cblk->bcscnum - 1;
133
134 assert( cblk->cblktype & CBLK_RECV );
135 assert( enums->solve_step == PastixSolveBackward );
136
137#if defined(PASTIX_DEBUG_MPI)
138 {
139 size_t cblksize = cblk_colnbr( cblk ) * rhsb->n * sizeof(pastix_complex32_t);
140
141 fprintf( stderr, "[%2d] RHS Bwd: Isend for cblk %ld toward %2d ( %ld Bytes ) (DONE)\n",
142 solvmtx->clustnum, (long)cblk->gcblknum, cblk->ownerid, (long)cblksize );
143 }
144#endif
145
146 if ( rhsb->cblkb[ idx ] ) {
147 memFree_null( rhsb->cblkb[ idx ] );
148 }
149 (void)solvmtx;
150 (void)enums;
151}
152
153/**
154 *******************************************************************************
155 *
156 * @brief Handle a finished request on a recv cblk.
157 *
158 *******************************************************************************
159 *
160 * @param[in] enums
161 * Enums needed for the solve.
162 *
163 * @param[inout] solvmtx
164 * The solver matrix structure.
165 *
166 * @param[in] rhsb
167 * The pointer to the rhs data structure that holds the vectors of the
168 * right hand side.
169 *
170 * @param[inout] threadid
171 * Id of the thread calling this method.
172 *
173 * @param[inout] status
174 * Statuses of the completed request.
175 *
176 * @param[inout] recvbuf
177 * Reception buffer of the completed request.
178 *
179 *******************************************************************************/
180static inline void
181cpucblk_crequest_rhs_bwd_handle_recv( const args_solve_t *enums,
182 SolverMatrix *solvmtx,
183 pastix_rhs_t rhsb,
184 int threadid,
185 const MPI_Status *status,
186 pastix_complex32_t *recvbuf )
187{
188 SolverCblk *cblk;
189 int src = status->MPI_SOURCE;
190 int tag = status->MPI_TAG;
191 pastix_int_t idx;
192
193 assert( ( 0 <= src ) && ( src < solvmtx->clustnbr ) );
194 assert( ( 0 <= tag ) && ( tag < solvmtx->gcblknbr ) );
195
196 /*
197 * Let's look for the local cblk
198 */
199 cblk = solvmtx->cblktab + solvmtx->gcbl2loc[ tag ];
200 assert( cblk->cblktype & CBLK_FANIN );
201
202#if defined(PASTIX_DEBUG_MPI)
203 {
204 pastix_int_t colnbr;
205 pastix_int_t size;
206 int rc;
207 int count = 0;
208
209 colnbr = cblk_colnbr( cblk );
210 size = colnbr * rhsb->n * sizeof(pastix_complex32_t);
211
212 rc = MPI_Get_count( status, MPI_CHAR, &count );
213 assert( rc == MPI_SUCCESS );
214 assert( count == size );
215
216 /* We can't know the sender easily, so we don't print it */
217 fprintf( stderr, "[%2d] RHS Bwd : recv of size %d/%ld for cblk %ld (DONE)\n",
218 solvmtx->clustnum, count, (long)size, (long)cblk->gcblknum );
219 }
220#endif
221
222 idx = - cblk->bcscnum - 1;
223 rhsb->cblkb[ idx ] = recvbuf;
224
225 cblk->threadid = threadid;
226 if ( solvmtx->computeQueue ) {
227 pastix_queue_t *queue = solvmtx->computeQueue[ cblk->threadid ];
228 const SolverBlok *blok = solvmtx->bloktab + solvmtx->browtab[ cblk[1].brownum-1 ];
229 const SolverCblk *fcbk = solvmtx->cblktab + blok->lcblknm;
230
231 pqueuePush1( queue, cblk - solvmtx->cblktab, - fcbk->priority );
232 }
233 else {
234 solve_cblk_ctrsmsp_backward( enums, solvmtx, cblk, rhsb );
235 /* Check it has been freed */
236 assert( rhsb->cblkb[ idx ] == NULL );
237 }
238
239 (void)src;
240}
241
242/**
243 *******************************************************************************
244 *
245 * @brief Handle a finished request.
246 *
247 * If cblktype & CBLK_FANIN : Will deallocate the coeftab
248 * If cblktype & CBLK_RECV : Will add cblk and deallocate the coeftab
249 *
250 *******************************************************************************
251 *
252 * @param[in] enums
253 * Enums needed for the solve.
254 *
255 * @param[inout] solvmtx
256 * The solver matrix structure.
257 *
258 * @param[in] rhsb
259 * The pointer to the rhs data structure that holds the vectors of the
260 * right hand side.
261 *
262 * @param[in] threadid
263 * Id of the thread calling this method.
264 *
265 * @param[in] outcount
266 * Amount of finshed requests.
267 *
268 * @param[in] indexes
269 * Array of completed requests.
270 *
271 * @param[in] statuses
272 * Array of statuses for the completed requests.
273 *
274 *******************************************************************************
275 *
276 * @retval The amount of finished requests updated.
277 *
278 *******************************************************************************/
279static inline int
280cpucblk_crequest_rhs_bwd_handle( const args_solve_t *enums,
281 SolverMatrix *solvmtx,
282 pastix_rhs_t rhsb,
283 int threadid,
284 int outcount,
285 const int *indexes,
286 const MPI_Status *statuses )
287{
288 pastix_int_t i, reqid;
289 int nbrequest = outcount;
290
291 for ( i = 0; i < outcount; i++ ) {
292 reqid = indexes[i];
293
294 /*
295 * Handle the reception
296 */
297 if ( solvmtx->reqidx[reqid] == -1 ) {
298 /* We're on a cblk recv, copy datas and restart communications */
299 pastix_complex32_t *recvbuf;
300 MPI_Status status;
301 int size;
302
303 memcpy( &status, statuses + i, sizeof(MPI_Status) );
304 MPI_Get_count( &status, PASTIX_MPI_COMPLEX32, &size );
305
306 MALLOC_INTERN( recvbuf, size, pastix_complex32_t );
307 memcpy( recvbuf, solvmtx->rcoeftab, size * sizeof(pastix_complex32_t) );
308
309 solvmtx->fanincnt--;
310
311 /* Let's restart the communication */
312 assert( solvmtx->fanincnt >= 0 );
313 if ( solvmtx->fanincnt > 0 ) {
314 MPI_Start( solvmtx->reqtab + reqid );
315 nbrequest--;
316 }
317 else {
318 MPI_Request_free( solvmtx->reqtab + reqid );
319 solvmtx->reqtab[reqid] = MPI_REQUEST_NULL;
320 }
321
322 cpucblk_crequest_rhs_bwd_handle_recv( enums, solvmtx, rhsb,
323 threadid, &status, recvbuf );
324 }
325 /*
326 * Handle the emission
327 */
328 else {
329 SolverCblk *cblk = solvmtx->cblktab + solvmtx->reqidx[ reqid ];
330 assert( cblk->cblktype & CBLK_RECV );
331
332 cpucblk_crequest_rhs_bwd_handle_send( enums, solvmtx, rhsb, cblk );
333
334#if !defined(NDEBUG)
335 solvmtx->reqidx[ reqid ] = -1;
336#endif
337 solvmtx->recvcnt--;
338 }
339 }
340
341 return nbrequest;
342}
343
344/**
345 *******************************************************************************
346 *
347 * @ingroup kernel_fact
348 * @brief Progress communications for one process
349 *
350 * If a communication is completed, it will be treated.
351 * If cblktype & CBLK_FANIN : Will deallocate coeftab
352 * If cblktype & CBLK_RECV : Will add cblk to fcblk
353 *
354 *******************************************************************************
355 *
356 * @param[in] enums
357 * Enums needed for the solve.
358 *
359 * @param[inout] solvmtx
360 * The solver matrix structure.
361 *
362 * @param[in] rhsb
363 * The pointer to the rhs data structure that holds the vectors of the
364 * right hand side.
365 *
366 * @param[in] threadid
367 * Id of the thread calling this method.
368 *
369 *******************************************************************************/
370void
371cpucblk_cmpi_rhs_bwd_progress( const args_solve_t *enums,
372 SolverMatrix *solvmtx,
373 pastix_rhs_t rhsb,
374 int threadid )
375{
376 pthread_t tid = pthread_self();
377 int outcount = 1;
378 int nbrequest, nbfree;
379 int indexes[ solvmtx->reqnbr ];
380 MPI_Status statuses[ solvmtx->reqnbr ];
381
382 /* Check if someone is already communicating or not */
383 pthread_mutex_lock( &pastix_comm_lock );
384 if ( pastix_comm_tid == (pthread_t)-1 ) {
385 pastix_comm_tid = tid;
386 }
387 pthread_mutex_unlock( &pastix_comm_lock );
388
389 if ( tid != pastix_comm_tid ) {
390 pastix_yield();
391 return;
392 }
393
394 /*
395 * Let's register the number of active requests.
396 * We now suppose that the current thread is working on the first nbrequest
397 * active in the reqtab array. Additional requests can be posted during this
398 * progression, but it will be with a larger index. Thus, we do not need to
399 * protect every changes in these requests.
400 * When this is done, the requests arrays is locked to be packed, and the
401 * number of requests is updated for the next round.
402 */
403 pastix_atomic_lock( &(solvmtx->reqlock) );
404 nbrequest = solvmtx->reqnum;
405 pastix_atomic_unlock( &(solvmtx->reqlock) );
406
407 while( (outcount > 0) && (nbrequest > 0) )
408 {
409 MPI_Testsome( nbrequest, solvmtx->reqtab, &outcount, indexes, statuses );
410 nbfree = 0;
411
412 /* Handle all the completed requests */
413 if ( outcount > 0 ) {
414 nbfree = cpucblk_crequest_rhs_bwd_handle( enums, solvmtx, rhsb, threadid,
415 outcount, indexes, statuses );
416 }
417
418 /*
419 * Pack the request arrays, and update the number of active requests by
420 * removing the completed ones
421 */
422 pastix_atomic_lock( &(solvmtx->reqlock) );
423 if ( nbfree > 0 ) {
424 cpucblk_cupdate_reqtab( solvmtx );
425 }
426 nbrequest = solvmtx->reqnum;
427 pastix_atomic_unlock( &(solvmtx->reqlock) );
428 }
429
430 pastix_comm_tid = (pthread_t)-1;
431 pastix_yield();
432}
433#endif /* defined(PASTIX_WITH_MPI) */
434
435/**
436 *******************************************************************************
437 *
438 * @brief Wait for incoming dependencies, and return when cblk->ctrbcnt has
439 * reached 0.
440 *
441 *******************************************************************************
442 *
443 * @param[in] rank
444 * The rank of the current thread.
445 *
446 * @param[in] enums
447 * Enums needed for the solve.
448 *
449 * @param[inout] solvmtx
450 * The solver matrix structure.
451 *
452 * @param[inout] cblk
453 * The column block that contribute to fcblk.
454 *
455 * @param[in] rhsb
456 * The pointer to the rhs data structure that holds the vectors of the
457 * right hand side.
458 *
459 *******************************************************************************
460 *
461 * @return 1 if the cblk is a fanin, 0 otherwise
462 *
463 *******************************************************************************/
464int
466 const args_solve_t *enums,
467 SolverMatrix *solvmtx,
468 SolverCblk *cblk,
469 pastix_rhs_t rhsb )
470{
471#if defined(PASTIX_WITH_MPI)
472 if ( cblk->cblktype & CBLK_FANIN ) {
473 /*
474 * We are in the sequential case, we progress on communications and
475 * return if nothing.
476 */
477 //cpucblk_ctestsome( side, solvmtx );
478 return 1;
479 }
480
481 if ( cblk->cblktype & CBLK_RECV ) {
482 return 1;
483 }
484
485 /* Make sure we receive every contribution */
486 while( cblk->ctrbcnt > 0 ) {
487 cpucblk_cmpi_rhs_bwd_progress( enums, solvmtx, rhsb, rank );
488 }
489#else
490 assert( !(cblk->cblktype & (CBLK_FANIN | CBLK_RECV)) );
491 do { pastix_yield(); } while( cblk->ctrbcnt > 0 );
492#endif
493
494 (void)rank;
495 (void)enums;
496 (void)solvmtx;
497 (void)rhsb;
498
499 return 0;
500}
501
502/**
503 *******************************************************************************
504 *
505 * @brief Release the dependencies of the given cblk after an update.
506 *
507 *******************************************************************************
508 *
509 * @param[in] enums
510 * Enums needed for the solve.
511 *
512 * @param[inout] solvmtx
513 * The solver matrix structure.
514 *
515 * @param[in] rhsb
516 * The pointer to the rhs data structure that holds the vectors of the
517 * right hand side.
518 *
519 * @param[in] cblk
520 * The column block that contribute to fcblk.
521 *
522 * @param[inout] fcbk
523 * The facing column block that is updated by cblk.
524 *
525 *******************************************************************************/
526void
528 SolverMatrix *solvmtx,
529 pastix_rhs_t rhsb,
530 const SolverCblk *cblk,
531 SolverCblk *fcbk )
532{
533 int32_t ctrbcnt;
534 if ( enums->sched == PastixSchedSequential ) {
535 return;
536 }
537 ctrbcnt = pastix_atomic_dec_32b( &(fcbk->ctrbcnt) );
538 if ( !ctrbcnt ) {
539#if defined(PASTIX_WITH_MPI)
540 if ( fcbk->cblktype & CBLK_RECV ) {
541 cpucblk_cisend_rhs_bwd( solvmtx, rhsb, fcbk );
542 return;
543 }
544#else
545 (void)rhsb;
546#endif
547 if ( solvmtx->computeQueue ) {
548 pastix_queue_t *queue = solvmtx->computeQueue[ cblk->threadid ];
549 assert( fcbk->priority != -1 );
550 pqueuePush1( queue, fcbk - solvmtx->cblktab, - fcbk->priority );
551 }
552 }
553 (void)enums;
554}
555
556/**
557 *******************************************************************************
558 *
559 * @brief Waitall routine for current cblk request
560 *
561 * It may be possible that some cblk will not be deallocated with the static
562 * scheduler. So a cleanup may be necessary.
563 *
564 *******************************************************************************
565 *
566 * @param[in] enums
567 * Enums needed for the solve.
568 *
569 * @param[in] sched
570 * Define which sched is used
571 * @arg PastixSchedSequential if sequential
572 * @arg PastixSchedStatic if multi-threaded static scheduler
573 * @arg PastixSchedDynamic if multi-threaded dynamic scheduler
574 * No other scheduler is supported.
575 *
576 * @param[inout] solvmtx
577 * The solver matrix structure.
578 *
579 * @param[in] rhsb
580 * The pointer to the rhs data structure that holds the vectors of the
581 * right hand side.
582 *
583 *******************************************************************************/
584void
586 pastix_int_t sched,
587 SolverMatrix *solvmtx,
588 pastix_rhs_t rhsb )
589{
590 if ( (sched != PastixSchedSequential) &&
591 (sched != PastixSchedStatic) &&
592 (sched != PastixSchedDynamic) )
593 {
594 return;
595 }
596#if defined(PASTIX_WITH_MPI)
597 pastix_int_t i;
598 int rc;
599 SolverCblk *cblk;
600 int reqnbr = solvmtx->reqnum;
601 MPI_Status status;
602
603#if defined(PASTIX_DEBUG_MPI)
604 fprintf( stderr, "[%2d] Wait for all pending communications\n",
605 solvmtx->clustnum );
606#endif
607
608 for ( i=0; i<reqnbr; i++ ) {
609 if ( solvmtx->reqtab[i] == MPI_REQUEST_NULL ) {
610 assert( 0 /* MPI_REQUEST_NULL should have been pushed to the end */ );
611 solvmtx->reqnum--;
612 continue;
613 }
614
615 /* Make sure that we don't have an already cleaned request in dynamic */
616 assert( solvmtx->reqidx[i] != -1 );
617
618 rc = MPI_Wait( solvmtx->reqtab + i, &status );
619 assert( rc == MPI_SUCCESS );
620
621 cblk = solvmtx->cblktab + solvmtx->reqidx[i];
622
623 /* We should wait only for recv */
624 assert( cblk->cblktype & CBLK_RECV );
625
626 cpucblk_crequest_rhs_bwd_handle_send( enums, solvmtx, rhsb, cblk );
627
628 solvmtx->reqnum--;
629 }
630 assert( solvmtx->reqnum == 0 );
631 (void)rc;
632#else
633 (void)enums;
634 (void)solvmtx;
635 (void)rhsb;
636#endif
637}
BEGIN_C_DECLS typedef int pastix_int_t
Definition datatypes.h:51
float _Complex pastix_complex32_t
Definition datatypes.h:76
static void pqueuePush1(pastix_queue_t *q, pastix_int_t elt, double key1)
Push an element with a single key.
Definition queue.h:64
Queue structure.
Definition queue.h:38
int cpucblk_cincoming_rhs_bwd_deps(int rank, const args_solve_t *enums, SolverMatrix *solvmtx, SolverCblk *cblk, pastix_rhs_t rhsb)
Wait for incoming dependencies, and return when cblk->ctrbcnt has reached 0.
void cpucblk_crequest_rhs_bwd_cleanup(const args_solve_t *enums, pastix_int_t sched, SolverMatrix *solvmtx, pastix_rhs_t rhsb)
Waitall routine for current cblk request.
void cpucblk_crelease_rhs_bwd_deps(const args_solve_t *enums, SolverMatrix *solvmtx, pastix_rhs_t rhsb, const SolverCblk *cblk, SolverCblk *fcbk)
Release the dependencies of the given cblk after an update.
void solve_cblk_ctrsmsp_backward(const args_solve_t *enums, SolverMatrix *datacode, SolverCblk *cblk, pastix_rhs_t rhsb)
Apply a backward solve related to one cblk to all the right hand side.
@ PastixSchedStatic
Definition api.h:335
@ PastixSchedDynamic
Definition api.h:338
@ PastixSchedSequential
Definition api.h:334
void ** cblkb
Definition pastixdata.h:162
pastix_int_t ld
Definition pastixdata.h:160
pastix_int_t n
Definition pastixdata.h:159
Main PaStiX RHS structure.
Definition pastixdata.h:155
pastix_int_t reqnbr
Definition solver.h:271
pastix_atomic_lock_t reqlock
Definition solver.h:273
pastix_int_t brownum
Definition solver.h:171
pastix_int_t priority
Definition solver.h:183
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
Definition solver.h:329
MPI_Request * reqtab
Definition solver.h:269
pastix_int_t recvcnt
Definition solver.h:217
void * rcoeftab
Definition solver.h:274
pastix_int_t * gcbl2loc
Definition solver.h:234
SolverBlok *restrict bloktab
Definition solver.h:229
pastix_int_t fanincnt
Definition solver.h:214
volatile int32_t ctrbcnt
Definition solver.h:163
pastix_int_t *restrict browtab
Definition solver.h:230
volatile int32_t reqnum
Definition solver.h:272
pastix_int_t lcblknm
Definition solver.h:143
pastix_int_t gcblknum
Definition solver.h:174
pastix_int_t lcolidx
Definition solver.h:170
pastix_int_t bcscnum
Definition solver.h:175
SolverCblk *restrict cblktab
Definition solver.h:228
int8_t cblktype
Definition solver.h:164
pastix_int_t * reqidx
Definition solver.h:270
Arguments for the solve.
Definition solver.h:88
Solver block structure.
Definition solver.h:141
Solver column block structure.
Definition solver.h:161
Solver column block structure.
Definition solver.h:203