PaStiX Handbook  6.3.2
solve_dtrsmsp.c
Go to the documentation of this file.
1 /**
2  *
3  * @file solve_dtrsmsp.c
4  *
5  * PaStiX solve kernels routines
6  *
7  * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8  * Univ. Bordeaux. All rights reserved.
9  *
10  * @version 6.3.2
11  * @author Mathieu Faverge
12  * @author Pierre Ramet
13  * @author Xavier Lacoste
14  * @author Tony Delarue
15  * @author Vincent Bridonneau
16  * @author Alycia Lisito
17  * @author Nolan Bredel
18  * @date 2023-11-10
19  * @generated from /builds/solverstack/pastix/kernels/solve_ztrsmsp.c, normal z -> d, Wed Dec 13 12:09:18 2023
20  *
21  **/
22 #include "common.h"
23 #include "cblas.h"
24 #include "blend/solver.h"
25 #include "kernels_trace.h"
26 #include "pastix_dcores.h"
27 #include "pastix_dlrcores.h"
28 
29 #ifndef DOXYGEN_SHOULD_SKIP_THIS
30 static double dzero = 0.0;
31 static double done = 1.0;
32 static double mdone = -1.0;
33 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
34 
35 /**
36  *******************************************************************************
37  *
38  * @brief Apply a solve trsm update related to a diagonal block of the matrix A.
39  *
40  *******************************************************************************
41  *
42  * @param[in] side
43  * Specify the side parameter of the TRSM.
44  *
45  * @param[in] uplo
46  * Specify the uplo parameter of the TRSM.
47  *
48  * @param[in] trans
49  * Specify the transposition used for the matrix A in the
50  * computation. It has to be either PastixTrans or PastixTrans.
51  *
52  * @param[in] diag
53  * Specify if the off-diagonal blocks are unit triangular. It has to be
54  * either PastixUnit or PastixNonUnit.
55  *
56  * @param[in] cblk
57  * The cblk structure that corresponds to the A and B matrix.
58  *
59  * @param[in] nrhs
60  * The number of right hand side.
61  *
62  * @param[in] dataA
63  * The pointer to the correct representation of the data of A.
64  * - coeftab if the block is in full rank. Must be of size cblk.stride -by- cblk.width.
65  * - pastix_lr_block if the block is compressed.
66  *
67  * @param[inout] b
68  * The pointer to the matrix B, that is a portion of the right hand
69  * side to solve.
70  *
71  * @param[in] ldb
72  * The leading dimension of B.
73  *
74  *******************************************************************************/
75 void
77  pastix_uplo_t uplo,
78  pastix_trans_t trans,
79  pastix_diag_t diag,
80  const SolverCblk *cblk,
81  int nrhs,
82  const void *dataA,
83  double *b,
84  int ldb )
85 {
86  const double *A;
87  pastix_int_t n;
88  pastix_int_t lda;
89 
90  n = cblk_colnbr( cblk );
91 
92  if ( cblk->cblktype & CBLK_COMPRESSED ) {
93  const pastix_lrblock_t *lrA = (const pastix_lrblock_t *)dataA;
94  assert( lrA->rk == -1 );
95  A = lrA->u;
96  lda = n;
97  }
98  else {
99  A = (const double *)dataA;
100  lda = (cblk->cblktype & CBLK_LAYOUT_2D) ? n : cblk->stride;
101  }
102 
103  cblas_dtrsm(
104  CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo,
105  (CBLAS_TRANSPOSE)trans, (CBLAS_DIAG)diag,
106  n, nrhs,
107  (done), A, lda,
108  b, ldb );
109 }
110 
111 /**
112  *******************************************************************************
113  *
114  * @brief Apply a solve gemm update related to a single block of the matrix A.
115  *
116  *******************************************************************************
117  *
118  * @param[in] side
119  * Specify whether the blok parameter belongs to cblk (PastixLeft), or
120  * to fcbk (PastixRight).
121  *
122  * @param[in] trans
123  * Specify the transposition used for the matrix A in the
124  * computation. It has to be either PastixTrans or PastixTrans.
125  *
126  * @param[in] nrhs
127  * The number of right hand side.
128  *
129  * @param[in] cblk
130  * The cblk structure that corresponds to the B matrix.
131  *
132  * @param[in] blok
133  * The blok structure that corresponds to the A matrix, and that
134  * belongs either to cblk or fcbk depending on the side parameter.
135  *
136  * @param[inout] fcbk
137  * The cblk structure that corresponds to the C matrix.
138  *
139  * @param[in] dataA
140  * The pointer to the correct representation of the data of A.
141  * - coeftab if the block is in full rank. Must be of size cblk.stride -by- cblk.width.
142  * - pastix_lr_block if the block is compressed.
143  *
144  * @param[in] B
145  * The pointer to the matrix B, that is a portion of the right hand
146  * side.
147  *
148  * @param[in] ldb
149  * The leading dimension of B.
150  *
151  * @param[inout] C
152  * The pointer to the matrix C, that is the updated portion of the
153  * right hand side.
154  *
155  * @param[in] ldc
156  * The leading dimension of C.
157  *
158  *******************************************************************************/
159 void
161  pastix_trans_t trans,
162  pastix_int_t nrhs,
163  const SolverCblk *cblk,
164  const SolverBlok *blok,
165  SolverCblk *fcbk,
166  const void *dataA,
167  const double *B,
168  pastix_int_t ldb,
169  double *C,
170  pastix_int_t ldc )
171 {
172  pastix_int_t m, n, lda;
173  pastix_int_t offB, offC;
174  const SolverCblk *bowner;
175 
176  if ( side == PastixLeft ) {
177  /*
178  * Blok should belong to cblk
179  */
180  bowner = cblk;
181 
182  m = blok_rownbr( blok );
183  n = cblk_colnbr( cblk );
184  lda = m;
185 
186  offB = 0;
187  offC = blok->frownum - fcbk->fcolnum;
188  }
189  else {
190  /*
191  * Blok should belong to fcbk
192  */
193  bowner = fcbk;
194 
195  m = cblk_colnbr( fcbk );
196  n = blok_rownbr( blok );
197  lda = n;
198 
199  offB = blok->frownum - cblk->fcolnum;
200  offC = 0;
201  }
202 
203  assert( (blok > bowner[0].fblokptr) &&
204  (blok < bowner[1].fblokptr) );
205 
206  if ( bowner->cblktype & CBLK_COMPRESSED ) {
207  const pastix_lrblock_t *lrA = dataA;
208  double *tmp;
209 
210  switch (lrA->rk){
211  case 0:
212  break;
213  case -1:
214  pastix_cblk_lock( fcbk );
215  cblas_dgemm(
216  CblasColMajor, (CBLAS_TRANSPOSE)trans, CblasNoTrans,
217  m, nrhs, n,
218  (mdone), lrA->u, lda,
219  B + offB, ldb,
220  (done), C + offC, ldc );
221  pastix_cblk_unlock( fcbk );
222  break;
223  default:
224  MALLOC_INTERN( tmp, lrA->rk * nrhs, double);
225  if (trans == PastixNoTrans) {
226  cblas_dgemm(
227  CblasColMajor, (CBLAS_TRANSPOSE)trans, CblasNoTrans,
228  lrA->rk, nrhs, n,
229  (done), lrA->v, lrA->rkmax,
230  B + offB, ldb,
231  (dzero), tmp, lrA->rk );
232 
233  pastix_cblk_lock( fcbk );
234  cblas_dgemm(
235  CblasColMajor, (CBLAS_TRANSPOSE)trans, CblasNoTrans,
236  m, nrhs, lrA->rk,
237  (mdone), lrA->u, lda,
238  tmp, lrA->rk,
239  (done), C + offC, ldc );
240  pastix_cblk_unlock( fcbk );
241  }
242  else {
243  cblas_dgemm(
244  CblasColMajor, (CBLAS_TRANSPOSE)trans, CblasNoTrans,
245  lrA->rk, nrhs, n,
246  (done), lrA->u, lda,
247  B + offB, ldb,
248  (dzero), tmp, lrA->rk );
249 
250  pastix_cblk_lock( fcbk );
251  cblas_dgemm(
252  CblasColMajor, (CBLAS_TRANSPOSE)trans, CblasNoTrans,
253  m, nrhs, lrA->rk,
254  (mdone), lrA->v, lrA->rkmax,
255  tmp, lrA->rk,
256  (done), C + offC, ldc );
257  pastix_cblk_unlock( fcbk );
258  }
259  memFree_null(tmp);
260  break;
261  }
262  }
263  else{
264  const double *A = dataA;
265  lda = (bowner->cblktype & CBLK_LAYOUT_2D) ? lda : bowner->stride;
266 
267  pastix_cblk_lock( fcbk );
268  cblas_dgemm(
269  CblasColMajor, (CBLAS_TRANSPOSE)trans, CblasNoTrans,
270  m, nrhs, n,
271  (mdone), A, lda,
272  B + offB, ldb,
273  (done), C + offC, ldc );
274  pastix_cblk_unlock( fcbk );
275  }
276 }
277 
278 /**
279  *******************************************************************************
280  *
281  * @brief Apply a forward solve related to one cblk to all the right hand side.
282  *
283  *******************************************************************************
284  *
285  * @param[in] enums
286  * Enums needed for the solve.
287  *
288  * @param[in] datacode
289  * The SolverMatrix structure from PaStiX.
290  *
291  * @param[in] cblk
292  * The cblk structure to which block belongs to. The A and B pointers
293  * must be the coeftab of this column block.
294  * Next column blok must be accessible through cblk[1].
295  *
296  * @param[inout] rhsb
297  * The pointer to the rhs data structure that holds the vectors of the
298  * right hand side.
299  *
300  *******************************************************************************/
301 void
303  SolverMatrix *datacode,
304  const SolverCblk *cblk,
305  pastix_rhs_t rhsb )
306 {
307  SolverCblk *fcbk;
308  const SolverBlok *blok;
309  pastix_trans_t tA;
311  const void *dataA = NULL;
312  const pastix_lrblock_t *lrA;
313  const double *A;
314  double *B, *C;
315  pastix_int_t ldb, ldc, k;
316  pastix_fixdbl_t time;
317  pastix_fixdbl_t flops_lvl1 = 0;
318  pastix_fixdbl_t flops_lvl2 = 0;
319  pastix_side_t side = enums->side;
320  pastix_uplo_t uplo = enums->uplo;
321  pastix_trans_t trans = enums->trans;
322  pastix_diag_t diag = enums->diag;
323  pastix_solv_mode_t mode = enums->mode;
324 
326 
327  if ( (side == PastixRight) && (uplo == PastixUpper) && (trans == PastixNoTrans) ) {
328  /* We store U^t, so we swap uplo and trans */
329  tA = PastixTrans;
330  cs = PastixUCoef;
331 
332  /* Right is not handled yet */
333  assert( 0 );
334  }
335  else if ( (side == PastixRight) && (uplo == PastixLower) && (trans != PastixNoTrans) ) {
336  tA = trans;
337  cs = PastixLCoef;
338 
339  /* Right is not handled yet */
340  assert( 0 );
341  }
342  else if ( (side == PastixLeft) && (uplo == PastixUpper) && (trans != PastixNoTrans) ) {
343  /* We store U^t, so we swap uplo and trans */
344  tA = PastixNoTrans;
345  cs = PastixUCoef;
346 
347  /* We do not handle conjtrans in real as we store U^t */
348 #if defined(PRECISION_z) || defined(PRECISION_c)
349  assert( trans != PastixTrans );
350 #endif
351  }
352  else if ( (side == PastixLeft) && (uplo == PastixLower) && (trans == PastixNoTrans) ) {
353  tA = trans;
354  cs = PastixLCoef;
355  }
356  else {
357  /* This correspond to case treated in backward trsm */
358  assert(0);
359  return;
360  }
361 
362  assert( !( cblk->cblktype & (CBLK_FANIN|CBLK_RECV) ) );
363 
364  if ( (cblk->cblktype & CBLK_IN_SCHUR) && (mode != PastixSolvModeSchur) ) {
365  return;
366  }
367 
368  B = rhsb->b;
369  B = B + cblk->lcolidx;
370  ldb = rhsb->ld;
371  k = cblk_colnbr( cblk );
372 
373  /* Solve the diagonal block */
374  flops_lvl2 = FLOPS_DTRSM( side, k, rhsb->n );
375  kernel_trace_start_lvl2( PastixKernelLvl2_FR_TRSM );
377  tA, diag, cblk, rhsb->n,
378  cblk_getdata( cblk, cs ),
379  B, ldb );
380  kernel_trace_stop_lvl2( flops_lvl2 );
381  flops_lvl1 += flops_lvl2;
382 
383  /* Apply the update */
384  for (blok = cblk[0].fblokptr+1; blok < cblk[1].fblokptr; blok++ ) {
385  fcbk = datacode->cblktab + blok->fcblknm;
386 
387  if ( (fcbk->cblktype & CBLK_IN_SCHUR) && (mode == PastixSolvModeLocal) ) {
388  return;
389  }
390  assert( !(fcbk->cblktype & CBLK_RECV) );
391 
392  /*
393  * Make sure we get the correct pointer to the lrA, or to the right position in [lu]coeftab
394  */
395  dataA = cblk_getdata( cblk, cs );
396  if ( cblk->cblktype & CBLK_COMPRESSED ) {
397  lrA = dataA;
398  lrA += (blok - cblk->fblokptr);
399  dataA = lrA;
400  }
401  else {
402  A = dataA;
403  A += blok->coefind;
404  dataA = A;
405  }
406 
407  /*
408  * Make sure we get the correct pointer for the C matrix.
409  */
410  if ( fcbk->cblktype & CBLK_FANIN ) {
411  C = rhsb->cblkb[ - fcbk->bcscnum - 1 ];
412  ldc = cblk_colnbr( fcbk );
413  if ( C == NULL ) {
414  C = calloc( ldc * rhsb->n, sizeof( double ) );
415  if ( !pastix_atomic_cas_xxb( &(rhsb->cblkb[ - fcbk->bcscnum - 1 ]),
416  (uint64_t)NULL, (uint64_t)C, sizeof(void*) ) )
417  {
418  free( C );
419  C = rhsb->cblkb[ - fcbk->bcscnum - 1 ];
420  }
421  }
422  }
423  else {
424  C = rhsb->b;
425  C = C + fcbk->lcolidx;
426  ldc = rhsb->ld;
427  }
428 
429  flops_lvl2 = FLOPS_DGEMM( blok_rownbr( blok ), rhsb->n, k );
430  kernel_trace_start_lvl2( PastixKernelLvl2_FR_GEMM );
431  solve_blok_dgemm( PastixLeft, tA, rhsb->n,
432  cblk, blok, fcbk,
433  dataA, B, ldb, C, ldc );
434  kernel_trace_stop_lvl2( flops_lvl2 );
435  flops_lvl1 += flops_lvl2;
436 
437  cpucblk_drelease_rhs_fwd_deps( enums, datacode,
438  rhsb, cblk, fcbk );
439  }
441  cblk_rownbr(cblk), rhsb->n, k, flops_lvl1, time );
442 }
443 
444 /**
445  *******************************************************************************
446  *
447  * @brief Apply a backward solve related to one cblk to all the right hand side.
448  *
449  *******************************************************************************
450  *
451  * @param[in] enums
452  * Enums needed for the solve.
453  *
454  * @param[in] datacode
455  * The SolverMatrix structure from PaStiX.
456  *
457  * @param[in] cblk
458  * The cblk structure to which block belongs to. The A and B pointers
459  * must be the coeftab of this column block.
460  * Next column blok must be accessible through cblk[1].
461  *
462  * @param[inout] rhsb
463  * The pointer to the rhs data structure that holds the vectors of the
464  * right hand side.
465  *
466  *******************************************************************************/
467 void
469  SolverMatrix *datacode,
470  SolverCblk *cblk,
471  pastix_rhs_t rhsb )
472 {
473  SolverCblk *fcbk;
474  const SolverBlok *blok;
475  pastix_int_t j;
476  pastix_trans_t tA;
478  const void *dataA = NULL;
479  const pastix_lrblock_t *lrA;
480  const double *A;
481  double *B, *C;
482  pastix_int_t ldb, ldc, k;
483  pastix_fixdbl_t time;
484  pastix_fixdbl_t flops_lvl1 = 0;
485  pastix_fixdbl_t flops_lvl2 = 0;
486  pastix_side_t side = enums->side;
487  pastix_uplo_t uplo = enums->uplo;
488  pastix_trans_t trans = enums->trans;
489  pastix_diag_t diag = enums->diag;
490  pastix_solv_mode_t mode = enums->mode;
491 
493  /*
494  * Left / Upper / NoTrans (Backward)
495  */
496  if ( (side == PastixLeft) && (uplo == PastixUpper) && (trans == PastixNoTrans) ) {
497  /* We store U^t, so we swap uplo and trans */
498  tA = PastixTrans;
499  cs = PastixUCoef;
500  }
501  else if ( (side == PastixLeft) && (uplo == PastixLower) && (trans != PastixNoTrans) ) {
502  tA = trans;
503  cs = PastixLCoef;
504  }
505  else if ( (side == PastixRight) && (uplo == PastixUpper) && (trans != PastixNoTrans) ) {
506  /* We store U^t, so we swap uplo and trans */
507  tA = PastixNoTrans;
508  cs = PastixUCoef;
509 
510  /* Right is not handled yet */
511  assert( 0 );
512 
513  /* We do not handle conjtrans in real as we store U^t */
514  assert( trans != PastixTrans );
515  }
516  else if ( (side == PastixRight) && (uplo == PastixLower) && (trans == PastixNoTrans) ) {
517  tA = trans;
518  cs = PastixLCoef;
519 
520  /* Right is not handled yet */
521  assert( 0 );
522  }
523  else {
524  /* This correspond to case treated in forward trsm */
525  assert(0);
526  return;
527  }
528 
529  /*
530  * If cblk is in the schur complement, all brow blocks are in
531  * the interface. Thus, it doesn't generate any update in local
532  * mode, and we know that we are at least in interface mode
533  * after this test.
534  */
535  if ( (cblk->cblktype & CBLK_IN_SCHUR) && (mode == PastixSolvModeLocal) ) {
536  for (j = cblk[0].brownum; j < cblk[1].brownum; j++ ) {
537  blok = datacode->bloktab + datacode->browtab[j];
538  fcbk = datacode->cblktab + blok->lcblknm;
539 
540  if ( fcbk->cblktype & CBLK_IN_SCHUR ) {
541  break;
542  }
543  cpucblk_drelease_rhs_bwd_deps( enums, datacode,
544  rhsb, cblk, fcbk );
545  }
546  return;
547  }
548 
549  /*
550  * Make sure we get the correct pointer for the B matrix.
551  */
552  assert( !(cblk->cblktype & CBLK_RECV) );
553  if ( cblk->cblktype & CBLK_FANIN ) {
554  B = rhsb->cblkb[ - cblk->bcscnum - 1 ];
555  ldb = cblk_colnbr( cblk );
556  }
557  else {
558  B = rhsb->b;
559  B = B + cblk->lcolidx;
560  ldb = rhsb->ld;
561  }
562  k = cblk_colnbr( cblk );
563 
564  if ( !(cblk->cblktype & (CBLK_FANIN|CBLK_RECV) ) &&
565  (!(cblk->cblktype & CBLK_IN_SCHUR) || (mode == PastixSolvModeSchur)) )
566  {
567  /* Solve the diagonal block */
568  flops_lvl2 = FLOPS_DTRSM( side, k, rhsb->n );
569  kernel_trace_start_lvl2( PastixKernelLvl2_FR_TRSM );
570  solve_blok_dtrsm( side, PastixLower, tA, diag,
571  cblk, rhsb->n,
572  cblk_getdata( cblk, cs ),
573  B, ldb );
574  kernel_trace_stop_lvl2( flops_lvl2 );
575  flops_lvl1 += flops_lvl2;
576  }
577 
578  /* Apply the update */
579  for (j = cblk[1].brownum-1; j>=cblk[0].brownum; j-- ) {
580  blok = datacode->bloktab + datacode->browtab[j];
581  fcbk = datacode->cblktab + blok->lcblknm;
582 
583  if ( (fcbk->cblktype & CBLK_IN_SCHUR) && (mode == PastixSolvModeInterface) ) {
584  continue;
585  }
586 
587  if ( fcbk->cblktype & CBLK_RECV ) {
588 #if defined( PASTIX_WITH_MPI )
589  if ( datacode->reqtab != NULL ) {
590  cpucblk_disend_rhs_bwd( datacode, rhsb, fcbk );
591  }
592 #endif
593  continue;
594  }
595  assert( !(fcbk->cblktype & CBLK_FANIN) );
596 
597  /*
598  * Make sure we get the correct pointer to the lrA, or to the right position in [lu]coeftab
599  */
600  dataA = cblk_getdata( fcbk, cs );
601  if ( fcbk->cblktype & CBLK_COMPRESSED ) {
602  lrA = dataA;
603  lrA += (blok - fcbk->fblokptr);
604  dataA = lrA;
605  }
606  else {
607  A = dataA;
608  A += blok->coefind;
609  dataA = A;
610  }
611 
612  /*
613  * Make sure we get the correct pointer for the C matrix.
614  */
615  C = rhsb->b;
616  C = C + fcbk->lcolidx;
617  ldc = rhsb->ld;
618 
619  flops_lvl2 = FLOPS_DGEMM( blok_rownbr( blok ), rhsb->n, k );
620  kernel_trace_start_lvl2( PastixKernelLvl2_FR_GEMM );
621  solve_blok_dgemm( PastixRight, tA, rhsb->n,
622  cblk, blok, fcbk,
623  dataA, B, ldb, C, ldc );
624  kernel_trace_stop_lvl2( flops_lvl2 );
625  flops_lvl1 += flops_lvl2;
626 
627  cpucblk_drelease_rhs_bwd_deps( enums, datacode,
628  rhsb, cblk, fcbk );
629  }
630 
631  if ( cblk->cblktype & CBLK_FANIN ) {
632  memFree_null( rhsb->cblkb[ - cblk->bcscnum - 1 ] );
633  }
634  kernel_trace_stop( cblk->fblokptr->inlast, PastixKernelTRSMBack, cblk_rownbr( cblk ), rhsb->n, k, flops_lvl1, time );
635 }
636 
637 /**
638  *******************************************************************************
639  *
640  * @brief Apply the diagonal solve related to one cblk to all the right hand side.
641  *
642  *******************************************************************************
643  *
644  * @param[in] cblk
645  * The cblk structure to which diagonal block belongs to.
646  *
647  * @param[in] nrhs
648  * The number of right hand side
649  *
650  * @param[inout] b
651  * The pointer to vectors of the right hand side
652  *
653  * @param[in] ldb
654  * The leading dimension of b
655  *
656  * @param[inout] work
657  * Workspace to temporarily store the diagonal when multiple RHS are
658  * involved. Might be set to NULL for internal allocation on need.
659  *
660  *******************************************************************************/
661 void
663  const void *dataA,
664  int nrhs,
665  double *b,
666  int ldb,
667  double *work )
668 {
669  const double *A;
670  double *tmp;
671  pastix_int_t k, j, tempn, lda;
672 
673  tempn = cblk->lcolnum - cblk->fcolnum + 1;
674  lda = (cblk->cblktype & CBLK_LAYOUT_2D) ? tempn : cblk->stride;
675  assert( blok_rownbr( cblk->fblokptr ) == tempn );
676 
677  if ( cblk->cblktype & CBLK_COMPRESSED ) {
678  const pastix_lrblock_t *lrA = (const pastix_lrblock_t*)dataA;
679  A = lrA->u;
680  assert( lrA->rkmax == lda );
681  }
682  else {
683  A = (const double*)dataA;
684  }
685 
686  /* Add shift for diagonal elements */
687  lda++;
688 
689  if( nrhs == 1 ) {
690  for (j=0; j<tempn; j++, b++, A+=lda) {
691  *b = (*b) / (*A);
692  }
693  }
694  else {
695  /* Copy the diagonal to a temporary buffer */
696  tmp = work;
697  if ( work == NULL ) {
698  MALLOC_INTERN( tmp, tempn, double );
699  }
700  cblas_dcopy( tempn, A, lda, tmp, 1 );
701 
702  /* Compute */
703  for (k=0; k<nrhs; k++, b+=ldb)
704  {
705  for (j=0; j<tempn; j++) {
706  b[j] /= tmp[j];
707  }
708  }
709 
710  if ( work == NULL ) {
711  memFree_null(tmp);
712  }
713  }
714 }
BEGIN_C_DECLS typedef int pastix_int_t
Definition: datatypes.h:51
double pastix_fixdbl_t
Definition: datatypes.h:65
static void kernel_trace_stop(int8_t inlast, pastix_ktype_t ktype, int m, int n, int k, double flops, double starttime)
Stop the trace of a single kernel.
static double kernel_trace_start(pastix_ktype_t ktype)
Start the trace of a single kernel.
Definition: kernels_trace.h:87
@ PastixKernelTRSMBack
Definition: kernels_enums.h:56
@ PastixKernelTRSMForw
Definition: kernels_enums.h:55
void cpucblk_drelease_rhs_fwd_deps(const args_solve_t *enums, SolverMatrix *solvmtx, pastix_rhs_t rhsb, const SolverCblk *cblk, SolverCblk *fcbk)
Release the dependencies of the given cblk after an update.
void cpucblk_drelease_rhs_bwd_deps(const args_solve_t *enums, SolverMatrix *solvmtx, pastix_rhs_t rhsb, const SolverCblk *cblk, SolverCblk *fcbk)
Release the dependencies of the given cblk after an update.
The block low-rank structure to hold a matrix in low-rank form.
void solve_blok_dgemm(pastix_side_t side, pastix_trans_t trans, pastix_int_t nrhs, const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcbk, const void *dataA, const double *B, pastix_int_t ldb, double *C, pastix_int_t ldc)
Apply a solve gemm update related to a single block of the matrix A.
void solve_cblk_dtrsmsp_forward(const args_solve_t *enums, SolverMatrix *datacode, const SolverCblk *cblk, pastix_rhs_t rhsb)
Apply a forward solve related to one cblk to all the right hand side.
void solve_blok_dtrsm(pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk, int nrhs, const void *dataA, double *b, int ldb)
Apply a solve trsm update related to a diagonal block of the matrix A.
Definition: solve_dtrsmsp.c:76
void solve_cblk_dtrsmsp_backward(const args_solve_t *enums, SolverMatrix *datacode, SolverCblk *cblk, pastix_rhs_t rhsb)
Apply a backward solve related to one cblk to all the right hand side.
void solve_cblk_ddiag(const SolverCblk *cblk, const void *dataA, int nrhs, double *b, int ldb, double *work)
Apply the diagonal solve related to one cblk to all the right hand side.
enum pastix_diag_e pastix_diag_t
Diagonal.
enum pastix_solv_mode_e pastix_solv_mode_t
Solve Schur modes.
enum pastix_uplo_e pastix_uplo_t
Upper/Lower part.
enum pastix_side_e pastix_side_t
Side of the operation.
enum pastix_trans_e pastix_trans_t
Transpostion.
enum pastix_coefside_e pastix_coefside_t
Data blocks used in the kernel.
@ PastixLCoef
Definition: api.h:478
@ PastixUCoef
Definition: api.h:479
@ PastixUpper
Definition: api.h:466
@ PastixLower
Definition: api.h:467
@ PastixRight
Definition: api.h:496
@ PastixLeft
Definition: api.h:495
@ PastixNoTrans
Definition: api.h:445
@ PastixTrans
Definition: api.h:446
void ** cblkb
Definition: pastixdata.h:157
pastix_int_t ld
Definition: pastixdata.h:155
pastix_int_t n
Definition: pastixdata.h:154
Main PaStiX RHS structure.
Definition: pastixdata.h:150
static pastix_int_t blok_rownbr(const SolverBlok *blok)
Compute the number of rows of a block.
Definition: solver.h:390
pastix_int_t brownum
Definition: solver.h:166
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
Definition: solver.h:324
pastix_int_t fcblknm
Definition: solver.h:140
MPI_Request * reqtab
Definition: solver.h:264
SolverBlok *restrict bloktab
Definition: solver.h:223
pastix_int_t frownum
Definition: solver.h:142
static pastix_int_t cblk_rownbr(const SolverCblk *cblk)
Compute the number of rows of a column block.
Definition: solver.h:444
static void * cblk_getdata(const SolverCblk *cblk, pastix_coefside_t side)
Get the pointer to the data associated to the side part of the cblk.
Definition: solver.h:364
pastix_int_t coefind
Definition: solver.h:144
SolverBlok * fblokptr
Definition: solver.h:163
pastix_int_t *restrict browtab
Definition: solver.h:224
pastix_int_t lcblknm
Definition: solver.h:139
pastix_int_t lcolidx
Definition: solver.h:165
int8_t inlast
Definition: solver.h:146
pastix_int_t bcscnum
Definition: solver.h:170
SolverCblk *restrict cblktab
Definition: solver.h:222
pastix_int_t stride
Definition: solver.h:164
int8_t cblktype
Definition: solver.h:159
pastix_int_t lcolnum
Definition: solver.h:162
pastix_int_t fcolnum
Definition: solver.h:161
Arguments for the solve.
Definition: solver.h:85
Solver block structure.
Definition: solver.h:137
Solver column block structure.
Definition: solver.h:156
Solver column block structure.
Definition: solver.h:200