PaStiX Handbook  6.4.0
core_strsmsp.c
Go to the documentation of this file.
1 /**
2  *
3  * @file core_strsmsp.c
4  *
5  * PaStiX kernel routines
6  *
7  * @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8  * Univ. Bordeaux. All rights reserved.
9  *
10  * @version 6.4.0
11  * @author Mathieu Faverge
12  * @author Pierre Ramet
13  * @author Xavier Lacoste
14  * @author Esragul Korkmaz
15  * @author Gregoire Pichon
16  * @date 2024-07-05
17  * @generated from /builds/solverstack/pastix/kernels/core_ztrsmsp.c, normal z -> s, Thu Aug 29 14:20:20 2024
18  *
19  **/
20 #include "common.h"
21 #include "cblas.h"
22 #include "blend/solver.h"
23 #include "kernels_trace.h"
24 #include "pastix_scores.h"
25 #include "pastix_slrcores.h"
26 
27 #ifndef DOXYGEN_SHOULD_SKIP_THIS
28 static float sone = 1.0;
29 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
30 
31 /**
32  *******************************************************************************
33  *
34  * @ingroup kernel_fact_null
35  *
36  * @brief Apply all the trsm updates on a panel stored in 1D layout.
37  *
38  *******************************************************************************
39  *
40  * @param[in] side
41  * Specify whether the A matrix appears on the left or right in the
42  * equation. It has to be either PastixLeft or PastixRight.
43  *
44  * @param[in] uplo
45  * Specify whether the A matrix is upper or lower triangular. It has to
46  * be either PastixUpper or PastixLower.
47  *
48  * @param[in] trans
49  * Specify the transposition used for the A matrix. It has to be either
50  * PastixTrans or PastixTrans.
51  *
52  * @param[in] diag
53  * Specify if the A matrix is unit triangular. It has to be either
54  * PastixUnit or PastixNonUnit.
55  *
56  * @param[in] cblk
57  * The cblk structure to which block belongs to. The A and C pointers
58  * must be the coeftab of this column block.
59  * Next column blok must be accessible through cblk[1].
60  *
61  * @param[in] A
62  * The pointer to the coeftab of the cblk.lcoeftab matrix storing the
63  * coefficients of the panel when the Lower part is computed,
64  * cblk.ucoeftab otherwise. Must be of size cblk.stride -by- cblk.width
65  *
66  * @param[inout] C
67  * The pointer to the fcblk.lcoeftab if the lower part is computed,
68  * fcblk.ucoeftab otherwise.
69  *
70  *******************************************************************************/
71 static inline void
73  pastix_uplo_t uplo,
74  pastix_trans_t trans,
75  pastix_diag_t diag,
76  const SolverCblk *cblk,
77  const float *A,
78  float *C )
79 {
80  SolverBlok *fblok;
81  pastix_int_t M, N, lda;
82 
83  N = cblk->lcolnum - cblk->fcolnum + 1;
84  lda = cblk->stride;
85  fblok = cblk->fblokptr; /* The diagonal block */
86 
87  /* vertical dimension */
88  M = lda - N;
89 
90  /* if there is an extra-diagonal bloc in column block */
91  assert( fblok + 1 < cblk[1].fblokptr );
92  assert( blok_rownbr( fblok) == N );
93  assert(!(cblk->cblktype & CBLK_LAYOUT_2D));
94 
95  /* first extra-diagonal bloc in column block address */
96  C = C + fblok[1].coefind;
97 
98  kernel_trace_start_lvl2( PastixKernelLvl2_FR_TRSM );
99  cblas_strsm(CblasColMajor,
100  (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, (CBLAS_DIAG)diag,
101  M, N,
102  (sone), A, lda,
103  C, lda);
104  kernel_trace_stop_lvl2( FLOPS_STRSM( side, M, N ) );
105 }
106 
107 /**
108  *******************************************************************************
109  *
110  * @ingroup kernel_fact_null
111  *
112  * @brief Compute the updates associated to one off-diagonal block between two
113  * cblk stored in 2D.
114  *
115  *******************************************************************************
116  *
117  * @param[in] side
118  * Specify whether the A matrix appears on the left or right in the
119  * equation. It has to be either PastixLeft or PastixRight.
120  *
121  * @param[in] uplo
122  * Specify whether the A matrix is upper or lower triangular. It has to
123  * be either PastixUpper or PastixLower.
124  *
125  * @param[in] trans
126  * Specify the transposition used for the A matrix. It has to be either
127  * PastixTrans or PastixTrans.
128  *
129  * @param[in] diag
130  * Specify if the A matrix is unit triangular. It has to be either
131  * PastixUnit or PastixNonUnit.
132  *
133  * @param[in] cblk
134  * The cblk structure to which block belongs to. The A and C pointers
135  * must be the coeftab of this column block.
136  * Next column blok must be accessible through cblk[1].
137  *
138  * @param[in] A
139  * The pointer to the coeftab of the cblk.lcoeftab matrix storing the
140  * coefficients of the panel when the Lower part is computed,
141  * cblk.ucoeftab otherwise. Must be of size cblk.stride -by- cblk.width
142  *
143  * @param[inout] C
144  * The pointer to the fcblk.lcoeftab if the lower part is computed,
145  * fcblk.ucoeftab otherwise.
146  *
147  *******************************************************************************/
148 static inline void
150  pastix_uplo_t uplo,
151  pastix_trans_t trans,
152  pastix_diag_t diag,
153  const SolverCblk *cblk,
154  const float *A,
155  float *C )
156 {
157  const SolverBlok *fblok, *lblok, *blok;
158  pastix_int_t M, N, lda, ldc;
159  float *blokC;
160 
161  N = cblk->lcolnum - cblk->fcolnum + 1;
162  fblok = cblk[0].fblokptr; /* The diagonal block */
163  lblok = cblk[1].fblokptr; /* The diagonal block of the next cblk */
164  lda = blok_rownbr( fblok );
165 
166  assert( blok_rownbr(fblok) == N );
167  assert( cblk->cblktype & CBLK_LAYOUT_2D );
168 
169  for (blok=fblok+1; blok<lblok; blok++) {
170 
171  blokC = C + blok->coefind;
172  M = blok_rownbr(blok);
173  ldc = M;
174 
175  kernel_trace_start_lvl2( PastixKernelLvl2_FR_TRSM );
176  cblas_strsm(CblasColMajor,
177  (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, (CBLAS_DIAG)diag,
178  M, N,
179  (sone), A, lda,
180  blokC, ldc);
181  kernel_trace_stop_lvl2( FLOPS_STRSM( side, M, N ) );
182  }
183 }
184 
185 /**
186  *******************************************************************************
187  *
188  * @ingroup kernel_fact_null
189  *
190  * @brief Computes the updates associated to one off-diagonal block between two
191  * cblk stored in low-rank format.
192  *
193  *******************************************************************************
194  *
195  * @param[in] side
196  * Specify whether the off-diagonal blocks appear on the left or right in the
197  * equation. It has to be either PastixLeft or PastixRight.
198  *
199  * @param[in] uplo
200  * Specify whether the off-diagonal blocks are upper or lower
201  * triangular. It has to be either PastixUpper or PastixLower.
202  *
203  * @param[in] trans
204  * Specify the transposition used for the off-diagonal blocks. It has
205  * to be either PastixTrans or PastixTrans.
206  *
207  * @param[in] diag
208  * Specify if the off-diagonal blocks are unit triangular. It has to be
209  * either PastixUnit or PastixNonUnit.
210  *
211  * @param[in] cblk
212  * The cblk structure to which block belongs to. The A and C pointers
213  * must be the coeftab of this column block.
214  * Next column blok must be accessible through cblk[1].
215  *
216  * @param[in] lrA
217  * Pointer to the low-rank representation of the block A.
218  * Must be followed by the low-rank representation of the following blocks.
219  *
220  * @param[in] lrC
221  * Pointer to the low-rank representation of the block C.
222  * Must be followed by the low-rank representation of the following blocks.
223  *
224  * @param[in] lowrank
225  * The structure with low-rank parameters.
226  *
227  *******************************************************************************
228  *
229  * @return The number of flops performed
230  *
231  *******************************************************************************/
232 static inline pastix_fixdbl_t
234  pastix_uplo_t uplo,
235  pastix_trans_t trans,
236  pastix_diag_t diag,
237  const SolverCblk *cblk,
238  const pastix_lrblock_t *lrA,
239  pastix_lrblock_t *lrC,
240  const pastix_lr_t *lowrank )
241 {
242  SolverBlok *fblok, *lblok, *blok;
243  pastix_int_t M, N, lda;
244  float *A;
245 
246  pastix_fixdbl_t flops = 0.0;
247  pastix_fixdbl_t flops_lr, flops_c;
248 
249  N = cblk->lcolnum - cblk->fcolnum + 1;
250  fblok = cblk[0].fblokptr; /* The diagonal block */
251  lblok = cblk[1].fblokptr; /* The diagonal block of the next cblk */
252 
253  A = lrA->u;
254  lda = lrA->rkmax;
255 
256  assert( lrA->rk == -1 );
257  assert( blok_rownbr(fblok) == N );
258  assert( cblk->cblktype & CBLK_COMPRESSED );
259  assert( cblk->cblktype & CBLK_LAYOUT_2D );
260 
261  lrC++; /* Skip diagonal block */
262  for (blok=fblok+1; blok<lblok; blok++, lrC++) {
263 
264  M = blok_rownbr(blok);
265  flops_lr = 0.;
266  flops_c = 0.;
267 
268  /* Check the size of the block */
269  if ( ( N >= lowrank->compress_min_width ) &&
270  ( M >= lowrank->compress_min_height ) )
271  {
272  int is_preselected = ( blok->iluklvl <= lowrank->ilu_lvl );
273 
274  /*
275  * Try to compress the block: 2 cases
276  * - Non preselected blocks are always compressed
277  * - Preselected blocks are compressed if compress_preselect
278  */
279  if ( lowrank->compress_preselect || (!is_preselected) )
280  {
281  flops_lr = cpublok_scompress( lowrank, M, N, lrC );
282  }
283  }
284 
285  if ( lrC->rk != 0 ) {
286  if ( lrC->rk != -1 ) {
287  kernel_trace_start_lvl2( PastixKernelLvl2_LR_TRSM );
288  cblas_strsm(CblasColMajor,
289  (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, (CBLAS_DIAG)diag,
290  lrC->rk, N,
291  (sone), A, lda,
292  lrC->v, lrC->rkmax);
293  flops_c = FLOPS_STRSM( side, lrC->rk, N );
294  kernel_trace_stop_lvl2( flops_c );
295  }
296  else {
297  kernel_trace_start_lvl2( PastixKernelLvl2_FR_TRSM );
298  cblas_strsm(CblasColMajor,
299  (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, (CBLAS_DIAG)diag,
300  M, N,
301  (sone), A, lda,
302  lrC->u, lrC->rkmax);
303  flops_c = FLOPS_STRSM( side, M, N );
304  kernel_trace_stop_lvl2( flops_c );
305  }
306  }
307 
308  flops += flops_lr + flops_c;
309  }
310  return flops;
311 }
312 
313 /**
314  *******************************************************************************
315  *
316  * @brief Compute the updates associated to a column of off-diagonal blocks.
317  *
318  *******************************************************************************
319  *
320  * @param[in] side
321  * Specify whether the A matrix appears on the left or right in the
322  * equation. It has to be either PastixLeft or PastixRight.
323  *
324  * @param[in] uplo
325  * Specify whether the A matrix is upper or lower triangular. It has to
326  * be either PastixUpper or PastixLower.
327  *
328  * @param[in] trans
329  * Specify the transposition used for the A matrix. It has to be either
330  * PastixTrans or PastixTrans.
331  *
332  * @param[in] diag
333  * Specify if the A matrix is unit triangular. It has to be either
334  * PastixUnit or PastixNonUnit.
335  *
336  * @param[in] cblk
337  * The cblk structure to which block belongs to. The A and B pointers
338  * must be the coeftab of this column block.
339  * Next column blok must be accessible through cblk[1].
340  *
341  * @param[in] A
342  * The pointer to the correct representation of A.
343  * - coeftab if the block is in full rank. Must be of size cblk.stride -by- cblk.width.
344  * - pastix_lr_block if the block is compressed.
345  *
346  * @param[inout] C
347  * The pointer to the correct representation of C.
348  * - coeftab if the block is in full rank. Must be of size cblk.stride -by- cblk.width.
349  * - pastix_lr_block if the block is compressed.
350  *
351  * @param[in] lowrank
352  * The structure with low-rank parameters.
353  *
354  *******************************************************************************/
355 void
357  pastix_uplo_t uplo,
358  pastix_trans_t trans,
359  pastix_diag_t diag,
360  const SolverCblk *cblk,
361  const void *A,
362  void *C,
363  const pastix_lr_t *lowrank )
364 {
365  if ( cblk[0].fblokptr + 1 < cblk[1].fblokptr )
366  {
367  pastix_ktype_t ktype = PastixKernelLvl1Nbr;
368  pastix_fixdbl_t time, flops = 0.0;
369  pastix_int_t n = cblk_colnbr( cblk );
370  pastix_int_t m = cblk->stride - n;
371 
372  if ( cblk->cblktype & CBLK_COMPRESSED ) {
373  ktype = PastixKernelTRSMCblkLR;
374  time = kernel_trace_start( ktype );
375 
376  flops = core_strsmsp_lr( side, uplo, trans, diag,
377  cblk, A, C, lowrank );
378  }
379  else {
380  if ( cblk->cblktype & CBLK_LAYOUT_2D ) {
381  ktype = PastixKernelTRSMCblk2d;
382  time = kernel_trace_start( ktype );
383 
384  core_strsmsp_2d( side, uplo, trans, diag,
385  cblk, A, C );
386  }
387  else {
388  ktype = PastixKernelTRSMCblk1d;
389  time = kernel_trace_start( ktype );
390 
391  core_strsmsp_1d( side, uplo, trans, diag,
392  cblk, A, C );
393  }
394  flops = FLOPS_STRSM( PastixRight, m, n );
395  }
396 
397  kernel_trace_stop( cblk->fblokptr->inlast, ktype, m, n, 0, flops, time );
398  }
399 }
400 
401 /**
402  *******************************************************************************
403  *
404  * @ingroup kernel_fact_null
405  *
406  * @brief Compute the updates associated to one off-diagonal block between two
407  * cblk stored in 2D.
408  *
409  *******************************************************************************
410  *
411  * @param[in] side
412  * Specify whether the A matrix appears on the left or right in the
413  * equation. It has to be either PastixLeft or PastixRight.
414  *
415  * @param[in] uplo
416  * Specify whether the A matrix is upper or lower triangular. It has to
417  * be either PastixUpper or PastixLower.
418  *
419  * @param[in] trans
420  * Specify the transposition used for the A matrix. It has to be either
421  * PastixTrans or PastixTrans.
422  *
423  * @param[in] diag
424  * Specify if the A matrix is unit triangular. It has to be either
425  * PastixUnit or PastixNonUnit.
426  *
427  * @param[in] cblk
428  * The cblk structure to which block belongs to. The A and C pointers
429  * must be the coeftab of this column block.
430  * Next column blok must be accessible through cblk[1].
431  *
432  * @param[in] blok_m
433  * Index of the first off-diagonal block in cblk that is solved. The
434  * TRSM is also applied to all the folowing blocks which are facing the
435  * same diagonal block
436  *
437  * @param[in] A
438  * The pointer to the coeftab of the cblk.lcoeftab matrix storing the
439  * coefficients of the panel when the Lower part is computed,
440  * cblk.ucoeftab otherwise. Must be of size cblk.stride -by- cblk.width
441  *
442  * @param[inout] C
443  * The pointer to the fcblk.lcoeftab if the lower part is computed,
444  * fcblk.ucoeftab otherwise.
445  *
446  *******************************************************************************
447  *
448  * @retval TODO
449  *
450  *******************************************************************************/
451 static inline pastix_fixdbl_t
453  pastix_uplo_t uplo,
454  pastix_trans_t trans,
455  pastix_diag_t diag,
456  const SolverCblk *cblk,
457  pastix_int_t blok_m,
458  const float *A,
459  float *C )
460 {
461  const SolverBlok *fblok, *lblok, *blok;
462  pastix_int_t M, N, lda, ldc, offset, cblk_m, full_m;
463  float *Cptr;
464  pastix_fixdbl_t flops = 0.0;
466 
467  N = cblk->lcolnum - cblk->fcolnum + 1;
468  fblok = cblk[0].fblokptr; /* The diagonal block */
469  lblok = cblk[1].fblokptr; /* The diagonal block of the next cblk */
470  lda = blok_rownbr( fblok );
471 
472  assert( blok_rownbr(fblok) == N );
473  assert( cblk->cblktype & CBLK_LAYOUT_2D );
474 
475  blok = fblok + blok_m;
476  offset = blok->coefind;
477  cblk_m = blok->fcblknm;
478  full_m = 0;
479 
480  for (; (blok < lblok) && (blok->fcblknm == cblk_m); blok++) {
481 
482  Cptr = C + blok->coefind - offset;
483  M = blok_rownbr(blok);
484  ldc = M;
485 
486  cblas_strsm( CblasColMajor,
487  (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, (CBLAS_DIAG)diag,
488  M, N,
489  (sone), A, lda,
490  Cptr, ldc );
491 
492  flops += FLOPS_STRSM( side, M, N );
493  full_m += M;
494  }
495 
497  full_m, N, 0, flops, time );
498  return flops;
499 }
500 
501 /**
502  *******************************************************************************
503  *
504  * @ingroup kernel_fact_null
505  *
506  * @brief Compute the updates associated to one off-diagonal block between two
507  * cblk stored in low-rank format.
508  *
509  *******************************************************************************
510  *
511  * @param[in] side
512  * Specify whether the off-diagonal blocks appear on the left or right in the
513  * equation. It has to be either PastixLeft or PastixRight.
514  *
515  * @param[in] uplo
516  * Specify whether the off-diagonal blocks are upper or lower
517  * triangular. It has to be either PastixUpper or PastixLower.
518  *
519  * @param[in] trans
520  * Specify the transposition used for the off-diagonal blocks. It has
521  * to be either PastixTrans or PastixTrans.
522  *
523  * @param[in] diag
524  * Specify if the off-diagonal blocks are unit triangular. It has to be
525  * either PastixUnit or PastixNonUnit.
526  *
527  * @param[in] cblk
528  * The cblk structure to which block belongs to. The A and C pointers
529  * must be the coeftab of this column block.
530  * Next column blok must be accessible through cblk[1].
531  *
532  * @param[in] blok_m
533  * Index of the first off-diagonal block in cblk that is solved. The
534  * TRSM is also applied to all the folowing blocks which are facing the
535  * same diagonal block
536  *
537  * @param[in] lrA
538  * Pointer to the low-rank representation of the block A.
539  * Must be followed by the low-rank representation of the following blocks.
540  *
541  * @param[inout] lrC
542  * Pointer to the low-rank representation of the block C.
543  * Must be followed by the low-rank representation of the following blocks.
544  *
545  * @param[in] lowrank
546  * The structure with low-rank parameters.
547  *
548  *******************************************************************************
549  *
550  * @retval TODO
551  *
552  *******************************************************************************/
553 static inline pastix_fixdbl_t
555  pastix_uplo_t uplo,
556  pastix_trans_t trans,
557  pastix_diag_t diag,
558  const SolverCblk *cblk,
559  pastix_int_t blok_m,
560  const pastix_lrblock_t *lrA,
561  pastix_lrblock_t *lrC,
562  const pastix_lr_t *lowrank )
563 {
564  SolverBlok *fblok, *lblok, *blok;
565  pastix_int_t M, N, lda, cblk_m, full_m, full_n;
566  float *A;
567  pastix_fixdbl_t flops = 0.0;
569 
570  N = cblk->lcolnum - cblk->fcolnum + 1;
571  fblok = cblk[0].fblokptr; /* The diagonal block */
572  lblok = cblk[1].fblokptr; /* The diagonal block of the next cblk */
573 
574  A = lrA->u;
575  lda = lrA->rkmax;
576 
577  assert( cblk->cblktype & CBLK_COMPRESSED );
578  assert( cblk->cblktype & CBLK_LAYOUT_2D );
579 
580  assert( blok_rownbr(fblok) == N );
581  assert( lrA->rk == -1 );
582 
583  blok = fblok + blok_m;
584  cblk_m = blok->fcblknm;
585  full_m = 0;
586  full_n = 0;
587 
588  for (; (blok < lblok) && (blok->fcblknm == cblk_m); blok++, lrC++) {
589 
590  M = blok_rownbr(blok);
591 
592  if ( ( N >= lowrank->compress_min_width ) &&
593  ( M >= lowrank->compress_min_height ) )
594  {
595  int is_preselected = ( blok->iluklvl <= lowrank->ilu_lvl );
596 
597  /*
598  * Try to compress the block: 2 cases
599  * - Non preselected blocks are always compressed
600  * - Preselected blocks are compressed if compress_preselect
601  */
602  if ( lowrank->compress_preselect || (!is_preselected) )
603  {
604  flops = cpublok_scompress( lowrank, M, N, lrC );
605  }
606  }
607 
608  if ( lrC->rk != 0 ) {
609  if ( lrC->rk != -1 ) {
610  cblas_strsm(CblasColMajor,
611  (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, (CBLAS_DIAG)diag,
612  lrC->rk, N,
613  (sone), A, lda,
614  lrC->v, lrC->rkmax);
615 
616  flops += FLOPS_STRSM( side, lrC->rk, N );
617  full_n += lrC->rk;
618  }
619  else {
620  cblas_strsm(CblasColMajor,
621  (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, (CBLAS_DIAG)diag,
622  M, N,
623  (sone), A, lda,
624  lrC->u, lrC->rkmax);
625 
626  flops += FLOPS_STRSM( side, M, N );
627  full_n += M;
628  }
629  }
630  full_m += M;
631  }
632 
634  full_m, N, full_n, flops, time );
635  return flops;
636 }
637 
638 /**
639  *******************************************************************************
640  *
641  * @brief Compute the updates associated to one off-diagonal block.
642  *
643  *******************************************************************************
644  *
645  * @param[in] side
646  * Specify whether the A matrix appears on the left or right in the
647  * equation. It has to be either PastixLeft or PastixRight.
648  *
649  * @param[in] uplo
650  * Specify whether the A matrix is upper or lower triangular. It has to
651  * be either PastixUpper or PastixLower.
652  *
653  * @param[in] trans
654  * Specify the transposition used for the A matrix. It has to be either
655  * PastixTrans or PastixTrans.
656  *
657  * @param[in] diag
658  * Specify if the A matrix is unit triangular. It has to be either
659  * PastixUnit or PastixNonUnit.
660  *
661  * @param[in] cblk
662  * The cblk structure to which block belongs to. The A and B pointers
663  * must be the coeftab of this column block.
664  * Next column blok must be accessible through cblk[1].
665  *
666  * @param[in] blok_m
667  * Index of the first off-diagonal block in cblk that is solved. The
668  * TRSM is also applied to all the folowing blocks which are facing the
669  * same diagonal block
670  *
671  * @param[in] A
672  * The pointer to the correct representation of A.
673  * - coeftab if the block is in full rank. Must be of size cblk.stride -by- cblk.width.
674  * - pastix_lr_block if the block is compressed.
675  *
676  * @param[inout] C
677  * The pointer to the correct representation of C.
678  * - coeftab if the block is in full rank. Must be of size cblk.stride -by- cblk.width.
679  * - pastix_lr_block if the block is compressed.
680  *
681  * @param[in] lowrank
682  * The structure with low-rank parameters.
683  *
684  *******************************************************************************
685  *
686  * @retval TODO
687  *
688  *******************************************************************************/
691  pastix_uplo_t uplo,
692  pastix_trans_t trans,
693  pastix_diag_t diag,
694  const SolverCblk *cblk,
695  pastix_int_t blok_m,
696  const void *A,
697  void *C,
698  const pastix_lr_t *lowrank )
699 {
700  if ( cblk->cblktype & CBLK_COMPRESSED ) {
701  return core_strsmsp_lrsub( side, uplo, trans, diag,
702  cblk, blok_m, A, C, lowrank );
703  }
704  else {
705  return core_strsmsp_2dsub( side, uplo, trans, diag,
706  cblk, blok_m, A, C );
707  }
708 }
BEGIN_C_DECLS typedef int pastix_int_t
Definition: datatypes.h:51
double pastix_fixdbl_t
Definition: datatypes.h:65
enum pastix_ktype_e pastix_ktype_t
List of the Level 1 events that may be traced in PaStiX.
static void kernel_trace_stop(int8_t inlast, pastix_ktype_t ktype, int m, int n, int k, double flops, double starttime)
Stop the trace of a single kernel.
static double kernel_trace_start(pastix_ktype_t ktype)
Start the trace of a single kernel.
Definition: kernels_trace.h:87
@ PastixKernelTRSMBlokLR
Definition: kernels_enums.h:61
@ PastixKernelTRSMCblk2d
Definition: kernels_enums.h:58
@ PastixKernelTRSMCblk1d
Definition: kernels_enums.h:57
@ PastixKernelTRSMBlok2d
Definition: kernels_enums.h:60
@ PastixKernelTRSMCblkLR
Definition: kernels_enums.h:59
static pastix_fixdbl_t core_strsmsp_lr(pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk, const pastix_lrblock_t *lrA, pastix_lrblock_t *lrC, const pastix_lr_t *lowrank)
Computes the updates associated to one off-diagonal block between two cblk stored in low-rank format.
Definition: core_strsmsp.c:233
static void core_strsmsp_2d(pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk, const float *A, float *C)
Compute the updates associated to one off-diagonal block between two cblk stored in 2D.
Definition: core_strsmsp.c:149
static pastix_fixdbl_t core_strsmsp_2dsub(pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk, pastix_int_t blok_m, const float *A, float *C)
Compute the updates associated to one off-diagonal block between two cblk stored in 2D.
Definition: core_strsmsp.c:452
static pastix_fixdbl_t core_strsmsp_lrsub(pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk, pastix_int_t blok_m, const pastix_lrblock_t *lrA, pastix_lrblock_t *lrC, const pastix_lr_t *lowrank)
Compute the updates associated to one off-diagonal block between two cblk stored in low-rank format.
Definition: core_strsmsp.c:554
static void core_strsmsp_1d(pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk, const float *A, float *C)
Apply all the trsm updates on a panel stored in 1D layout.
Definition: core_strsmsp.c:72
void cpucblk_strsmsp(pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk, const void *A, void *C, const pastix_lr_t *lowrank)
Compute the updates associated to a column of off-diagonal blocks.
Definition: core_strsmsp.c:356
pastix_fixdbl_t cpublok_strsmsp(pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk, pastix_int_t blok_m, const void *A, void *C, const pastix_lr_t *lowrank)
Compute the updates associated to one off-diagonal block.
Definition: core_strsmsp.c:690
pastix_fixdbl_t cpublok_scompress(const pastix_lr_t *lowrank, pastix_int_t M, pastix_int_t N, pastix_lrblock_t *lrA)
Compress a single block from full-rank to low-rank format.
int compress_preselect
pastix_int_t compress_min_width
pastix_int_t compress_min_height
Structure to define the type of function to use for the low-rank kernels and their parameters.
The block low-rank structure to hold a matrix in low-rank form.
enum pastix_diag_e pastix_diag_t
Diagonal.
enum pastix_uplo_e pastix_uplo_t
Upper/Lower part.
enum pastix_side_e pastix_side_t
Side of the operation.
enum pastix_trans_e pastix_trans_t
Transpostion.
@ PastixRight
Definition: api.h:496
static pastix_int_t blok_rownbr(const SolverBlok *blok)
Compute the number of rows of a block.
Definition: solver.h:395
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
Definition: solver.h:329
int iluklvl
Definition: solver.h:152
pastix_int_t fcblknm
Definition: solver.h:144
pastix_int_t coefind
Definition: solver.h:149
SolverBlok * fblokptr
Definition: solver.h:168
int8_t inlast
Definition: solver.h:151
pastix_int_t stride
Definition: solver.h:169
int8_t cblktype
Definition: solver.h:164
pastix_int_t lcolnum
Definition: solver.h:167
pastix_int_t fcolnum
Definition: solver.h:166
Solver block structure.
Definition: solver.h:141
Solver column block structure.
Definition: solver.h:161