PaStiX Handbook  6.2.1
core_spotrfsp.c
Go to the documentation of this file.
1 /**
2  *
3  * @file core_spotrfsp.c
4  *
5  * PaStiX kernel routines for Cholesky factorization.
6  *
7  * @copyright 2011-2021 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8  * Univ. Bordeaux. All rights reserved.
9  *
10  * @version 6.2.1
11  * @author Mathieu Faverge
12  * @author Pierre Ramet
13  * @author Xavier Lacoste
14  * @author Gregoire Pichon
15  * @author Nolan Bredel
16  * @date 2021-06-16
17  * @generated from /builds/solverstack/pastix/kernels/core_zpotrfsp.c, normal z -> s, Tue Apr 12 09:38:36 2022
18  *
19  **/
20 #include "common.h"
21 #include "cblas.h"
22 #include "blend/solver.h"
23 #include "pastix_scores.h"
24 #include "kernels_trace.h"
25 
26 #ifndef DOXYGEN_SHOULD_SKIP_THIS
27 #define MAXSIZEOFBLOCKS 64
28 static float sone = 1.0;
29 static float msone = -1.0;
30 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
31 
32 /**
33  *******************************************************************************
34  *
35  * @ingroup kernel_blas_lapack_null
36  *
37  * @brief Compute the sequential static pivoting Cholesky factorization of the
38  * matrix n-by-n A = L * L^t .
39  *
40  *******************************************************************************
41  *
42  * @param[in] n
43  * The number of rows and columns of the matrix A.
44  *
45  * @param[inout] A
46  * The matrix A to factorize with Cholesky factorization. The matrix
47  * is of size lda -by- n.
48  *
49  * @param[in] lda
50  * The leading dimension of the matrix A.
51  *
52  * @param[inout] nbpivots
53  * Pointer to the number of piovting operations made during
54  * factorization. It is updated during this call
55  *
56  * @param[in] criterion
57  * Threshold use for static pivoting. If diagonal value is under this
58  * threshold, its value is replaced by the threshold and the number of
59  * pivots is incremented.
60  *
61  *******************************************************************************
62  *
63  * @warning This routine will fail if it discovers a null or negative value on
64  * the diagonal during factorization.
65  *
66  *******************************************************************************/
67 static inline void
68 core_spotf2sp( pastix_int_t n,
69  float *A,
70  pastix_int_t lda,
71  pastix_int_t *nbpivots,
72  float criterion )
73 {
74  pastix_int_t k;
75  float *Akk = A; /* A [k ][k] */
76  float *Amk = A+1; /* A [k+1][k] */
77  float alpha;
78 
79  for (k=0; k<n; k++){
80  if ( fabsf(*Akk) < criterion ) {
81  (*Akk) = (float)criterion;
82  (*nbpivots)++;
83  }
84 
85  /* Symmetric matrices, so imaginary part should be 0 */
86  if ( (*Akk) < 0.0 )
87  {
88  pastix_print_error( "Negative diagonal term\n" );
89  }
90 
91  *Akk = sqrtf(*Akk);
92  alpha = 1.0 / (*Akk);
93 
94  /* Scale the diagonal to compute L((k+1):n,k) */
95  cblas_sscal(n-k-1, ( alpha ), Amk, 1 );
96 
97  /* Move to next Akk */
98  Akk += (lda+1);
99 
100  cblas_ssyr(CblasColMajor, CblasLower,
101  n-k-1, -1.0,
102  Amk, 1,
103  Akk, lda);
104 
105  /* Move to next Amk */
106  Amk = Akk+1;
107  }
108 }
109 
110 /**
111  *******************************************************************************
112  *
113  * @brief Compute the block static pivoting Cholesky factorization of the matrix
114  * n-by-n A = L * L^t .
115  *
116  *******************************************************************************
117  *
118  * @param[in] n
119  * The number of rows and columns of the matrix A.
120  *
121  * @param[inout] A
122  * The matrix A to factorize with Cholesky factorization. The matrix
123  * is of size lda -by- n.
124  *
125  * @param[in] lda
126  * The leading dimension of the matrix A.
127  *
128  * @param[inout] nbpivots
129  * Pointer to the number of piovting operations made during
130  * factorization. It is updated during this call
131  *
132  * @param[in] criterion
133  * Threshold use for static pivoting. If diagonal value is under this
134  * threshold, its value is replaced by the threshold and the nu,ber of
135  * pivots is incremented.
136  *
137  *******************************************************************************
138  *
139  * @warning This routine will fail if it discovers a null or negative value on
140  * the diagonal during factorization.
141  *
142  *******************************************************************************/
143 void
144 core_spotrfsp( pastix_int_t n,
145  float *A,
146  pastix_int_t lda,
147  pastix_int_t *nbpivots,
148  float criterion )
149 {
150  pastix_int_t k, blocknbr, blocksize, matrixsize;
151  float *tmp,*tmp1,*tmp2;
152 
153  /* diagonal supernode is divided into MAXSIZEOFBLOCK-by-MAXSIZEOFBLOCKS blocks */
154  blocknbr = pastix_iceil( n, MAXSIZEOFBLOCKS );
155 
156  for (k=0; k<blocknbr; k++) {
157 
158  blocksize = pastix_imin(MAXSIZEOFBLOCKS, n-k*MAXSIZEOFBLOCKS);
159  tmp = A+(k*MAXSIZEOFBLOCKS)*(lda+1); /* Lk,k */
160 
161  /* Factorize the diagonal block Akk*/
162  core_spotf2sp(blocksize, tmp, lda, nbpivots, criterion);
163 
164  if ((k*MAXSIZEOFBLOCKS+blocksize) < n) {
165 
166  tmp1 = tmp + blocksize; /* Lk+1,k */
167  tmp2 = tmp1 + blocksize * lda; /* Lk+1,k+1 */
168 
169  matrixsize = n-(k*MAXSIZEOFBLOCKS+blocksize);
170 
171  /* Compute the column L(k+1:n,k) = (L(k,k)D(k,k))^{-1}A(k+1:n,k) */
172  /* 1) Compute A(k+1:n,k) = A(k+1:n,k)L(k,k)^{-T} = D(k,k)L(k+1:n,k) */
173  /* input: L(k,k) in tmp, A(k+1:n,k) in tmp1 */
174  /* output: A(k+1:n,k) in tmp1 */
175  cblas_strsm(CblasColMajor,
176  CblasRight, CblasLower,
177  CblasTrans, CblasNonUnit,
178  matrixsize, blocksize,
179  (sone), tmp, lda,
180  tmp1, lda);
181 
182  /* Update Ak+1k+1 = Ak+1k+1 - Lk+1k * Lk+1kT */
183  cblas_ssyrk(CblasColMajor, CblasLower, CblasNoTrans,
184  matrixsize, blocksize,
185  (float)msone, tmp1, lda,
186  (float)sone, tmp2, lda);
187  }
188  }
189 }
190 
191 /**
192  *******************************************************************************
193  *
194  * @brief Compute the Cholesky factorization of the diagonal block in a panel.
195  *
196  * @warning This routine will fail if it discovers a null or negative value on
197  * the diagonal during factorization.
198  *
199  *******************************************************************************
200  *
201  * @param[in] solvmtx
202  * Solver Matrix structure of the problem
203  *
204  * @param[in] cblk
205  * Pointer to the structure representing the panel to factorize in the
206  * cblktab array. Next column blok must be accessible through cblk[1].
207  *
208  * @param[inout] dataL
209  * The pointer to the correct representation of the lower part of the data.
210  * - coeftab if the block is in full rank. Must be of size cblk.stride -by- cblk.width.
211  * - pastix_lr_block if the block is compressed.
212  *
213  *******************************************************************************
214  *
215  * @return The number of static pivoting performed during the diagonal block
216  * factorization.
217  *
218  *******************************************************************************/
219 int
220 cpucblk_spotrfsp1d_potrf( SolverMatrix *solvmtx,
221  SolverCblk *cblk,
222  void *dataL )
223 {
224  pastix_int_t ncols, stride;
225  pastix_int_t nbpivots = 0;
226  pastix_fixdbl_t time, flops;
227  float *L;
228  pastix_lrblock_t *lrL;
229  float criterion = solvmtx->diagthreshold;
230 
231  time = kernel_trace_start( PastixKernelPOTRF );
232 
233  ncols = cblk->lcolnum - cblk->fcolnum + 1;
234  stride = (cblk->cblktype & CBLK_LAYOUT_2D) ? ncols : cblk->stride;
235 
236  /* check if diagonal column block */
237  assert( cblk->fcolnum == cblk->fblokptr->frownum );
238  assert( cblk->lcolnum == cblk->fblokptr->lrownum );
239 
240  if ( cblk->cblktype & CBLK_COMPRESSED ) {
241  /* dataL is a LRblock */
242  lrL = (pastix_lrblock_t *)dataL;
243  L = lrL->u;
244  stride = ncols;
245 
246  assert( lrL->rk == -1 );
247  assert( stride == lrL->rkmax );
248  } else {
249  L = (float *)dataL;
250  }
251 
252  /* Factorize diagonal block */
253  flops = FLOPS_SPOTRF( ncols );
254  kernel_trace_start_lvl2( PastixKernelLvl2POTRF );
255  core_spotrfsp(ncols, L, stride, &nbpivots, criterion );
256  kernel_trace_stop_lvl2( flops );
257 
258  kernel_trace_stop( cblk->fblokptr->inlast, PastixKernelPOTRF, ncols, 0, 0, flops, time );
259 
260  if ( nbpivots ) {
261  pastix_atomic_add_32b( &(solvmtx->nbpivots), nbpivots );
262  }
263  return nbpivots;
264 }
265 
266 /**
267  *******************************************************************************
268  *
269  * @brief Compute the Cholesky factorization of one panel.
270  *
271  *******************************************************************************
272  *
273  * @param[in] solvmtx
274  * Solver Matrix structure of the problem
275  *
276  * @param[in] cblk
277  * Pointer to the structure representing the panel to factorize in the
278  * cblktab array. Next column blok must be accessible through cblk[1].
279  *
280  * @param[inout] L
281  * The pointer to the correct representation of lower part of the data.
282  * - coeftab if the block is in full rank. Must be of size cblk.stride -by- cblk.width.
283  * - pastix_lr_block if the block is compressed.
284  *
285  *******************************************************************************
286  *
287  * @return The number of static pivoting during factorization of the diagonal
288  * block.
289  *
290  *******************************************************************************/
291 int
292 cpucblk_spotrfsp1d_panel( SolverMatrix *solvmtx,
293  SolverCblk *cblk,
294  void *L )
295 {
296  pastix_int_t nbpivots;
297  nbpivots = cpucblk_spotrfsp1d_potrf( solvmtx, cblk, L );
298 
301  cblk, L, L, &(solvmtx->lowrank) );
302  return nbpivots;
303 }
304 
305 
306 /**
307  *******************************************************************************
308  *
309  * @brief Perform the Cholesky factorization of a given panel and apply all its
310  * updates.
311  *
312  *******************************************************************************
313  *
314  * @param[in] solvmtx
315  * Solver Matrix structure of the problem
316  *
317  * @param[in] cblk
318  * Pointer to the structure representing the panel to factorize in the
319  * cblktab array. Next column blok must be accessible through cblk[1].
320  *
321  * @param[in] work
322  * Temporary memory buffer.
323  *
324  * @param[in] lwork
325  * Temporary workspace dimension.
326  *
327  *******************************************************************************
328  *
329  * @return The number of static pivoting during factorization of the diagonal
330  * block.
331  *
332  *******************************************************************************/
333 int
334 cpucblk_spotrfsp1d( SolverMatrix *solvmtx,
335  SolverCblk *cblk,
336  float *work,
337  pastix_int_t lwork )
338 {
339  void *L = cblk_getdataL( cblk );
340  SolverCblk *fcblk;
341  SolverBlok *blok, *lblk;
342  pastix_int_t nbpivots;
343 
344  nbpivots = cpucblk_spotrfsp1d_panel( solvmtx, cblk, L );
345 
346  blok = cblk->fblokptr + 1; /* First off-diagonal block */
347  lblk = cblk[1].fblokptr; /* Next diagonal block */
348 
349  /* If there are off-diagonal blocks, perform the updates */
350  for( ; blok < lblk; blok++ )
351  {
352  fcblk = solvmtx->cblktab + blok->fcblknm;
353 
354  if ( fcblk->cblktype & CBLK_FANIN ) {
355  cpucblk_salloc( PastixLCoef, fcblk );
356  }
357 
359  cblk, blok, fcblk,
360  L, L, cblk_getdataL( fcblk ),
361  work, lwork, &(solvmtx->lowrank) );
362 
363  cpucblk_srelease_deps( PastixLCoef, solvmtx, cblk, fcblk );
364  }
365 
366  return nbpivots;
367 }
solver_blok_s::frownum
pastix_int_t frownum
Definition: solver.h:112
solver.h
PastixTrans
@ PastixTrans
Definition: api.h:425
cpucblk_spotrfsp1d_panel
int cpucblk_spotrfsp1d_panel(SolverMatrix *solvmtx, SolverCblk *cblk, void *L)
Compute the Cholesky factorization of one panel.
Definition: core_spotrfsp.c:292
solver_cblk_s::fblokptr
SolverBlok * fblokptr
Definition: solver.h:134
solver_cblk_s::stride
pastix_int_t stride
Definition: solver.h:135
pastix_lrblock_s::u
void * u
Definition: pastix_lowrank.h:115
solver_cblk_s
Solver column block structure.
Definition: solver.h:127
cpucblk_spotrfsp1d_potrf
int cpucblk_spotrfsp1d_potrf(SolverMatrix *solvmtx, SolverCblk *cblk, void *dataL)
Compute the Cholesky factorization of the diagonal block in a panel.
Definition: core_spotrfsp.c:220
solver_blok_s
Solver block structure.
Definition: solver.h:107
PastixLower
@ PastixLower
Definition: api.h:445
pastix_lrblock_s
The block low-rank structure to hold a matrix in low-rank form.
Definition: pastix_lowrank.h:112
solver_cblk_s::lcolnum
pastix_int_t lcolnum
Definition: solver.h:133
PastixNonUnit
@ PastixNonUnit
Definition: api.h:465
cpucblk_srelease_deps
void cpucblk_srelease_deps(pastix_coefside_t side, SolverMatrix *solvmtx, const SolverCblk *cblk, SolverCblk *fcbk)
Release the dependencies of the given cblk after an update.
Definition: cpucblk_smpi_coeftab.c:558
core_spotf2sp
static void core_spotf2sp(pastix_int_t n, float *A, pastix_int_t lda, pastix_int_t *nbpivots, float criterion)
Compute the sequential static pivoting Cholesky factorization of the matrix n-by-n A = L * L^t .
Definition: core_spotrfsp.c:68
pastix_scores.h
PastixLCoef
@ PastixLCoef
Definition: api.h:456
cpucblk_salloc
void cpucblk_salloc(pastix_coefside_t side, SolverCblk *cblk)
Allocate the cblk structure to store the coefficient.
Definition: cpucblk_sinit.c:264
solver_blok_s::lrownum
pastix_int_t lrownum
Definition: solver.h:113
pastix_lrblock_s::rk
int rk
Definition: pastix_lowrank.h:113
cpucblk_strsmsp
void cpucblk_strsmsp(pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk, const void *A, void *C, const pastix_lr_t *lowrank)
Compute the updates associated to a column of off-diagonal blocks.
Definition: core_strsmsp.c:356
solver_cblk_s::cblktype
int8_t cblktype
Definition: solver.h:130
solver_blok_s::inlast
int8_t inlast
Definition: solver.h:117
solver_cblk_s::fcolnum
pastix_int_t fcolnum
Definition: solver.h:132
cpucblk_spotrfsp1d
int cpucblk_spotrfsp1d(SolverMatrix *solvmtx, SolverCblk *cblk, float *work, pastix_int_t lwork)
Perform the Cholesky factorization of a given panel and apply all its updates.
Definition: core_spotrfsp.c:334
pastix_lrblock_s::rkmax
int rkmax
Definition: pastix_lowrank.h:114
core_spotrfsp
void core_spotrfsp(pastix_int_t n, float *A, pastix_int_t lda, pastix_int_t *nbpivots, float criterion)
Compute the block static pivoting Cholesky factorization of the matrix n-by-n A = L * L^t .
Definition: core_spotrfsp.c:144
cpucblk_sgemmsp
pastix_fixdbl_t cpucblk_sgemmsp(pastix_coefside_t sideA, pastix_trans_t trans, const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcblk, const void *A, const void *B, void *C, float *work, pastix_int_t lwork, const pastix_lr_t *lowrank)
Compute the updates associated to one off-diagonal block.
Definition: core_sgemmsp.c:1512
cblk_getdataL
static void * cblk_getdataL(const SolverCblk *cblk)
Get the pointer to the data associated to the lower part of the cblk.
Definition: solver.h:260
solver_blok_s::fcblknm
pastix_int_t fcblknm
Definition: solver.h:110
PastixRight
@ PastixRight
Definition: api.h:474