PaStiX Handbook  6.3.0
core_zgetrfsp.c
Go to the documentation of this file.
1 /**
2  *
3  * @file core_zgetrfsp.c
4  *
5  * PaStiX kernel routines for LU factorization.
6  *
7  * @copyright 2011-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8  * Univ. Bordeaux. All rights reserved.
9  *
10  * @version 6.3.0
11  * @author Mathieu Faverge
12  * @author Pierre Ramet
13  * @author Xavier Lacoste
14  * @author Gregoire Pichon
15  * @date 2023-01-13
16  * @generated from /builds/solverstack/pastix/kernels/core_zgetrfsp.c, normal z -> z, Mon Aug 28 13:40:36 2023
17  *
18  **/
19 #include "common.h"
20 #include "cblas.h"
21 #include "blend/solver.h"
22 #include "pastix_zcores.h"
23 #include "kernels_trace.h"
24 
25 #ifndef DOXYGEN_SHOULD_SKIP_THIS
26 #define MAXSIZEOFBLOCKS 64
27 static pastix_complex64_t zone = 1.0;
28 static pastix_complex64_t mzone = -1.0;
29 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
30 
31 /**
32  *******************************************************************************
33  *
34  * @ingroup kernel_blas_lapack_null
35  *
36  * @brief Compute the sequential static pivoting LU factorization of the matrix
37  * m-by-n A = L * U.
38  *
39  *******************************************************************************
40  *
41  * @param[in] m
42  * The number of rows and columns of the matrix A.
43  *
44  * @param[in] n
45  * The number of rows and columns of the matrix A.
46  *
47  * @param[inout] A
48  * The matrix A to factorize with LU factorization. The matrix
49  * is of size lda -by- n.
50  *
51  * @param[in] lda
52  * The leading dimension of the matrix A.
53  *
54  * @param[inout] nbpivots
55  * Pointer to the number of piovting operations made during
56  * factorization. It is updated during this call
57  *
58  * @param[in] criterion
59  * Threshold use for static pivoting. If diagonal value is under this
60  * threshold, its value is replaced by the threshold and the number of
61  * pivots is incremented.
62  *
63  *******************************************************************************/
64 static inline void
65 core_zgetf2sp( pastix_int_t m,
66  pastix_int_t n,
67  pastix_complex64_t *A,
68  pastix_int_t lda,
69  pastix_int_t *nbpivots,
70  double criterion )
71 {
72  pastix_int_t k, minMN;
73  pastix_complex64_t *Akk, *Aik, alpha;
74 
75  minMN = pastix_imin( m, n );
76 
77  Akk = A;
78  for (k=0; k<minMN; k++) {
79  Aik = Akk + 1;
80 
81  if ( cabs(*Akk) < criterion ) {
82  if ( creal(*Akk) < 0. ) {
83  *Akk = (pastix_complex64_t)(-criterion);
84  }
85  else {
86  *Akk = (pastix_complex64_t)criterion;
87  }
88  (*nbpivots)++;
89  }
90 
91  /* A_ik = A_ik / A_kk, i = k+1 .. n */
92  alpha = 1.0 / (*Akk);
93  cblas_zscal(m-k-1, CBLAS_SADDR( alpha ), Aik, 1 );
94 
95  if ( k+1 < minMN ) {
96 
97  /* A_ij = A_ij - A_ik * A_kj, i,j = k+1..n */
98  cblas_zgeru(CblasColMajor, m-k-1, n-k-1,
99  CBLAS_SADDR(mzone),
100  Aik, 1,
101  Akk+lda, lda,
102  Aik+lda, lda);
103  }
104 
105  Akk += lda+1;
106  }
107 }
108 
109 /**
110  *******************************************************************************
111  *
112  * @brief Compute the block static pivoting LU factorization of the matrix
113  * m-by-n A = L * U.
114  *
115  *******************************************************************************
116  *
117  * @param[in] n
118  * The number of rows and columns of the matrix A.
119  *
120  * @param[inout] A
121  * The matrix A to factorize with LU factorization. The matrix
122  * is of size lda -by- n.
123  *
124  * @param[in] lda
125  * The leading dimension of the matrix A.
126  *
127  * @param[inout] nbpivots
128  * Pointer to the number of piovting operations made during
129  * factorization. It is updated during this call
130  *
131  * @param[in] criterion
132  * Threshold use for static pivoting. If diagonal value is under this
133  * threshold, its value is replaced by the threshold and the number of
134  * pivots is incremented.
135  *
136  *******************************************************************************/
137 void
138 core_zgetrfsp( pastix_int_t n,
139  pastix_complex64_t *A,
140  pastix_int_t lda,
141  pastix_int_t *nbpivots,
142  double criterion )
143 {
144  pastix_int_t k, blocknbr, blocksize, matrixsize, tempm;
145  pastix_complex64_t *Akk, *Lik, *Ukj, *Aij;
146 
147  blocknbr = pastix_iceil( n, MAXSIZEOFBLOCKS );
148 
149  Akk = A; /* Lk,k */
150 
151  for (k=0; k<blocknbr; k++) {
152 
153  tempm = n - k * MAXSIZEOFBLOCKS;
154  blocksize = pastix_imin(MAXSIZEOFBLOCKS, tempm);
155  Lik = Akk + blocksize;
156  Ukj = Akk + blocksize*lda;
157  Aij = Ukj + blocksize;
158 
159  /* Factorize the diagonal block Akk*/
160  core_zgetf2sp( tempm, blocksize, Akk, lda, nbpivots, criterion );
161 
162  matrixsize = tempm - blocksize;
163  if ( matrixsize > 0 ) {
164 
165  /* Compute the column Ukk+1 */
166  cblas_ztrsm(CblasColMajor,
167  CblasLeft, CblasLower,
168  CblasNoTrans, CblasUnit,
169  blocksize, matrixsize,
170  CBLAS_SADDR(zone), Akk, lda,
171  Ukj, lda);
172 
173  /* Update Ak+1,k+1 = Ak+1,k+1 - Lk+1,k*Uk,k+1 */
174  cblas_zgemm(CblasColMajor,
175  CblasNoTrans, CblasNoTrans,
176  matrixsize, matrixsize, blocksize,
177  CBLAS_SADDR(mzone), Lik, lda,
178  Ukj, lda,
179  CBLAS_SADDR(zone), Aij, lda);
180  }
181 
182  Akk += blocksize * (lda+1);
183  }
184 }
185 
186 /**
187  *******************************************************************************
188  *
189  * @brief Compute the LU factorization of the diagonal block in a panel.
190  *
191  *******************************************************************************
192  *
193  * @param[in] solvmtx
194  * Solver Matrix structure of the problem
195  *
196  * @param[in] cblk
197  * Pointer to the structure representing the panel to factorize in the
198  * cblktab array. Next column blok must be accessible through cblk[1].
199  *
200  * @param[inout] dataL
201  * The pointer to the correct representation of the lower part of the data.
202  * - coeftab if the block is in full rank. Must be of size cblk.stride -by- cblk.width.
203  * - pastix_lr_block if the block is compressed.
204  *
205  * @param[inout] dataU
206  * The pointer to the correct representation of the upper part of the data.
207  * - coeftab if the block is in full rank. Must be of size cblk.stride -by- cblk.width
208  * - pastix_lr_block if the block is compressed.
209  *
210  *******************************************************************************
211  *
212  * @return The number of static pivoting performed during the diagonal block
213  * factorization.
214  *
215  *******************************************************************************/
216 int
217 cpucblk_zgetrfsp1d_getrf( SolverMatrix *solvmtx,
218  SolverCblk *cblk,
219  void *dataL,
220  void *dataU )
221 {
222  pastix_int_t ncols, stride;
223  pastix_int_t nbpivots = 0;
224  pastix_fixdbl_t time, flops;
225  pastix_lrblock_t *lrL, *lrU;
226  pastix_complex64_t *L, *U;
227 
228  double criterion = solvmtx->diagthreshold;
229 
230  time = kernel_trace_start( PastixKernelGETRF );
231 
232  ncols = cblk->lcolnum - cblk->fcolnum + 1;
233  stride = (cblk->cblktype & CBLK_LAYOUT_2D) ? ncols : cblk->stride;
234 
235  if ( cblk->cblktype & CBLK_COMPRESSED ) {
236  /* dataL and dataU are LRblock */
237 
238  lrL = (pastix_lrblock_t *)dataL;
239  lrU = (pastix_lrblock_t *)dataU;
240  assert( (lrL->rk == -1) && (lrU->rk == -1) );
241 
242  L = lrL->u;
243  U = lrU->u;
244  stride = ncols;
245 
246  assert( stride == lrL->rkmax );
247  assert( stride == lrU->rkmax );
248  } else {
249  L = (pastix_complex64_t *)dataL;
250  U = (pastix_complex64_t *)dataU;
251  }
252 
253  core_zgeadd( PastixTrans, ncols, ncols,
254  1.0, U, stride,
255  1.0, L, stride );
256 
257  /* Factorize diagonal block */
258  flops = FLOPS_ZGETRF( ncols, ncols );
259  kernel_trace_start_lvl2( PastixKernelLvl2GETRF );
260  core_zgetrfsp(ncols, L, stride, &nbpivots, criterion);
261  kernel_trace_stop_lvl2( flops );
262 
263  /* Transpose Akk in ucoeftab */
264  core_zgetmo( ncols, ncols, L, stride, U, stride );
265 
266  kernel_trace_stop( cblk->fblokptr->inlast, PastixKernelGETRF, ncols, 0, 0, flops, time );
267 
268  if ( nbpivots ) {
269  pastix_atomic_add_32b( &(solvmtx->nbpivots), nbpivots );
270  }
271  return nbpivots;
272 }
273 
274 /**
275  *******************************************************************************
276  *
277  * @brief Compute the LU factorization of one panel.
278  *
279  *******************************************************************************
280  *
281  * @param[in] solvmtx
282  * Solver Matrix structure of the problem
283  *
284  * @param[in] cblk
285  * Pointer to the structure representing the panel to factorize in the
286  * cblktab array. Next column blok must be accessible through cblk[1].
287  *
288  * @param[inout] L
289  * The pointer to the correct representation of the lower part of the data.
290  * - coeftab if the block is in full rank. Must be of size cblk.stride -by- cblk.width.
291  * - pastix_lr_block if the block is compressed.
292  *
293  * @param[inout] U
294  * The pointer to the correct representation of the upper part of the data.
295  * - coeftab if the block is in full rank. Must be of size cblk.stride -by- cblk.width.
296  * - pastix_lr_block if the block is compressed.
297  *
298  *******************************************************************************
299  *
300  * @return The number of static pivoting performed during the diagonal block
301  * factorization.
302  *
303  *******************************************************************************/
304 int
305 cpucblk_zgetrfsp1d_panel( SolverMatrix *solvmtx,
306  SolverCblk *cblk,
307  void *L,
308  void *U )
309 {
310  pastix_int_t nbpivots;
311  nbpivots = cpucblk_zgetrfsp1d_getrf( solvmtx, cblk, L, U );
312 
313  /*
314  * We exploit the fact that the upper triangle is stored at the top of the L
315  * column, and by transposition the L part of the diagonal block is
316  * similarly stored in the U panel
317  */
320  cblk, L, L, &(solvmtx->lowrank) );
323  cblk, U, U, &(solvmtx->lowrank) );
324  return nbpivots;
325 }
326 
327 /**
328  *******************************************************************************
329  *
330  * @brief Perform the LU factorization of a given panel and apply all its
331  * updates.
332  *
333  *******************************************************************************
334  *
335  * @param[in] solvmtx
336  * Solver Matrix structure of the problem
337  *
338  * @param[in] cblk
339  * Pointer to the structure representing the panel to factorize in the
340  * cblktab array. Next column blok must be accessible through cblk[1].
341  *
342  * @param[in] work
343  * Temporary memory buffer.
344  *
345  * @param[in] lwork
346  * Temporary workspace dimension.
347  *
348  *******************************************************************************
349  *
350  * @return The number of static pivoting during factorization of the diagonal
351  * block.
352  *
353  *******************************************************************************/
354 int
355 cpucblk_zgetrfsp1d( SolverMatrix *solvmtx,
356  SolverCblk *cblk,
357  pastix_complex64_t *work,
358  pastix_int_t lwork )
359 {
360  void *L = cblk_getdataL( cblk );
361  void *U = cblk_getdataU( cblk );
362  SolverCblk *fcblk;
363  SolverBlok *blok, *lblk;
364  pastix_int_t nbpivots;
365 
366  nbpivots = cpucblk_zgetrfsp1d_panel( solvmtx, cblk, L, U );
367 
368  blok = cblk->fblokptr + 1; /* this diagonal block */
369  lblk = cblk[1].fblokptr; /* the next diagonal block */
370 
371  /* if there are off-diagonal supernodes in the column */
372  for( ; blok < lblk; blok++ )
373  {
374  fcblk = solvmtx->cblktab + blok->fcblknm;
375 
376  if ( fcblk->cblktype & CBLK_FANIN ) {
377  cpucblk_zalloc( PastixLUCoef, fcblk );
378  }
379 
380  /* Update on L */
382  cblk, blok, fcblk,
383  L, U, cblk_getdataL( fcblk ),
384  work, lwork, &(solvmtx->lowrank) );
385 
386  /* Update on U */
387  if ( blok+1 < lblk ) {
389  cblk, blok, fcblk,
390  U, L, cblk_getdataU( fcblk ),
391  work, lwork, &(solvmtx->lowrank) );
392  }
393  cpucblk_zrelease_deps( PastixLUCoef, solvmtx, cblk, fcblk );
394  }
395 
396  return nbpivots;
397 }
static void core_zgetf2sp(pastix_int_t m, pastix_int_t n, pastix_complex64_t *A, pastix_int_t lda, pastix_int_t *nbpivots, double criterion)
Compute the sequential static pivoting LU factorization of the matrix m-by-n A = L * U.
Definition: core_zgetrfsp.c:65
void core_zgetrfsp(pastix_int_t n, pastix_complex64_t *A, pastix_int_t lda, pastix_int_t *nbpivots, double criterion)
Compute the block static pivoting LU factorization of the matrix m-by-n A = L * U.
void core_zgetmo(int m, int n, const pastix_complex64_t *A, int lda, pastix_complex64_t *B, int ldb)
Transposes a m-by-n matrix out of place using an extra workspace of size m-by-n.
Definition: core_zgetmo.c:49
int core_zgeadd(pastix_trans_t trans, pastix_int_t M, pastix_int_t N, pastix_complex64_t alpha, const pastix_complex64_t *A, pastix_int_t LDA, pastix_complex64_t beta, pastix_complex64_t *B, pastix_int_t LDB)
Add two matrices together.
Definition: core_zgeadd.c:78
void cpucblk_ztrsmsp(pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk, const void *A, void *C, const pastix_lr_t *lowrank)
Compute the updates associated to a column of off-diagonal blocks.
Definition: core_ztrsmsp.c:356
int cpucblk_zgetrfsp1d_getrf(SolverMatrix *solvmtx, SolverCblk *cblk, void *dataL, void *dataU)
Compute the LU factorization of the diagonal block in a panel.
void cpucblk_zalloc(pastix_coefside_t side, SolverCblk *cblk)
Allocate the cblk structure to store the coefficient.
pastix_fixdbl_t cpucblk_zgemmsp(pastix_coefside_t sideA, pastix_trans_t trans, const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcblk, const void *A, const void *B, void *C, pastix_complex64_t *work, pastix_int_t lwork, const pastix_lr_t *lowrank)
Compute the updates associated to one off-diagonal block.
int cpucblk_zgetrfsp1d(SolverMatrix *solvmtx, SolverCblk *cblk, pastix_complex64_t *work, pastix_int_t lwork)
Perform the LU factorization of a given panel and apply all its updates.
void cpucblk_zrelease_deps(pastix_coefside_t side, SolverMatrix *solvmtx, const SolverCblk *cblk, SolverCblk *fcbk)
Release the dependencies of the given cblk after an update.
int cpucblk_zgetrfsp1d_panel(SolverMatrix *solvmtx, SolverCblk *cblk, void *L, void *U)
Compute the LU factorization of one panel.
The block low-rank structure to hold a matrix in low-rank form.
@ PastixLCoef
Definition: api.h:478
@ PastixLUCoef
Definition: api.h:480
@ PastixUCoef
Definition: api.h:479
@ PastixUpper
Definition: api.h:466
@ PastixRight
Definition: api.h:496
@ PastixUnit
Definition: api.h:488
@ PastixNonUnit
Definition: api.h:487
@ PastixNoTrans
Definition: api.h:447
@ PastixTrans
Definition: api.h:448
pastix_int_t fcblknm
Definition: solver.h:140
static void * cblk_getdataU(const SolverCblk *cblk)
Get the pointer to the data associated to the upper part of the cblk.
Definition: solver.h:348
SolverBlok * fblokptr
Definition: solver.h:163
static void * cblk_getdataL(const SolverCblk *cblk)
Get the pointer to the data associated to the lower part of the cblk.
Definition: solver.h:336
int8_t inlast
Definition: solver.h:146
pastix_int_t stride
Definition: solver.h:164
int8_t cblktype
Definition: solver.h:159
pastix_int_t lcolnum
Definition: solver.h:162
pastix_int_t fcolnum
Definition: solver.h:161
Solver block structure.
Definition: solver.h:137
Solver column block structure.
Definition: solver.h:156