PaStiX Handbook  6.2.1
core_zpxtrfsp.c
Go to the documentation of this file.
1 /**
2  *
3  * @file core_zpxtrfsp.c
4  *
5  * PaStiX kernel routines for LL^t factorization.
6  *
7  * @copyright 2011-2021 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8  * Univ. Bordeaux. All rights reserved.
9  *
10  * @version 6.0.3
11  * @author Mathieu Faverge
12  * @author Pierre Ramet
13  * @author Xavier Lacoste
14  * @date 2019-12-19
15  * @generated from /builds/solverstack/pastix/kernels/core_zpxtrfsp.c, normal z -> z, Tue Apr 12 09:38:37 2022
16  *
17  **/
18 #include "common.h"
19 #include "cblas.h"
20 #include "blend/solver.h"
21 #include "pastix_zcores.h"
22 #include "kernels_trace.h"
23 
24 #ifndef DOXYGEN_SHOULD_SKIP_THIS
25 #define MAXSIZEOFBLOCKS 64
26 static pastix_complex64_t zone = 1.0;
27 static pastix_complex64_t mzone = -1.0;
28 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
29 
30 /**
31  *******************************************************************************
32  *
33  * @ingroup kernel_blas_lapack_null
34  *
35  * @brief Compute the sequential static pivoting LL^t factorization of the
36  * matrix n-by-n A = L * L^t .
37  *
38  *******************************************************************************
39  *
40  * @param[in] n
41  * The number of rows and columns of the matrix A.
42  *
43  * @param[inout] A
44  * The matrix A to factorize with LL^t factorization. The matrix
45  * is of size lda -by- n.
46  *
47  * @param[in] lda
48  * The leading dimension of the matrix A.
49  *
50  * @param[inout] nbpivots
51  * Pointer to the number of piovting operations made during
52  * factorization. It is updated during this call
53  *
54  * @param[in] criterion
55  * Threshold use for static pivoting. If diagonal value is under this
56  * threshold, its value is replaced by the threshold and the number of
57  * pivots is incremented.
58  *
59  *******************************************************************************
60  *
61  * @warning This routine will fail if it discovers a null or negative value on
62  * the diagonal during factorization.
63  *
64  *******************************************************************************/
65 static inline void
66 core_zpxtf2sp( pastix_int_t n,
67  pastix_complex64_t *A,
68  pastix_int_t lda,
69  pastix_int_t *nbpivots,
70  double criterion )
71 {
72  pastix_int_t k;
73  pastix_complex64_t *Akk = A; /* A [k ][k] */
74  pastix_complex64_t *Amk = A+1; /* A [k+1][k] */
75  pastix_complex64_t alpha;
76 
77  for (k=0; k<n; k++){
78  if ( cabs(*Akk) < criterion ) {
79  (*Akk) = (pastix_complex64_t)criterion;
80  (*nbpivots)++;
81  }
82 
83  *Akk = csqrt(*Akk);
84  alpha = 1.0 / (*Akk);
85 
86  /* Scale the diagonal to compute L((k+1):n,k) */
87  cblas_zscal(n-k-1, CBLAS_SADDR( alpha ), Amk, 1 );
88 
89  /* Move to next Akk */
90  Akk += (lda+1);
91 
92  cblas_zsyrk( CblasColMajor, CblasLower, CblasNoTrans,
93  n-k-1, 1,
94  CBLAS_SADDR( mzone ), Amk, lda,
95  CBLAS_SADDR( zone ), Akk, lda );
96 
97  /* Move to next Amk */
98  Amk = Akk+1;
99  }
100 }
101 
102 /**
103  *******************************************************************************
104  *
105  * @brief Compute the block static pivoting LL^t factorization of the matrix
106  * n-by-n A = L * L^t .
107  *
108  *******************************************************************************
109  *
110  * @param[in] n
111  * The number of rows and columns of the matrix A.
112  *
113  * @param[inout] A
114  * The matrix A to factorize with LL^t factorization. The matrix
115  * is of size lda -by- n.
116  *
117  * @param[in] lda
118  * The leading dimension of the matrix A.
119  *
120  * @param[inout] nbpivots
121  * Pointer to the number of piovting operations made during
122  * factorization. It is updated during this call
123  *
124  * @param[in] criterion
125  * Threshold use for static pivoting. If diagonal value is under this
126  * threshold, its value is replaced by the threshold and the nu,ber of
127  * pivots is incremented.
128  *
129  *******************************************************************************
130  *
131  * @warning This routine will fail if it discovers a null or negative value on
132  * the diagonal during factorization.
133  *
134  *******************************************************************************/
135 void
136 core_zpxtrfsp( pastix_int_t n,
137  pastix_complex64_t *A,
138  pastix_int_t lda,
139  pastix_int_t *nbpivots,
140  double criterion )
141 {
142  pastix_int_t k, blocknbr, blocksize, matrixsize;
143  pastix_complex64_t *tmp,*tmp1,*tmp2;
144 
145  /* diagonal supernode is divided into MAXSIZEOFBLOCK-by-MAXSIZEOFBLOCKS blocks */
146  blocknbr = pastix_iceil( n, MAXSIZEOFBLOCKS );
147 
148  for (k=0; k<blocknbr; k++) {
149 
150  blocksize = pastix_imin(MAXSIZEOFBLOCKS, n-k*MAXSIZEOFBLOCKS);
151  tmp = A+(k*MAXSIZEOFBLOCKS)*(lda+1); /* Lk,k */
152 
153  /* Factorize the diagonal block Akk*/
154  core_zpxtf2sp(blocksize, tmp, lda, nbpivots, criterion);
155 
156  if ((k*MAXSIZEOFBLOCKS+blocksize) < n) {
157 
158  tmp1 = tmp + blocksize; /* Lk+1,k */
159  tmp2 = tmp1 + blocksize * lda; /* Lk+1,k+1 */
160 
161  matrixsize = n-(k*MAXSIZEOFBLOCKS+blocksize);
162 
163  /* Compute the column L(k+1:n,k) = (L(k,k)D(k,k))^{-1}A(k+1:n,k) */
164  /* 1) Compute A(k+1:n,k) = A(k+1:n,k)L(k,k)^{-T} = D(k,k)L(k+1:n,k) */
165  /* input: L(k,k) in tmp, A(k+1:n,k) in tmp1 */
166  /* output: A(k+1:n,k) in tmp1 */
167  cblas_ztrsm(CblasColMajor,
168  CblasRight, CblasLower,
169  CblasTrans, CblasNonUnit,
170  matrixsize, blocksize,
171  CBLAS_SADDR(zone), tmp, lda,
172  tmp1, lda);
173 
174  /* Update Ak+1k+1 = Ak+1k+1 - Lk+1k * Lk+1kT */
175  cblas_zsyrk(CblasColMajor, CblasLower, CblasNoTrans,
176  matrixsize, blocksize,
177  CBLAS_SADDR( mzone ), tmp1, lda,
178  CBLAS_SADDR( zone ), tmp2, lda);
179  }
180  }
181 }
182 
183 /**
184  *******************************************************************************
185  *
186  * @brief Compute the LL^t factorization of the diagonal block in a panel.
187  *
188  * @warning This routine will fail if it discovers a null or negative value on
189  * the diagonal during factorization.
190  *
191  *******************************************************************************
192  *
193  * @param[in] solvmtx
194  * Solver Matrix structure of the problem
195  *
196  * @param[in] cblk
197  * Pointer to the structure representing the panel to factorize in the
198  * cblktab array. Next column blok must be accessible through cblk[1].
199  *
200  * @param[inout] dataL
201  * The pointer to the correct representation of the lower part the data.
202  * - coeftab if the block is in full rank. Must be of size cblk.stride -by- cblk.width.
203  * - pastix_lr_block if the block is compressed.
204  *
205  *******************************************************************************
206  *
207  * @return The number of static pivoting performed during the diagonal block
208  * factorization.
209  *
210  *******************************************************************************/
211 int
212 cpucblk_zpxtrfsp1d_pxtrf( SolverMatrix *solvmtx,
213  SolverCblk *cblk,
214  void *dataL )
215 {
216  pastix_int_t ncols, stride;
217  pastix_int_t nbpivots = 0;
218  pastix_fixdbl_t time, flops;
219  pastix_complex64_t *L;
220  pastix_lrblock_t *lrL;
221  double criterion = solvmtx->diagthreshold;
222 
223  time = kernel_trace_start( PastixKernelPXTRF );
224 
225  ncols = cblk->lcolnum - cblk->fcolnum + 1;
226  stride = (cblk->cblktype & CBLK_LAYOUT_2D) ? ncols : cblk->stride;
227 
228  /* check if diagonal column block */
229  assert( cblk->fcolnum == cblk->fblokptr->frownum );
230  assert( cblk->lcolnum == cblk->fblokptr->lrownum );
231 
232  if ( cblk->cblktype & CBLK_COMPRESSED ) {
233  /* dataL is a LRblock */
234  lrL = (pastix_lrblock_t *)dataL;
235  L = lrL->u;
236  stride = ncols;
237 
238  assert( lrL->rk == -1 );
239  assert( stride == lrL->rkmax );
240  } else {
241  L = (pastix_complex64_t *)dataL;
242  }
243 
244  /* Factorize diagonal block */
245  flops = FLOPS_ZPOTRF( ncols );
246  kernel_trace_start_lvl2( PastixKernelLvl2PXTRF );
247  core_zpxtrfsp(ncols, L, stride, &nbpivots, criterion );
248  kernel_trace_stop_lvl2( flops );
249 
250  kernel_trace_stop( cblk->fblokptr->inlast, PastixKernelPXTRF, ncols, 0, 0, flops, time );
251 
252  if ( nbpivots ) {
253  pastix_atomic_add_32b( &(solvmtx->nbpivots), nbpivots );
254  }
255  return nbpivots;
256 }
257 
258 /**
259  *******************************************************************************
260  *
261  * @brief Compute the LL^t factorization of one panel.
262  *
263  *******************************************************************************
264  *
265  * @param[in] solvmtx
266  * Solver Matrix structure of the problem
267  *
268  * @param[in] cblk
269  * Pointer to the structure representing the panel to factorize in the
270  * cblktab array. Next column blok must be accessible through cblk[1].
271  *
272  * @param[inout] L
273  * The pointer to the correct representation of the lower part the data.
274  * - coeftab if the block is in full rank. Must be of size cblk.stride -by- cblk.width.
275  * - pastix_lr_block if the block is compressed.
276  *
277  *******************************************************************************
278  *
279  * @return The number of static pivoting during factorization of the diagonal
280  * block.
281  *
282  *******************************************************************************/
283 int
284 cpucblk_zpxtrfsp1d_panel( SolverMatrix *solvmtx,
285  SolverCblk *cblk,
286  void *L )
287 {
288  pastix_int_t nbpivots;
289  nbpivots = cpucblk_zpxtrfsp1d_pxtrf( solvmtx, cblk, L );
290 
293  cblk, L, L, &(solvmtx->lowrank) );
294  return nbpivots;
295 }
296 
297 
298 /**
299  *******************************************************************************
300  *
301  * @brief Perform the LL^t factorization of a given panel and apply all its
302  * updates.
303  *
304  *******************************************************************************
305  *
306  * @param[in] solvmtx
307  * Solver Matrix structure of the problem
308  *
309  * @param[in] cblk
310  * Pointer to the structure representing the panel to factorize in the
311  * cblktab array. Next column blok must be accessible through cblk[1].
312  *
313  * @param[in] work
314  * Temporary memory buffer.
315  *
316  * @param[in] lwork
317  * Temporary workspace dimension.
318  *
319  *******************************************************************************
320  *
321  * @return The number of static pivoting during factorization of the diagonal
322  * block.
323  *
324  *******************************************************************************/
325 int
326 cpucblk_zpxtrfsp1d( SolverMatrix *solvmtx,
327  SolverCblk *cblk,
328  pastix_complex64_t *work,
329  pastix_int_t lwork )
330 {
331  void *L = cblk_getdataL( cblk );
332  SolverCblk *fcblk;
333  SolverBlok *blok, *lblk;
334  pastix_int_t nbpivots;
335 
336  nbpivots = cpucblk_zpxtrfsp1d_panel( solvmtx, cblk, L );
337 
338  blok = cblk->fblokptr + 1; /* First off-diagonal block */
339  lblk = cblk[1].fblokptr; /* Next diagonal block */
340 
341  /* If there are off-diagonal blocks, perform the updates */
342  for( ; blok < lblk; blok++ )
343  {
344  fcblk = (solvmtx->cblktab + blok->fcblknm);
345 
346  if ( fcblk->cblktype & CBLK_FANIN ) {
347  cpucblk_zalloc( PastixLCoef, fcblk );
348  }
349 
351  cblk, blok, fcblk,
352  L, L, cblk_getdataL( fcblk ),
353  work, lwork, &(solvmtx->lowrank) );
354 
355  cpucblk_zrelease_deps( PastixLCoef, solvmtx, cblk, fcblk );
356  }
357 
358  return nbpivots;
359 }
core_zpxtrfsp
void core_zpxtrfsp(pastix_int_t n, pastix_complex64_t *A, pastix_int_t lda, pastix_int_t *nbpivots, double criterion)
Compute the block static pivoting LL^t factorization of the matrix n-by-n A = L * L^t .
Definition: core_zpxtrfsp.c:136
solver_blok_s::frownum
pastix_int_t frownum
Definition: solver.h:112
solver.h
core_zpxtf2sp
static void core_zpxtf2sp(pastix_int_t n, pastix_complex64_t *A, pastix_int_t lda, pastix_int_t *nbpivots, double criterion)
Compute the sequential static pivoting LL^t factorization of the matrix n-by-n A = L * L^t .
Definition: core_zpxtrfsp.c:66
PastixTrans
@ PastixTrans
Definition: api.h:425
solver_cblk_s::fblokptr
SolverBlok * fblokptr
Definition: solver.h:134
solver_cblk_s::stride
pastix_int_t stride
Definition: solver.h:135
pastix_lrblock_s::u
void * u
Definition: pastix_lowrank.h:115
solver_cblk_s
Solver column block structure.
Definition: solver.h:127
solver_blok_s
Solver block structure.
Definition: solver.h:107
pastix_zcores.h
PastixLower
@ PastixLower
Definition: api.h:445
pastix_lrblock_s
The block low-rank structure to hold a matrix in low-rank form.
Definition: pastix_lowrank.h:112
cpucblk_zpxtrfsp1d_pxtrf
int cpucblk_zpxtrfsp1d_pxtrf(SolverMatrix *solvmtx, SolverCblk *cblk, void *dataL)
Compute the LL^t factorization of the diagonal block in a panel.
Definition: core_zpxtrfsp.c:212
cpucblk_zpxtrfsp1d_panel
int cpucblk_zpxtrfsp1d_panel(SolverMatrix *solvmtx, SolverCblk *cblk, void *L)
Compute the LL^t factorization of one panel.
Definition: core_zpxtrfsp.c:284
cpucblk_zgemmsp
pastix_fixdbl_t cpucblk_zgemmsp(pastix_coefside_t sideA, pastix_trans_t trans, const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcblk, const void *A, const void *B, void *C, pastix_complex64_t *work, pastix_int_t lwork, const pastix_lr_t *lowrank)
Compute the updates associated to one off-diagonal block.
Definition: core_zgemmsp.c:1512
solver_cblk_s::lcolnum
pastix_int_t lcolnum
Definition: solver.h:133
PastixNonUnit
@ PastixNonUnit
Definition: api.h:465
PastixLCoef
@ PastixLCoef
Definition: api.h:456
solver_blok_s::lrownum
pastix_int_t lrownum
Definition: solver.h:113
cpucblk_zalloc
void cpucblk_zalloc(pastix_coefside_t side, SolverCblk *cblk)
Allocate the cblk structure to store the coefficient.
Definition: cpucblk_zinit.c:264
pastix_lrblock_s::rk
int rk
Definition: pastix_lowrank.h:113
cpucblk_zpxtrfsp1d
int cpucblk_zpxtrfsp1d(SolverMatrix *solvmtx, SolverCblk *cblk, pastix_complex64_t *work, pastix_int_t lwork)
Perform the LL^t factorization of a given panel and apply all its updates.
Definition: core_zpxtrfsp.c:326
solver_cblk_s::cblktype
int8_t cblktype
Definition: solver.h:130
solver_blok_s::inlast
int8_t inlast
Definition: solver.h:117
solver_cblk_s::fcolnum
pastix_int_t fcolnum
Definition: solver.h:132
cpucblk_zrelease_deps
void cpucblk_zrelease_deps(pastix_coefside_t side, SolverMatrix *solvmtx, const SolverCblk *cblk, SolverCblk *fcbk)
Release the dependencies of the given cblk after an update.
Definition: cpucblk_zmpi_coeftab.c:558
cpucblk_ztrsmsp
void cpucblk_ztrsmsp(pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk, const void *A, void *C, const pastix_lr_t *lowrank)
Compute the updates associated to a column of off-diagonal blocks.
Definition: core_ztrsmsp.c:356
pastix_lrblock_s::rkmax
int rkmax
Definition: pastix_lowrank.h:114
cblk_getdataL
static void * cblk_getdataL(const SolverCblk *cblk)
Get the pointer to the data associated to the lower part of the cblk.
Definition: solver.h:260
solver_blok_s::fcblknm
pastix_int_t fcblknm
Definition: solver.h:110
PastixRight
@ PastixRight
Definition: api.h:474