PaStiX Handbook 6.4.0
Loading...
Searching...
No Matches
core_sgetrfsp.c
Go to the documentation of this file.
1/**
2 *
3 * @file core_sgetrfsp.c
4 *
5 * PaStiX kernel routines for LU factorization.
6 *
7 * @copyright 2011-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8 * Univ. Bordeaux. All rights reserved.
9 *
10 * @version 6.4.0
11 * @author Mathieu Faverge
12 * @author Pierre Ramet
13 * @author Xavier Lacoste
14 * @author Gregoire Pichon
15 * @date 2024-07-05
16 * @generated from /builds/2mk6rsew/0/solverstack/pastix/kernels/core_zgetrfsp.c, normal z -> s, Tue Feb 25 14:34:56 2025
17 *
18 **/
19#include "common.h"
20#include "cblas.h"
21#include "blend/solver.h"
22#include "pastix_scores.h"
23#include "kernels_trace.h"
24
25#ifndef DOXYGEN_SHOULD_SKIP_THIS
26#define MAXSIZEOFBLOCKS 64
27static float sone = 1.0;
28static float msone = -1.0;
29#endif /* DOXYGEN_SHOULD_SKIP_THIS */
30
31/**
32 *******************************************************************************
33 *
34 * @ingroup kernel_blas_lapack_null
35 *
36 * @brief Compute the sequential static pivoting LU factorization of the matrix
37 * m-by-n A = L * U.
38 *
39 *******************************************************************************
40 *
41 * @param[in] m
42 * The number of rows and columns of the matrix A.
43 *
44 * @param[in] n
45 * The number of rows and columns of the matrix A.
46 *
47 * @param[inout] A
48 * The matrix A to factorize with LU factorization. The matrix
49 * is of size lda -by- n.
50 *
51 * @param[in] lda
52 * The leading dimension of the matrix A.
53 *
54 * @param[inout] nbpivots
55 * Pointer to the number of piovting operations made during
56 * factorization. It is updated during this call
57 *
58 * @param[in] criterion
59 * Threshold use for static pivoting. If diagonal value is under this
60 * threshold, its value is replaced by the threshold and the number of
61 * pivots is incremented.
62 *
63 *******************************************************************************/
64static inline void
67 float *A,
68 pastix_int_t lda,
69 pastix_int_t *nbpivots,
70 float criterion )
71{
72 pastix_int_t k, minMN;
73 float *Akk, *Aik, alpha;
74
75 minMN = pastix_imin( m, n );
76
77 Akk = A;
78 for (k=0; k<minMN; k++) {
79 Aik = Akk + 1;
80
81 if ( fabsf(*Akk) < criterion ) {
82 if ( (*Akk) < 0. ) {
83 *Akk = (float)(-criterion);
84 }
85 else {
86 *Akk = (float)criterion;
87 }
88 (*nbpivots)++;
89 }
90
91 /* A_ik = A_ik / A_kk, i = k+1 .. n */
92 alpha = 1.0 / (*Akk);
93 cblas_sscal(m-k-1, ( alpha ), Aik, 1 );
94
95 if ( k+1 < minMN ) {
96
97 /* A_ij = A_ij - A_ik * A_kj, i,j = k+1..n */
98 cblas_sger(CblasColMajor, m-k-1, n-k-1,
99 (msone),
100 Aik, 1,
101 Akk+lda, lda,
102 Aik+lda, lda);
103 }
104
105 Akk += lda+1;
106 }
107}
108
109/**
110 *******************************************************************************
111 *
112 * @brief Compute the block static pivoting LU factorization of the matrix
113 * m-by-n A = L * U.
114 *
115 *******************************************************************************
116 *
117 * @param[in] n
118 * The number of rows and columns of the matrix A.
119 *
120 * @param[inout] A
121 * The matrix A to factorize with LU factorization. The matrix
122 * is of size lda -by- n.
123 *
124 * @param[in] lda
125 * The leading dimension of the matrix A.
126 *
127 * @param[inout] nbpivots
128 * Pointer to the number of piovting operations made during
129 * factorization. It is updated during this call
130 *
131 * @param[in] criterion
132 * Threshold use for static pivoting. If diagonal value is under this
133 * threshold, its value is replaced by the threshold and the number of
134 * pivots is incremented.
135 *
136 *******************************************************************************/
137void
139 float *A,
140 pastix_int_t lda,
141 pastix_int_t *nbpivots,
142 float criterion )
143{
144 pastix_int_t k, blocknbr, blocksize, matrixsize, tempm;
145 float *Akk, *Lik, *Ukj, *Aij;
146
147 blocknbr = pastix_iceil( n, MAXSIZEOFBLOCKS );
148
149 Akk = A; /* Lk,k */
150
151 for (k=0; k<blocknbr; k++) {
152
153 tempm = n - k * MAXSIZEOFBLOCKS;
154 blocksize = pastix_imin(MAXSIZEOFBLOCKS, tempm);
155 Lik = Akk + blocksize;
156 Ukj = Akk + blocksize*lda;
157 Aij = Ukj + blocksize;
158
159 /* Factorize the diagonal block Akk*/
160 core_sgetf2sp( tempm, blocksize, Akk, lda, nbpivots, criterion );
161
162 matrixsize = tempm - blocksize;
163 if ( matrixsize > 0 ) {
164
165 /* Compute the column Ukk+1 */
166 cblas_strsm(CblasColMajor,
167 CblasLeft, CblasLower,
168 CblasNoTrans, CblasUnit,
169 blocksize, matrixsize,
170 (sone), Akk, lda,
171 Ukj, lda);
172
173 /* Update Ak+1,k+1 = Ak+1,k+1 - Lk+1,k*Uk,k+1 */
174 cblas_sgemm(CblasColMajor,
175 CblasNoTrans, CblasNoTrans,
176 matrixsize, matrixsize, blocksize,
177 (msone), Lik, lda,
178 Ukj, lda,
179 (sone), Aij, lda);
180 }
181
182 Akk += blocksize * (lda+1);
183 }
184}
185
186/**
187 *******************************************************************************
188 *
189 * @brief Compute the LU factorization of the diagonal block in a panel.
190 *
191 *******************************************************************************
192 *
193 * @param[in] solvmtx
194 * Solver Matrix structure of the problem
195 *
196 * @param[in] cblk
197 * Pointer to the structure representing the panel to factorize in the
198 * cblktab array. Next column blok must be accessible through cblk[1].
199 *
200 * @param[inout] dataL
201 * The pointer to the correct representation of the lower part of the data.
202 * - coeftab if the block is in full rank. Must be of size cblk.stride -by- cblk.width.
203 * - pastix_lr_block if the block is compressed.
204 *
205 * @param[inout] dataU
206 * The pointer to the correct representation of the upper part of the data.
207 * - coeftab if the block is in full rank. Must be of size cblk.stride -by- cblk.width
208 * - pastix_lr_block if the block is compressed.
209 *
210 *******************************************************************************
211 *
212 * @return The number of static pivoting performed during the diagonal block
213 * factorization.
214 *
215 *******************************************************************************/
216int
218 SolverCblk *cblk,
219 void *dataL,
220 void *dataU )
221{
222 pastix_int_t ncols, stride;
223 pastix_int_t nbpivots = 0;
224 pastix_fixdbl_t time, flops;
225 pastix_lrblock_t *lrL, *lrU;
226 float *L, *U;
227
228 float criterion = solvmtx->diagthreshold;
229
231
232 ncols = cblk->lcolnum - cblk->fcolnum + 1;
233 stride = (cblk->cblktype & CBLK_LAYOUT_2D) ? ncols : cblk->stride;
234
235 if ( cblk->cblktype & CBLK_COMPRESSED ) {
236 /* dataL and dataU are LRblock */
237
238 lrL = (pastix_lrblock_t *)dataL;
239 lrU = (pastix_lrblock_t *)dataU;
240 assert( (lrL->rk == -1) && (lrU->rk == -1) );
241
242 L = lrL->u;
243 U = lrU->u;
244 stride = ncols;
245
246 assert( stride == lrL->rkmax );
247 assert( stride == lrU->rkmax );
248 } else {
249 L = (float *)dataL;
250 U = (float *)dataU;
251 }
252
253 core_sgeadd( PastixTrans, ncols, ncols,
254 1.0, U, stride,
255 1.0, L, stride );
256
257 /* Factorize diagonal block */
258 flops = FLOPS_SGETRF( ncols, ncols );
259 kernel_trace_start_lvl2( PastixKernelLvl2GETRF );
260 core_sgetrfsp(ncols, L, stride, &nbpivots, criterion);
261 kernel_trace_stop_lvl2( flops );
262
263 /* Transpose Akk in ucoeftab */
264 core_sgetmo( ncols, ncols, L, stride, U, stride );
265
266 kernel_trace_stop( cblk->fblokptr->inlast, PastixKernelGETRF, ncols, 0, 0, flops, time );
267
268 if ( nbpivots ) {
269 pastix_atomic_add_32b( &(solvmtx->nbpivots), nbpivots );
270 }
271 return nbpivots;
272}
273
274/**
275 *******************************************************************************
276 *
277 * @brief Compute the LU factorization of one panel.
278 *
279 *******************************************************************************
280 *
281 * @param[in] solvmtx
282 * Solver Matrix structure of the problem
283 *
284 * @param[in] cblk
285 * Pointer to the structure representing the panel to factorize in the
286 * cblktab array. Next column blok must be accessible through cblk[1].
287 *
288 * @param[inout] L
289 * The pointer to the correct representation of the lower part of the data.
290 * - coeftab if the block is in full rank. Must be of size cblk.stride -by- cblk.width.
291 * - pastix_lr_block if the block is compressed.
292 *
293 * @param[inout] U
294 * The pointer to the correct representation of the upper part of the data.
295 * - coeftab if the block is in full rank. Must be of size cblk.stride -by- cblk.width.
296 * - pastix_lr_block if the block is compressed.
297 *
298 *******************************************************************************
299 *
300 * @return The number of static pivoting performed during the diagonal block
301 * factorization.
302 *
303 *******************************************************************************/
304int
306 SolverCblk *cblk,
307 void *L,
308 void *U )
309{
310 pastix_int_t nbpivots;
311 nbpivots = cpucblk_sgetrfsp1d_getrf( solvmtx, cblk, L, U );
312
313 /*
314 * We exploit the fact that the upper triangle is stored at the top of the L
315 * column, and by transposition the L part of the diagonal block is
316 * similarly stored in the U panel
317 */
320 cblk, L, L, &(solvmtx->lowrank) );
323 cblk, U, U, &(solvmtx->lowrank) );
324 return nbpivots;
325}
326
327/**
328 *******************************************************************************
329 *
330 * @brief Perform the LU factorization of a given panel and apply all its
331 * updates.
332 *
333 *******************************************************************************
334 *
335 * @param[in] solvmtx
336 * Solver Matrix structure of the problem
337 *
338 * @param[in] cblk
339 * Pointer to the structure representing the panel to factorize in the
340 * cblktab array. Next column blok must be accessible through cblk[1].
341 *
342 * @param[in] work
343 * Temporary memory buffer.
344 *
345 * @param[in] lwork
346 * Temporary workspace dimension.
347 *
348 *******************************************************************************
349 *
350 * @return The number of static pivoting during factorization of the diagonal
351 * block.
352 *
353 *******************************************************************************/
354int
356 SolverCblk *cblk,
357 float *work,
358 pastix_int_t lwork )
359{
360 void *L = cblk_getdataL( cblk );
361 void *U = cblk_getdataU( cblk );
362 SolverCblk *fcblk;
363 SolverBlok *blok, *lblk;
364 pastix_int_t nbpivots;
365
366 nbpivots = cpucblk_sgetrfsp1d_panel( solvmtx, cblk, L, U );
367
368 blok = cblk->fblokptr + 1; /* this diagonal block */
369 lblk = cblk[1].fblokptr; /* the next diagonal block */
370
371 /* if there are off-diagonal supernodes in the column */
372 for( ; blok < lblk; blok++ )
373 {
374 fcblk = solvmtx->cblktab + blok->fcblknm;
375
376 if ( fcblk->cblktype & CBLK_FANIN ) {
378 }
379
380 /* Update on L */
382 cblk, blok, fcblk,
383 L, U, cblk_getdataL( fcblk ),
384 work, lwork, &(solvmtx->lowrank) );
385
386 /* Update on U */
387 if ( blok+1 < lblk ) {
389 cblk, blok, fcblk,
390 U, L, cblk_getdataU( fcblk ),
391 work, lwork, &(solvmtx->lowrank) );
392 }
393 cpucblk_srelease_deps( PastixLUCoef, solvmtx, cblk, fcblk );
394 }
395
396 return nbpivots;
397}
398
399/**
400 *******************************************************************************
401 *
402 * @brief Perform the LU factorization of a given panel and submit tasks for the
403 * subsequent updates.
404 *
405 *******************************************************************************
406 *
407 * @param[in] solvmtx
408 * Solver Matrix structure of the problem
409 *
410 * @param[in] cblk
411 * Pointer to the structure representing the panel to factorize in the
412 * cblktab array. Next column blok must be accessible through cblk[1].
413 *
414 *******************************************************************************
415 *
416 * @return The number of static pivoting during factorization of the diagonal
417 * block.
418 *
419 *******************************************************************************/
420int
422 SolverCblk *cblk )
423{
424 void *L = cblk_getdataL( cblk );
425 void *U = cblk_getdataU( cblk );
426 SolverBlok *blok, *lblk;
427 pastix_int_t i, nbpivots;
428 pastix_queue_t *queue = solvmtx->computeQueue[ cblk->threadid ];
429
430 assert( cblk->cblktype & CBLK_TASKS_2D );
431 nbpivots = cpucblk_sgetrfsp1d_panel( solvmtx, cblk, L, U );
432
433 blok = cblk->fblokptr + 1; /* this diagonal block */
434 lblk = cblk[1].fblokptr; /* the next diagonal block */
435
436 /* if there are off-diagonal supernodes in the column */
437 for( i=0; blok < lblk; i++, blok++ )
438 {
439 assert( !((solvmtx->cblktab + blok->fcblknm)->cblktype & CBLK_RECV) );
440 pqueuePush1( queue, - (blok - solvmtx->bloktab) - 1, cblk->priority + i );
441
442 /* Skip blocks facing the same cblk */
443 while ( ( blok < lblk ) &&
444 ( blok[0].fcblknm == blok[1].fcblknm ) &&
445 ( blok[0].lcblknm == blok[1].lcblknm ) )
446 {
447 blok++;
448 }
449 }
450
451 return nbpivots;
452}
453
454/**
455 *******************************************************************************
456 *
457 * @brief Apply the updates of the LU factorisation of a given panel.
458 *
459 *******************************************************************************
460 *
461 * @param[in] solvmtx
462 * Solver Matrix structure of the problem
463 *
464 * @param[in] blok
465 * Pointer to the blok where the update start.
466 *
467 * @param[in] work
468 * Temporary memory buffer.
469 *
470 * @param[in] lwork
471 * Temporary workspace dimension.
472 *
473 *******************************************************************************/
474void
476 SolverBlok *blok,
477 float *work,
478 pastix_int_t lwork )
479{
480 SolverCblk *cblk = solvmtx->cblktab + blok->lcblknm;
481 SolverCblk *fcbk = solvmtx->cblktab + blok->fcblknm;
482 SolverBlok *lblk = cblk[1].fblokptr; /* the next diagonal block */
483 void *L = cblk_getdataL( cblk );
484 void *U = cblk_getdataU( cblk );
485
486 if ( fcbk->cblktype & CBLK_FANIN ) {
488 }
489
490 do
491 {
492 /* Update on L */
494 cblk, blok, fcbk,
495 L, U, cblk_getdataL( fcbk ),
496 work, lwork, &(solvmtx->lowrank) );
497
498 /* Update on U */
499 if ( blok+1 < lblk ) {
501 cblk, blok, fcbk,
502 U, L, cblk_getdataU( fcbk ),
503 work, lwork, &(solvmtx->lowrank) );
504 }
505 cpucblk_srelease_deps( PastixLUCoef, solvmtx, cblk, fcbk );
506 blok++;
507 }
508 while ( ( blok < lblk ) &&
509 ( blok[-1].fcblknm == blok[0].fcblknm ) &&
510 ( blok[-1].lcblknm == blok[0].lcblknm ) );
511}
static void core_sgetf2sp(pastix_int_t m, pastix_int_t n, float *A, pastix_int_t lda, pastix_int_t *nbpivots, float criterion)
Compute the sequential static pivoting LU factorization of the matrix m-by-n A = L * U.
BEGIN_C_DECLS typedef int pastix_int_t
Definition datatypes.h:51
double pastix_fixdbl_t
Definition datatypes.h:65
static void pqueuePush1(pastix_queue_t *q, pastix_int_t elt, double key1)
Push an element with a single key.
Definition queue.h:64
Queue structure.
Definition queue.h:38
static void kernel_trace_stop(int8_t inlast, pastix_ktype_t ktype, int m, int n, int k, double flops, double starttime)
Stop the trace of a single kernel.
static double kernel_trace_start(pastix_ktype_t ktype)
Start the trace of a single kernel.
@ PastixKernelLvl2GETRF
@ PastixKernelGETRF
int core_sgeadd(pastix_trans_t trans, pastix_int_t M, pastix_int_t N, float alpha, const float *A, pastix_int_t LDA, float beta, float *B, pastix_int_t LDB)
Add two matrices together.
Definition core_sgeadd.c:78
void core_sgetmo(int m, int n, const float *A, int lda, float *B, int ldb)
Transposes a m-by-n matrix out of place using an extra workspace of size m-by-n.
Definition core_sgetmo.c:49
void core_sgetrfsp(pastix_int_t n, float *A, pastix_int_t lda, pastix_int_t *nbpivots, float criterion)
Compute the block static pivoting LU factorization of the matrix m-by-n A = L * U.
void cpucblk_sgetrfsp1dplus_update(SolverMatrix *solvmtx, SolverBlok *blok, float *work, pastix_int_t lwork)
Apply the updates of the LU factorisation of a given panel.
int cpucblk_sgetrfsp1dplus(SolverMatrix *solvmtx, SolverCblk *cblk)
Perform the LU factorization of a given panel and submit tasks for the subsequent updates.
pastix_fixdbl_t cpucblk_sgemmsp(pastix_coefside_t sideA, pastix_trans_t trans, const SolverCblk *cblk, const SolverBlok *blok, SolverCblk *fcblk, const void *A, const void *B, void *C, float *work, pastix_int_t lwork, const pastix_lr_t *lowrank)
Compute the updates associated to one off-diagonal block.
int cpucblk_sgetrfsp1d(SolverMatrix *solvmtx, SolverCblk *cblk, float *work, pastix_int_t lwork)
Perform the LU factorization of a given panel and apply all its updates.
void cpucblk_strsmsp(pastix_side_t side, pastix_uplo_t uplo, pastix_trans_t trans, pastix_diag_t diag, const SolverCblk *cblk, const void *A, void *C, const pastix_lr_t *lowrank)
Compute the updates associated to a column of off-diagonal blocks.
void cpucblk_salloc(pastix_coefside_t side, SolverCblk *cblk)
Allocate the cblk structure to store the coefficient.
int cpucblk_sgetrfsp1d_panel(SolverMatrix *solvmtx, SolverCblk *cblk, void *L, void *U)
Compute the LU factorization of one panel.
int cpucblk_sgetrfsp1d_getrf(SolverMatrix *solvmtx, SolverCblk *cblk, void *dataL, void *dataU)
Compute the LU factorization of the diagonal block in a panel.
void cpucblk_srelease_deps(pastix_coefside_t side, SolverMatrix *solvmtx, const SolverCblk *cblk, SolverCblk *fcbk)
Release the dependencies of the given cblk after an update.
The block low-rank structure to hold a matrix in low-rank form.
@ PastixLCoef
Definition api.h:478
@ PastixLUCoef
Definition api.h:480
@ PastixUCoef
Definition api.h:479
@ PastixUpper
Definition api.h:466
@ PastixRight
Definition api.h:496
@ PastixUnit
Definition api.h:488
@ PastixNonUnit
Definition api.h:487
@ PastixNoTrans
Definition api.h:445
@ PastixTrans
Definition api.h:446
pastix_lr_t lowrank
Definition solver.h:236
pastix_int_t priority
Definition solver.h:183
static void * cblk_getdataU(const SolverCblk *cblk)
Get the pointer to the data associated to the upper part of the cblk.
Definition solver.h:354
pastix_int_t fcblknm
Definition solver.h:144
SolverBlok *restrict bloktab
Definition solver.h:229
SolverBlok * fblokptr
Definition solver.h:168
static void * cblk_getdataL(const SolverCblk *cblk)
Get the pointer to the data associated to the lower part of the cblk.
Definition solver.h:342
pastix_int_t lcblknm
Definition solver.h:143
int8_t inlast
Definition solver.h:151
SolverCblk *restrict cblktab
Definition solver.h:228
pastix_int_t stride
Definition solver.h:169
int8_t cblktype
Definition solver.h:164
pastix_int_t lcolnum
Definition solver.h:167
double diagthreshold
Definition solver.h:238
volatile int32_t nbpivots
Definition solver.h:239
pastix_int_t fcolnum
Definition solver.h:166
Solver block structure.
Definition solver.h:141
Solver column block structure.
Definition solver.h:161
Solver column block structure.
Definition solver.h:203