29 #ifndef DOXYGEN_SHOULD_SKIP_THIS
30 static float sone = 1.0;
31 static float szero = 0.0;
64 u = malloc( M * N *
sizeof(
float) );
65 memset( u, 0, M * N *
sizeof(
float) );
71 else if ( rkmax == 0 ) {
79 rkmax = pastix_imin( rkmax, rk );
81 #if defined(PASTIX_DEBUG_LR)
82 u = malloc( M * rkmax *
sizeof(
float) );
83 v = malloc( N * rkmax *
sizeof(
float) );
86 memset(u, 0, M * rkmax *
sizeof(
float));
87 memset(v, 0, N * rkmax *
sizeof(
float));
89 u = malloc( (M+N) * rkmax *
sizeof(
float));
92 memset(u, 0, (M+N) * rkmax *
sizeof(
float));
124 #if defined(PASTIX_DEBUG_LR)
185 newrkmax = (newrkmax == -1) ? newrk : newrkmax;
186 newrkmax = pastix_imax( newrkmax, newrk );
191 if ( (newrk > rklimit) || (newrk == -1) )
193 A->
u = realloc( A->
u, M * N *
sizeof(
float) );
194 #if defined(PASTIX_DEBUG_LR)
205 else if (newrkmax == 0)
211 #if defined(PASTIX_DEBUG_LR)
227 if ( ( A->
rk == -1 ) ||
228 (( A->
rk != -1 ) && (newrkmax != A->
rkmax)) )
230 #if defined(PASTIX_DEBUG_LR)
231 u = malloc( M * newrkmax *
sizeof(
float) );
232 v = malloc( N * newrkmax *
sizeof(
float) );
234 u = malloc( (M+N) * newrkmax *
sizeof(
float) );
235 v = u + M * newrkmax;
238 assert( A->
rk != -1 );
239 ret = LAPACKE_slacpy_work( LAPACK_COL_MAJOR,
'A', M, newrk,
242 ret = LAPACKE_slacpy_work( LAPACK_COL_MAJOR,
'A', newrk, N,
243 A->
v, A->
rkmax, v, newrkmax );
247 #if defined(PASTIX_DEBUG_LR)
318 if (Alr == NULL || Alr->
rk > Alr->
rkmax) {
326 if ( Alr->
rk == -1 ) {
327 if (Alr->
u == NULL || Alr->
v != NULL || (Alr->
rkmax < m))
332 else if ( Alr->
rk != 0){
333 if (Alr->
u == NULL || Alr->
v == NULL) {
340 if ( Alr->
rk == -1 ) {
341 ret = LAPACKE_slacpy_work( LAPACK_COL_MAJOR,
'A', m, n,
342 Alr->
u, Alr->
rkmax, A, lda );
345 else if ( Alr->
rk == 0 ) {
346 ret = LAPACKE_slaset_work( LAPACK_COL_MAJOR,
'A', m, n,
351 cblas_sgemm(CblasColMajor, CblasNoTrans, CblasNoTrans,
359 if ( Alr->
rk == -1 ) {
362 else if ( Alr->
rk == 0 ) {
363 ret = LAPACKE_slaset_work( LAPACK_COL_MAJOR,
'A', n, m,
368 cblas_sgemm(CblasColMajor, CblasTrans, CblasTrans,
370 (sone), Alr->
v, Alr->
rkmax,
439 assert( (M1 + offx) <= M2 );
440 assert( (N1 + offy) <= N2 );
442 ldau = (A->
rk == -1) ? A->
rkmax : M1;
453 if ( (M1 != M2) || (N1 != N2) ) {
454 ret = LAPACKE_slaset_work( LAPACK_COL_MAJOR,
'A', M2, N2,
460 0.0, u + M2 * offy + offx, M2 );
465 ret = LAPACKE_slaset_work( LAPACK_COL_MAJOR,
'A', M2, B->
rk,
469 ret = LAPACKE_slacpy_work( LAPACK_COL_MAJOR,
'A', M1, A->
rk,
475 ret = LAPACKE_slaset_work( LAPACK_COL_MAJOR,
'A', B->
rk, N2,
476 0.0, 0.0, v, B->
rkmax );
487 float *work = malloc( M2 * N2 *
sizeof(
float) );
548 rank = (A->
rk == -1) ? pastix_imin(M1, N1) : A->
rk;
551 ldau = (A->
rk == -1) ? A->
rkmax : M1;
554 ret = LAPACKE_slacpy_work( LAPACK_COL_MAJOR,
'A', M2, B->
rk,
555 B->
u, ldbu, u1u2, M2 );
558 tmp = u1u2 + B->
rk * M2;
566 memset(tmp, 0, M2 * M1 *
sizeof(
float));
570 for (i=0; i<M1; i++, tmp += M2+1) {
576 ret = LAPACKE_slaset_work( LAPACK_COL_MAJOR,
'A', M2, M1,
586 memset(tmp, 0, M2 * N1 *
sizeof(
float));
588 ret = LAPACKE_slacpy_work( LAPACK_COL_MAJOR,
'A', M1, N1,
589 A->
u, ldau, tmp + offx, M2 );
598 memset(tmp, 0, M2 * A->
rk *
sizeof(
float));
600 ret = LAPACKE_slacpy_work( LAPACK_COL_MAJOR,
'A', M1, A->
rk,
601 A->
u, ldau, tmp + offx, M2 );
661 rank = (A->
rk == -1) ? pastix_imin(M1, N1) : A->
rk;
664 ldau = (A->
rk == -1) ? A->
rkmax : M1;
668 ret = LAPACKE_slacpy_work( LAPACK_COL_MAJOR,
'A', B->
rk, N2,
669 B->
v, ldbv, v1v2, rank );
680 ret = LAPACKE_slaset_work( LAPACK_COL_MAJOR,
'A', M1, N2,
681 0.0, 0.0, tmp, rank );
686 0.0, tmp + offy * rank, rank );
694 ret = LAPACKE_slaset_work( LAPACK_COL_MAJOR,
'A', N1, N2,
695 0.0, 0.0, tmp, rank );
700 for (i=0; i<N1; i++, tmp += rank+1) {
706 ret = LAPACKE_slaset_work( LAPACK_COL_MAJOR,
'A', N1, N2,
707 0.0, alpha, tmp + offy * rank, rank );
717 ret = LAPACKE_slaset_work( LAPACK_COL_MAJOR,
'A', A->
rk, N2,
718 0.0, 0.0, tmp, rank );
723 0.0, tmp + offy * rank, rank );
790 float *A = (
float*)Avoid;
793 float *work, *tau, zzsize;
800 float norm = LAPACKE_slange_work( LAPACK_COL_MAJOR,
'f', m, n,
803 if ( (norm == 0.) && (tol >= 0.)) {
813 else if ( use_reltol ) {
817 ret = rrqrfct( tol, rklimit, 0, nb,
831 #if defined(PASTIX_DEBUG_LR)
833 Acpy = malloc( m * n *
sizeof(
float) );
834 tau = malloc( n *
sizeof(
float) );
835 work = malloc( lwork *
sizeof(
float) );
836 rwork = malloc( rsize *
sizeof(
float) );
838 zwork = malloc( zsize *
sizeof(
float) + rsize *
sizeof(
float) );
842 rwork = (
float*)(work + lwork);
850 ret = LAPACKE_slacpy_work( LAPACK_COL_MAJOR,
'A', m, n,
854 newrk = rrqrfct( tol, rklimit, 0, nb,
857 work, lwork, rwork );
859 flops = FLOPS_SGEQRF( m, n );
862 flops = FLOPS_SGEQRF( m, newrk ) + FLOPS_SORMQR( m, n-newrk, newrk,
PastixLeft );
872 ret = LAPACKE_slacpy_work( LAPACK_COL_MAJOR,
'A', m, n,
873 A, lda, Alr->
u, Alr->
rkmax );
876 else if ( newrk > 0 ) {
887 ret = LAPACKE_slacpy_work( LAPACK_COL_MAJOR,
'A', m, Alr->
rk,
891 ret = LAPACKE_sorgqr_work( LAPACK_COL_MAJOR, m, Alr->
rk, Alr->
rk,
892 U, m, tau, work, lwork );
894 flops += FLOPS_SORGQR( m, Alr->
rk, Alr->
rk );
897 ret = LAPACKE_slaset_work( LAPACK_COL_MAJOR,
'L', Alr->
rk-1, Alr->
rk-1,
898 0.0, 0.0, Acpy + 1, m );
901 memcpy( V + jpvt[i] * Alr->
rk,
903 Alr->
rk *
sizeof(
float) );
907 #if defined(PASTIX_DEBUG_LR)
911 fprintf(stderr,
"Failed to compress a matrix and generate an orthogonal u\n" );
918 #if defined(PASTIX_DEBUG_LR)
989 float *A = (
float*)Avoid;
992 float *work, *tau, *B, *tau_b, zzsize;
999 #if defined(PRECISION_c) || defined(PRECISION_z)
1005 float norm = LAPACKE_slange_work( LAPACK_COL_MAJOR,
'f', m, n,
1008 if ( (norm == 0.) && (tol >= 0.)) {
1018 else if ( use_reltol ) {
1022 ret = rrqrfct( tol, rklimit, nb,
1026 &zzsize, -1, norm );
1030 bsize = n * rklimit;
1038 #if defined(PASTIX_DEBUG_LR)
1040 Acpy = malloc( m * n *
sizeof(
float) );
1041 tau = malloc( n *
sizeof(
float) );
1042 B = malloc( bsize *
sizeof(
float) );
1043 tau_b = malloc( n *
sizeof(
float) );
1044 work = malloc( lwork *
sizeof(
float) );
1046 zwork = malloc( zsize *
sizeof(
float) );
1059 ret = LAPACKE_slacpy_work( LAPACK_COL_MAJOR,
'A', m, n,
1063 newrk = rrqrfct( tol, rklimit, nb,
1067 work, lwork, norm );
1069 flops = FLOPS_SGEQRF( m, n );
1072 flops = FLOPS_SGEQRF( m, newrk ) + FLOPS_SORMQR( m, n-newrk, newrk,
PastixLeft );
1081 if ( newrk == -1 ) {
1082 ret = LAPACKE_slacpy_work( LAPACK_COL_MAJOR,
'A', m, n,
1083 A, lda, Alr->
u, Alr->
rkmax );
1086 else if ( newrk > 0 ) {
1097 ret = LAPACKE_slacpy_work( LAPACK_COL_MAJOR,
'A', m, Alr->
rk,
1101 ret = LAPACKE_sorgqr_work( LAPACK_COL_MAJOR, m, Alr->
rk, Alr->
rk,
1102 U, m, tau, work, lwork );
1104 flops += FLOPS_SORGQR( m, Alr->
rk, Alr->
rk );
1107 ret = LAPACKE_slacpy_work( LAPACK_COL_MAJOR,
'U', Alr->
rk, n,
1108 Acpy, m, V, Alr->
rk );
1110 ret = LAPACKE_slaset_work( LAPACK_COL_MAJOR,
'L', Alr->
rk-1, Alr->
rk-1,
1111 0.0, 0.0, V + 1, Alr->
rk );
1120 rk = (Alr->
rk / nb) * nb;
1122 d = pastix_imin( nb, Alr->
rk - rk );
1123 ret = LAPACKE_sormqr_work( LAPACK_COL_MAJOR,
'R', trans,
1124 Alr->
rk - rk, n - rk, d,
1125 B + rk * n + rk, n, tau_b + rk,
1126 V + rk * Alr->
rk + rk, Alr->
rk,
1134 #if defined(PASTIX_DEBUG_LR)
1135 if ( Alr->
rk > 0 ) {
1138 fprintf(stderr,
"Failed to compress a matrix and generate an orthogonal u\n" );
1145 #if defined(PASTIX_DEBUG_LR)
1221 const void *alphaptr,
1234 float *u1u2, *v1v2, *u;
1243 float *zwork, zzsize;
1245 float alpha = *((
float*)alphaptr);
1248 #if defined(PASTIX_DEBUG_LR)
1252 fprintf(stderr,
"Failed to have B->u orthogonal in entry of rradd\n" );
1257 rankA = (A->
rk == -1) ? pastix_imin(M1, N1) : A->
rk;
1258 rank = rankA + B->
rk;
1259 M = pastix_imax(M2, M1);
1260 N = pastix_imax(N2, N1);
1262 minV = pastix_imin(N, rank);
1264 assert(M2 == M && N2 == N);
1265 assert(B->
rk != -1);
1270 if ( ((M1 + offx) > M2) ||
1271 ((N1 + offy) > N2) )
1273 pastix_print_error(
"Dimensions are not correct" );
1291 M1, N1, A, M2, N2, B,
1299 if ( rank > pastix_imin( M, N ) ) {
1306 ldau = (A->
rk == -1) ? A->
rkmax : M1;
1317 wzsize = (M+N) * rank;
1323 rrqrfct( tol, rklimit, 1, nb,
1326 &zzsize, -1, NULL );
1330 #if defined(PASTIX_DEBUG_LR)
1332 u1u2 = malloc( ldu * rank *
sizeof(
float) );
1333 v1v2 = malloc( ldv * N *
sizeof(
float) );
1334 tauV = malloc( rank *
sizeof(
float) );
1335 zwork = malloc( lwork *
sizeof(
float) );
1337 rwork = malloc( 2 * pastix_imax( rank, N ) *
sizeof(
float) );
1339 zbuf = malloc( wzsize *
sizeof(
float) + 2 * pastix_imax(rank, N) *
sizeof(
float) );
1342 v1v2 = u1u2 + ldu * rank;
1343 tauV = v1v2 + ldv * N;
1344 zwork = tauV + rank;
1346 rwork = (
float*)(zwork + lwork);
1383 u1u2, ldu, v1v2, ldv );
1388 u1u2, ldu, v1v2, ldv );
1394 u1u2, ldu, v1v2, ldv );
1396 kernel_trace_stop_lvl2( flops );
1398 total_flops += flops;
1401 rank = B->
rk + rankA;
1408 ret = LAPACKE_slacpy_work( LAPACK_COL_MAJOR,
'A', M, B->
rk, u1u2, ldu, B->
u, ldbu );
1410 ret = LAPACKE_slacpy_work( LAPACK_COL_MAJOR,
'A', B->
rk, N, v1v2, ldv, B->
v, ldbv );
1414 #if defined(PASTIX_DEBUG_LR)
1424 MALLOC_INTERN( jpvt, pastix_imax(rank, N),
pastix_int_t );
1452 tol = tol * ( fabsf(alpha) * normA + normB );
1459 kernel_trace_start_lvl2( PastixKernelLvl2_LR_add2C_rradd_recompression );
1460 rklimit = pastix_imin( rklimit, rank );
1461 new_rank = rrqrfct( tol, rklimit, 1, nb,
1464 zwork, lwork, rwork );
1465 flops = (new_rank == -1) ? FLOPS_SGEQRF( rank, N )
1466 : (FLOPS_SGEQRF( rank, new_rank ) +
1467 FLOPS_SORMQR( rank, N-new_rank, new_rank,
PastixLeft ));
1468 kernel_trace_stop_lvl2_rank( flops, new_rank );
1469 total_flops += flops;
1474 if ( (new_rank > rklimit) ||
1483 flops = FLOPS_SGEMM( M, N, Bbackup.
rk );
1484 kernel_trace_start_lvl2( PastixKernelLvl2_LR_add2C_uncompress );
1485 cblas_sgemm(CblasColMajor, CblasNoTrans, CblasNoTrans,
1487 (sone), Bbackup.
u, ldbu,
1490 kernel_trace_stop_lvl2( flops );
1491 total_flops += flops;
1494 if ( A->
rk == -1 ) {
1495 flops = 2 * M1 * N1;
1496 kernel_trace_start_lvl2( PastixKernelLvl2_FR_GEMM );
1499 sone, u + offy * M + offx, M);
1500 kernel_trace_stop_lvl2( flops );
1503 flops = FLOPS_SGEMM( M1, N1, A->
rk );
1504 kernel_trace_start_lvl2( PastixKernelLvl2_FR_GEMM );
1505 cblas_sgemm(CblasColMajor, CblasNoTrans, (CBLAS_TRANSPOSE)transA1,
1507 (alpha), A->
u, ldau,
1509 (sone), u + offy * M + offx, M);
1510 kernel_trace_stop_lvl2( flops );
1512 total_flops += flops;
1516 #if defined(PASTIX_DEBUG_LR)
1525 else if ( new_rank == 0 ) {
1529 #if defined(PASTIX_DEBUG_LR)
1543 ret =
core_slrsze( 0, M, N, B, new_rank, -1, -1 );
1544 assert( ret != -1 );
1545 assert( B->
rkmax >= new_rank );
1555 memset(B->
v, 0, N * ldbv *
sizeof(
float));
1557 for (i=0; i<N; i++){
1558 lm = pastix_imin( new_rank, i+1 );
1559 memcpy(tmpV + jpvt[i] * ldbv,
1561 lm *
sizeof(
float));
1567 flops = FLOPS_SORGQR( rank, new_rank, new_rank )
1568 + FLOPS_SGEMM( M, new_rank, rank );
1570 kernel_trace_start_lvl2( PastixKernelLvl2_LR_add2C_rradd_computeNewU );
1571 ret = LAPACKE_sorgqr_work( LAPACK_COL_MAJOR, rank, new_rank, new_rank,
1572 v1v2, ldv, tauV, zwork, lwork );
1575 cblas_sgemm(CblasColMajor, CblasNoTrans, CblasNoTrans,
1579 (szero), B->
u, ldbu);
1580 kernel_trace_stop_lvl2( flops );
1581 total_flops += flops;
1587 #if defined(PASTIX_DEBUG_LR)
1625 if ( A->
rk != -1 ) {
1626 return A->
rk * ( M + N );
1664 int rkmax = A->
rkmax;
1670 memcpy( buffer, &rk,
sizeof(
int ) );
1671 buffer +=
sizeof( int );
1675 memcpy( buffer, u, rk * M *
sizeof(
float ) );
1676 buffer += rk * M *
sizeof( float );
1679 if ( rk == rkmax ) {
1680 memcpy( buffer, v, rk * N *
sizeof(
float ) );
1681 buffer += rk * N *
sizeof( float );
1684 ret = LAPACKE_slacpy_work( LAPACK_COL_MAJOR,
'A', rk, N, v, rkmax,
1685 (
float *)buffer, rk );
1687 buffer += rk * N *
sizeof( float );
1691 memcpy( buffer, u, M * N *
sizeof(
float ) );
1692 buffer += M * N *
sizeof( float );
1731 memcpy( &rk, buffer,
sizeof(
int ) );
1732 buffer +=
sizeof( int );
1739 memcpy( A->
u, buffer, M * rk *
sizeof(
float ) );
1740 buffer += M * rk *
sizeof( float );
1743 memcpy( A->
v, buffer, N * rk *
sizeof(
float ) );
1744 buffer += N * rk *
sizeof( float );
1748 memcpy( A->
u, buffer, M * N *
sizeof(
float ) );
1749 buffer += M * N *
sizeof( float );
1789 char *output = *outptr;
1793 rk = *((
int *)input);
1794 input +=
sizeof( int );
1801 size = M * rk *
sizeof( float );
1804 memcpy( A->
u, input, size );
1809 size = N * rk *
sizeof( float );
1812 memcpy( A->
v, input, size );
1822 size = M * N *
sizeof( float );
1825 memcpy( A->
u, input, size );
BEGIN_C_DECLS typedef int pastix_int_t
@ PastixKernelLvl2_LR_add2C_rradd_orthogonalize
pastix_fixdbl_t core_slrorthu_fullqr(pastix_int_t M, pastix_int_t N, pastix_int_t rank, float *U, pastix_int_t ldu, float *V, pastix_int_t ldv)
Try to orthognalize the u part of the low-rank form, and update the v part accordingly using full QR.
pastix_fixdbl_t core_slrorthu_cgs(pastix_int_t M1, pastix_int_t N1, pastix_int_t M2, pastix_int_t N2, pastix_int_t r1, pastix_int_t *r2ptr, pastix_int_t offx, pastix_int_t offy, float *U, pastix_int_t ldu, float *V, pastix_int_t ldv)
Try to orthognalize the U part of the low-rank form, and update the V part accordingly using CGS.
int core_sgeadd(pastix_trans_t trans, pastix_int_t M, pastix_int_t N, float alpha, const float *A, pastix_int_t LDA, float beta, float *B, pastix_int_t LDB)
Add two matrices together.
void core_sgetmo(int m, int n, const float *A, int lda, float *B, int ldb)
Transposes a m-by-n matrix out of place using an extra workspace of size m-by-n.
pastix_fixdbl_t core_slrorthu_partialqr(pastix_int_t M, pastix_int_t N, pastix_int_t r1, pastix_int_t *r2ptr, pastix_int_t offx, pastix_int_t offy, float *U, pastix_int_t ldu, float *V, pastix_int_t ldv)
Try to orthognalize the U part of the low-rank form, and update the V part accordingly using partial ...
int core_slrdbg_check_orthogonality(pastix_int_t M, pastix_int_t N, const float *A, pastix_int_t lda)
Check the orthogonality of the matrix A.
pastix_int_t pastix_lr_ortho
Define the orthogonalization method.
pastix_int_t(* core_get_rklimit)(pastix_int_t, pastix_int_t)
Compute the maximal rank accepted for a given matrix size. The pointer is set according to the low-ra...
Structure to define the type of function to use for the low-rank kernels and their parameters.
The block low-rank structure to hold a matrix in low-rank form.
pastix_fixdbl_t core_sge2lr_qrrt(core_srrqr_rt_t rrqrfct, int use_reltol, pastix_fixdbl_t tol, pastix_int_t rklimit, pastix_int_t m, pastix_int_t n, const void *Avoid, pastix_int_t lda, pastix_lrblock_t *Alr)
Template to convert a full rank matrix into a low rank matrix through QR decompositions.
pastix_fixdbl_t core_sge2lr_qrcp(core_srrqr_cp_t rrqrfct, int use_reltol, pastix_fixdbl_t tol, pastix_int_t rklimit, pastix_int_t m, pastix_int_t n, const void *Avoid, pastix_int_t lda, pastix_lrblock_t *Alr)
Template to convert a full rank matrix into a low rank matrix through QR decompositions.
int(* core_srrqr_rt_t)(float tol, pastix_int_t maxrank, pastix_int_t nb, pastix_int_t m, pastix_int_t n, float *A, pastix_int_t lda, float *tau, float *B, pastix_int_t ldb, float *tau_b, float *work, pastix_int_t lwork, float normA)
TODO.
int(* core_srrqr_cp_t)(float tol, pastix_int_t maxrank, int refine, pastix_int_t nb, pastix_int_t m, pastix_int_t n, float *A, pastix_int_t lda, pastix_int_t *jpvt, float *tau, float *work, pastix_int_t lwork, float *rwork)
TODO.
pastix_fixdbl_t core_srradd_qr(core_srrqr_cp_t rrqrfct, const pastix_lr_t *lowrank, pastix_trans_t transA1, const void *alphaptr, pastix_int_t M1, pastix_int_t N1, const pastix_lrblock_t *A, pastix_int_t M2, pastix_int_t N2, pastix_lrblock_t *B, pastix_int_t offx, pastix_int_t offy)
Template to perform the addition of two low-rank structures with compression kernel based on QR decom...
int core_slr2ge(pastix_trans_t trans, pastix_int_t m, pastix_int_t n, const pastix_lrblock_t *Alr, float *A, pastix_int_t lda)
Convert a low rank matrix into a dense matrix.
int core_slrsze(int copy, pastix_int_t M, pastix_int_t N, pastix_lrblock_t *A, pastix_int_t newrk, pastix_int_t newrkmax, pastix_int_t rklimit)
Resize a low-rank matrix.
const char * core_slrunpack2(pastix_int_t M, pastix_int_t N, pastix_lrblock_t *A, const char *input, char **outptr)
Unpack low rank data and fill the cblk concerned by the computation.
char * core_slrpack(pastix_int_t M, pastix_int_t N, const pastix_lrblock_t *A, char *buffer)
Pack low-rank data by side.
void core_slralloc(pastix_int_t M, pastix_int_t N, pastix_int_t rkmax, pastix_lrblock_t *A)
Allocate a low-rank matrix.
void core_slrconcatenate_v(pastix_trans_t transA1, float alpha, pastix_int_t M1, pastix_int_t N1, const pastix_lrblock_t *A, pastix_int_t N2, pastix_lrblock_t *B, pastix_int_t offy, float *v1v2)
Concatenate right parts of two low-rank matrices.
void core_slrcpy(const pastix_lr_t *lowrank, pastix_trans_t transAv, float alpha, pastix_int_t M1, pastix_int_t N1, const pastix_lrblock_t *A, pastix_int_t M2, pastix_int_t N2, pastix_lrblock_t *B, pastix_int_t offx, pastix_int_t offy)
Copy a small low-rank structure into a large one.
size_t core_slrgetsize(pastix_int_t M, pastix_int_t N, pastix_lrblock_t *A)
Compute the size of a block to send in LR.
void core_slrconcatenate_u(float alpha, pastix_int_t M1, pastix_int_t N1, const pastix_lrblock_t *A, pastix_int_t M2, pastix_lrblock_t *B, pastix_int_t offx, float *u1u2)
Concatenate left parts of two low-rank matrices.
void core_slrfree(pastix_lrblock_t *A)
Free a low-rank matrix.
float core_slrnrm(pastix_normtype_t ntype, int transV, pastix_int_t M, pastix_int_t N, const pastix_lrblock_t *A)
Compute the norm of a low-rank matrix.
char * core_slrunpack(pastix_int_t M, pastix_int_t N, pastix_lrblock_t *A, char *buffer)
Unpack low rank data and fill the cblk concerned by the computation.
enum pastix_trans_e pastix_trans_t
Transpostion.
@ PastixCompressOrthoPartialQR
Manage nancheck for lowrank kernels. This header describes all the LAPACKE functions used for low-ran...