Line data Source code
1 : /**
2 : *
3 : * @file pstpqrt_param.c
4 : *
5 : * @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
6 : * Univ. Bordeaux. All rights reserved.
7 : * @copyright 2016-2020 KAUST. All rights reserved.
8 : *
9 : ***
10 : *
11 : * @brief Chameleon computational routines
12 : *
13 : * @version 1.3.0
14 : * @author Mathieu Faverge
15 : * @date 2025-01-24
16 : * @generated from /builds/solverstack/chameleon/compute/pztpqrt_param.c, normal z -> s, Thu Sep 18 20:32:51 2025
17 : *
18 : */
19 : #include "control/common.h"
20 : #include <stdlib.h>
21 : #include <stdio.h>
22 : #include "libhqr.h"
23 :
24 : #define ATop(m,n) ATop, (m), (n)
25 : #define A(m,n) A, (m), (n)
26 : #define T(m,n) T, (m), (n)
27 : #define D(m,n) D, (m), (n)
28 :
29 : /**
30 : * Parallel tile QR matrix reduction - Equivalent to tpqrt kernel for matrices.
31 : *
32 : * @param[in] genD
33 : * Indicate if copies of the geqrt tiles must be done to speedup
34 : * computations in updates. genD is considered only if D is not NULL.
35 : *
36 : * @param[in] uplo
37 : * - ChamUpper: This corresponds to the former TTQRT kernel. Only the upper
38 : * trapezoidal part of A is factorized.
39 : * - ChamLower, or ChamUpperLower: This corresponds to the former TSQRT
40 : * kernel. The full A is factorized.
41 : */
42 360 : void chameleon_pstpqrt_param( int genD, cham_uplo_t uplo, int K,
43 : const libhqr_tree_t *qrtree, CHAM_desc_t *ATop, CHAM_desc_t *A,
44 : CHAM_desc_t *TS, CHAM_desc_t *TT, CHAM_desc_t *D,
45 : RUNTIME_sequence_t *sequence, RUNTIME_request_t *request )
46 : {
47 360 : CHAM_context_t *chamctxt;
48 360 : RUNTIME_option_t options;
49 360 : size_t ws_worker = 0;
50 360 : size_t ws_host = 0;
51 :
52 360 : int k, n, p;
53 360 : int ib, *tiles;
54 :
55 360 : chamctxt = chameleon_context_self();
56 360 : if (sequence->status != CHAMELEON_SUCCESS) {
57 0 : return;
58 : }
59 360 : RUNTIME_options_init(&options, chamctxt, sequence, request);
60 :
61 360 : ib = CHAMELEON_IB;
62 :
63 360 : if ( (genD == 0) || (D == NULL) ) {
64 0 : D = A;
65 0 : genD = 0;
66 : }
67 :
68 : /*
69 : * sgeqrt = A->nb * (ib+1)
70 : * sormqr = A->nb * ib
71 : * stpqrt = A->nb * (ib+1)
72 : * stpmqrt = A->nb * ib
73 : */
74 360 : ws_worker = A->nb * (ib+1);
75 :
76 : /* Allocation of temporary (scratch) working space */
77 : #if defined(CHAMELEON_USE_CUDA)
78 : /*
79 : * sormqr = A->nb * ib
80 : * stpmqrt = 3 * A->nb * ib
81 : */
82 : ws_worker = chameleon_max( ws_worker, ib * A->nb * 3 );
83 : #endif
84 :
85 360 : ws_worker *= sizeof(float);
86 360 : ws_host *= sizeof(float);
87 :
88 360 : RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
89 :
90 : /* Initialisation of temporary tiles array */
91 360 : tiles = (int*)calloc(qrtree->mt, sizeof(int));
92 :
93 1128 : for (k = 0; k < K; k++) {
94 768 : RUNTIME_iteration_push(chamctxt, k);
95 :
96 768 : p = chameleon_psgeqrf_param_step( genD, uplo, k, ib, qrtree, tiles,
97 : A, TS, TT, D, &options, sequence );
98 :
99 : /* Combine with ATop and A by merging last pivot with A(k,k) */
100 : {
101 768 : CHAM_desc_t *T;
102 768 : int temppm = ATop->get_blkdim( ATop, p, DIM_m, ATop->m );
103 768 : int tempkn = ATop->get_blkdim( ATop, k, DIM_n, ATop->n );
104 768 : int L, node, tempnn;
105 :
106 768 : T = TT;
107 768 : L = temppm;
108 :
109 768 : node = A->get_rankof( A, p, k );
110 768 : RUNTIME_data_migrate( sequence, ATop(k, k), node );
111 768 : RUNTIME_data_migrate( sequence, A( p, k), node );
112 :
113 768 : INSERT_TASK_stpqrt(
114 : &options,
115 : temppm, tempkn, chameleon_min(L, tempkn), ib, T->nb,
116 : ATop(k, k),
117 : A(p, k),
118 : T(p, k));
119 :
120 1482 : for (n = k+1; n < A->nt; n++) {
121 714 : tempnn = A->get_blkdim( A, n, DIM_n, A->n );
122 :
123 714 : node = A->get_rankof( A, p, n );
124 714 : RUNTIME_data_migrate( sequence, ATop(k, n), node );
125 714 : RUNTIME_data_migrate( sequence, A( p, n), node );
126 :
127 714 : INSERT_TASK_stpmqrt(
128 : &options,
129 : ChamLeft, ChamTrans,
130 : temppm, tempnn, A->nb, L, ib, T->nb,
131 : A(p, k),
132 : T(p, k),
133 : ATop(k, n),
134 : A(p, n));
135 : }
136 :
137 768 : chameleon_data_flush( sequence, A(p, k), request->flush );
138 768 : chameleon_data_flush( sequence, T(p, k), request->flush );
139 : }
140 :
141 : /* Restore the original location of the tiles */
142 3018 : for (n = k; n < ATop->nt; n++) {
143 1482 : RUNTIME_data_migrate( sequence, ATop(k, n),
144 1482 : ATop->get_rankof( ATop, k, n ) );
145 : }
146 :
147 768 : RUNTIME_iteration_pop(chamctxt);
148 : }
149 :
150 360 : free(tiles);
151 360 : RUNTIME_options_ws_free(&options);
152 360 : RUNTIME_options_finalize(&options, chamctxt);
153 : }
|