LCOV - code coverage report
Current view: top level - build-starpu/compute - pstpqrt_param.c (source / functions) Coverage Total Hit
Test: chameleon.lcov Lines: 93.8 % 48 45
Test Date: 2025-09-18 21:10:10 Functions: 100.0 % 1 1

            Line data    Source code
       1              : /**
       2              :  *
       3              :  * @file pstpqrt_param.c
       4              :  *
       5              :  * @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
       6              :  *                      Univ. Bordeaux. All rights reserved.
       7              :  * @copyright 2016-2020 KAUST. All rights reserved.
       8              :  *
       9              :  ***
      10              :  *
      11              :  * @brief Chameleon computational routines
      12              :  *
      13              :  * @version 1.3.0
      14              :  * @author Mathieu Faverge
      15              :  * @date 2025-01-24
      16              :  * @generated from /builds/solverstack/chameleon/compute/pztpqrt_param.c, normal z -> s, Thu Sep 18 20:32:51 2025
      17              :  *
      18              :  */
      19              : #include "control/common.h"
      20              : #include <stdlib.h>
      21              : #include <stdio.h>
      22              : #include "libhqr.h"
      23              : 
      24              : #define ATop(m,n) ATop, (m), (n)
      25              : #define A(m,n) A, (m), (n)
      26              : #define T(m,n) T, (m), (n)
      27              : #define D(m,n) D, (m), (n)
      28              : 
      29              : /**
      30              :  *  Parallel tile QR matrix reduction - Equivalent to tpqrt kernel for matrices.
      31              :  *
      32              :  * @param[in] genD
      33              :  *         Indicate if copies of the geqrt tiles must be done to speedup
      34              :  *         computations in updates. genD is considered only if D is not NULL.
      35              :  *
      36              :  * @param[in] uplo
      37              :  *         - ChamUpper: This corresponds to the former TTQRT kernel. Only the upper
      38              :  *           trapezoidal part of A is factorized.
      39              :  *         - ChamLower, or ChamUpperLower: This corresponds to the former TSQRT
      40              :  *           kernel. The full A is factorized.
      41              :  */
      42          360 : void chameleon_pstpqrt_param( int genD, cham_uplo_t uplo, int K,
      43              :                               const libhqr_tree_t *qrtree, CHAM_desc_t *ATop, CHAM_desc_t *A,
      44              :                               CHAM_desc_t *TS, CHAM_desc_t *TT, CHAM_desc_t *D,
      45              :                               RUNTIME_sequence_t *sequence, RUNTIME_request_t *request )
      46              : {
      47          360 :     CHAM_context_t *chamctxt;
      48          360 :     RUNTIME_option_t options;
      49          360 :     size_t ws_worker = 0;
      50          360 :     size_t ws_host = 0;
      51              : 
      52          360 :     int k, n, p;
      53          360 :     int ib, *tiles;
      54              : 
      55          360 :     chamctxt = chameleon_context_self();
      56          360 :     if (sequence->status != CHAMELEON_SUCCESS) {
      57            0 :         return;
      58              :     }
      59          360 :     RUNTIME_options_init(&options, chamctxt, sequence, request);
      60              : 
      61          360 :     ib = CHAMELEON_IB;
      62              : 
      63          360 :     if ( (genD == 0) || (D == NULL) ) {
      64            0 :         D    = A;
      65            0 :         genD = 0;
      66              :     }
      67              : 
      68              :     /*
      69              :      * sgeqrt  = A->nb * (ib+1)
      70              :      * sormqr  = A->nb * ib
      71              :      * stpqrt  = A->nb * (ib+1)
      72              :      * stpmqrt = A->nb * ib
      73              :      */
      74          360 :     ws_worker = A->nb * (ib+1);
      75              : 
      76              :     /* Allocation of temporary (scratch) working space */
      77              : #if defined(CHAMELEON_USE_CUDA)
      78              :     /*
      79              :      * sormqr  =     A->nb * ib
      80              :      * stpmqrt = 3 * A->nb * ib
      81              :      */
      82              :     ws_worker = chameleon_max( ws_worker, ib * A->nb * 3 );
      83              : #endif
      84              : 
      85          360 :     ws_worker *= sizeof(float);
      86          360 :     ws_host   *= sizeof(float);
      87              : 
      88          360 :     RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
      89              : 
      90              :     /* Initialisation of temporary tiles array */
      91          360 :     tiles = (int*)calloc(qrtree->mt, sizeof(int));
      92              : 
      93         1128 :     for (k = 0; k < K; k++) {
      94          768 :         RUNTIME_iteration_push(chamctxt, k);
      95              : 
      96          768 :         p = chameleon_psgeqrf_param_step( genD, uplo, k, ib, qrtree, tiles,
      97              :                                           A, TS, TT, D, &options, sequence );
      98              : 
      99              :         /* Combine with ATop and A by merging last pivot with A(k,k) */
     100              :         {
     101          768 :             CHAM_desc_t *T;
     102          768 :             int temppm = ATop->get_blkdim( ATop, p, DIM_m, ATop->m );
     103          768 :             int tempkn = ATop->get_blkdim( ATop, k, DIM_n, ATop->n );
     104          768 :             int L, node, tempnn;
     105              : 
     106          768 :             T = TT;
     107          768 :             L = temppm;
     108              : 
     109          768 :             node = A->get_rankof( A, p, k );
     110          768 :             RUNTIME_data_migrate( sequence, ATop(k, k), node );
     111          768 :             RUNTIME_data_migrate( sequence, A(   p, k), node );
     112              : 
     113          768 :             INSERT_TASK_stpqrt(
     114              :                 &options,
     115              :                 temppm, tempkn, chameleon_min(L, tempkn), ib, T->nb,
     116              :                 ATop(k, k),
     117              :                 A(p, k),
     118              :                 T(p, k));
     119              : 
     120         1482 :             for (n = k+1; n < A->nt; n++) {
     121          714 :                 tempnn = A->get_blkdim( A, n, DIM_n, A->n );
     122              : 
     123          714 :                 node = A->get_rankof( A, p, n );
     124          714 :                 RUNTIME_data_migrate( sequence, ATop(k, n), node );
     125          714 :                 RUNTIME_data_migrate( sequence, A(   p, n), node );
     126              : 
     127          714 :                 INSERT_TASK_stpmqrt(
     128              :                     &options,
     129              :                     ChamLeft, ChamTrans,
     130              :                     temppm, tempnn, A->nb, L, ib, T->nb,
     131              :                     A(p, k),
     132              :                     T(p, k),
     133              :                     ATop(k, n),
     134              :                     A(p, n));
     135              :             }
     136              : 
     137          768 :             chameleon_data_flush( sequence, A(p, k), request->flush );
     138          768 :             chameleon_data_flush( sequence, T(p, k), request->flush );
     139              :         }
     140              : 
     141              :         /* Restore the original location of the tiles */
     142         3018 :         for (n = k; n < ATop->nt; n++) {
     143         1482 :             RUNTIME_data_migrate( sequence, ATop(k, n),
     144         1482 :                                   ATop->get_rankof( ATop, k, n ) );
     145              :         }
     146              : 
     147          768 :         RUNTIME_iteration_pop(chamctxt);
     148              :     }
     149              : 
     150          360 :     free(tiles);
     151          360 :     RUNTIME_options_ws_free(&options);
     152          360 :     RUNTIME_options_finalize(&options, chamctxt);
     153              : }
        

Generated by: LCOV version 2.0-1