14 #ifndef _pastix_cuda_h_
15 #define _pastix_cuda_h_
21 #define MAX_BATCH_COUNT 16
36 pastix_zgemm_vbatched_nt(
39 cuDoubleComplex alpha,
46 pastix_cgemm_vbatched_nt(
56 pastix_dgemm_vbatched_nt(
66 pastix_sgemm_vbatched_nt(
78 char TRANSA,
char TRANSB,
int m ,
int n ,
int k ,
79 cuDoubleComplex alpha,
const cuDoubleComplex *gpu_A,
int lda,
80 const cuDoubleComplex *gpu_B,
int ldb,
81 cuDoubleComplex beta, cuDoubleComplex *gpu_C,
int ldc,
82 int blocknbr,
const int *blocktab,
int fblocknbr,
const int *fblocktab,
83 cudaStream_t stream );
87 char TRANSA,
char TRANSB,
int m ,
int n ,
int k ,
88 cuFloatComplex alpha,
const cuFloatComplex *gpu_A,
int lda,
89 const cuFloatComplex *gpu_B,
int ldb,
90 cuFloatComplex beta, cuFloatComplex *gpu_C,
int ldc,
91 int blocknbr,
const int *blocktab,
int fblocknbr,
const int *fblocktab,
92 cudaStream_t stream );
96 char TRANSA,
char TRANSB,
int m ,
int n ,
int k ,
97 double alpha,
const double *gpu_A,
int lda,
98 const double *gpu_B,
int ldb,
99 double beta,
double *gpu_C,
int ldc,
100 int blocknbr,
const int *blocktab,
int fblocknbr,
const int *fblocktab,
101 cudaStream_t stream );
104 pastix_fermi_sgemmsp(
105 char TRANSA,
char TRANSB,
int m ,
int n ,
int k ,
106 float alpha,
const float *gpu_A,
int lda,
107 const float *gpu_B,
int ldb,
108 float beta,
float *gpu_C,
int ldc,
109 int blocknbr,
const int *blocktab,
int fblocknbr,
const int *fblocktab,
110 cudaStream_t stream );
BEGIN_C_DECLS typedef int pastix_int_t
enum pastix_trans_e pastix_trans_t
Transpostion.