53 struct s_solver solver;
55 float *x = (
float*)(xp->
b);
56 float *b = (
float*)(bp->
b);
63 #if defined(PASTIX_DEBUG_GMRES)
64 float *dbg_x, *dbg_r, *dbg_G;
68 float eps, resid, resid_b;
69 float norm, normb, normx;
76 memset( &solver, 0,
sizeof(
struct s_solver) );
85 n = pastix_data->
bcsc->n;
92 if ( !(pastix_data->
steps & STEP_NUMFACT) ) {
96 if ((!precond) || savemem ) {
100 gmcos = (
float *)solver.malloc(im *
sizeof(
float));
101 gmsin = (
float *)solver.malloc(im *
sizeof(
float));
102 gmG = (
float *)solver.malloc(im1 *
sizeof(
float));
114 gmH = (
float *)solver.malloc(im * im1 *
sizeof(
float));
115 gmV = (
float *)solver.malloc(n * im1 *
sizeof(
float));
116 if (precond && (!savemem) ) {
117 gmW = (
float *)solver.malloc(n * im *
sizeof(
float));
120 gmW = (
float *)solver.malloc(n *
sizeof(
float));
122 memset( gmH, 0, im * im1 *
sizeof(
float) );
124 #if defined(PASTIX_DEBUG_GMRES)
125 dbg_x = (
float *)solver.malloc(n *
sizeof(
float));
126 dbg_r = (
float *)solver.malloc(n *
sizeof(
float));
127 dbg_G = (
float *)solver.malloc(im1 *
sizeof(
float));
128 solver.copy( pastix_data, n, x, dbg_x );
131 normb = solver.norm( pastix_data, n, b );
135 normx = solver.norm( pastix_data, n, x );
140 sgmWi = solver.malloc( n *
sizeof(
float) );
143 clockInit(refine_clk);
144 clockStart(refine_clk);
159 solver.copy( pastix_data, n, b, gmVi );
165 resid = solver.norm( pastix_data, n, gmVi );
166 resid_b = resid / normb;
169 if ( resid_b <= eps )
176 tmp = (float)( 1.0 / resid );
177 solver.scal( pastix_data, n, tmp, gmVi );
179 gmG[0] = (float)resid;
187 clockStop( refine_clk );
197 solver.copy( pastix_data, n, gmVi, gmWi );
201 solver.spsv( pastix_data, gmWi, sgmWi );
206 solver.spmv( pastix_data,
PastixNoTrans, 1.0, gmWi, 0., gmVi );
212 gmHi[j] = solver.dot( pastix_data, n, gmVi, gmV + j * n );
215 solver.axpy( pastix_data, n, -1. * gmHi[j], gmV + j * n, gmVi );
219 norm = solver.norm( pastix_data, n, gmVi );
225 tmp = (float)(1.0 / norm);
226 solver.scal( pastix_data, n, tmp, gmVi );
237 gmHi[j] = gmcos[j] * tmp + gmsin[j] * gmHi[j+1];
238 gmHi[j+1] = gmcos[j] * gmHi[j+1] - (gmsin[j]) * tmp;
249 tmp = sqrtf( gmHi[i] * gmHi[i] +
250 gmHi[i+1] * gmHi[i+1] );
252 if ( fabsf(tmp) <= eps ) {
255 gmcos[i] = gmHi[i] / tmp;
256 gmsin[i] = gmHi[i+1] / tmp;
260 gmG[i+1] = -gmsin[i] * gmG[i];
261 gmG[i] = gmcos[i] * gmG[i];
264 gmHi[i] = gmcos[i] * gmHi[i] + gmsin[i] * gmHi[i+1];
267 resid = fabsf( gmG[i+1] );
269 resid_b = resid / normb;
278 clockStop((refine_clk));
281 ( pastix_data->
procnum == 0 ) ) {
282 solver.output_oneiter( t0, t3, resid_b, iters );
284 #if defined(PASTIX_DEBUG_GMRES)
289 memcpy( dbg_G, gmG, im1 *
sizeof(
float) );
290 cblas_strsv( CblasColMajor, CblasUpper, CblasNoTrans, CblasNonUnit,
291 i+1, gmH, im1, dbg_G, 1 );
293 solver.copy( pastix_data, n, b, dbg_r );
294 solver.copy( pastix_data, n, x, dbg_x );
297 solver.gemv( pastix_data, n, i+1, 1.0, (precond ? gmW : gmV), n, dbg_G, 1.0, dbg_x );
300 solver.spmv( pastix_data,
PastixNoTrans, -1., dbg_x, 1., dbg_r );
302 normr2 = solver.norm( pastix_data, n, dbg_r );
303 fprintf(stdout, OUT_ITERREFINE_ERR, normr2 / normb );
310 cblas_strsv( CblasColMajor, CblasUpper, CblasNoTrans, CblasNonUnit,
311 i+1, gmH, im1, gmG, 1 );
317 if (precond && savemem) {
325 solver.gemv( pastix_data, n, i+1, 1.0, gmV, n, gmG, 0., gmW );
326 solver.spsv( pastix_data, gmW, sgmWi );
327 solver.axpy( pastix_data, n, 1., gmW, x );
336 gmWi = precond ? gmW : gmV;
337 solver.gemv( pastix_data, n, i+1, 1.0, gmWi, n, gmG, 1.0, x );
346 if (iters >= itermax)
352 clockStop( refine_clk );
355 solver.output_final( pastix_data, resid_b, iters, t3, x, x );
364 #if defined(PASTIX_DEBUG_GMRES)
BEGIN_C_DECLS typedef int pastix_int_t
@ DPARM_EPSILON_REFINEMENT
void s_refine_init(struct s_solver *, pastix_data_t *)
Initiate functions pointers to define basic operations.
pastix_int_t s_gmres_smp(pastix_data_t *pastix_data, pastix_rhs_t xp, pastix_rhs_t bp)
Main PaStiX data structure.
Main PaStiX RHS structure.