PaStiX Handbook  6.4.0
bcsc_dinit.c
Go to the documentation of this file.
1 /**
2  *
3  * @file bcsc_dinit.c
4  *
5  * @copyright 2004-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
6  * Univ. Bordeaux. All rights reserved.
7  *
8  * @version 6.4.0
9  * @author Mathieu Faverge
10  * @author Pierre Ramet
11  * @author Xavier Lacoste
12  * @author Theophile Terraz
13  * @author Tony Delarue
14  * @author Vincent Bridonneau
15  * @author Alycia Lisito
16  * @date 2024-07-09
17  *
18  * @generated from /builds/solverstack/pastix/bcsc/bcsc_zinit.c, normal z -> d, Tue Oct 8 14:17:55 2024
19  *
20  **/
21 #include "common.h"
22 #include "pastix/order.h"
23 #include <spm.h>
24 #include "blend/solver.h"
25 #include "bcsc/bcsc.h"
26 #include "bcsc_d.h"
27 
28 #ifndef DOXYGEN_SHOULD_SKIP_THIS
29 static inline double
30 __fct_id( double val ) {
31  return val;
32 }
33 
34 static inline double
35 __fct_conj( double val ) {
36 #if defined(PRECISION_c) || defined(PRECISION_z)
37  return ( val );
38 #else
39  /* This function should not be called in this case. */
40  assert(0);
41  return val;
42 #endif
43 }
44 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
45 
46 #if defined(PASTIX_WITH_MPI)
47 /**
48  *******************************************************************************
49  *
50  * @brief Stores the data the current processor has to send to the other processors
51  * in the bcsc_comm structure.
52  *
53  * There are two cases:
54  *
55  * - If the matrix is general: the full columns and rows of the blocks are stored
56  * in Lvalues and Uvalues.
57  * The local data of the current process which are in remote blocks after
58  * the permutation need be to sent to the owner process. The data is stored
59  * in sendA if it is sent for the column only, in sendAt if it is sent for
60  * the row only and in sendAAt if it is sent for the row and column.
61  *
62  * - If the matrix is Symmetric or Symmetric: only the full columns of the blocks
63  * are stored in Lvalues (and Uvalues = Lvalues). Only half of the spm is
64  * stored lower (or upper) triangular half, therefore we need to duplicate
65  * the lower (or upper) data to fill the upper (or lower half of the matrix
66  * in the blocks.
67  * The local data of the current process which are in remote blocks after
68  * the permutation need be to sent to the owner process. The data is stored
69  * in sendA if it is sent for the lower (or upper) half or the column, in
70  * sendAt if it is sent for the upper (or lower) half of the column and in
71  * sendAAt if it is sent for both the lower and upper half of the column.
72  * The diagonal values are stored in sendA only.
73  *
74  *******************************************************************************
75  *
76  * @param[in] spm
77  * The initial sparse matrix in the spm format.
78  *
79  * @param[in] ord
80  * The ordering that needs to be applied on the spm to generate the
81  * block csc.
82  *
83  * @param[in] col2cblk
84  * The array of matching columns with cblk indexes.
85  *
86  * @param[inout] bcsc_comm
87  * The structure in which the data is stored.
88  *
89  *******************************************************************************/
90 void
91 bcsc_dstore_data( const spmatrix_t *spm,
92  const pastix_order_t *ord,
93  const pastix_int_t *col2cblk,
94  bcsc_handle_comm_t *bcsc_comm )
95 {
96  double *values_c;
97  double *values = (double*)(spm->values);
98  pastix_int_t *colptr = spm->colptr;
99  pastix_int_t *rowptr = spm->rowptr;
100  pastix_int_t *loc2glob = spm->loc2glob;
101  pastix_int_t *dofs = spm->dofs;
102  pastix_int_t dof = spm->dof;
103  bcsc_proc_comm_t *data_comm = bcsc_comm->data_comm;
104  pastix_int_t clustnbr = bcsc_comm->clustnbr;
105  pastix_int_t il, jl, ig, jg, igp, jgp, igpe, jgpe;
106  pastix_int_t ownerj, owneri, baseval;
107  pastix_int_t dofi, dofj, frow, lrow;
108  bcsc_exch_comm_t *data_sendA, *data_sendAt, *data_sendAAt;
109  bcsc_data_amount_t *data_cntA, *data_cntAt, *data_cntAAt;
110 #if defined(PRECISION_z) || defined(PRECISION_c)
111  int sym = (spm->mtxtype == SpmSymmetric) || (spm->mtxtype == SpmSymmetric);
112 #else
113  int sym = (spm->mtxtype == SpmSymmetric);
114 #endif
115 
116  /* Allocates the sending indexes and values buffers. */
117  bcsc_allocate_buf( bcsc_comm, PastixTagMemSend );
118 
119  /*
120  * Allocates and initialises the counters used to fill the values
121  * and indexes buffers.
122  */
123  MALLOC_INTERN( data_cntA, 3 * clustnbr, bcsc_data_amount_t );
124  memset( data_cntA, 0, 3 * clustnbr * sizeof(bcsc_data_amount_t) );
125  data_cntAt = data_cntA + clustnbr;
126  data_cntAAt = data_cntA + clustnbr * 2;
127 
128  baseval = spm->baseval;
129 
130  /* Goes through the columns of spm. */
131  for ( jl = 0; jl < spm->n; jl++, colptr++, loc2glob++ ) {
132  jg = *loc2glob - baseval;
133  jgp = ord->permtab[jg];
134  jgpe = ( dof > 0 ) ? jgp * dof : dofs[ jg ] - baseval;
135  dofj = ( dof > 0 ) ? dof : dofs[ jg+1 ] - dofs[ jg ];
136 
137  frow = colptr[0] - baseval;
138  lrow = colptr[1] - baseval;
139  assert( (lrow - frow) >= 0 );
140 
141  ownerj = col2cblk[ jgpe ];
142 
143  /* The column jp belongs to another process. */
144  if ( ownerj < 0 ) {
145  ownerj = - ownerj - 1;
146  data_comm = bcsc_comm->data_comm + ownerj;
147  data_sendA = &( data_comm->sendA );
148 
149  /* Goes through the rows of jl. */
150  for ( il = frow; il < lrow; il++ ) {
151  ig = rowptr[il] - baseval;
152  igp = ord->permtab[ig];
153  igpe = ( dof > 0 ) ? igp * dof : dofs[ ig ] - baseval;
154  dofi = ( dof > 0 ) ? dof : dofs[ ig+1 ] - dofs[ ig ];
155  owneri = col2cblk[ igpe ];
156 
157  /*
158  * The diagonal values (ip, jp) belong to the same process.
159  * They are sent to owneri in the sym case for A only.
160  */
161  if ( sym && ( ig == jg ) ) {
162  data_sendA->idxbuf[data_cntA[ownerj].idxcnt ] = igp;
163  data_sendA->idxbuf[data_cntA[ownerj].idxcnt+1] = jgp;
164  data_cntA[ownerj].idxcnt += 2;
165 
166  values_c = ((double*)(data_sendA->valbuf)) + data_cntA[ownerj].valcnt;
167  memcpy( values_c, values, dofi * dofj * sizeof(double) );
168  data_cntA[ownerj].valcnt += dofi * dofj;
169 
170  values += dofi * dofj;
171  continue;
172  }
173 
174  /* The row ip belongs to another process. */
175  if ( owneri < 0 ) {
176  owneri = - owneri - 1;
177  data_comm = bcsc_comm->data_comm + owneri;
178 
179  /*
180  * The diagonal values (ip, jp) belong to the same process.
181  * They are sent to owneri for AAt in the general cae.
182  */
183  if ( owneri == ownerj ) {
184  data_sendAAt = &( data_comm->sendAAt );
185 
186  data_sendAAt->idxbuf[data_cntAAt[ownerj].idxcnt ] = igp;
187  data_sendAAt->idxbuf[data_cntAAt[ownerj].idxcnt+1] = jgp;
188  data_cntAAt[ownerj].idxcnt += 2;
189 
190  values_c = ((double*)(data_sendAAt->valbuf)) + data_cntAAt[ownerj].valcnt;
191  memcpy( values_c, values, dofi * dofj * sizeof(double) );
192  data_cntAAt[ownerj].valcnt += dofi * dofj;
193  }
194  /*
195  * The values (ip, jp) belong to different processes.
196  * They are sent to owneri for At and to ownerj for A.
197  */
198  else {
199  data_sendAt = &( data_comm->sendAt );
200 
201  data_sendAt->idxbuf[data_cntAt[owneri].idxcnt ] = igp;
202  data_sendAt->idxbuf[data_cntAt[owneri].idxcnt+1] = jgp;
203  data_cntAt[owneri].idxcnt += 2;
204 
205  values_c = ((double*)(data_sendAt->valbuf)) + data_cntAt[owneri].valcnt;
206  memcpy( values_c, values, dofi * dofj * sizeof(double) );
207  data_cntAt[owneri].valcnt += dofi * dofj;
208 
209  data_sendA->idxbuf[data_cntA[ownerj].idxcnt ] = igp;
210  data_sendA->idxbuf[data_cntA[ownerj].idxcnt+1] = jgp;
211  data_cntA[ownerj].idxcnt += 2;
212 
213  values_c = ((double*)(data_sendA->valbuf)) + data_cntA[ownerj].valcnt;
214  memcpy( values_c, values, dofi * dofj * sizeof(double) );
215  data_cntA[ownerj].valcnt += dofi * dofj;
216  }
217  }
218  /* The row ip is local. */
219  else {
220  /*
221  * The values (ip, jp) belong to ownerj.
222  * They are sent to ownerj for A.
223  */
224  data_sendA->idxbuf[data_cntA[ownerj].idxcnt ] = igp;
225  data_sendA->idxbuf[data_cntA[ownerj].idxcnt+1] = jgp;
226  data_cntA[ownerj].idxcnt += 2;
227 
228  values_c = ((double*)(data_sendA->valbuf)) + data_cntA[ownerj].valcnt;
229  memcpy( values_c, values, dofi * dofj * sizeof(double) );
230  data_cntA[ownerj].valcnt += dofi * dofj;
231  }
232  values += dofi * dofj;
233  }
234  }
235  /* The column jp is local. */
236  else {
237  /* Goes through the rows of j. */
238  for ( il = frow; il < lrow; il++ ) {
239  ig = rowptr[il] - baseval;
240  igp = ord->permtab[ig];
241  igpe = ( dof > 0 ) ? igp * dof : dofs[ ig ] - baseval;
242  dofi = ( dof > 0 ) ? dof : dofs[ ig+1 ] - dofs[ ig ];
243  owneri = col2cblk[ igpe ];
244 
245  /*
246  * The diagonal values (ip, jp) have already been
247  * added to globcol in the sym case.
248  */
249  if ( sym && ( ig == jg ) ) {
250  values += dofi * dofj;
251  continue;
252  }
253 
254  /* The row ip belongs to another process. */
255  if ( owneri < 0 ) {
256  owneri = - owneri - 1;
257  data_comm = bcsc_comm->data_comm + owneri;
258  data_sendAt = &( data_comm->sendAt );
259 
260  /*
261  * The values (ip, jp) belong to owneri.
262  * They are sent to ownerj for At.
263  */
264  data_sendAt->idxbuf[data_cntAt[owneri].idxcnt ] = igp;
265  data_sendAt->idxbuf[data_cntAt[owneri].idxcnt+1] = jgp;
266  data_cntAt[owneri].idxcnt += 2;
267 
268  values_c = ((double*)(data_sendAt->valbuf)) + data_cntAt[owneri].valcnt;
269  memcpy( values_c, values, dofi * dofj * sizeof(double) );
270  data_cntAt[owneri].valcnt += dofi * dofj;
271  }
272  values += dofi * dofj;
273  }
274  }
275  }
276 
277  memFree_null( data_cntA );
278 }
279 
280 /**
281  *******************************************************************************
282  *
283  * @ingroup bcsc_internal
284  *
285  * @brief Adds the remote data of A to the bcsc structure.
286  *
287  *******************************************************************************
288  *
289  * @param[in] spm
290  * The initial sparse matrix in the spm format.
291  *
292  * @param[in] ord
293  * The ordering that needs to be applied on the spm to generate the
294  * block csc.
295  *
296  * @param[in] solvmtx
297  * The solver matrix structure which describes the data distribution.
298  *
299  * @param[inout] bcsc
300  * On entry, the pointer to an allocated bcsc.
301  * On exit, the bcsc fields are updated.
302  *
303  * @param[in] values_buf
304  * The array containing the remote values.
305  *
306  * @param[in] idx_cnt
307  * The index for the begining of the indexes array buffer
308  * corresponding to the values_buf.
309  *
310  * @param[in] idx_size
311  * The number of indexes corresponding to the values_buf.
312  *
313  * @param[in] AAt
314  * TODO
315  *
316  *******************************************************************************/
317 static inline pastix_int_t
318 bcsc_dhandle_recv_A( const spmatrix_t *spm,
319  const pastix_order_t *ord,
320  const SolverMatrix *solvmtx,
321  pastix_bcsc_t *bcsc,
322  double *values_buf,
323  pastix_int_t idx_cnt,
324  pastix_int_t idx_size,
325  pastix_int_t AAt )
326 {
327  bcsc_handle_comm_t *bcsc_comm = bcsc->bcsc_comm;
328  pastix_int_t *dofs = spm->dofs;
329  pastix_int_t dof = spm->dof;
330  SolverCblk *cblk;
331  pastix_int_t *coltab;
332  pastix_int_t k, igp, jgp, ig, jg, igpe, jgpe;
333  pastix_int_t itercblk;
334  pastix_int_t idofi, idofj, dofi, dofj;
335  pastix_int_t colidx, rowidx, pos;
336  pastix_int_t clustnum = bcsc_comm->clustnum;
337  double *Values = bcsc->Lvalues;
338  pastix_int_t *indexes = ( AAt == 0 ) ? bcsc_comm->data_comm[clustnum].sendA.idxbuf :
339  bcsc_comm->data_comm[clustnum].sendAAt.idxbuf;
340  pastix_int_t nbelt = 0;
341 
342  /*
343  * Goes through the received indexes in order to add the data
344  * received.
345  */
346  indexes += idx_cnt;
347  for ( k = 0; k < idx_size; k+=2, indexes+=2 ) {
348  /* Gets received indexes jgp and igp. */
349  igp = indexes[0];
350  jgp = indexes[1];
351 
352  ig = ord->peritab[igp];
353  jg = ord->peritab[jgp];
354  igpe = (dof > 0) ? igp * dof : dofs[ ig ] - spm->baseval;
355  jgpe = (dof > 0) ? jgp * dof : dofs[ jg ] - spm->baseval;
356  dofi = (dof > 0) ? dof : dofs[ig+1] - dofs[ig];
357  dofj = (dof > 0) ? dof : dofs[jg+1] - dofs[jg];
358 
359  itercblk = bcsc->col2cblk[ jgpe ];
360  assert( itercblk >= 0 );
361 
362  /* Gets the block on which the data received will be added. */
363  cblk = solvmtx->cblktab + itercblk;
364  coltab = bcsc->cscftab[cblk->bcscnum].coltab;
365 
366  /* Goes through the values at (igp, jgp). */
367  colidx = jgpe - cblk->fcolnum;
368  for ( idofj = 0; idofj < dofj; idofj++, colidx++ ) {
369  rowidx = igpe;
370  pos = coltab[ colidx ];
371  for ( idofi = 0; idofi < dofi; idofi++, rowidx++, pos++, values_buf++ ) {
372  /* Adds the row igp. */
373  assert( rowidx >= 0 );
374  assert( rowidx < spm->gNexp );
375  bcsc->rowtab[ pos ] = rowidx;
376  /* Adds the data at row igp and column jgp. */
377  Values[ pos ] = *values_buf;
378  nbelt++;
379  }
380  coltab[ colidx ] += dofi;
381  assert( coltab[colidx] <= coltab[colidx+1] );
382  }
383  }
384 
385  return nbelt;
386 }
387 
388 /**
389  *******************************************************************************
390  *
391  * @ingroup bcsc_internal
392  *
393  * @brief Adds the remote data of At to the bcsc structure.
394  *
395  *******************************************************************************
396  *
397  * @param[in] spm
398  * The initial sparse matrix in the spm format.
399  *
400  * @param[in] ord
401  * The ordering that needs to be applied on the spm to generate the
402  * block csc.
403  *
404  * @param[in] solvmtx
405  * The solver matrix structure which describes the data distribution.
406  *
407  * @param[inout] rowtab
408  * The row tab of the bcsc or the row tab associated to At.
409  *
410  * @param[inout] bcsc
411  * On entry, the pointer to an allocated bcsc.
412  * On exit, the bcsc fields are updated.
413  *
414  * @param[in] values_buf
415  * The array containing the remote values.
416  *
417  * @param[in] idx_cnt
418  * The index for the begining of the indexes array buffer
419  * corresponding to the values_buf.
420  *
421  * @param[in] idx_size
422  * The number of indexes corresponding to the values_buf.
423  *
424  * @param[in] AAt
425  * TODO
426  *
427  *******************************************************************************/
428 static inline pastix_int_t
429 bcsc_dhandle_recv_At( const spmatrix_t *spm,
430  const pastix_order_t *ord,
431  const SolverMatrix *solvmtx,
432  pastix_int_t *rowtab,
433  pastix_bcsc_t *bcsc,
434  double *values_buf,
435  pastix_int_t idx_cnt,
436  pastix_int_t idx_size,
437  pastix_int_t AAt )
438 {
439  bcsc_handle_comm_t *bcsc_comm = bcsc->bcsc_comm;
440  pastix_int_t *dofs = spm->dofs;
441  pastix_int_t dof = spm->dof;
442  SolverCblk *cblk;
443  pastix_int_t *coltab;
444  pastix_int_t k, igp, jgp, ig, jg, igpe, jgpe;
445  pastix_int_t itercblk;
446  pastix_int_t idofi, idofj, dofi, dofj;
447  pastix_int_t colidx, rowidx, pos;
448  pastix_int_t clustnum = bcsc_comm->clustnum;
449  pastix_int_t *indexes = ( AAt == 0 ) ? bcsc_comm->data_comm[clustnum].sendAt.idxbuf :
450  bcsc_comm->data_comm[clustnum].sendAAt.idxbuf;
451  double *Values;
452  double (*_bcsc_conj)(double) = __fct_id;
453  pastix_int_t nbelt = 0;
454 
455  /* Gets the Uvalues in the general case and the Lvalues in the sym case. */
456  if ( spm->mtxtype == SpmGeneral ) {
457  _bcsc_conj = __fct_id;
458  Values = (double*)(bcsc->Uvalues);
459  }
460  else {
461  if( spm->mtxtype == SpmSymmetric ) {
462  _bcsc_conj = __fct_conj;
463  }
464  if( spm->mtxtype == SpmSymmetric ) {
465  _bcsc_conj = __fct_id;
466  }
467  Values = (double*)(bcsc->Lvalues);
468  }
469 
470  /*
471  * Goes through the received indexes in order to add the data
472  * received.
473  */
474  indexes += idx_cnt;
475  for ( k = 0; k < idx_size; k+=2, indexes+=2 ) {
476  /* Gets received indexes igp and jgp. */
477  igp = indexes[0];
478  jgp = indexes[1];
479 
480  ig = ord->peritab[igp];
481  jg = ord->peritab[jgp];
482  igpe = (dof > 0) ? igp * dof : dofs[ ig ] - spm->baseval;
483  jgpe = (dof > 0) ? jgp * dof : dofs[ jg ] - spm->baseval;
484  dofi = (dof > 0) ? dof : dofs[ig+1] - dofs[ig];
485  dofj = (dof > 0) ? dof : dofs[jg+1] - dofs[jg];
486 
487  itercblk = bcsc->col2cblk[ igpe ];
488  assert( itercblk >= 0 );
489 
490  /* Gets the block on which the data received will be added. */
491  cblk = solvmtx->cblktab + itercblk;
492  coltab = bcsc->cscftab[cblk->bcscnum].coltab;
493 
494  /* Goes through the values at (igp, jgp). */
495  for ( idofj = 0; idofj < dofj; idofj++ ) {
496  rowidx = jgpe + idofj;
497  colidx = igpe - cblk->fcolnum;
498  for ( idofi = 0; idofi < dofi; idofi++, colidx++, values_buf++ ) {
499  pos = coltab[ colidx ];
500  /* Adds the row jgp. */
501  assert( rowidx >= 0 );
502  assert( rowidx < spm->gNexp );
503  rowtab[ pos ] = rowidx;
504 
505  /* Adds the data at row jgp and column igp. */
506  Values[ pos ] = _bcsc_conj( *values_buf );
507  coltab[ colidx ] ++;
508  assert( coltab[colidx] <= coltab[colidx+1] );
509  nbelt++;
510  }
511  }
512  }
513  return nbelt;
514 }
515 
516 /**
517  *******************************************************************************
518  *
519  * @ingroup bcsc_internal
520  *
521  * @brief Adds the remote data of At to the bcsc structure.
522  *
523  *******************************************************************************
524  *
525  * @param[in] spm
526  * The initial sparse matrix in the spm format.
527  *
528  * @param[in] ord
529  * The ordering that needs to be applied on the spm to generate the
530  * block csc.
531  *
532  * @param[in] solvmtx
533  * The solver matrix structure which describes the data distribution.
534  *
535  * @param[inout] rowtab
536  * The row tab of the bcsc or the row tab associated to At.
537  *
538  * @param[inout] bcsc
539  * On entry, the pointer to an allocated bcsc.
540  * On exit, the bcsc fields are updated.
541  *
542  * @param[in] values_buf
543  * The array containing the remote values.
544  *
545  * @param[in] idx_cnt
546  * The index for the begining of the indexes array buffer
547  * corresponding to the values_buf.
548  *
549  * @param[in] idx_size
550  * The number of indexes corresponding to the values_buf.
551  *
552  *******************************************************************************/
553 static inline pastix_int_t
554 bcsc_dhandle_recv_AAt( const spmatrix_t *spm,
555  const pastix_order_t *ord,
556  const SolverMatrix *solvmtx,
557  pastix_int_t *rowtab,
558  pastix_bcsc_t *bcsc,
559  double *values_buf,
560  pastix_int_t idx_cnt,
561  pastix_int_t idx_size )
562 {
563  bcsc_handle_comm_t *bcsc_comm = bcsc->bcsc_comm;
564  pastix_int_t *dofs = spm->dofs;
565  pastix_int_t dof = spm->dof;
566  SolverCblk *cblki, *cblkj;
567  pastix_int_t *coltabi, *coltabj;
568  pastix_int_t k, igp, jgp, ig, jg, igpe, jgpe;
569  pastix_int_t itercblki, itercblkj;
570  pastix_int_t idofi, idofj, dofi, dofj;
571  pastix_int_t colj, rowj, posj;
572  pastix_int_t coli, rowi, posi;
573  pastix_int_t clustnum = bcsc_comm->clustnum;
574  pastix_int_t *indexes = bcsc_comm->data_comm[clustnum].sendAAt.idxbuf;
575  double *ValuesA, *ValuesAt;
576  double (*_bcsc_conj)(double) = __fct_id;
577  pastix_int_t nbelt = 0;
578 
579  /* Gets the Uvalues in the general case and the Lvalues in the sym case. */
580  if ( spm->mtxtype == SpmGeneral ) {
581  _bcsc_conj = __fct_id;
582  ValuesAt = (double*)(bcsc->Uvalues);
583  }
584  else {
585  if( spm->mtxtype == SpmSymmetric ) {
586  _bcsc_conj = __fct_conj;
587  }
588  if( spm->mtxtype == SpmSymmetric ) {
589  _bcsc_conj = __fct_id;
590  }
591  ValuesAt = (double*)(bcsc->Lvalues);
592  }
593  ValuesA = (double*)(bcsc->Lvalues);
594 
595  /*
596  * Goes through the received indexes in order to add the data
597  * received.
598  */
599  indexes += idx_cnt;
600  for ( k = 0; k < idx_size; k+=2, indexes+=2 ) {
601  /* Gets received indexes igp and jgp. */
602  igp = indexes[0];
603  jgp = indexes[1];
604 
605  ig = ord->peritab[igp];
606  jg = ord->peritab[jgp];
607  igpe = (dof > 0) ? igp * dof : dofs[ ig ] - spm->baseval;
608  jgpe = (dof > 0) ? jgp * dof : dofs[ jg ] - spm->baseval;
609  dofi = (dof > 0) ? dof : dofs[ig+1] - dofs[ig];
610  dofj = (dof > 0) ? dof : dofs[jg+1] - dofs[jg];
611 
612  itercblki = bcsc->col2cblk[ igpe ];
613  itercblkj = bcsc->col2cblk[ jgpe ];
614  assert( itercblki >= 0 );
615  assert( itercblkj >= 0 );
616 
617  /* Gets the block on which the data received will be added. */
618  cblki = solvmtx->cblktab + itercblki;
619  cblkj = solvmtx->cblktab + itercblkj;
620  coltabi = bcsc->cscftab[cblki->bcscnum].coltab;
621  coltabj = bcsc->cscftab[cblkj->bcscnum].coltab;
622 
623  /* Goes through the values at (igp, jgp). */
624  colj = jgpe - cblkj->fcolnum;
625  for ( idofj = 0; idofj < dofj; idofj++ ) {
626  rowj = igpe;
627  posj = coltabj[ colj ];
628  rowi = jgpe + idofj;
629  coli = igpe - cblki->fcolnum;
630  for ( idofi = 0; idofi < dofi; idofi++, coli++, rowj++, posj++, values_buf++ ) {
631  posi = coltabi[ coli ];
632  /* Adds the row jgp /igp. */
633  assert( rowi >= 0 );
634  assert( rowj >= 0 );
635  assert( rowi < spm->gNexp );
636  assert( rowj < spm->gNexp );
637 
638  rowtab[ posi ] = rowi;
639  bcsc->rowtab[ posj ] = rowj;
640 
641  /* Adds the data at row jgp and column igp. */
642  ValuesAt[ posi ] = _bcsc_conj( *values_buf );
643  ValuesA [ posj ] = *values_buf;
644  coltabi[ coli ] ++;
645  assert( coltabi[coli] <= coltabi[coli+1] );
646  nbelt++;
647  }
648  coltabj[ colj ] += dofi;
649  assert( coltabj[colj] <= coltabj[colj+1] );
650  }
651  }
652 
653  return nbelt;
654 }
655 
656 /**
657  *******************************************************************************
658  *
659  * @ingroup bcsc_internal
660  *
661  * @brief Exchanges and stores the remote values of A.
662  *
663  *******************************************************************************
664  *
665  * @param[in] spm
666  * The initial sparse matrix in the spm format.
667  *
668  * @param[in] ord
669  * The ordering which needs to be applied on the spm to generate the
670  * block csc.
671  *
672  * @param[in] solvmtx
673  * The solver matrix structure which describes the data distribution.
674  *
675  * @param[inout] bcsc
676  * On entry, the pointer to an allocated bcsc.
677  * On exit, the bcsc fields are updated with the remote values of A.
678  *
679  *******************************************************************************/
680 static inline pastix_int_t
681 bcsc_dexchange_values_A( const spmatrix_t *spm,
682  const pastix_order_t *ord,
683  const SolverMatrix *solvmtx,
684  pastix_bcsc_t *bcsc )
685 {
686  bcsc_handle_comm_t *bcsc_comm = bcsc->bcsc_comm;
687  pastix_int_t clustnbr = bcsc_comm->clustnbr;
688  pastix_int_t clustnum = bcsc_comm->clustnum;
689  bcsc_proc_comm_t *data_comm = bcsc_comm->data_comm;
690  bcsc_proc_comm_t *data_send = NULL;
691  bcsc_proc_comm_t *data_recv = NULL;
692  double *val_buf = NULL;
693  pastix_int_t idx_size = 0;
694  pastix_int_t counter_req = 0;
695  pastix_int_t nbelt_recv = 0;
696  pastix_int_t cnt = 0;
697  pastix_int_t idx_cnt[clustnbr];
698  MPI_Status statuses[clustnbr-1];
699  MPI_Request requests[clustnbr-1];
700  bcsc_data_amount_t *sends, *recvs;
701  pastix_int_t c_send, c_recv, k;
702 
703  /* Allocates the receiving indexes and values buffers. */
704  if ( bcsc_comm->max_idx != 0 ) {
705  MALLOC_INTERN( val_buf, bcsc_comm->max_val, double );
706  }
707 
708  for ( k = 0; k < clustnbr; k++ ) {
709  if ( k == clustnum ) {
710  idx_cnt[k] = 0;
711  continue;
712  }
713  idx_cnt[ k ] = cnt;
714  cnt += data_comm[k].recvA.idxcnt;
715  }
716 
717  /* Sends the values. */
718  c_send = (clustnum+1) % clustnbr;
719  for ( k = 0; k < clustnbr-1; k++ ) {
720  data_send = data_comm + c_send;
721  sends = &( data_send->sendA.size );
722  if ( c_send == clustnum ) {
723  continue;
724  }
725 
726  /* Posts the emissions of the values. */
727  if ( sends->valcnt != 0 ) {
728  MPI_Isend( data_send->sendA.valbuf, sends->valcnt, PASTIX_MPI_DOUBLE,
729  c_send, PastixTagValuesA, bcsc_comm->comm, &requests[counter_req++] );
730  }
731  c_send = (c_send+1) % clustnbr;
732  }
733 
734  /* Receives the values. */
735  c_recv = (clustnum-1+clustnbr) % clustnbr;
736  for ( k = 0; k < clustnbr-1; k++ ) {
737  data_recv = data_comm + c_recv;
738  recvs = &( data_recv->recvA );
739  if ( c_recv == clustnum ) {
740  continue;
741  }
742 
743  /* Posts the receptions of the values. */
744  if ( recvs->valcnt != 0 ) {
745  MPI_Recv( val_buf, recvs->valcnt, PASTIX_MPI_DOUBLE,
746  c_recv, PastixTagValuesA, bcsc_comm->comm, MPI_STATUS_IGNORE );
747  idx_size = recvs->idxcnt;
748  nbelt_recv += bcsc_dhandle_recv_A( spm, ord, solvmtx, bcsc,
749  val_buf, idx_cnt[c_recv], idx_size, 0 );
750  }
751  c_recv = (c_recv-1+clustnbr) % clustnbr;
752  }
753 
754  MPI_Waitall( counter_req, requests, statuses );
755  free( val_buf );
756  assert( nbelt_recv == bcsc_comm->data_comm[clustnum].recvA.valcnt );
757  bcsc_free_buf( bcsc_comm, PastixTagMemSendValA );
758  bcsc_free_buf( bcsc_comm, PastixTagMemRecvIdxA );
759  return nbelt_recv;
760 }
761 
762 /**
763  *******************************************************************************
764  *
765  * @ingroup bcsc_internal
766  *
767  * @brief Exchanges and stores the remote values of At.
768  *
769  *******************************************************************************
770  *
771  * @param[in] spm
772  * The initial sparse matrix in the spm format.
773  *
774  * @param[in] ord
775  * The ordering which needs to be applied on the spm to generate the
776  * block csc.
777  *
778  * @param[in] solvmtx
779  * The solver matrix structure which describes the data distribution.
780  *
781  * @param[inout] rowtab
782  * The row tab of the bcsc or the row tab associated to At.
783  *
784  * @param[inout] bcsc
785  * On entry, the pointer to an allocated bcsc.
786  * On exit, the bcsc fields are updated with the remote values of At.
787  *
788  *******************************************************************************/
789 static inline pastix_int_t
790 bcsc_dexchange_values_At( const spmatrix_t *spm,
791  const pastix_order_t *ord,
792  const SolverMatrix *solvmtx,
793  pastix_int_t *rowtab,
794  pastix_bcsc_t *bcsc )
795 {
796  bcsc_handle_comm_t *bcsc_comm = bcsc->bcsc_comm;
797  pastix_int_t clustnbr = bcsc_comm->clustnbr;
798  pastix_int_t clustnum = bcsc_comm->clustnum;
799  bcsc_proc_comm_t *data_comm = bcsc_comm->data_comm;
800  bcsc_proc_comm_t *data_send = NULL;
801  bcsc_proc_comm_t *data_recv = NULL;
802  double *val_buf = NULL;
803  pastix_int_t idx_size = 0;
804  pastix_int_t counter_req = 0;
805  pastix_int_t nbelt_recv = 0;
806  pastix_int_t cnt = 0;
807  pastix_int_t idx_cnt[clustnbr];
808  MPI_Status statuses[clustnbr-1];
809  MPI_Request requests[clustnbr-1];
810  bcsc_data_amount_t *sends, *recvs;
811  pastix_int_t c_send, c_recv, k;
812 
813  /* Allocates the receiving indexes and values buffers. */
814  if ( bcsc_comm->max_idx != 0 ) {
815  MALLOC_INTERN( val_buf, bcsc_comm->max_val, double );
816  }
817 
818  for ( k = 0; k < clustnbr; k++ ) {
819  if ( k == clustnum ) {
820  idx_cnt[k] = 0;
821  continue;
822  }
823  idx_cnt[ k ] = cnt;
824  cnt += data_comm[k].recvAt.idxcnt;
825  }
826 
827  /* Sends the values. */
828  c_send = (clustnum+1) % clustnbr;
829  for ( k = 0; k < clustnbr-1; k++ ) {
830  data_send = data_comm + c_send;
831  sends = &( data_send->sendAt.size );
832  if ( c_send == clustnum ) {
833  continue;
834  }
835 
836  /* Posts the emissions of the values. */
837  if ( sends->valcnt != 0 ) {
838  MPI_Isend( data_send->sendAt.valbuf, sends->valcnt, PASTIX_MPI_DOUBLE,
839  c_send, PastixTagValuesAt, bcsc_comm->comm, &requests[counter_req++] );
840  }
841  c_send = (c_send+1) % clustnbr;
842  }
843 
844  /* Receives the values. */
845  c_recv = (clustnum-1+clustnbr) % clustnbr;
846  for ( k = 0; k < clustnbr-1; k++ ) {
847  data_recv = data_comm + c_recv;
848  recvs = &( data_recv->recvAt );
849  if ( c_recv == clustnum ) {
850  continue;
851  }
852 
853  /* Posts the receptions of the values. */
854  if ( recvs->valcnt != 0 ) {
855  MPI_Recv( val_buf, recvs->valcnt, PASTIX_MPI_DOUBLE,
856  c_recv, PastixTagValuesAt, bcsc_comm->comm, MPI_STATUS_IGNORE );
857  idx_size = recvs->idxcnt;
858  nbelt_recv += bcsc_dhandle_recv_At( spm, ord, solvmtx, rowtab, bcsc,
859  val_buf, idx_cnt[c_recv], idx_size, 0 );
860  }
861  c_recv = (c_recv-1+clustnbr) % clustnbr;
862  }
863 
864  MPI_Waitall( counter_req, requests, statuses );
865  free( val_buf );
866  assert( nbelt_recv == bcsc_comm->data_comm[clustnum].recvAt.valcnt );
867  bcsc_free_buf( bcsc_comm, PastixTagMemSendValAt );
868  bcsc_free_buf( bcsc_comm, PastixTagMemRecvIdxAt );
869  return nbelt_recv;
870 }
871 
872 /**
873  *******************************************************************************
874  *
875  * @ingroup bcsc_internal
876  *
877  * @brief Exchanges and stores the remote values of A.
878  *
879  *******************************************************************************
880  *
881  * @param[in] spm
882  * The initial sparse matrix in the spm format.
883  *
884  * @param[in] ord
885  * The ordering which needs to be applied on the spm to generate the
886  * block csc.
887  *
888  * @param[in] solvmtx
889  * The solver matrix structure which describes the data distribution.
890  *
891  * @param[inout] rowtab
892  * The row tab of the bcsc or the row tab associated to At.
893  *
894  * @param[inout] bcsc
895  * On entry, the pointer to an allocated bcsc.
896  * On exit, the bcsc fields are updated with the remote values of A.
897  *
898  *******************************************************************************/
899 static inline pastix_int_t
900 bcsc_dexchange_values_AAt( const spmatrix_t *spm,
901  const pastix_order_t *ord,
902  const SolverMatrix *solvmtx,
903  pastix_int_t *rowtabAt,
904  pastix_bcsc_t *bcsc )
905 {
906  bcsc_handle_comm_t *bcsc_comm = bcsc->bcsc_comm;
907  pastix_int_t clustnbr = bcsc_comm->clustnbr;
908  pastix_int_t clustnum = bcsc_comm->clustnum;
909  bcsc_proc_comm_t *data_comm = bcsc_comm->data_comm;
910  bcsc_proc_comm_t *data_send = NULL;
911  bcsc_proc_comm_t *data_recv = NULL;
912  double *val_buf = NULL;
913  pastix_int_t idx_size = 0;
914  pastix_int_t counter_req = 0;
915  pastix_int_t nbelt_recv = 0;
916  pastix_int_t cnt = 0;
917  pastix_int_t idx_cnt[clustnbr];
918  MPI_Status statuses[clustnbr-1];
919  MPI_Request requests[clustnbr-1];
920  bcsc_data_amount_t *sends;
921  bcsc_exch_comm_t *recvs;
922  pastix_int_t c_send, c_recv, k;
923 
924  /* Allocates the receiving indexes and values buffers. */
925  if ( bcsc_comm->max_idx != 0 ) {
926  if ( spm->mtxtype != SpmGeneral ) {
927  MALLOC_INTERN( val_buf, bcsc_comm->max_val, double );
928  }
929  else {
930  if ( rowtabAt == bcsc->rowtab ) {
931  bcsc_allocate_buf( bcsc_comm, PastixTagMemRecvValAAt);
932  }
933  }
934  }
935 
936  for ( k = 0; k < clustnbr; k++ ) {
937  if ( k == clustnum ) {
938  idx_cnt[k] = 0;
939  continue;
940  }
941  idx_cnt[ k ] = cnt;
942  cnt += data_comm[k].recvAAt.size.idxcnt;
943  }
944 
945  /* Sends the values. */
946  c_send = (clustnum+1) % clustnbr;
947  for ( k = 0; k < clustnbr-1; k++ ) {
948  if ( rowtabAt != bcsc->rowtab ) {
949  break;
950  }
951  data_send = data_comm + c_send;
952  sends = &( data_send->sendAAt.size );
953  if ( c_send == clustnum ) {
954  continue;
955  }
956 
957  /* Posts the emissions of the values. */
958  if ( sends->valcnt != 0 ) {
959  MPI_Isend( data_send->sendAAt.valbuf, sends->valcnt, PASTIX_MPI_DOUBLE,
960  c_send, PastixTagValuesAAt, bcsc_comm->comm, &requests[counter_req++] );
961  }
962  c_send = (c_send+1) % clustnbr;
963  }
964 
965  /* Receives the values. */
966  c_recv = (clustnum-1+clustnbr) % clustnbr;
967  for ( k = 0; k < clustnbr-1; k++ ) {
968  data_recv = data_comm + c_recv;
969  recvs = &( data_recv->recvAAt );
970  if ( c_recv == clustnum ) {
971  continue;
972  }
973 
974  /* Posts the receptions of the values. */
975  if ( recvs->size.valcnt != 0 ) {
976  if ( spm->mtxtype == SpmGeneral ) {
977  val_buf = recvs->valbuf;
978  }
979  if ( rowtabAt == bcsc->rowtab ) {
980  MPI_Recv( val_buf, recvs->size.valcnt, PASTIX_MPI_DOUBLE,
981  c_recv, PastixTagValuesAAt, bcsc_comm->comm, MPI_STATUS_IGNORE );
982  }
983  idx_size = recvs->size.idxcnt;
984 
985  if ( spm->mtxtype != SpmGeneral ) {
986  nbelt_recv += bcsc_dhandle_recv_AAt( spm, ord, solvmtx, rowtabAt, bcsc,
987  val_buf, idx_cnt[c_recv], idx_size );
988  }
989  else {
990  if ( rowtabAt == bcsc->rowtab ) {
991  nbelt_recv += bcsc_dhandle_recv_A( spm, ord, solvmtx, bcsc, val_buf,
992  idx_cnt[c_recv], idx_size, 1 );
993  }
994  else {
995  nbelt_recv += bcsc_dhandle_recv_At( spm, ord, solvmtx, rowtabAt, bcsc,
996  val_buf, idx_cnt[c_recv], idx_size, 1 );
997  }
998  }
999  }
1000  c_recv = (c_recv-1+clustnbr) % clustnbr;
1001  }
1002 
1003  if ( rowtabAt == bcsc->rowtab ) {
1004  MPI_Waitall( counter_req, requests, statuses );
1005  }
1006  if ( spm->mtxtype != SpmGeneral ) {
1007  free( val_buf );
1008  }
1009  assert( nbelt_recv == bcsc_comm->data_comm[clustnum].recvAAt.size.valcnt );
1010  if ( ( spm->mtxtype != SpmGeneral ) || ( rowtabAt != bcsc->rowtab ) ) {
1011  bcsc_free_buf( bcsc_comm, PastixTagMemRecvIdxAAt );
1012  bcsc_free_buf( bcsc_comm, PastixTagMemSendValAAt );
1013  }
1014  if ( ( spm->mtxtype == SpmGeneral ) && ( rowtabAt != bcsc->rowtab ) ) {
1015  bcsc_free_buf( bcsc_comm, PastixTagMemRecvAAt );
1016  }
1017  return nbelt_recv;
1018 }
1019 #endif
1020 
1021 /**
1022  *******************************************************************************
1023  *
1024  * @ingroup bcsc_internal
1025  *
1026  * @brief Initializes the A values of the block csc stored in the given spm.
1027  *
1028  *******************************************************************************
1029  *
1030  * @param[in] spm
1031  * The initial sparse matrix in the spm format.
1032  *
1033  * @param[in] ord
1034  * The ordering which needs to be applied on the spm to generate the
1035  * block csc.
1036  *
1037  * @param[in] solvmtx
1038  * The solver matrix structure which describes the data distribution.
1039  *
1040  * @param[inout] bcsc
1041  * On entry, the pointer to an allocated bcsc.
1042  * On exit, the bcsc fields are updated.
1043  *
1044  *******************************************************************************
1045  *
1046  * @retval TODO
1047  *
1048  *******************************************************************************/
1049 static inline pastix_int_t
1050 bcsc_dinit_A( const spmatrix_t *spm,
1051  const pastix_order_t *ord,
1052  const SolverMatrix *solvmtx,
1053  pastix_bcsc_t *bcsc )
1054 {
1055  double *values = (double*)(spm->values);
1056  double *Lvalues = (double*)(bcsc->Lvalues);
1057  pastix_int_t *loc2glob = spm->loc2glob;
1058  pastix_int_t *colptr = spm->colptr;
1059  pastix_int_t *rowptr = spm->rowptr;
1060  pastix_int_t *dofs = spm->dofs;
1061  pastix_int_t dof = spm->dof;
1062  pastix_int_t nbelt = 0;
1063  SolverCblk *cblk;
1064  pastix_int_t *coltab;
1065  pastix_int_t k, j, ig, jg, igp, jgp, igpe, jgpe;
1066  pastix_int_t itercblk, baseval;
1067  pastix_int_t idofi, idofj, dofi, dofj;
1068  pastix_int_t colidx, rowidx, pos;
1069 
1070  baseval = spm->baseval;
1071  /*
1072  * Initializes the values of the matrix A in the blocked csc format. This
1073  * applies the permutation to the values array.
1074  *
1075  * Goes through the columns of the spm.
1076  * j is the initial local column index.
1077  * jg is the initial global column index.
1078  * jgpis the "new" jg index -> the one resulting from the permutation.
1079  */
1080  for ( j = 0; j < spm->n; j++, colptr++, loc2glob++ ) {
1081  jg = ( spm->loc2glob == NULL ) ? j : *loc2glob - baseval;
1082  jgp = ord->permtab[jg];
1083  jgpe = ( dof > 0 ) ? jgp * dof : dofs[ jg ] - baseval;
1084  dofj = ( dof > 0 ) ? dof : dofs[ jg+1 ] - dofs[ jg ];
1085 
1086  itercblk = bcsc->col2cblk[ jgpe ];
1087 
1088  /*
1089  * If MPI is used in shared memory, the block can belong to another
1090  * processor, in this case the column is skigped.
1091  */
1092  /* The block of the column jgpbelongs to the current processor. */
1093  if ( itercblk >= 0 ) {
1094  /* Gets the block in which the data will be added. */
1095  cblk = solvmtx->cblktab + itercblk;
1096  coltab = bcsc->cscftab[cblk->bcscnum].coltab;
1097  /*
1098  * Goes through the row of the column j.
1099  * ig is the initial global row index (for the row index local = global).
1100  * igpis the "new" ig index -> the one resulting from the permutation.
1101  */
1102  for ( k = colptr[0]; k < colptr[1]; k++, rowptr++ ) {
1103  ig = *rowptr - baseval;
1104  igp = ord->permtab[ig];
1105  igpe = ( dof > 0 ) ? igp * dof : dofs[ ig ] - baseval;
1106  dofi = ( dof > 0 ) ? dof : dofs[ ig+1 ] - dofs[ ig ];
1107 
1108  /* Copies the values from element (ig, jg) to position (igpe, jgpe) into expanded bcsc. */
1109  colidx = jgpe - cblk->fcolnum;
1110  for ( idofj = 0; idofj < dofj; idofj++, colidx++ ) {
1111  rowidx = igpe;
1112  pos = coltab[ colidx ];
1113  for ( idofi = 0; idofi < dofi; idofi++, rowidx++, pos++, values++ ) {
1114  assert( rowidx >= 0 );
1115  assert( rowidx < spm->gNexp );
1116  bcsc->rowtab[ pos ] = rowidx;
1117  Lvalues[ pos ] = *values;
1118  nbelt++;
1119  }
1120  coltab[ colidx ] += dofi;
1121  assert( coltab[ colidx ] <= coltab[ colidx+1 ] );
1122  }
1123  }
1124  }
1125  /* The block of the column jgpbelongs to another processor. */
1126  else {
1127  for ( k = colptr[0]; k < colptr[1]; k++, rowptr++ ) {
1128  ig = *rowptr - baseval;
1129  dofi = ( dof > 0 ) ? dof : dofs[ ig+1 ] - dofs[ ig ];
1130 
1131  values += dofi * dofj;
1132  }
1133  }
1134  }
1135 
1136  return nbelt;
1137 }
1138 
1139 /**
1140  *******************************************************************************
1141  *
1142  * @ingroup bcsc_internal
1143  *
1144  * @brief Initializes the At values in the block cscstored in the given spm.
1145  *
1146  * This routine initializes either :
1147  * The symmetric upper part (L^t)
1148  * The symmetric upper part (L^h)
1149  * The transpose part of A (A^t -> U)
1150  *
1151  *******************************************************************************
1152  *
1153  * @param[in] spm
1154  * The initial sparse matrix in the spm format.
1155  *
1156  * @param[in] ord
1157  * The ordering which needs to be applied on the spm to generate the
1158  * block csc.
1159  *
1160  * @param[in] solvmtx
1161  * The solver matrix structure which describes the data distribution.
1162  *
1163  * @param[inout] rowtab
1164  * The row tab of the bcsc or the row tab associated to At.
1165  *
1166  * @param[inout] bcsc
1167  * On entry, the pointer to an allocated bcsc.
1168  * On exit, the bcsc fields are updated.
1169  *
1170  *******************************************************************************
1171  *
1172  * @retval TODO
1173  *
1174  *******************************************************************************/
1175 static inline pastix_int_t
1176 bcsc_dinit_At( const spmatrix_t *spm,
1177  const pastix_order_t *ord,
1178  const SolverMatrix *solvmtx,
1179  pastix_int_t *rowtab,
1180  pastix_bcsc_t *bcsc )
1181 {
1182  double *values = (double*)(spm->values);
1183  double *Uvalues;
1184  pastix_int_t *loc2glob = spm->loc2glob;
1185  pastix_int_t *colptr = spm->colptr;
1186  pastix_int_t *rowptr = spm->rowptr;
1187  pastix_int_t *dofs = spm->dofs;
1188  pastix_int_t dof = spm->dof;
1189  pastix_int_t nbelt = 0;
1190  SolverCblk *cblk;
1191  pastix_int_t *coltab;
1192  pastix_int_t k, j, ig, jg, igp, jgp, igpe, jgpe;
1193  pastix_int_t itercblk, baseval;
1194  pastix_int_t idofi, idofj, dofi, dofj;
1195  pastix_int_t colidx, rowidx, pos;
1196  double (*_bcsc_conj)(double) = NULL;
1197 
1198  /* We're working on U. */
1199  if ( spm->mtxtype == SpmGeneral ) {
1200  _bcsc_conj = __fct_id;
1201  Uvalues = (double*)(bcsc->Uvalues);
1202  }
1203  /* L^[t|h] part of the matrix. */
1204  else {
1205  /*
1206  * precision_generator/sub.py will change SpmSymmetric to SpmSymmetric
1207  * Don't use else or else if.
1208  */
1209  if( spm->mtxtype == SpmSymmetric ) {
1210  _bcsc_conj = __fct_conj;
1211  }
1212  if( spm->mtxtype == SpmSymmetric ) {
1213  _bcsc_conj = __fct_id;
1214  }
1215  Uvalues = (double*)(bcsc->Lvalues);
1216  }
1217  assert( _bcsc_conj != NULL );
1218 
1219  baseval = spm->baseval;
1220  /*
1221  * Initializes the values of the matrix A^t in the blocked csc format. This
1222  * applies the permutation to the values array.
1223  *
1224  * Goes through the columns of the spm.
1225  * j is the initial local column index.
1226  * jg is the initial global column index.
1227  * jgpis the "new" jg index -> the one resulting from the permutation.
1228  */
1229  for ( j = 0; j < spm->n; j++, colptr++, loc2glob++ ) {
1230  jg = ( spm->loc2glob == NULL ) ? j : *loc2glob - baseval;
1231  jgp = ord->permtab[jg];
1232  jgpe = ( dof > 0 ) ? jgp * dof : dofs[ jg ] - baseval;
1233  dofj = ( dof > 0 ) ? dof : dofs[ jg+1 ] - dofs[ jg ];
1234 
1235  /*
1236  * Goes through the row of the column j.
1237  * ig is the initial global row index (for the row index local = global).
1238  * igpis the "new" ig index -> the one resulting from the permutation.
1239  */
1240  for ( k = colptr[0]; k < colptr[1]; k++, rowptr++ ) {
1241  ig = *rowptr - baseval;
1242  igp = ord->permtab[ig];
1243  igpe = ( dof > 0 ) ? igp * dof : dofs[ ig ] - baseval;
1244  dofi = ( dof > 0 ) ? dof : dofs[ ig+1 ] - dofs[ ig ];
1245 
1246  /* Diagonal block of a symmetric matrix. */
1247  if ( ( ig == jg ) && ( spm->mtxtype != SpmGeneral ) ) {
1248  values += dofi * dofj;
1249  continue;
1250  }
1251  itercblk = bcsc->col2cblk[ igpe ];
1252 
1253  /*
1254  * If MPI is used in shared memory, the block can belong to another
1255  * processor, in this case the row is skigped.
1256  */
1257  /* The block of the row igpbelongs to the current processor. */
1258  if ( itercblk >= 0 ) {
1259  /* Gets the block in which the data will be added. */
1260  cblk = solvmtx->cblktab + itercblk;
1261  coltab = bcsc->cscftab[cblk->bcscnum].coltab;
1262 
1263  /* Copies the values from element (ig, jg) to position (igpe, jgpe) into expanded bcsc. */
1264  for ( idofj = 0; idofj < dofj; idofj++ ) {
1265  rowidx = jgpe + idofj;
1266  colidx = igpe - cblk->fcolnum;
1267  for ( idofi = 0; idofi < dofi; idofi++, colidx++, values++ ) {
1268  pos = coltab[ colidx ];
1269 
1270  /* Copies the index/value */
1271  assert( rowidx >= 0 );
1272  assert( rowidx < spm->gNexp );
1273  rowtab[ pos ] = rowidx;
1274  Uvalues[ pos ] = _bcsc_conj( *values );
1275 
1276  coltab[ colidx ]++;
1277  nbelt++;
1278  }
1279  }
1280  }
1281  /* The block of the row igpbelongs to another processor. */
1282  else {
1283  values += dofi * dofj;
1284  continue;
1285  }
1286  }
1287  }
1288  return nbelt;
1289 }
1290 
1291 /**
1292  *******************************************************************************
1293  *
1294  * @ingroup bcsc_internal
1295  *
1296  * @brief Sorts the block csc subarray associated to each column block.
1297  *
1298  *******************************************************************************
1299  *
1300  * @param[in] bcsc
1301  * On entry, the pointer to an allocated bcsc.
1302  *
1303  * @param[in] rowtab
1304  * The initial sparse matrix in the spm format.
1305  *
1306  * @param[in] valtab
1307  * The ordering that needs to be applied on the spm to generate the
1308  * block csc.
1309  *
1310  *******************************************************************************/
1311 static inline void
1312 bcsc_dsort( const pastix_bcsc_t *bcsc,
1313  pastix_int_t *rowtab,
1314  double *valtab )
1315 {
1316  bcsc_cblk_t *block;
1317  pastix_int_t block_num, col, len_col_block, block_nbr, col_nbr;
1318  void *sortptr[2];
1319 
1320  block_nbr = bcsc->cscfnbr;
1321  block = bcsc->cscftab;
1322  for ( block_num = 0; block_num < block_nbr; block_num++, block++ ) {
1323 
1324  col_nbr = block->colnbr;
1325  for ( col = 0; col < col_nbr; col++ ) {
1326 
1327  sortptr[0] = (void*)(rowtab + block->coltab[col]);
1328  sortptr[1] = (void*)(valtab + block->coltab[col]);
1329 
1330  len_col_block = block->coltab[col+1] - block->coltab[col];
1331 #if !defined(NDEBUG)
1332  {
1333  pastix_int_t gN = bcsc->gN;
1334  pastix_int_t i;
1335 
1336  for ( i = 0; i < len_col_block; i++ ) {
1337  assert( rowtab[ block->coltab[col] + i ] >= 0 );
1338  assert( rowtab[ block->coltab[col] + i ] < gN );
1339  }
1340  }
1341 #endif
1342 
1343  d_qsortIntFloatAsc( sortptr, len_col_block );
1344  }
1345  }
1346 }
1347 
1348 /**
1349  *******************************************************************************
1350  *
1351  * @brief Initializes a centralize double block csc.
1352  *
1353  *******************************************************************************
1354  *
1355  * @param[in] spm
1356  * The initial sparse matrix in the spm format.
1357  *
1358  * @param[in] ord
1359  * The ordering that needs to be applied on the spm to generate the
1360  * block csc.
1361  *
1362  * @param[in] solvmtx
1363  * The solver matrix structure that describe the data distribution.
1364  *
1365  * @param[in] initAt
1366  * A flag to enable/disable the initialization of A'.
1367  *
1368  * @param[inout] bcsc
1369  * On entry, the pointer to an allocated bcsc.
1370  * On exit, the bcsc stores the input spm with the permutation applied
1371  * and grouped according to the distribution described in solvmtx.
1372  *
1373  * @param[in] valuesize
1374  * The number of non zero unknowns in the matrix.
1375  *
1376  *******************************************************************************/
1377 void
1378 bcsc_dinit( const spmatrix_t *spm,
1379  const pastix_order_t *ord,
1380  const SolverMatrix *solvmtx,
1381  int initAt,
1382  pastix_bcsc_t *bcsc,
1383  pastix_int_t valuesize )
1384 {
1385  pastix_int_t nbelt = 0;
1386 #if defined(PASTIX_WITH_MPI)
1387  pastix_int_t nbelt_recv = 0;
1388  bcsc_handle_comm_t *bcsc_comm = bcsc->bcsc_comm;
1389 #endif
1390 
1391  /* Exchanges the data if the spm is distributed and adds the received data of A. */
1392 #if defined(PASTIX_WITH_MPI)
1393  if ( bcsc_comm != NULL ) {
1394  nbelt_recv = bcsc_dexchange_values_A( spm, ord, solvmtx, bcsc );
1395  nbelt_recv += bcsc_dexchange_values_AAt( spm, ord, solvmtx, bcsc->rowtab, bcsc );
1396  nbelt += nbelt_recv;
1397  }
1398 #endif
1399 
1400  /* Initializes the blocked structure of the matrix A. */
1401  nbelt += bcsc_dinit_A( spm, ord, solvmtx, bcsc );
1402 
1403  /* Initializes the blocked structure of the matrix At if the matrix is not general. */
1404  if ( spm->mtxtype != SpmGeneral ) {
1405  /* Exchanges the data if the spm is distributed and adds the received data of A. */
1406 #if defined(PASTIX_WITH_MPI)
1407  if ( bcsc_comm != NULL ) {
1408  nbelt_recv = bcsc_dexchange_values_At( spm, ord, solvmtx, bcsc->rowtab, bcsc );
1409  nbelt += nbelt_recv;
1410  }
1411 #endif
1412  nbelt += bcsc_dinit_At( spm, ord, solvmtx, bcsc->rowtab, bcsc );
1413  }
1414 
1415  /* Restores the correct coltab arrays. */
1416  bcsc_restore_coltab( bcsc );
1417 
1418  /* Sorts the csc. */
1419  bcsc_dsort( bcsc, bcsc->rowtab, bcsc->Lvalues );
1420 
1421  /* Initializes the blocked structure of the matrix At if the matrix is general. */
1422  if ( spm->mtxtype == SpmGeneral ) {
1423  /* If only the refinement is performed A^t is not required. */
1424  if ( initAt ) {
1425  pastix_int_t *trowtab, i;
1426  MALLOC_INTERN( bcsc->Uvalues, valuesize, double );
1427  MALLOC_INTERN( trowtab, valuesize, pastix_int_t );
1428 
1429  for (i=0; i<valuesize; i++) {
1430  trowtab[i] = -1;
1431  }
1432  nbelt = bcsc_dinit_At( spm, ord, solvmtx, trowtab, bcsc );
1433 #if defined(PASTIX_WITH_MPI)
1434  if ( bcsc_comm != NULL ) {
1435  nbelt_recv = bcsc_dexchange_values_At( spm, ord, solvmtx, trowtab, bcsc );
1436  nbelt_recv += bcsc_dexchange_values_AAt( spm, ord, solvmtx, trowtab, bcsc );
1437  nbelt += nbelt_recv;
1438  }
1439 #endif
1440  /* Restores the correct coltab arrays */
1441  bcsc_restore_coltab( bcsc );
1442 
1443  /* Sorts the transposed csc */
1444  bcsc_dsort( bcsc, trowtab, bcsc->Uvalues );
1445  memFree( trowtab );
1446  }
1447  }
1448  /* In the case of SpmSymmetric, is applied when used to save memory space */
1449  else {
1450  bcsc->Uvalues = bcsc->Lvalues;
1451  }
1452 
1453  (void) nbelt;
1454 #if defined(PASTIX_WITH_MPI)
1455  (void) nbelt_recv;
1456 #endif
1457 }
BEGIN_C_DECLS typedef int pastix_int_t
Definition: datatypes.h:51
static pastix_int_t bcsc_dinit_At(const spmatrix_t *spm, const pastix_order_t *ord, const SolverMatrix *solvmtx, pastix_int_t *rowtab, pastix_bcsc_t *bcsc)
Initializes the At values in the block cscstored in the given spm.
Definition: bcsc_dinit.c:1176
void bcsc_restore_coltab(pastix_bcsc_t *bcsc)
Restores the coltab array when it has been modified to initialize the row and values arrays.
Definition: bcsc.c:1700
static void bcsc_dsort(const pastix_bcsc_t *bcsc, pastix_int_t *rowtab, double *valtab)
Sorts the block csc subarray associated to each column block.
Definition: bcsc_dinit.c:1312
static pastix_int_t bcsc_dinit_A(const spmatrix_t *spm, const pastix_order_t *ord, const SolverMatrix *solvmtx, pastix_bcsc_t *bcsc)
Initializes the A values of the block csc stored in the given spm.
Definition: bcsc_dinit.c:1050
void bcsc_dinit(const spmatrix_t *spm, const pastix_order_t *ord, const SolverMatrix *solvmtx, int initAt, pastix_bcsc_t *bcsc, pastix_int_t valuesize)
Initializes a centralize double block csc.
Definition: bcsc_dinit.c:1378
bcsc_data_amount_t recvA
Definition: bcsc.h:83
bcsc_exch_comm_t recvAAt
Definition: bcsc.h:85
pastix_int_t max_idx
Definition: bcsc.h:97
bcsc_exch_comm_t sendAt
Definition: bcsc.h:81
PASTIX_Comm comm
Definition: bcsc.h:95
pastix_int_t idxcnt
Definition: bcsc.h:57
pastix_int_t colnbr
Definition: bcsc.h:111
bcsc_exch_comm_t sendA
Definition: bcsc.h:80
bcsc_data_amount_t size
Definition: bcsc.h:66
pastix_int_t clustnum
Definition: bcsc.h:94
bcsc_proc_comm_t data_comm[1]
Definition: bcsc.h:99
pastix_int_t * idxbuf
Definition: bcsc.h:67
pastix_int_t * coltab
Definition: bcsc.h:113
bcsc_data_amount_t recvAt
Definition: bcsc.h:84
bcsc_exch_comm_t sendAAt
Definition: bcsc.h:82
pastix_int_t valcnt
Definition: bcsc.h:58
void * valbuf
Definition: bcsc.h:70
pastix_int_t clustnbr
Definition: bcsc.h:93
pastix_int_t max_val
Definition: bcsc.h:98
Compressed colptr format for the bcsc.
Definition: bcsc.h:110
Information about the amount of data.
Definition: bcsc.h:56
Information about the sending data.
Definition: bcsc.h:65
Structure to manage communications with distributed spm.
Definition: bcsc.h:92
Informations of the data exchanged with other processors.
Definition: bcsc.h:79
pastix_int_t * permtab
Definition: order.h:51
pastix_int_t * peritab
Definition: order.h:52
Order structure.
Definition: order.h:47
pastix_int_t bcscnum
Definition: solver.h:175
SolverCblk *restrict cblktab
Definition: solver.h:228
pastix_int_t fcolnum
Definition: solver.h:166
Solver column block structure.
Definition: solver.h:161
Solver column block structure.
Definition: solver.h:203