PaStiX Handbook  6.3.2
bcsc_cinit.c
Go to the documentation of this file.
1 /**
2  *
3  * @file bcsc_cinit.c
4  *
5  * @copyright 2004-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
6  * Univ. Bordeaux. All rights reserved.
7  *
8  * @version 6.3.2
9  * @author Mathieu Faverge
10  * @author Pierre Ramet
11  * @author Xavier Lacoste
12  * @author Theophile Terraz
13  * @author Tony Delarue
14  * @author Vincent Bridonneau
15  * @author Alycia Lisito
16  * @date 2023-07-21
17  *
18  * @generated from /builds/solverstack/pastix/bcsc/bcsc_zinit.c, normal z -> c, Wed Dec 13 12:09:45 2023
19  *
20  **/
21 #include "common.h"
22 #include "pastix/order.h"
23 #include <spm.h>
24 #include "blend/solver.h"
25 #include "bcsc/bcsc.h"
26 #include "bcsc_c.h"
27 
28 #ifndef DOXYGEN_SHOULD_SKIP_THIS
29 static inline pastix_complex32_t
30 __fct_id( pastix_complex32_t val ) {
31  return val;
32 }
33 
34 static inline pastix_complex32_t
35 __fct_conj( pastix_complex32_t val ) {
36 #if defined(PRECISION_c) || defined(PRECISION_z)
37  return conjf( val );
38 #else
39  /* This function should not be called in this case. */
40  assert(0);
41  return val;
42 #endif
43 }
44 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
45 
46 #if defined(PASTIX_WITH_MPI)
47 /**
48  *******************************************************************************
49  *
50  * @brief Stores the data the current processor has to send to the other processors
51  * in the bcsc_comm structure.
52  *
53  * There are two cases:
54  *
55  * - If the matrix is general: the full columns and rows of the blocks are stored
56  * in Lvalues and Uvalues.
57  * The local data of the current process which are in remote blocks after
58  * the permutation need be to sent to the owner process. The data is stored
59  * in sendA if it is sent for the column only, in sendAt if it is sent for
60  * the row only and in sendAAt if it is sent for the row and column.
61  *
62  * - If the matrix is Symmetric or Hermitian: only the full columns of the blocks
63  * are stored in Lvalues (and Uvalues = Lvalues). Only half of the spm is
64  * stored lower (or upper) triangular half, therefore we need to duplicate
65  * the lower (or upper) data to fill the upper (or lower half of the matrix
66  * in the blocks.
67  * The local data of the current process which are in remote blocks after
68  * the permutation need be to sent to the owner process. The data is stored
69  * in sendA if it is sent for the lower (or upper) half or the column, in
70  * sendAt if it is sent for the upper (or lower) half of the column and in
71  * sendAAt if it is sent for both the lower and upper half of the column.
72  * The diagonal values are stored in sendA only.
73  *
74  *******************************************************************************
75  *
76  * @param[in] spm
77  * The initial sparse matrix in the spm format.
78  *
79  * @param[in] ord
80  * The ordering that needs to be applied on the spm to generate the
81  * block csc.
82  *
83  * @param[in] col2cblk
84  * The array of matching columns with cblk indexes.
85  *
86  * @param[inout] bcsc_comm
87  * The structure in which the data is stored.
88  *
89  *******************************************************************************/
90 void
91 bcsc_cstore_data( const spmatrix_t *spm,
92  const pastix_order_t *ord,
93  const pastix_int_t *col2cblk,
94  bcsc_handle_comm_t *bcsc_comm )
95 {
96  pastix_complex32_t *values_c;
97  pastix_complex32_t *values = (pastix_complex32_t*)(spm->values);
98  pastix_int_t *colptr = spm->colptr;
99  pastix_int_t *rowptr = spm->rowptr;
100  pastix_int_t *loc2glob = spm->loc2glob;
101  pastix_int_t *dofs = spm->dofs;
102  pastix_int_t dof = spm->dof;
103  bcsc_proc_comm_t *data_comm = bcsc_comm->data_comm;
104  pastix_int_t clustnbr = bcsc_comm->clustnbr;
105  pastix_int_t il, jl, ig, jg, igp, jgp, igpe, jgpe;
106  pastix_int_t ownerj, owneri, baseval;
107  pastix_int_t dofi, dofj, frow, lrow;
108  bcsc_exch_comm_t *data_sendA, *data_sendAt, *data_sendAAt;
109  bcsc_data_amount_t *data_cntA, *data_cntAt, *data_cntAAt;
110  int sym = (spm->mtxtype == SpmSymmetric) || (spm->mtxtype == SpmHermitian);
111 
112  /* Allocates the sending indexes and values buffers. */
113  bcsc_allocate_buf( bcsc_comm, PastixTagMemSend );
114 
115  /*
116  * Allocates and initialises the counters used to fill the values
117  * and indexes buffers.
118  */
119  MALLOC_INTERN( data_cntA, 3 * clustnbr, bcsc_data_amount_t );
120  memset( data_cntA, 0, 3 * clustnbr * sizeof(bcsc_data_amount_t) );
121  data_cntAt = data_cntA + clustnbr;
122  data_cntAAt = data_cntA + clustnbr * 2;
123 
124  baseval = spm->baseval;
125 
126  /* Goes through the columns of spm. */
127  for ( jl = 0; jl < spm->n; jl++, colptr++, loc2glob++ ) {
128  jg = *loc2glob - baseval;
129  jgp = ord->permtab[jg];
130  jgpe = ( dof > 0 ) ? jgp * dof : dofs[ jg ] - baseval;
131  dofj = ( dof > 0 ) ? dof : dofs[ jg+1 ] - dofs[ jg ];
132 
133  frow = colptr[0] - baseval;
134  lrow = colptr[1] - baseval;
135  assert( (lrow - frow) >= 0 );
136 
137  ownerj = col2cblk[ jgpe ];
138 
139  /* The column jp belongs to another process. */
140  if ( ownerj < 0 ) {
141  ownerj = - ownerj - 1;
142  data_comm = bcsc_comm->data_comm + ownerj;
143  data_sendA = &( data_comm->sendA );
144 
145  /* Goes through the rows of jl. */
146  for ( il = frow; il < lrow; il++ ) {
147  ig = rowptr[il] - baseval;
148  igp = ord->permtab[ig];
149  igpe = ( dof > 0 ) ? igp * dof : dofs[ ig ] - baseval;
150  dofi = ( dof > 0 ) ? dof : dofs[ ig+1 ] - dofs[ ig ];
151  owneri = col2cblk[ igpe ];
152 
153  /*
154  * The diagonal values (ip, jp) belong to the same process.
155  * They are sent to owneri in the sym case for A only.
156  */
157  if ( sym && ( ig == jg ) ) {
158  data_sendA->idxbuf[data_cntA[ownerj].idxcnt ] = igp;
159  data_sendA->idxbuf[data_cntA[ownerj].idxcnt+1] = jgp;
160  data_cntA[ownerj].idxcnt += 2;
161 
162  values_c = ((pastix_complex32_t*)(data_sendA->valbuf)) + data_cntA[ownerj].valcnt;
163  memcpy( values_c, values, dofi * dofj * sizeof(pastix_complex32_t) );
164  data_cntA[ownerj].valcnt += dofi * dofj;
165 
166  values += dofi * dofj;
167  continue;
168  }
169 
170  /* The row ip belongs to another process. */
171  if ( owneri < 0 ) {
172  owneri = - owneri - 1;
173  data_comm = bcsc_comm->data_comm + owneri;
174 
175  /*
176  * The diagonal values (ip, jp) belong to the same process.
177  * They are sent to owneri for AAt in the general cae.
178  */
179  if ( owneri == ownerj ) {
180  data_sendAAt = &( data_comm->sendAAt );
181 
182  data_sendAAt->idxbuf[data_cntAAt[ownerj].idxcnt ] = igp;
183  data_sendAAt->idxbuf[data_cntAAt[ownerj].idxcnt+1] = jgp;
184  data_cntAAt[ownerj].idxcnt += 2;
185 
186  values_c = ((pastix_complex32_t*)(data_sendAAt->valbuf)) + data_cntAAt[ownerj].valcnt;
187  memcpy( values_c, values, dofi * dofj * sizeof(pastix_complex32_t) );
188  data_cntAAt[ownerj].valcnt += dofi * dofj;
189  }
190  /*
191  * The values (ip, jp) belong to different processes.
192  * They are sent to owneri for At and to ownerj for A.
193  */
194  else {
195  data_sendAt = &( data_comm->sendAt );
196 
197  data_sendAt->idxbuf[data_cntAt[owneri].idxcnt ] = igp;
198  data_sendAt->idxbuf[data_cntAt[owneri].idxcnt+1] = jgp;
199  data_cntAt[owneri].idxcnt += 2;
200 
201  values_c = ((pastix_complex32_t*)(data_sendAt->valbuf)) + data_cntAt[owneri].valcnt;
202  memcpy( values_c, values, dofi * dofj * sizeof(pastix_complex32_t) );
203  data_cntAt[owneri].valcnt += dofi * dofj;
204 
205  data_sendA->idxbuf[data_cntA[ownerj].idxcnt ] = igp;
206  data_sendA->idxbuf[data_cntA[ownerj].idxcnt+1] = jgp;
207  data_cntA[ownerj].idxcnt += 2;
208 
209  values_c = ((pastix_complex32_t*)(data_sendA->valbuf)) + data_cntA[ownerj].valcnt;
210  memcpy( values_c, values, dofi * dofj * sizeof(pastix_complex32_t) );
211  data_cntA[ownerj].valcnt += dofi * dofj;
212  }
213  }
214  /* The row ip is local. */
215  else {
216  /*
217  * The values (ip, jp) belong to ownerj.
218  * They are sent to ownerj for A.
219  */
220  data_sendA->idxbuf[data_cntA[ownerj].idxcnt ] = igp;
221  data_sendA->idxbuf[data_cntA[ownerj].idxcnt+1] = jgp;
222  data_cntA[ownerj].idxcnt += 2;
223 
224  values_c = ((pastix_complex32_t*)(data_sendA->valbuf)) + data_cntA[ownerj].valcnt;
225  memcpy( values_c, values, dofi * dofj * sizeof(pastix_complex32_t) );
226  data_cntA[ownerj].valcnt += dofi * dofj;
227  }
228  values += dofi * dofj;
229  }
230  }
231  /* The column jp is local. */
232  else {
233  /* Goes through the rows of j. */
234  for ( il = frow; il < lrow; il++ ) {
235  ig = rowptr[il] - baseval;
236  igp = ord->permtab[ig];
237  igpe = ( dof > 0 ) ? igp * dof : dofs[ ig ] - baseval;
238  dofi = ( dof > 0 ) ? dof : dofs[ ig+1 ] - dofs[ ig ];
239  owneri = col2cblk[ igpe ];
240 
241  /*
242  * The diagonal values (ip, jp) have already been
243  * added to globcol in the sym case.
244  */
245  if ( sym && ( ig == jg ) ) {
246  values += dofi * dofj;
247  continue;
248  }
249 
250  /* The row ip belongs to another process. */
251  if ( owneri < 0 ) {
252  owneri = - owneri - 1;
253  data_comm = bcsc_comm->data_comm + owneri;
254  data_sendAt = &( data_comm->sendAt );
255 
256  /*
257  * The values (ip, jp) belong to owneri.
258  * They are sent to ownerj for At.
259  */
260  data_sendAt->idxbuf[data_cntAt[owneri].idxcnt ] = igp;
261  data_sendAt->idxbuf[data_cntAt[owneri].idxcnt+1] = jgp;
262  data_cntAt[owneri].idxcnt += 2;
263 
264  values_c = ((pastix_complex32_t*)(data_sendAt->valbuf)) + data_cntAt[owneri].valcnt;
265  memcpy( values_c, values, dofi * dofj * sizeof(pastix_complex32_t) );
266  data_cntAt[owneri].valcnt += dofi * dofj;
267  }
268  values += dofi * dofj;
269  }
270  }
271  }
272 
273  memFree_null( data_cntA );
274 }
275 
276 /**
277  *******************************************************************************
278  *
279  * @ingroup bcsc_internal
280  *
281  * @brief Adds the remote data of A to the bcsc structure.
282  *
283  *******************************************************************************
284  *
285  * @param[in] spm
286  * The initial sparse matrix in the spm format.
287  *
288  * @param[in] ord
289  * The ordering that needs to be applied on the spm to generate the
290  * block csc.
291  *
292  * @param[in] solvmtx
293  * The solver matrix structure which describes the data distribution.
294  *
295  * @param[inout] bcsc
296  * On entry, the pointer to an allocated bcsc.
297  * On exit, the bcsc fields are updated.
298  *
299  * @param[in] values_buf
300  * The array containing the remote values.
301  *
302  * @param[in] idx_cnt
303  * The index for the begining of the indexes array buffer
304  * corresponding to the values_buf.
305  *
306  * @param[in] idx_size
307  * The number of indexes corresponding to the values_buf.
308  *
309  * @param[in] AAt
310  * TODO
311  *
312  *******************************************************************************/
313 static inline pastix_int_t
314 bcsc_chandle_recv_A( const spmatrix_t *spm,
315  const pastix_order_t *ord,
316  const SolverMatrix *solvmtx,
317  pastix_bcsc_t *bcsc,
318  pastix_complex32_t *values_buf,
319  pastix_int_t idx_cnt,
320  pastix_int_t idx_size,
321  pastix_int_t AAt )
322 {
323  bcsc_handle_comm_t *bcsc_comm = bcsc->bcsc_comm;
324  pastix_int_t *dofs = spm->dofs;
325  pastix_int_t dof = spm->dof;
326  SolverCblk *cblk;
327  pastix_int_t *coltab;
328  pastix_int_t k, igp, jgp, ig, jg, igpe, jgpe;
329  pastix_int_t itercblk;
330  pastix_int_t idofi, idofj, dofi, dofj;
331  pastix_int_t colidx, rowidx, pos;
332  pastix_int_t clustnum = bcsc_comm->clustnum;
333  pastix_complex32_t *Values = bcsc->Lvalues;
334  pastix_int_t *indexes = ( AAt == 0 ) ? bcsc_comm->data_comm[clustnum].sendA.idxbuf :
335  bcsc_comm->data_comm[clustnum].sendAAt.idxbuf;
336  pastix_int_t nbelt = 0;
337 
338  /*
339  * Goes through the received indexes in order to add the data
340  * received.
341  */
342  indexes += idx_cnt;
343  for ( k = 0; k < idx_size; k+=2, indexes+=2 ) {
344  /* Gets received indexes jgp and igp. */
345  igp = indexes[0];
346  jgp = indexes[1];
347 
348  ig = ord->peritab[igp];
349  jg = ord->peritab[jgp];
350  igpe = (dof > 0) ? igp * dof : dofs[ ig ] - spm->baseval;
351  jgpe = (dof > 0) ? jgp * dof : dofs[ jg ] - spm->baseval;
352  dofi = (dof > 0) ? dof : dofs[ig+1] - dofs[ig];
353  dofj = (dof > 0) ? dof : dofs[jg+1] - dofs[jg];
354 
355  itercblk = bcsc->col2cblk[ jgpe ];
356  assert( itercblk >= 0 );
357 
358  /* Gets the block on which the data received will be added. */
359  cblk = solvmtx->cblktab + itercblk;
360  coltab = bcsc->cscftab[cblk->bcscnum].coltab;
361 
362  /* Goes through the values at (igp, jgp). */
363  colidx = jgpe - cblk->fcolnum;
364  for ( idofj = 0; idofj < dofj; idofj++, colidx++ ) {
365  rowidx = igpe;
366  pos = coltab[ colidx ];
367  for ( idofi = 0; idofi < dofi; idofi++, rowidx++, pos++, values_buf++ ) {
368  /* Adds the row igp. */
369  assert( rowidx >= 0 );
370  assert( rowidx < spm->gNexp );
371  bcsc->rowtab[ pos ] = rowidx;
372  /* Adds the data at row igp and column jgp. */
373  Values[ pos ] = *values_buf;
374  nbelt++;
375  }
376  coltab[ colidx ] += dofi;
377  assert( coltab[colidx] <= coltab[colidx+1] );
378  }
379  }
380 
381  return nbelt;
382 }
383 
384 /**
385  *******************************************************************************
386  *
387  * @ingroup bcsc_internal
388  *
389  * @brief Adds the remote data of At to the bcsc structure.
390  *
391  *******************************************************************************
392  *
393  * @param[in] spm
394  * The initial sparse matrix in the spm format.
395  *
396  * @param[in] ord
397  * The ordering that needs to be applied on the spm to generate the
398  * block csc.
399  *
400  * @param[in] solvmtx
401  * The solver matrix structure which describes the data distribution.
402  *
403  * @param[inout] rowtab
404  * The row tab of the bcsc or the row tab associated to At.
405  *
406  * @param[inout] bcsc
407  * On entry, the pointer to an allocated bcsc.
408  * On exit, the bcsc fields are updated.
409  *
410  * @param[in] values_buf
411  * The array containing the remote values.
412  *
413  * @param[in] idx_cnt
414  * The index for the begining of the indexes array buffer
415  * corresponding to the values_buf.
416  *
417  * @param[in] idx_size
418  * The number of indexes corresponding to the values_buf.
419  *
420  * @param[in] AAt
421  * TODO
422  *
423  *******************************************************************************/
424 static inline pastix_int_t
425 bcsc_chandle_recv_At( const spmatrix_t *spm,
426  const pastix_order_t *ord,
427  const SolverMatrix *solvmtx,
428  pastix_int_t *rowtab,
429  pastix_bcsc_t *bcsc,
430  pastix_complex32_t *values_buf,
431  pastix_int_t idx_cnt,
432  pastix_int_t idx_size,
433  pastix_int_t AAt )
434 {
435  bcsc_handle_comm_t *bcsc_comm = bcsc->bcsc_comm;
436  pastix_int_t *dofs = spm->dofs;
437  pastix_int_t dof = spm->dof;
438  SolverCblk *cblk;
439  pastix_int_t *coltab;
440  pastix_int_t k, igp, jgp, ig, jg, igpe, jgpe;
441  pastix_int_t itercblk;
442  pastix_int_t idofi, idofj, dofi, dofj;
443  pastix_int_t colidx, rowidx, pos;
444  pastix_int_t clustnum = bcsc_comm->clustnum;
445  pastix_int_t *indexes = ( AAt == 0 ) ? bcsc_comm->data_comm[clustnum].sendAt.idxbuf :
446  bcsc_comm->data_comm[clustnum].sendAAt.idxbuf;
447  pastix_complex32_t *Values;
448  pastix_complex32_t (*_bcsc_conj)(pastix_complex32_t) = __fct_id;
449  pastix_int_t nbelt = 0;
450 
451  /* Gets the Uvalues in the general case and the Lvalues in the sym case. */
452  if ( spm->mtxtype == SpmGeneral ) {
453  _bcsc_conj = __fct_id;
454  Values = (pastix_complex32_t*)(bcsc->Uvalues);
455  }
456  else {
457  if( spm->mtxtype == SpmHermitian ) {
458  _bcsc_conj = __fct_conj;
459  }
460  if( spm->mtxtype == SpmSymmetric ) {
461  _bcsc_conj = __fct_id;
462  }
463  Values = (pastix_complex32_t*)(bcsc->Lvalues);
464  }
465 
466  /*
467  * Goes through the received indexes in order to add the data
468  * received.
469  */
470  indexes += idx_cnt;
471  for ( k = 0; k < idx_size; k+=2, indexes+=2 ) {
472  /* Gets received indexes igp and jgp. */
473  igp = indexes[0];
474  jgp = indexes[1];
475 
476  ig = ord->peritab[igp];
477  jg = ord->peritab[jgp];
478  igpe = (dof > 0) ? igp * dof : dofs[ ig ] - spm->baseval;
479  jgpe = (dof > 0) ? jgp * dof : dofs[ jg ] - spm->baseval;
480  dofi = (dof > 0) ? dof : dofs[ig+1] - dofs[ig];
481  dofj = (dof > 0) ? dof : dofs[jg+1] - dofs[jg];
482 
483  itercblk = bcsc->col2cblk[ igpe ];
484  assert( itercblk >= 0 );
485 
486  /* Gets the block on which the data received will be added. */
487  cblk = solvmtx->cblktab + itercblk;
488  coltab = bcsc->cscftab[cblk->bcscnum].coltab;
489 
490  /* Goes through the values at (igp, jgp). */
491  for ( idofj = 0; idofj < dofj; idofj++ ) {
492  rowidx = jgpe + idofj;
493  colidx = igpe - cblk->fcolnum;
494  for ( idofi = 0; idofi < dofi; idofi++, colidx++, values_buf++ ) {
495  pos = coltab[ colidx ];
496  /* Adds the row jgp. */
497  assert( rowidx >= 0 );
498  assert( rowidx < spm->gNexp );
499  rowtab[ pos ] = rowidx;
500 
501  /* Adds the data at row jgp and column igp. */
502  Values[ pos ] = _bcsc_conj( *values_buf );
503  coltab[ colidx ] ++;
504  assert( coltab[colidx] <= coltab[colidx+1] );
505  nbelt++;
506  }
507  }
508  }
509  return nbelt;
510 }
511 
512 /**
513  *******************************************************************************
514  *
515  * @ingroup bcsc_internal
516  *
517  * @brief Adds the remote data of At to the bcsc structure.
518  *
519  *******************************************************************************
520  *
521  * @param[in] spm
522  * The initial sparse matrix in the spm format.
523  *
524  * @param[in] ord
525  * The ordering that needs to be applied on the spm to generate the
526  * block csc.
527  *
528  * @param[in] solvmtx
529  * The solver matrix structure which describes the data distribution.
530  *
531  * @param[inout] rowtab
532  * The row tab of the bcsc or the row tab associated to At.
533  *
534  * @param[inout] bcsc
535  * On entry, the pointer to an allocated bcsc.
536  * On exit, the bcsc fields are updated.
537  *
538  * @param[in] values_buf
539  * The array containing the remote values.
540  *
541  * @param[in] idx_cnt
542  * The index for the begining of the indexes array buffer
543  * corresponding to the values_buf.
544  *
545  * @param[in] idx_size
546  * The number of indexes corresponding to the values_buf.
547  *
548  *******************************************************************************/
549 static inline pastix_int_t
550 bcsc_chandle_recv_AAt( const spmatrix_t *spm,
551  const pastix_order_t *ord,
552  const SolverMatrix *solvmtx,
553  pastix_int_t *rowtab,
554  pastix_bcsc_t *bcsc,
555  pastix_complex32_t *values_buf,
556  pastix_int_t idx_cnt,
557  pastix_int_t idx_size )
558 {
559  bcsc_handle_comm_t *bcsc_comm = bcsc->bcsc_comm;
560  pastix_int_t *dofs = spm->dofs;
561  pastix_int_t dof = spm->dof;
562  SolverCblk *cblki, *cblkj;
563  pastix_int_t *coltabi, *coltabj;
564  pastix_int_t k, igp, jgp, ig, jg, igpe, jgpe;
565  pastix_int_t itercblki, itercblkj;
566  pastix_int_t idofi, idofj, dofi, dofj;
567  pastix_int_t colj, rowj, posj;
568  pastix_int_t coli, rowi, posi;
569  pastix_int_t clustnum = bcsc_comm->clustnum;
570  pastix_int_t *indexes = bcsc_comm->data_comm[clustnum].sendAAt.idxbuf;
571  pastix_complex32_t *ValuesA, *ValuesAt;
572  pastix_complex32_t (*_bcsc_conj)(pastix_complex32_t) = __fct_id;
573  pastix_int_t nbelt = 0;
574 
575  /* Gets the Uvalues in the general case and the Lvalues in the sym case. */
576  if ( spm->mtxtype == SpmGeneral ) {
577  _bcsc_conj = __fct_id;
578  ValuesAt = (pastix_complex32_t*)(bcsc->Uvalues);
579  }
580  else {
581  if( spm->mtxtype == SpmHermitian ) {
582  _bcsc_conj = __fct_conj;
583  }
584  if( spm->mtxtype == SpmSymmetric ) {
585  _bcsc_conj = __fct_id;
586  }
587  ValuesAt = (pastix_complex32_t*)(bcsc->Lvalues);
588  }
589  ValuesA = (pastix_complex32_t*)(bcsc->Lvalues);
590 
591  /*
592  * Goes through the received indexes in order to add the data
593  * received.
594  */
595  indexes += idx_cnt;
596  for ( k = 0; k < idx_size; k+=2, indexes+=2 ) {
597  /* Gets received indexes igp and jgp. */
598  igp = indexes[0];
599  jgp = indexes[1];
600 
601  ig = ord->peritab[igp];
602  jg = ord->peritab[jgp];
603  igpe = (dof > 0) ? igp * dof : dofs[ ig ] - spm->baseval;
604  jgpe = (dof > 0) ? jgp * dof : dofs[ jg ] - spm->baseval;
605  dofi = (dof > 0) ? dof : dofs[ig+1] - dofs[ig];
606  dofj = (dof > 0) ? dof : dofs[jg+1] - dofs[jg];
607 
608  itercblki = bcsc->col2cblk[ igpe ];
609  itercblkj = bcsc->col2cblk[ jgpe ];
610  assert( itercblki >= 0 );
611  assert( itercblkj >= 0 );
612 
613  /* Gets the block on which the data received will be added. */
614  cblki = solvmtx->cblktab + itercblki;
615  cblkj = solvmtx->cblktab + itercblkj;
616  coltabi = bcsc->cscftab[cblki->bcscnum].coltab;
617  coltabj = bcsc->cscftab[cblkj->bcscnum].coltab;
618 
619  /* Goes through the values at (igp, jgp). */
620  colj = jgpe - cblkj->fcolnum;
621  for ( idofj = 0; idofj < dofj; idofj++ ) {
622  rowj = igpe;
623  posj = coltabj[ colj ];
624  rowi = jgpe + idofj;
625  coli = igpe - cblki->fcolnum;
626  for ( idofi = 0; idofi < dofi; idofi++, coli++, rowj++, posj++, values_buf++ ) {
627  posi = coltabi[ coli ];
628  /* Adds the row jgp /igp. */
629  assert( rowi >= 0 );
630  assert( rowj >= 0 );
631  assert( rowi < spm->gNexp );
632  assert( rowj < spm->gNexp );
633 
634  rowtab[ posi ] = rowi;
635  bcsc->rowtab[ posj ] = rowj;
636 
637  /* Adds the data at row jgp and column igp. */
638  ValuesAt[ posi ] = _bcsc_conj( *values_buf );
639  ValuesA [ posj ] = *values_buf;
640  coltabi[ coli ] ++;
641  assert( coltabi[coli] <= coltabi[coli+1] );
642  nbelt++;
643  }
644  coltabj[ colj ] += dofi;
645  assert( coltabj[colj] <= coltabj[colj+1] );
646  }
647  }
648 
649  return nbelt;
650 }
651 
652 /**
653  *******************************************************************************
654  *
655  * @ingroup bcsc_internal
656  *
657  * @brief Exchanges and stores the remote values of A.
658  *
659  *******************************************************************************
660  *
661  * @param[in] spm
662  * The initial sparse matrix in the spm format.
663  *
664  * @param[in] ord
665  * The ordering which needs to be applied on the spm to generate the
666  * block csc.
667  *
668  * @param[in] solvmtx
669  * The solver matrix structure which describes the data distribution.
670  *
671  * @param[inout] bcsc
672  * On entry, the pointer to an allocated bcsc.
673  * On exit, the bcsc fields are updated with the remote values of A.
674  *
675  *******************************************************************************/
676 static inline pastix_int_t
677 bcsc_cexchange_values_A( const spmatrix_t *spm,
678  const pastix_order_t *ord,
679  const SolverMatrix *solvmtx,
680  pastix_bcsc_t *bcsc )
681 {
682  bcsc_handle_comm_t *bcsc_comm = bcsc->bcsc_comm;
683  pastix_int_t clustnbr = bcsc_comm->clustnbr;
684  pastix_int_t clustnum = bcsc_comm->clustnum;
685  bcsc_proc_comm_t *data_comm = bcsc_comm->data_comm;
686  bcsc_proc_comm_t *data_send = NULL;
687  bcsc_proc_comm_t *data_recv = NULL;
688  pastix_complex32_t *val_buf = NULL;
689  pastix_int_t idx_size = 0;
690  pastix_int_t counter_req = 0;
691  pastix_int_t nbelt_recv = 0;
692  pastix_int_t cnt = 0;
693  pastix_int_t idx_cnt[clustnbr];
694  MPI_Status statuses[clustnbr-1];
695  MPI_Request requests[clustnbr-1];
696  bcsc_data_amount_t *sends, *recvs;
697  pastix_int_t c_send, c_recv, k;
698 
699  /* Allocates the receiving indexes and values buffers. */
700  if ( bcsc_comm->max_idx != 0 ) {
701  MALLOC_INTERN( val_buf, bcsc_comm->max_val, pastix_complex32_t );
702  }
703 
704  for ( k = 0; k < clustnbr; k++ ) {
705  if ( k == clustnum ) {
706  idx_cnt[k] = 0;
707  continue;
708  }
709  idx_cnt[ k ] = cnt;
710  cnt += data_comm[k].recvA.idxcnt;
711  }
712 
713  /* Sends the values. */
714  c_send = (clustnum+1) % clustnbr;
715  for ( k = 0; k < clustnbr-1; k++ ) {
716  data_send = data_comm + c_send;
717  sends = &( data_send->sendA.size );
718  if ( c_send == clustnum ) {
719  continue;
720  }
721 
722  /* Posts the emissions of the values. */
723  if ( sends->valcnt != 0 ) {
724  MPI_Isend( data_send->sendA.valbuf, sends->valcnt, PASTIX_MPI_COMPLEX32,
725  c_send, PastixTagValuesA, bcsc_comm->comm, &requests[counter_req++] );
726  }
727  c_send = (c_send+1) % clustnbr;
728  }
729 
730  /* Receives the values. */
731  c_recv = (clustnum-1+clustnbr) % clustnbr;
732  for ( k = 0; k < clustnbr-1; k++ ) {
733  data_recv = data_comm + c_recv;
734  recvs = &( data_recv->recvA );
735  if ( c_recv == clustnum ) {
736  continue;
737  }
738 
739  /* Posts the receptions of the values. */
740  if ( recvs->valcnt != 0 ) {
741  MPI_Recv( val_buf, recvs->valcnt, PASTIX_MPI_COMPLEX32,
742  c_recv, PastixTagValuesA, bcsc_comm->comm, MPI_STATUS_IGNORE );
743  idx_size = recvs->idxcnt;
744  nbelt_recv += bcsc_chandle_recv_A( spm, ord, solvmtx, bcsc,
745  val_buf, idx_cnt[c_recv], idx_size, 0 );
746  }
747  c_recv = (c_recv-1+clustnbr) % clustnbr;
748  }
749 
750  MPI_Waitall( counter_req, requests, statuses );
751  free( val_buf );
752  assert( nbelt_recv == bcsc_comm->data_comm[clustnum].recvA.valcnt );
753  bcsc_free_buf( bcsc_comm, PastixTagMemSendValA );
754  bcsc_free_buf( bcsc_comm, PastixTagMemRecvIdxA );
755  return nbelt_recv;
756 }
757 
758 /**
759  *******************************************************************************
760  *
761  * @ingroup bcsc_internal
762  *
763  * @brief Exchanges and stores the remote values of At.
764  *
765  *******************************************************************************
766  *
767  * @param[in] spm
768  * The initial sparse matrix in the spm format.
769  *
770  * @param[in] ord
771  * The ordering which needs to be applied on the spm to generate the
772  * block csc.
773  *
774  * @param[in] solvmtx
775  * The solver matrix structure which describes the data distribution.
776  *
777  * @param[inout] rowtab
778  * The row tab of the bcsc or the row tab associated to At.
779  *
780  * @param[inout] bcsc
781  * On entry, the pointer to an allocated bcsc.
782  * On exit, the bcsc fields are updated with the remote values of At.
783  *
784  *******************************************************************************/
785 static inline pastix_int_t
786 bcsc_cexchange_values_At( const spmatrix_t *spm,
787  const pastix_order_t *ord,
788  const SolverMatrix *solvmtx,
789  pastix_int_t *rowtab,
790  pastix_bcsc_t *bcsc )
791 {
792  bcsc_handle_comm_t *bcsc_comm = bcsc->bcsc_comm;
793  pastix_int_t clustnbr = bcsc_comm->clustnbr;
794  pastix_int_t clustnum = bcsc_comm->clustnum;
795  bcsc_proc_comm_t *data_comm = bcsc_comm->data_comm;
796  bcsc_proc_comm_t *data_send = NULL;
797  bcsc_proc_comm_t *data_recv = NULL;
798  pastix_complex32_t *val_buf = NULL;
799  pastix_int_t idx_size = 0;
800  pastix_int_t counter_req = 0;
801  pastix_int_t nbelt_recv = 0;
802  pastix_int_t cnt = 0;
803  pastix_int_t idx_cnt[clustnbr];
804  MPI_Status statuses[clustnbr-1];
805  MPI_Request requests[clustnbr-1];
806  bcsc_data_amount_t *sends, *recvs;
807  pastix_int_t c_send, c_recv, k;
808 
809  /* Allocates the receiving indexes and values buffers. */
810  if ( bcsc_comm->max_idx != 0 ) {
811  MALLOC_INTERN( val_buf, bcsc_comm->max_val, pastix_complex32_t );
812  }
813 
814  for ( k = 0; k < clustnbr; k++ ) {
815  if ( k == clustnum ) {
816  idx_cnt[k] = 0;
817  continue;
818  }
819  idx_cnt[ k ] = cnt;
820  cnt += data_comm[k].recvAt.idxcnt;
821  }
822 
823  /* Sends the values. */
824  c_send = (clustnum+1) % clustnbr;
825  for ( k = 0; k < clustnbr-1; k++ ) {
826  data_send = data_comm + c_send;
827  sends = &( data_send->sendAt.size );
828  if ( c_send == clustnum ) {
829  continue;
830  }
831 
832  /* Posts the emissions of the values. */
833  if ( sends->valcnt != 0 ) {
834  MPI_Isend( data_send->sendAt.valbuf, sends->valcnt, PASTIX_MPI_COMPLEX32,
835  c_send, PastixTagValuesAt, bcsc_comm->comm, &requests[counter_req++] );
836  }
837  c_send = (c_send+1) % clustnbr;
838  }
839 
840  /* Receives the values. */
841  c_recv = (clustnum-1+clustnbr) % clustnbr;
842  for ( k = 0; k < clustnbr-1; k++ ) {
843  data_recv = data_comm + c_recv;
844  recvs = &( data_recv->recvAt );
845  if ( c_recv == clustnum ) {
846  continue;
847  }
848 
849  /* Posts the receptions of the values. */
850  if ( recvs->valcnt != 0 ) {
851  MPI_Recv( val_buf, recvs->valcnt, PASTIX_MPI_COMPLEX32,
852  c_recv, PastixTagValuesAt, bcsc_comm->comm, MPI_STATUS_IGNORE );
853  idx_size = recvs->idxcnt;
854  nbelt_recv += bcsc_chandle_recv_At( spm, ord, solvmtx, rowtab, bcsc,
855  val_buf, idx_cnt[c_recv], idx_size, 0 );
856  }
857  c_recv = (c_recv-1+clustnbr) % clustnbr;
858  }
859 
860  MPI_Waitall( counter_req, requests, statuses );
861  free( val_buf );
862  assert( nbelt_recv == bcsc_comm->data_comm[clustnum].recvAt.valcnt );
863  bcsc_free_buf( bcsc_comm, PastixTagMemSendValAt );
864  bcsc_free_buf( bcsc_comm, PastixTagMemRecvIdxAt );
865  return nbelt_recv;
866 }
867 
868 /**
869  *******************************************************************************
870  *
871  * @ingroup bcsc_internal
872  *
873  * @brief Exchanges and stores the remote values of A.
874  *
875  *******************************************************************************
876  *
877  * @param[in] spm
878  * The initial sparse matrix in the spm format.
879  *
880  * @param[in] ord
881  * The ordering which needs to be applied on the spm to generate the
882  * block csc.
883  *
884  * @param[in] solvmtx
885  * The solver matrix structure which describes the data distribution.
886  *
887  * @param[inout] rowtab
888  * The row tab of the bcsc or the row tab associated to At.
889  *
890  * @param[inout] bcsc
891  * On entry, the pointer to an allocated bcsc.
892  * On exit, the bcsc fields are updated with the remote values of A.
893  *
894  *******************************************************************************/
895 static inline pastix_int_t
896 bcsc_cexchange_values_AAt( const spmatrix_t *spm,
897  const pastix_order_t *ord,
898  const SolverMatrix *solvmtx,
899  pastix_int_t *rowtabAt,
900  pastix_bcsc_t *bcsc )
901 {
902  bcsc_handle_comm_t *bcsc_comm = bcsc->bcsc_comm;
903  pastix_int_t clustnbr = bcsc_comm->clustnbr;
904  pastix_int_t clustnum = bcsc_comm->clustnum;
905  bcsc_proc_comm_t *data_comm = bcsc_comm->data_comm;
906  bcsc_proc_comm_t *data_send = NULL;
907  bcsc_proc_comm_t *data_recv = NULL;
908  pastix_complex32_t *val_buf = NULL;
909  pastix_int_t idx_size = 0;
910  pastix_int_t counter_req = 0;
911  pastix_int_t nbelt_recv = 0;
912  pastix_int_t cnt = 0;
913  pastix_int_t idx_cnt[clustnbr];
914  MPI_Status statuses[clustnbr-1];
915  MPI_Request requests[clustnbr-1];
916  bcsc_data_amount_t *sends;
917  bcsc_exch_comm_t *recvs;
918  pastix_int_t c_send, c_recv, k;
919 
920  /* Allocates the receiving indexes and values buffers. */
921  if ( bcsc_comm->max_idx != 0 ) {
922  if ( spm->mtxtype != SpmGeneral ) {
923  MALLOC_INTERN( val_buf, bcsc_comm->max_val, pastix_complex32_t );
924  }
925  else {
926  if ( rowtabAt == bcsc->rowtab ) {
927  bcsc_allocate_buf( bcsc_comm, PastixTagMemRecvValAAt);
928  }
929  }
930  }
931 
932  for ( k = 0; k < clustnbr; k++ ) {
933  if ( k == clustnum ) {
934  idx_cnt[k] = 0;
935  continue;
936  }
937  idx_cnt[ k ] = cnt;
938  cnt += data_comm[k].recvAAt.size.idxcnt;
939  }
940 
941  /* Sends the values. */
942  c_send = (clustnum+1) % clustnbr;
943  for ( k = 0; k < clustnbr-1; k++ ) {
944  if ( rowtabAt != bcsc->rowtab ) {
945  break;
946  }
947  data_send = data_comm + c_send;
948  sends = &( data_send->sendAAt.size );
949  if ( c_send == clustnum ) {
950  continue;
951  }
952 
953  /* Posts the emissions of the values. */
954  if ( sends->valcnt != 0 ) {
955  MPI_Isend( data_send->sendAAt.valbuf, sends->valcnt, PASTIX_MPI_COMPLEX32,
956  c_send, PastixTagValuesAAt, bcsc_comm->comm, &requests[counter_req++] );
957  }
958  c_send = (c_send+1) % clustnbr;
959  }
960 
961  /* Receives the values. */
962  c_recv = (clustnum-1+clustnbr) % clustnbr;
963  for ( k = 0; k < clustnbr-1; k++ ) {
964  data_recv = data_comm + c_recv;
965  recvs = &( data_recv->recvAAt );
966  if ( c_recv == clustnum ) {
967  continue;
968  }
969 
970  /* Posts the receptions of the values. */
971  if ( recvs->size.valcnt != 0 ) {
972  if ( spm->mtxtype == SpmGeneral ) {
973  val_buf = recvs->valbuf;
974  }
975  if ( rowtabAt == bcsc->rowtab ) {
976  MPI_Recv( val_buf, recvs->size.valcnt, PASTIX_MPI_COMPLEX32,
977  c_recv, PastixTagValuesAAt, bcsc_comm->comm, MPI_STATUS_IGNORE );
978  }
979  idx_size = recvs->size.idxcnt;
980 
981  if ( spm->mtxtype != SpmGeneral ) {
982  nbelt_recv += bcsc_chandle_recv_AAt( spm, ord, solvmtx, rowtabAt, bcsc,
983  val_buf, idx_cnt[c_recv], idx_size );
984  }
985  else {
986  if ( rowtabAt == bcsc->rowtab ) {
987  nbelt_recv += bcsc_chandle_recv_A( spm, ord, solvmtx, bcsc, val_buf,
988  idx_cnt[c_recv], idx_size, 1 );
989  }
990  else {
991  nbelt_recv += bcsc_chandle_recv_At( spm, ord, solvmtx, rowtabAt, bcsc,
992  val_buf, idx_cnt[c_recv], idx_size, 1 );
993  }
994  }
995  }
996  c_recv = (c_recv-1+clustnbr) % clustnbr;
997  }
998 
999  if ( rowtabAt == bcsc->rowtab ) {
1000  MPI_Waitall( counter_req, requests, statuses );
1001  }
1002  if ( spm->mtxtype != SpmGeneral ) {
1003  free( val_buf );
1004  }
1005  assert( nbelt_recv == bcsc_comm->data_comm[clustnum].recvAAt.size.valcnt );
1006  if ( ( spm->mtxtype != SpmGeneral ) || ( rowtabAt != bcsc->rowtab ) ) {
1007  bcsc_free_buf( bcsc_comm, PastixTagMemRecvIdxAAt );
1008  bcsc_free_buf( bcsc_comm, PastixTagMemSendValAAt );
1009  }
1010  if ( ( spm->mtxtype == SpmGeneral ) && ( rowtabAt != bcsc->rowtab ) ) {
1011  bcsc_free_buf( bcsc_comm, PastixTagMemRecvAAt );
1012  }
1013  return nbelt_recv;
1014 }
1015 #endif
1016 
1017 /**
1018  *******************************************************************************
1019  *
1020  * @ingroup bcsc_internal
1021  *
1022  * @brief Initializes the A values of the block csc stored in the given spm.
1023  *
1024  *******************************************************************************
1025  *
1026  * @param[in] spm
1027  * The initial sparse matrix in the spm format.
1028  *
1029  * @param[in] ord
1030  * The ordering which needs to be applied on the spm to generate the
1031  * block csc.
1032  *
1033  * @param[in] solvmtx
1034  * The solver matrix structure which describes the data distribution.
1035  *
1036  * @param[inout] bcsc
1037  * On entry, the pointer to an allocated bcsc.
1038  * On exit, the bcsc fields are updated.
1039  *
1040  *******************************************************************************
1041  *
1042  * @retval TODO
1043  *
1044  *******************************************************************************/
1045 static inline pastix_int_t
1046 bcsc_cinit_A( const spmatrix_t *spm,
1047  const pastix_order_t *ord,
1048  const SolverMatrix *solvmtx,
1049  pastix_bcsc_t *bcsc )
1050 {
1051  pastix_complex32_t *values = (pastix_complex32_t*)(spm->values);
1052  pastix_complex32_t *Lvalues = (pastix_complex32_t*)(bcsc->Lvalues);
1053  pastix_int_t *loc2glob = spm->loc2glob;
1054  pastix_int_t *colptr = spm->colptr;
1055  pastix_int_t *rowptr = spm->rowptr;
1056  pastix_int_t *dofs = spm->dofs;
1057  pastix_int_t dof = spm->dof;
1058  pastix_int_t nbelt = 0;
1059  SolverCblk *cblk;
1060  pastix_int_t *coltab;
1061  pastix_int_t k, j, ig, jg, igp, jgp, igpe, jgpe;
1062  pastix_int_t itercblk, baseval;
1063  pastix_int_t idofi, idofj, dofi, dofj;
1064  pastix_int_t colidx, rowidx, pos;
1065 
1066  baseval = spm->baseval;
1067  /*
1068  * Initializes the values of the matrix A in the blocked csc format. This
1069  * applies the permutation to the values array.
1070  *
1071  * Goes through the columns of the spm.
1072  * j is the initial local column index.
1073  * jg is the initial global column index.
1074  * jgpis the "new" jg index -> the one resulting from the permutation.
1075  */
1076  for ( j = 0; j < spm->n; j++, colptr++, loc2glob++ ) {
1077  jg = ( spm->loc2glob == NULL ) ? j : *loc2glob - baseval;
1078  jgp = ord->permtab[jg];
1079  jgpe = ( dof > 0 ) ? jgp * dof : dofs[ jg ] - baseval;
1080  dofj = ( dof > 0 ) ? dof : dofs[ jg+1 ] - dofs[ jg ];
1081 
1082  itercblk = bcsc->col2cblk[ jgpe ];
1083 
1084  /*
1085  * If MPI is used in shared memory, the block can belong to another
1086  * processor, in this case the column is skigped.
1087  */
1088  /* The block of the column jgpbelongs to the current processor. */
1089  if ( itercblk >= 0 ) {
1090  /* Gets the block in which the data will be added. */
1091  cblk = solvmtx->cblktab + itercblk;
1092  coltab = bcsc->cscftab[cblk->bcscnum].coltab;
1093  /*
1094  * Goes through the row of the column j.
1095  * ig is the initial global row index (for the row index local = global).
1096  * igpis the "new" ig index -> the one resulting from the permutation.
1097  */
1098  for ( k = colptr[0]; k < colptr[1]; k++, rowptr++ ) {
1099  ig = *rowptr - baseval;
1100  igp = ord->permtab[ig];
1101  igpe = ( dof > 0 ) ? igp * dof : dofs[ ig ] - baseval;
1102  dofi = ( dof > 0 ) ? dof : dofs[ ig+1 ] - dofs[ ig ];
1103 
1104  /* Copies the values from element (ig, jg) to position (igpe, jgpe) into expanded bcsc. */
1105  colidx = jgpe - cblk->fcolnum;
1106  for ( idofj = 0; idofj < dofj; idofj++, colidx++ ) {
1107  rowidx = igpe;
1108  pos = coltab[ colidx ];
1109  for ( idofi = 0; idofi < dofi; idofi++, rowidx++, pos++, values++ ) {
1110  assert( rowidx >= 0 );
1111  assert( rowidx < spm->gNexp );
1112  bcsc->rowtab[ pos ] = rowidx;
1113  Lvalues[ pos ] = *values;
1114  nbelt++;
1115  }
1116  coltab[ colidx ] += dofi;
1117  assert( coltab[ colidx ] <= coltab[ colidx+1 ] );
1118  }
1119  }
1120  }
1121  /* The block of the column jgpbelongs to another processor. */
1122  else {
1123  for ( k = colptr[0]; k < colptr[1]; k++, rowptr++ ) {
1124  ig = *rowptr - baseval;
1125  dofi = ( dof > 0 ) ? dof : dofs[ ig+1 ] - dofs[ ig ];
1126 
1127  values += dofi * dofj;
1128  }
1129  }
1130  }
1131 
1132  return nbelt;
1133 }
1134 
1135 /**
1136  *******************************************************************************
1137  *
1138  * @ingroup bcsc_internal
1139  *
1140  * @brief Initializes the At values in the block cscstored in the given spm.
1141  *
1142  * This routine initializes either :
1143  * The symmetric upper part (L^t)
1144  * The hermitian upper part (L^h)
1145  * The transpose part of A (A^t -> U)
1146  *
1147  *******************************************************************************
1148  *
1149  * @param[in] spm
1150  * The initial sparse matrix in the spm format.
1151  *
1152  * @param[in] ord
1153  * The ordering which needs to be applied on the spm to generate the
1154  * block csc.
1155  *
1156  * @param[in] solvmtx
1157  * The solver matrix structure which describes the data distribution.
1158  *
1159  * @param[inout] rowtab
1160  * The row tab of the bcsc or the row tab associated to At.
1161  *
1162  * @param[inout] bcsc
1163  * On entry, the pointer to an allocated bcsc.
1164  * On exit, the bcsc fields are updated.
1165  *
1166  *******************************************************************************
1167  *
1168  * @retval TODO
1169  *
1170  *******************************************************************************/
1171 static inline pastix_int_t
1172 bcsc_cinit_At( const spmatrix_t *spm,
1173  const pastix_order_t *ord,
1174  const SolverMatrix *solvmtx,
1175  pastix_int_t *rowtab,
1176  pastix_bcsc_t *bcsc )
1177 {
1178  pastix_complex32_t *values = (pastix_complex32_t*)(spm->values);
1179  pastix_complex32_t *Uvalues;
1180  pastix_int_t *loc2glob = spm->loc2glob;
1181  pastix_int_t *colptr = spm->colptr;
1182  pastix_int_t *rowptr = spm->rowptr;
1183  pastix_int_t *dofs = spm->dofs;
1184  pastix_int_t dof = spm->dof;
1185  pastix_int_t nbelt = 0;
1186  SolverCblk *cblk;
1187  pastix_int_t *coltab;
1188  pastix_int_t k, j, ig, jg, igp, jgp, igpe, jgpe;
1189  pastix_int_t itercblk, baseval;
1190  pastix_int_t idofi, idofj, dofi, dofj;
1191  pastix_int_t colidx, rowidx, pos;
1192  pastix_complex32_t (*_bcsc_conj)(pastix_complex32_t) = NULL;
1193 
1194  /* We're working on U. */
1195  if ( spm->mtxtype == SpmGeneral ) {
1196  _bcsc_conj = __fct_id;
1197  Uvalues = (pastix_complex32_t*)(bcsc->Uvalues);
1198  }
1199  /* L^[t|h] part of the matrix. */
1200  else {
1201  /*
1202  * precision_generator/sub.py will change SpmHermitian to SpmSymmetric
1203  * Don't use else or else if.
1204  */
1205  if( spm->mtxtype == SpmHermitian ) {
1206  _bcsc_conj = __fct_conj;
1207  }
1208  if( spm->mtxtype == SpmSymmetric ) {
1209  _bcsc_conj = __fct_id;
1210  }
1211  Uvalues = (pastix_complex32_t*)(bcsc->Lvalues);
1212  }
1213  assert( _bcsc_conj != NULL );
1214 
1215  baseval = spm->baseval;
1216  /*
1217  * Initializes the values of the matrix A^t in the blocked csc format. This
1218  * applies the permutation to the values array.
1219  *
1220  * Goes through the columns of the spm.
1221  * j is the initial local column index.
1222  * jg is the initial global column index.
1223  * jgpis the "new" jg index -> the one resulting from the permutation.
1224  */
1225  for ( j = 0; j < spm->n; j++, colptr++, loc2glob++ ) {
1226  jg = ( spm->loc2glob == NULL ) ? j : *loc2glob - baseval;
1227  jgp = ord->permtab[jg];
1228  jgpe = ( dof > 0 ) ? jgp * dof : dofs[ jg ] - baseval;
1229  dofj = ( dof > 0 ) ? dof : dofs[ jg+1 ] - dofs[ jg ];
1230 
1231  /*
1232  * Goes through the row of the column j.
1233  * ig is the initial global row index (for the row index local = global).
1234  * igpis the "new" ig index -> the one resulting from the permutation.
1235  */
1236  for ( k = colptr[0]; k < colptr[1]; k++, rowptr++ ) {
1237  ig = *rowptr - baseval;
1238  igp = ord->permtab[ig];
1239  igpe = ( dof > 0 ) ? igp * dof : dofs[ ig ] - baseval;
1240  dofi = ( dof > 0 ) ? dof : dofs[ ig+1 ] - dofs[ ig ];
1241 
1242  /* Diagonal block of a symmetric matrix. */
1243  if ( ( ig == jg ) && ( spm->mtxtype != SpmGeneral ) ) {
1244  values += dofi * dofj;
1245  continue;
1246  }
1247  itercblk = bcsc->col2cblk[ igpe ];
1248 
1249  /*
1250  * If MPI is used in shared memory, the block can belong to another
1251  * processor, in this case the row is skigped.
1252  */
1253  /* The block of the row igpbelongs to the current processor. */
1254  if ( itercblk >= 0 ) {
1255  /* Gets the block in which the data will be added. */
1256  cblk = solvmtx->cblktab + itercblk;
1257  coltab = bcsc->cscftab[cblk->bcscnum].coltab;
1258 
1259  /* Copies the values from element (ig, jg) to position (igpe, jgpe) into expanded bcsc. */
1260  for ( idofj = 0; idofj < dofj; idofj++ ) {
1261  rowidx = jgpe + idofj;
1262  colidx = igpe - cblk->fcolnum;
1263  for ( idofi = 0; idofi < dofi; idofi++, colidx++, values++ ) {
1264  pos = coltab[ colidx ];
1265 
1266  /* Copies the index/value */
1267  assert( rowidx >= 0 );
1268  assert( rowidx < spm->gNexp );
1269  rowtab[ pos ] = rowidx;
1270  Uvalues[ pos ] = _bcsc_conj( *values );
1271 
1272  coltab[ colidx ]++;
1273  nbelt++;
1274  }
1275  }
1276  }
1277  /* The block of the row igpbelongs to another processor. */
1278  else {
1279  values += dofi * dofj;
1280  continue;
1281  }
1282  }
1283  }
1284  return nbelt;
1285 }
1286 
1287 /**
1288  *******************************************************************************
1289  *
1290  * @ingroup bcsc_internal
1291  *
1292  * @brief Sorts the block csc subarray associated to each column block.
1293  *
1294  *******************************************************************************
1295  *
1296  * @param[in] bcsc
1297  * On entry, the pointer to an allocated bcsc.
1298  *
1299  * @param[in] rowtab
1300  * The initial sparse matrix in the spm format.
1301  *
1302  * @param[in] valtab
1303  * The ordering that needs to be applied on the spm to generate the
1304  * block csc.
1305  *
1306  *******************************************************************************/
1307 static inline void
1308 bcsc_csort( const pastix_bcsc_t *bcsc,
1309  pastix_int_t *rowtab,
1310  pastix_complex32_t *valtab )
1311 {
1312  bcsc_cblk_t *block;
1313  pastix_int_t block_num, col, len_col_block, block_nbr, col_nbr;
1314  void *sortptr[2];
1315 
1316  block_nbr = bcsc->cscfnbr;
1317  block = bcsc->cscftab;
1318  for ( block_num = 0; block_num < block_nbr; block_num++, block++ ) {
1319 
1320  col_nbr = block->colnbr;
1321  for ( col = 0; col < col_nbr; col++ ) {
1322 
1323  sortptr[0] = (void*)(rowtab + block->coltab[col]);
1324  sortptr[1] = (void*)(valtab + block->coltab[col]);
1325 
1326  len_col_block = block->coltab[col+1] - block->coltab[col];
1327 #if !defined(NDEBUG)
1328  {
1329  pastix_int_t gN = bcsc->gN;
1330  pastix_int_t i;
1331 
1332  for ( i = 0; i < len_col_block; i++ ) {
1333  assert( rowtab[ block->coltab[col] + i ] >= 0 );
1334  assert( rowtab[ block->coltab[col] + i ] < gN );
1335  }
1336  }
1337 #endif
1338 
1339  c_qsortIntFloatAsc( sortptr, len_col_block );
1340  }
1341  }
1342 }
1343 
1344 /**
1345  *******************************************************************************
1346  *
1347  * @brief Initializes a centralize pastix_complex32_t block csc.
1348  *
1349  *******************************************************************************
1350  *
1351  * @param[in] spm
1352  * The initial sparse matrix in the spm format.
1353  *
1354  * @param[in] ord
1355  * The ordering that needs to be applied on the spm to generate the
1356  * block csc.
1357  *
1358  * @param[in] solvmtx
1359  * The solver matrix structure that describe the data distribution.
1360  *
1361  * @param[in] initAt
1362  * A flag to enable/disable the initialization of A'.
1363  *
1364  * @param[inout] bcsc
1365  * On entry, the pointer to an allocated bcsc.
1366  * On exit, the bcsc stores the input spm with the permutation applied
1367  * and grouped according to the distribution described in solvmtx.
1368  *
1369  * @param[in] valuesize
1370  * The number of non zero unknowns in the matrix.
1371  *
1372  *******************************************************************************/
1373 void
1374 bcsc_cinit( const spmatrix_t *spm,
1375  const pastix_order_t *ord,
1376  const SolverMatrix *solvmtx,
1377  int initAt,
1378  pastix_bcsc_t *bcsc,
1379  pastix_int_t valuesize )
1380 {
1381  pastix_int_t nbelt = 0;
1382 #if defined(PASTIX_WITH_MPI)
1383  pastix_int_t nbelt_recv = 0;
1384  bcsc_handle_comm_t *bcsc_comm = bcsc->bcsc_comm;
1385 #endif
1386 
1387  /* Exchanges the data if the spm is distributed and adds the received data of A. */
1388 #if defined(PASTIX_WITH_MPI)
1389  if ( bcsc_comm != NULL ) {
1390  nbelt_recv = bcsc_cexchange_values_A( spm, ord, solvmtx, bcsc );
1391  nbelt_recv += bcsc_cexchange_values_AAt( spm, ord, solvmtx, bcsc->rowtab, bcsc );
1392  nbelt += nbelt_recv;
1393  }
1394 #endif
1395 
1396  /* Initializes the blocked structure of the matrix A. */
1397  nbelt += bcsc_cinit_A( spm, ord, solvmtx, bcsc );
1398 
1399  /* Initializes the blocked structure of the matrix At if the matrix is not general. */
1400  if ( spm->mtxtype != SpmGeneral ) {
1401  /* Exchanges the data if the spm is distributed and adds the received data of A. */
1402 #if defined(PASTIX_WITH_MPI)
1403  if ( bcsc_comm != NULL ) {
1404  nbelt_recv = bcsc_cexchange_values_At( spm, ord, solvmtx, bcsc->rowtab, bcsc );
1405  nbelt += nbelt_recv;
1406  }
1407 #endif
1408  nbelt += bcsc_cinit_At( spm, ord, solvmtx, bcsc->rowtab, bcsc );
1409  }
1410 
1411  /* Restores the correct coltab arrays. */
1412  bcsc_restore_coltab( bcsc );
1413 
1414  /* Sorts the csc. */
1415  bcsc_csort( bcsc, bcsc->rowtab, bcsc->Lvalues );
1416 
1417  /* Initializes the blocked structure of the matrix At if the matrix is general. */
1418  if ( spm->mtxtype == SpmGeneral ) {
1419  /* If only the refinement is performed A^t is not required. */
1420  if ( initAt ) {
1421  pastix_int_t *trowtab, i;
1422  MALLOC_INTERN( bcsc->Uvalues, valuesize, pastix_complex32_t );
1423  MALLOC_INTERN( trowtab, valuesize, pastix_int_t );
1424 
1425  for (i=0; i<valuesize; i++) {
1426  trowtab[i] = -1;
1427  }
1428  nbelt = bcsc_cinit_At( spm, ord, solvmtx, trowtab, bcsc );
1429 #if defined(PASTIX_WITH_MPI)
1430  if ( bcsc_comm != NULL ) {
1431  nbelt_recv = bcsc_cexchange_values_At( spm, ord, solvmtx, trowtab, bcsc );
1432  nbelt_recv += bcsc_cexchange_values_AAt( spm, ord, solvmtx, trowtab, bcsc );
1433  nbelt += nbelt_recv;
1434  }
1435 #endif
1436  /* Restores the correct coltab arrays */
1437  bcsc_restore_coltab( bcsc );
1438 
1439  /* Sorts the transposed csc */
1440  bcsc_csort( bcsc, trowtab, bcsc->Uvalues );
1441  memFree( trowtab );
1442  }
1443  }
1444  /* In the case of SpmHermitian, conjf is applied when used to save memory space */
1445  else {
1446  bcsc->Uvalues = bcsc->Lvalues;
1447  }
1448 
1449  (void) nbelt;
1450 #if defined(PASTIX_WITH_MPI)
1451  (void) nbelt_recv;
1452 #endif
1453 }
BEGIN_C_DECLS typedef int pastix_int_t
Definition: datatypes.h:51
float _Complex pastix_complex32_t
Definition: datatypes.h:76
static void bcsc_csort(const pastix_bcsc_t *bcsc, pastix_int_t *rowtab, pastix_complex32_t *valtab)
Sorts the block csc subarray associated to each column block.
Definition: bcsc_cinit.c:1308
static pastix_int_t bcsc_cinit_At(const spmatrix_t *spm, const pastix_order_t *ord, const SolverMatrix *solvmtx, pastix_int_t *rowtab, pastix_bcsc_t *bcsc)
Initializes the At values in the block cscstored in the given spm.
Definition: bcsc_cinit.c:1172
void bcsc_restore_coltab(pastix_bcsc_t *bcsc)
Restores the coltab array when it has been modified to initialize the row and values arrays.
Definition: bcsc.c:1688
void bcsc_cinit(const spmatrix_t *spm, const pastix_order_t *ord, const SolverMatrix *solvmtx, int initAt, pastix_bcsc_t *bcsc, pastix_int_t valuesize)
Initializes a centralize pastix_complex32_t block csc.
Definition: bcsc_cinit.c:1374
static pastix_int_t bcsc_cinit_A(const spmatrix_t *spm, const pastix_order_t *ord, const SolverMatrix *solvmtx, pastix_bcsc_t *bcsc)
Initializes the A values of the block csc stored in the given spm.
Definition: bcsc_cinit.c:1046
bcsc_data_amount_t recvA
Definition: bcsc.h:83
bcsc_exch_comm_t recvAAt
Definition: bcsc.h:85
pastix_int_t max_idx
Definition: bcsc.h:97
bcsc_exch_comm_t sendAt
Definition: bcsc.h:81
PASTIX_Comm comm
Definition: bcsc.h:95
pastix_int_t idxcnt
Definition: bcsc.h:57
pastix_int_t colnbr
Definition: bcsc.h:111
bcsc_exch_comm_t sendA
Definition: bcsc.h:80
bcsc_data_amount_t size
Definition: bcsc.h:66
pastix_int_t clustnum
Definition: bcsc.h:94
bcsc_proc_comm_t data_comm[1]
Definition: bcsc.h:99
pastix_int_t * idxbuf
Definition: bcsc.h:67
pastix_int_t * coltab
Definition: bcsc.h:113
bcsc_data_amount_t recvAt
Definition: bcsc.h:84
bcsc_exch_comm_t sendAAt
Definition: bcsc.h:82
pastix_int_t valcnt
Definition: bcsc.h:58
void * valbuf
Definition: bcsc.h:70
pastix_int_t clustnbr
Definition: bcsc.h:93
pastix_int_t max_val
Definition: bcsc.h:98
Compressed colptr format for the bcsc.
Definition: bcsc.h:110
Information about the amount of data.
Definition: bcsc.h:56
Information about the sending data.
Definition: bcsc.h:65
Structure to manage communications with distributed spm.
Definition: bcsc.h:92
Informations of the data exchanged with other processors.
Definition: bcsc.h:79
pastix_int_t * permtab
Definition: order.h:51
pastix_int_t * peritab
Definition: order.h:52
Order structure.
Definition: order.h:47
pastix_int_t bcscnum
Definition: solver.h:170
SolverCblk *restrict cblktab
Definition: solver.h:222
pastix_int_t fcolnum
Definition: solver.h:161
Solver column block structure.
Definition: solver.h:156
Solver column block structure.
Definition: solver.h:200