PaStiX Handbook  6.3.2
bcsc.c
Go to the documentation of this file.
1 /**
2  *
3  * @file bcsc.c
4  *
5  * @copyright 2004-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
6  * Univ. Bordeaux. All rights reserved.
7  *
8  * @version 6.3.2
9  * @author Mathieu Faverge
10  * @author Pierre Ramet
11  * @author Xavier Lacoste
12  * @author Theophile Terraz
13  * @author Tony Delarue
14  * @author Alycia Lisito
15  * @date 2023-07-21
16  *
17  **/
18 #include "common.h"
19 #include "pastix/order.h"
20 #include <spm.h>
21 #include "blend/solver.h"
22 #include "bcsc/bcsc.h"
23 
24 #include "bcsc/bcsc_z.h"
25 #include "bcsc/bcsc_c.h"
26 #include "bcsc/bcsc_d.h"
27 #include "bcsc/bcsc_s.h"
28 
29 #define BCSC_COMM_NBR 6
30 
31 /**
32  *******************************************************************************
33  *
34  * @ingroup bcsc_internal
35  *
36  * @brief Initializes the bcsc_handle_comm_t structure.
37  *
38  *******************************************************************************
39  *
40  * @param[in] solvmtx
41  * The solver matrix structure which describes the data distribution.
42  *
43  * @param[out] bcsc
44  * The bcsc.
45  *
46  *******************************************************************************/
47 void
49  pastix_bcsc_t *bcsc )
50 {
51  pastix_int_t size = sizeof(bcsc_handle_comm_t) + (solvmtx->clustnbr-1)*sizeof(bcsc_proc_comm_t);
52  bcsc_handle_comm_t *bcsc_comm;
53 
54  bcsc->bcsc_comm = (bcsc_handle_comm_t *)malloc( size );
55  bcsc_comm = bcsc->bcsc_comm;
56 
57  bcsc_comm->flttype = bcsc->flttype;
58  bcsc_comm->clustnbr = solvmtx->clustnbr;
59  bcsc_comm->clustnum = solvmtx->clustnum;
60  bcsc_comm->comm = solvmtx->solv_comm;
61 
62  memset( bcsc_comm->data_comm, 0, bcsc_comm->clustnbr * sizeof(bcsc_proc_comm_t) );
63 }
64 
65 /**
66  *******************************************************************************
67  *
68  * @ingroup bcsc_internal
69  *
70  * @brief Frees the bcsc_handle_comm pointers.
71  *
72  *******************************************************************************
73  *
74  * @param[inout] bcsc_comm
75  * The bcsc_handle_comm_t structure.
76  *
77  *******************************************************************************/
78 void
80 {
81  int c;
82  int clustnbr = bcsc_comm->clustnbr;
83  bcsc_proc_comm_t *data;
84 
85  for ( c = 0; c < clustnbr; c++ ) {
86  data = bcsc_comm->data_comm + c;
87 
88  if( data->sendA.idxbuf != NULL ) {
89  memFree_null( data->sendA.idxbuf );
90  }
91  if( data->sendA.valbuf != NULL ) {
92  memFree_null( data->sendA.valbuf );
93  }
94  if( data->sendAt.idxbuf != NULL ) {
95  memFree_null( data->sendAt.idxbuf );
96  }
97  if( data->sendAt.valbuf != NULL ) {
98  memFree_null( data->sendAt.valbuf );
99  }
100  if( data->sendAAt.idxbuf != NULL ) {
101  memFree_null( data->sendAAt.idxbuf );
102  }
103  if( data->sendAAt.valbuf != NULL ) {
104  memFree_null( data->sendAAt.valbuf );
105  }
106  if( data->recvAAt.idxbuf != NULL ) {
107  memFree_null( data->recvAAt.idxbuf );
108  }
109  if( data->recvAAt.valbuf != NULL ) {
110  memFree_null( data->recvAAt.valbuf );
111  }
112  }
113 }
114 
115 #if defined(PASTIX_WITH_MPI)
116 /**
117  *******************************************************************************
118  *
119  * @ingroup bcsc
120  *
121  * @brief Computes the maximum size of the sending indexes and values buffers.
122  *
123  *******************************************************************************
124  *
125  * @param[inout] bcsc_comm
126  * On entry the bcsc_comm initialized.
127  * At exit the fields max_idx and max_val of bcsc_comm are updated.
128  *
129  *******************************************************************************
130  *
131  * @retval PASTIX_SUCCESS
132  *
133  *******************************************************************************/
134 static inline int
135 bcsc_compute_max( bcsc_handle_comm_t *bcsc_comm )
136 {
137  bcsc_proc_comm_t *data = NULL;
138  bcsc_proc_comm_t *data_local = NULL;
139  pastix_int_t clustnbr = bcsc_comm->clustnbr;
140  pastix_int_t clustnum = bcsc_comm->clustnum;
141  pastix_int_t max_idx = 0;
142  pastix_int_t max_val = 0;
143  pastix_int_t idxsum_A = 0;
144  pastix_int_t valsum_A = 0;
145  pastix_int_t idxsum_At = 0;
146  pastix_int_t valsum_At = 0;
147  pastix_int_t idxsum_AAt = 0;
148  pastix_int_t valsum_AAt = 0;
149  pastix_int_t idxcnt_A, idxcnt_At, idxcnt_AAt, valcnt_A, valcnt_At, valcnt_AAt, c;
150 
151  /* Receives the amount of indexes and values. */
152  for ( c = 0; c < clustnbr; c++ ) {
153  data = bcsc_comm->data_comm + c;
154  if ( c == clustnum ) {
155  continue;
156  }
157 
158  idxcnt_A = data->recvA.idxcnt;
159  idxcnt_At = data->recvAt.idxcnt;
160  idxcnt_AAt = data->recvAAt.size.idxcnt;
161  valcnt_A = data->recvA.valcnt;
162  valcnt_At = data->recvAt.valcnt;
163  valcnt_AAt = data->recvAAt.size.valcnt;
164 
165  max_idx = pastix_imax( max_idx, idxcnt_A);
166  max_idx = pastix_imax( max_idx, idxcnt_At);
167  max_idx = pastix_imax( max_idx, idxcnt_AAt);
168  max_val = pastix_imax( max_val, valcnt_A);
169  max_val = pastix_imax( max_val, valcnt_At);
170  max_val = pastix_imax( max_val, valcnt_AAt);
171 
172  idxsum_A += idxcnt_A;
173  valsum_A += valcnt_A;
174  idxsum_At += idxcnt_At;
175  valsum_At += valcnt_At;
176  idxsum_AAt += idxcnt_AAt;
177  valsum_AAt += valcnt_AAt;
178  }
179 
180  data_local = bcsc_comm->data_comm + clustnum;
181  data_local->recvA.idxcnt = idxsum_A;
182  data_local->recvA.valcnt = valsum_A;
183  data_local->recvAt.idxcnt = idxsum_At;
184  data_local->recvAt.valcnt = valsum_At;
185  data_local->recvAAt.size.idxcnt = idxsum_AAt;
186  data_local->recvAAt.size.valcnt = valsum_AAt;
187 
188  assert( max_idx <= 2 * max_val );
189 
190  bcsc_comm->max_idx = max_idx;
191  bcsc_comm->max_val = max_val;
192 
193  return PASTIX_SUCCESS;
194 }
195 
196 /**
197  *******************************************************************************
198  *
199  * @ingroup bcsc
200  *
201  * @brief Allocates the sending buffers in bcsc_comm->data_comm. These buffers
202  * are filled with the sending values.
203  *
204  *******************************************************************************
205  *
206  * @param[inout] bcsc_comm
207  * On entry the bcsc_comm initialized.
208  * At exit the arrays of bcsc_comm->data_comm are allocated.
209  *
210  * @param[in] mode
211  * If PastixTagMemRecvIdx: allocates receiving indexes A and At buffers.
212  * If PastixTagMemSend: allocates sending indexes and values A and At
213  * buffers.
214  * If PastixTagMemRecvValAAt: allocates receiving values AAt buffers, it
215  * used only if the spm is general.
216  *
217  *******************************************************************************
218  *
219  * @retval PASTIX_SUCCESS
220  *
221  *******************************************************************************/
222 int
223 bcsc_allocate_buf( bcsc_handle_comm_t *bcsc_comm,
224  bcsc_tag_e mode )
225 {
226  bcsc_proc_comm_t *data = NULL;
227  pastix_int_t clustnbr = bcsc_comm->clustnbr;
228  pastix_int_t clustnum = bcsc_comm->clustnum;
229  pastix_int_t c;
230  size_t size;
231 
232  if ( mode == PastixTagMemRecvIdx ) {
233  data = bcsc_comm->data_comm + clustnum;
234 
235  if ( ( data->recvA.idxcnt > 0 ) && ( data->sendA.idxbuf == NULL ) ) {
236  MALLOC_INTERN( data->sendA.idxbuf, data->recvA.idxcnt, pastix_int_t );
237  }
238 
239  if ( ( data->recvAt.idxcnt > 0 ) && ( data->sendAt.idxbuf == NULL ) ) {
240  MALLOC_INTERN( data->sendAt.idxbuf, data->recvAt.idxcnt, pastix_int_t );
241  }
242 
243  if ( ( data->recvAAt.size.idxcnt > 0 ) && ( data->sendAAt.idxbuf == NULL ) ) {
244  MALLOC_INTERN( data->sendAAt.idxbuf, data->recvAAt.size.idxcnt, pastix_int_t );
245  }
246  }
247 
248  if ( mode == PastixTagMemRecvValAAt ) {
249  for ( c = 0; c < clustnbr; c ++ ) {
250  data = bcsc_comm->data_comm + c;
251  if ( c == clustnum ) {
252  continue;
253  }
254  if ( ( data->recvAAt.size.valcnt > 0 ) && ( data->recvAAt.valbuf == NULL ) ) {
255  size = data->recvAAt.size.valcnt * pastix_size_of( bcsc_comm->flttype );
256  MALLOC_INTERN( data->recvAAt.valbuf, size, char );
257  }
258  }
259  }
260 
261  if ( mode == PastixTagMemSend ) {
262  for ( c = 0; c < clustnbr; c ++ ) {
263  data = bcsc_comm->data_comm + c;
264 
265  if ( c == clustnum ) {
266  continue;
267  }
268 
269  if ( ( data->sendA.size.idxcnt > 0 ) && ( data->sendA.idxbuf == NULL ) ) {
270  MALLOC_INTERN( data->sendA.idxbuf, data->sendA.size.idxcnt, pastix_int_t );
271  }
272  if ( ( data->sendA.size.valcnt > 0 ) && ( data->sendA.valbuf == NULL ) ) {
273  size = data->sendA.size.valcnt * pastix_size_of( bcsc_comm->flttype );
274  MALLOC_INTERN( data->sendA.valbuf, size, char );
275  }
276 
277  if ( ( data->sendAt.size.idxcnt > 0 ) && ( data->sendAt.idxbuf == NULL ) ) {
278  MALLOC_INTERN( data->sendAt.idxbuf, data->sendAt.size.idxcnt, pastix_int_t );
279  }
280  if ( ( data->sendAt.size.valcnt > 0 ) && ( data->sendAt.valbuf == NULL ) ) {
281  size = data->sendAt.size.valcnt * pastix_size_of( bcsc_comm->flttype );
282  MALLOC_INTERN( data->sendAt.valbuf, size, char );
283  }
284 
285  if ( ( data->sendAAt.size.idxcnt > 0 ) && ( data->sendAAt.idxbuf == NULL ) ) {
286  MALLOC_INTERN( data->sendAAt.idxbuf, data->sendAAt.size.idxcnt, pastix_int_t );
287  }
288  if ( ( data->sendAAt.size.valcnt > 0 ) && ( data->sendAAt.valbuf == NULL ) ) {
289  size = data->sendAAt.size.valcnt * pastix_size_of( bcsc_comm->flttype );
290  MALLOC_INTERN( data->sendAAt.valbuf, size, char );
291  }
292  }
293  }
294 
295  return PASTIX_SUCCESS;
296 }
297 
298 /**
299  *******************************************************************************
300  *
301  * @ingroup bcsc
302  *
303  * @brief Frees the sending and receiving buffers in bcsc_comm->data_comm.
304  * These buffers are filled with the sending adn receiving values.
305  *
306  *******************************************************************************
307  *
308  * @param[inout] bcsc_comm
309  * On entry the bcsc_comm initialized.
310  * At exit the arrays of bcsc_comm->data_comm are freed.
311  *
312  * @param[in] mode
313  * If PastixTagMemSendIdx: frees sending indexes A, At and AAt buffers.
314  * If PastixTagMemSendValA: frees sending values A buffers.
315  * If PastixTagMemSendValAt: frees sending values At buffers.
316  * If PastixTagMemSendValAAt: frees sending values AAt buffers.
317  * If PastixTagMemRecvIdxA: frees receiving indexes A buffers.
318  * If PastixTagMemRecvIdxAt: frees receiving indexes At buffers.
319  * If PastixTagMemRecvAAt:frees receiving indexes and values if the
320  * spm is general AAt buffers.
321  *
322  *******************************************************************************
323  *
324  * @retval PASTIX_SUCCESS
325  *
326  *******************************************************************************/
327 int
328 bcsc_free_buf( bcsc_handle_comm_t *bcsc_comm,
329  bcsc_tag_e mode )
330 {
331  bcsc_proc_comm_t *data = NULL;
332  pastix_int_t clustnbr = bcsc_comm->clustnbr;
333  pastix_int_t clustnum = bcsc_comm->clustnum;
334  pastix_int_t c;
335 
336  if ( mode == PastixTagMemSendIdx ) {
337  for ( c = 0; c < clustnbr; c ++ ) {
338  data = bcsc_comm->data_comm + c;
339  if ( c == clustnum ) {
340  continue;
341  }
342  if ( data->sendA.idxbuf != NULL ) {
343  memFree_null( data->sendA.idxbuf );
344  }
345  if ( data->sendAt.idxbuf != NULL ) {
346  memFree_null( data->sendAt.idxbuf );
347  }
348  if ( data->sendAAt.idxbuf != NULL ) {
349  memFree_null( data->sendAAt.idxbuf );
350  }
351  }
352  }
353 
354  if ( mode == PastixTagMemSendValA ) {
355  for ( c = 0; c < clustnbr; c ++ ) {
356  data = bcsc_comm->data_comm + c;
357  if ( c == clustnum ) {
358  continue;
359  }
360  if ( data->sendA.valbuf != NULL ) {
361  memFree_null( data->sendA.valbuf );
362  }
363  }
364  }
365 
366  if ( mode == PastixTagMemSendValAt ) {
367  for ( c = 0; c < clustnbr; c ++ ) {
368  data = bcsc_comm->data_comm + c;
369  if ( c == clustnum ) {
370  continue;
371  }
372  if ( data->sendAt.valbuf != NULL ) {
373  memFree_null( data->sendAt.valbuf );
374  }
375  }
376  }
377 
378  if ( mode == PastixTagMemSendValAAt ) {
379  for ( c = 0; c < clustnbr; c ++ ) {
380  data = bcsc_comm->data_comm + c;
381  if ( c == clustnum ) {
382  continue;
383  }
384  if ( data->sendAAt.valbuf != NULL ) {
385  memFree_null( data->sendAAt.valbuf );
386  }
387  }
388  }
389 
390  if ( mode == PastixTagMemRecvIdxA ) {
391  data = bcsc_comm->data_comm + clustnum;
392  if ( data->sendA.idxbuf != NULL ) {
393  memFree_null( data->sendA.idxbuf );
394  }
395  }
396 
397  if ( mode == PastixTagMemRecvIdxAt ) {
398  data = bcsc_comm->data_comm + clustnum;
399  if ( data->sendAt.idxbuf != NULL ) {
400  memFree_null( data->sendAt.idxbuf );
401  }
402  }
403 
404  if ( mode == PastixTagMemRecvAAt ) {
405  data = bcsc_comm->data_comm + clustnum;
406  if ( data->sendAAt.idxbuf != NULL ) {
407  memFree_null( data->sendAAt.idxbuf );
408  }
409  if ( data->recvAAt.valbuf != NULL ) {
410  memFree_null( data->recvAAt.valbuf );
411  }
412  }
413 
414  return PASTIX_SUCCESS;
415 }
416 
417 /**
418  *******************************************************************************
419  *
420  * @ingroup bcsc_internal
421  *
422  * @brief Exchanges the amount of data the current processor will send to and
423  * receive from each processor.
424  *
425  *******************************************************************************
426  *
427  * @param[in] bcsc_comm
428  * The bcsc_handle_comm_t structure.
429  *
430  *******************************************************************************/
431 void
432 bcsc_exchange_amount_of_data( bcsc_handle_comm_t *bcsc_comm )
433 {
434  bcsc_proc_comm_t *data_comm = bcsc_comm->data_comm;
435  pastix_int_t clustnbr = bcsc_comm->clustnbr;
436  pastix_int_t clustnum = bcsc_comm->clustnum;
437  bcsc_proc_comm_t *data_send = NULL;
438  bcsc_proc_comm_t *data_recv = NULL;
439  pastix_int_t counter_req = 0;
440  MPI_Status statuses[(clustnbr-1)*BCSC_COMM_NBR];
441  MPI_Request requests[(clustnbr-1)*BCSC_COMM_NBR];
442  bcsc_data_amount_t *sends, *recvs;
443  pastix_int_t c_send, c_recv, k;
444 
445  /* Exchanges the amount of indexes and values. */
446  c_send = (clustnum+1) % clustnbr;
447  c_recv = (clustnum-1+clustnbr) % clustnbr;
448  for ( k = 0; k < clustnbr-1; k++ ) {
449  data_send = data_comm + c_send;
450  data_recv = data_comm + c_recv;
451 
452  if ( c_send == clustnum ) {
453  continue;
454  }
455 
456  /* Exchanges the amount of indexes and values for A. */
457  sends = &( data_send->sendA.size );
458  recvs = &( data_recv->recvA );
459  MPI_Irecv( recvs, 2, PASTIX_MPI_INT, c_recv,
460  PastixTagCountA, bcsc_comm->comm, &requests[counter_req++] );
461 
462  MPI_Isend( sends, 2, PASTIX_MPI_INT, c_send,
463  PastixTagCountA, bcsc_comm->comm, &requests[counter_req++] );
464 
465  /* Exchanges the amount of indexes and values for At. */
466  sends = &( data_send->sendAt.size );
467  recvs = &( data_recv->recvAt );
468  MPI_Irecv( recvs, 2, PASTIX_MPI_INT, c_recv,
469  PastixTagCountAt, bcsc_comm->comm, &requests[counter_req++] );
470 
471  MPI_Isend( sends, 2, PASTIX_MPI_INT, c_send,
472  PastixTagCountAt, bcsc_comm->comm, &requests[counter_req++] );
473 
474  /* Exchanges the amount of indexes and values for AAt. */
475  sends = &( data_send->sendAAt.size );
476  recvs = &( data_recv->recvAAt.size );
477  MPI_Irecv( recvs, 2, PASTIX_MPI_INT, c_recv,
478  PastixTagCountAAt, bcsc_comm->comm, &requests[counter_req++] );
479 
480  MPI_Isend( sends, 2, PASTIX_MPI_INT, c_send,
481  PastixTagCountAAt, bcsc_comm->comm, &requests[counter_req++] );
482 
483  c_send = (c_send+1) % clustnbr;
484  c_recv = (c_recv-1+clustnbr) % clustnbr;
485  }
486 
487  MPI_Waitall( counter_req, requests, statuses );
488 
489  bcsc_compute_max( bcsc_comm );
490 
491  return;
492 }
493 #endif
494 
495 /**
496  *******************************************************************************
497  *
498  * @ingroup bcsc_internal
499  *
500  * @brief Creates the array which represents the repartition of each column
501  * in the block structure. The array size is spm->gNexp where:
502  * - col2cblk[k] = cblknum, with cblknum the index of the block column
503  * where the column k is stored.
504  * This routine is called when the matrix is in shared memory.
505  *
506  *******************************************************************************
507  *
508  * @param[in] solvmtx
509  * The solvmtx structure associated to the problem.
510  *
511  * @param[in,out] bcsc
512  * The internal block CSC structure.
513  * The number of local columns is updated.
514  *
515  *******************************************************************************
516  *
517  * @return The col2cblk array which gives the repartition of the solvmtx columns
518  * into the block structure.
519  *
520  *******************************************************************************/
521 pastix_int_t *
523  const pastix_bcsc_t *bcsc )
524 {
525  pastix_int_t j;
526  pastix_int_t cblknum;
527  pastix_int_t *col2cblk;
528 
529  /* Allocates the col2cblk. */
530  MALLOC_INTERN( col2cblk, bcsc->gN, pastix_int_t );
531  memset( col2cblk, 0xff, bcsc->gN * sizeof(pastix_int_t) );
532 
533  const SolverCblk *cblk = solvmtx->cblktab;
534  pastix_int_t cblknbr = solvmtx->cblknbr;
535  /* Goes through the blocks. */
536  for ( cblknum = 0; cblknum < cblknbr; cblknum++, cblk++ ) {
537  if ( cblk->cblktype & (CBLK_FANIN|CBLK_RECV) ) {
538  continue;
539  }
540  /*
541  * Goes through the columns of the block and adds the number of
542  * the block in col2cblk at the corresponding index.
543  */
544  for ( j = cblk->fcolnum; j <= cblk->lcolnum; j++ ) {
545  col2cblk[j] = cblknum;
546  }
547  }
548 
549  return col2cblk;
550 }
551 
552 #if defined(PASTIX_WITH_MPI)
553 /**
554  *******************************************************************************
555  *
556  * @ingroup bcsc_internal
557  *
558  * @brief Creates the array which represents the repartition of each column
559  * in the block structure. The array size is spm->gNexp where:
560  * - col2cblk[k] = - (owner + 1) if the column is not stored in a local block
561  * - col2cblk[k] = cblknum, if the column k is stored in a local block, with
562  * cblknum the index of this block column.
563  * This routine is called when the matrix is in distributed memory.
564  *
565  *******************************************************************************
566  *
567  * @param[in] solvmtx
568  * The solvmtx structure associated to the problem.
569  *
570  * @param[in,out] bcsc
571  * The internal block CSC structure.
572  * The number of local columns is updated.
573  *
574  *******************************************************************************
575  *
576  * @return The col2cblk array which gives the repartition of the solvmtx columns
577  * into the block structure.
578  *
579  *******************************************************************************/
580 pastix_int_t *
581 bcsc_init_col2cblk_dst( const SolverMatrix *solvmtx,
582  const pastix_bcsc_t *bcsc )
583 {
584  pastix_int_t n, nr = 0;
585  pastix_int_t k, j, c;
586  pastix_int_t clustnum = solvmtx->clustnum;
587  pastix_int_t clustnbr = solvmtx->clustnbr;
588  pastix_int_t fcolnum, lcolnum, cblknum;
589  pastix_int_t *col2cblk;
590  pastix_int_t *col2cblk_bcast = NULL;
591 
592  /* Allocates the col2cblk. */
593  MALLOC_INTERN( col2cblk, bcsc->gN, pastix_int_t );
594  memset( col2cblk, 0xff, bcsc->gN * sizeof(pastix_int_t) );
595 
596  for( c = 0; c < clustnbr; c++ ) {
597  if ( c == clustnum ) {
598  const SolverCblk *cblk = solvmtx->cblktab;
599  pastix_int_t cblknbr = solvmtx->cblknbr;
600  pastix_int_t colcount;
601 
602  n = (solvmtx->cblknbr - solvmtx->faninnbr - solvmtx->recvnbr) * 2;
603 
604  /* Sends the size of data. */
605  MPI_Bcast( &n, 1, PASTIX_MPI_INT, c, solvmtx->solv_comm );
606 
607  if ( n > nr ) {
608  pastix_int_t *tmp;
609  nr = n;
610  tmp = (pastix_int_t *)realloc( col2cblk_bcast, nr * sizeof(pastix_int_t) );
611  if ( tmp != NULL ) {
612  col2cblk_bcast = tmp;
613  }
614  }
615 
616  colcount = 0;
617  k = 0;
618  /* Goes through the blocks. */
619  for ( cblknum = 0; cblknum < cblknbr; cblknum++, cblk++ ) {
620  if ( cblk->cblktype & (CBLK_FANIN|CBLK_RECV) ) {
621  continue;
622  }
623  /* Adds the first and last columns of the block in col2cblk_bcast. */
624  col2cblk_bcast[k] = cblk->fcolnum;
625  col2cblk_bcast[k+1] = cblk->lcolnum;
626  k += 2;
627  /*
628  * Goes through the columns of the block and adds the
629  * block number in col2cblk.
630  */
631  for ( j = cblk->fcolnum; j <= cblk->lcolnum; j++ ) {
632  colcount++;
633  col2cblk[j] = cblknum;
634  }
635  }
636  assert( colcount == bcsc->n );
637 
638  /* Sends the col2cblk_bcast. */
639  MPI_Bcast( col2cblk_bcast, n, PASTIX_MPI_INT, c, solvmtx->solv_comm );
640  }
641  else {
642  /* Receives the size of data from c. */
643  MPI_Bcast( &n, 1, PASTIX_MPI_INT, c, solvmtx->solv_comm );
644 
645  if ( n > nr ) {
646  pastix_int_t *tmp;
647  nr = n;
648  tmp = (pastix_int_t *)realloc( col2cblk_bcast, nr * sizeof(pastix_int_t) );
649  if ( tmp != NULL ) {
650  col2cblk_bcast = tmp;
651  }
652  }
653 
654  if ( n == 0 ) {
655  continue;
656  }
657 
658  /* Receives the col2cblk_bcast from c. */
659  MPI_Bcast( col2cblk_bcast, n, PASTIX_MPI_INT, c, solvmtx->solv_comm );
660  /*
661  * Goes through the columns in col2cblk_bcast and adds the processor
662  * number in col2cblk.
663  */
664  for ( k = 0; k < n; k += 2 ) {
665  fcolnum = col2cblk_bcast[k];
666  lcolnum = col2cblk_bcast[k+1];
667  for ( j = fcolnum; j <= lcolnum; j++ ) {
668  col2cblk[j] = - c - 1;
669  }
670  }
671  }
672  }
673 
674  free( col2cblk_bcast );
675 
676  return col2cblk;
677 }
678 #endif
679 
680 /**
681  *******************************************************************************
682  *
683  * @ingroup bcsc_internal
684  *
685  * @brief Creates the array which represents the repartition of each column
686  * in the block structure. This routine calls bcsc_init_col2cblk_shm or
687  * bcsc_init_col2cblk_dst according to the way the matrix is stored in the
688  * memory.
689  *
690  *******************************************************************************
691  *
692  * @param[in] solvmtx
693  * The solvmtx structure associated to the problem.
694  *
695  * @param[in] bcsc
696  * The internal block CSC structure.
697  * The number of local columns is updated.
698  *
699  * @param[in] spm
700  * The initial sparse matrix in the spm format.
701  *
702  *******************************************************************************
703  *
704  * @return The col2cblk array which gives the repartition of the solvmtx columns
705  * into the block structure.
706  *
707  *******************************************************************************/
708 pastix_int_t *
710  const pastix_bcsc_t *bcsc,
711  const spmatrix_t *spm )
712 {
713  pastix_int_t *col2cblk;
714 
715  /* Tests if the spm is in shared or distributed memory. */
716 #if defined(PASTIX_WITH_MPI)
717  if ( spm->loc2glob != NULL ) {
718  col2cblk = bcsc_init_col2cblk_dst( solvmtx, bcsc );
719  }
720  else
721 #endif
722  {
723  col2cblk = bcsc_init_col2cblk_shm( solvmtx, bcsc );
724  }
725 
726  (void)spm;
727  return col2cblk;
728 }
729 
730 /**
731  *******************************************************************************
732  *
733  * @brief Initializes the dofshit array of size gNexp which gives
734  * dofshift[index_permuted] = index. This corresponds to the inverse of
735  * the permutation given in ord->permtab.
736  *
737  *******************************************************************************
738  *
739  * @param[in] spm
740  * The initial sparse matrix in the spm format.
741  *
742  * @param[in] ord
743  * The ordering that needs to be applied on the spm to generate the
744  * block csc.
745  *
746  *******************************************************************************
747  *
748  * @return The dofshift array.
749  *
750  *******************************************************************************/
751 static inline pastix_int_t*
752 bcsc_init_dofshift( const spmatrix_t *spm,
753  const pastix_order_t *ord )
754 {
755  pastix_int_t *dofshift, *ptr;
756  pastix_int_t *dofs;
757  pastix_int_t dof;
758  pastix_int_t idof, dofj, dofidx;
759  pastix_int_t jg, jgp;
760 
761  /* Allocates the dofshift array. */
762  MALLOC_INTERN( dofshift, spm->gNexp, pastix_int_t );
763 
764  dofs = spm->dofs;
765  dof = spm->dof;
766  ptr = dofshift;
767  for ( jg = 0; jg < spm->gN; jg++ ) {
768  jgp = ord->permtab[jg];
769  dofidx = (dof > 0) ? jgp * dof : dofs[jg];
770  ptr = dofshift + dofidx;
771  dofj = (dof > 0) ? dof : dofs[jg+1] - dofs[jg];
772  for ( idof = 0; idof < dofj; idof++, ptr++ ) {
773  *ptr = jgp;
774  }
775  }
776  return dofshift;
777 }
778 
779 /**
780  *******************************************************************************
781  *
782  * @brief Initializes the coltab of a block csc matrix. The coltab corresponds to
783  * the number of rows (expended) per column (non expended). This rountine
784  * is called when the matrix is stored in shared memory or the matrix is
785  * replicated on the processors and the matrix's degree of liberty is
786  * constant.
787  *
788  *******************************************************************************
789  *
790  * @param[in] spm
791  * The initial sparse matrix in the spm format.
792  *
793  * @param[in] ord
794  * The ordering which needs to be applied on the spm to generate the
795  * block csc.
796  *
797  * @param[out] globcol
798  * The array which contains, for each column, its beginning in the
799  * smp->colptr.
800  *
801  *******************************************************************************/
802 static inline void
803 bcsc_init_global_coltab_shm_cdof( const spmatrix_t *spm,
804  const pastix_order_t *ord,
805  pastix_int_t *globcol )
806 {
807  pastix_int_t *colptr = spm->colptr;
808  pastix_int_t *rowptr = spm->rowptr;
809  pastix_int_t dof = spm->dof;
810  pastix_int_t baseval = spm->baseval;
811  pastix_int_t frow, lrow;
812  pastix_int_t k, j, ig, jg, igp, jgp;
813  int sym = (spm->mtxtype == SpmSymmetric) || (spm->mtxtype == SpmHermitian);
814 
815  assert( dof > 0 );
816  assert( spm->loc2glob == NULL );
817 
818  /* Goes through the column of the spm. */
819  for ( j = 0; j < spm->n; j++, colptr++ ) {
820  jg = j;
821  jgp = ord->permtab[jg];
822  frow = colptr[0] - baseval;
823  lrow = colptr[1] - baseval;
824  assert( (lrow - frow) >= 0 );
825  /* Adds the number of values in the column jg. */
826  globcol[jgp] += (lrow - frow) * dof;
827 
828  /*
829  * Adds for At the number of values in the row ig and column jg. This
830  * is not required for the general case as the spm has a symmetric
831  * pattern.
832  */
833  if ( !sym ) {
834  continue;
835  }
836 
837  for ( k = frow; k < lrow; k++ ) {
838  ig = rowptr[k] - baseval;
839  if ( ig != jg ) {
840  igp = ord->permtab[ig];
841  globcol[igp] += dof;
842  }
843  }
844  }
845 
846  return;
847 }
848 
849 /**
850  *******************************************************************************
851  *
852  * @brief Initializes the coltab of a block csc matrix. The coltab corresponds to
853  * the number of rows (expended) per column (non expended). This rountine
854  * is called when the matrix is stored in shared memory or the matrix is
855  * replicated on the processors and the matrix's degree of liberty is
856  * variadic.
857  *
858  *******************************************************************************
859  *
860  * @param[in] spm
861  * The initial sparse matrix in the spm format.
862  *
863  * @param[in] ord
864  * The ordering which needs to be applied on the spm to generate the
865  * block csc.
866  *
867  * @param[out] globcol
868  * The array which contains, for each column, its begining in the
869  * smp->colptr.
870  *
871  *******************************************************************************/
872 static inline void
873 bcsc_init_global_coltab_shm_vdof( const spmatrix_t *spm,
874  const pastix_order_t *ord,
875  pastix_int_t *globcol )
876 {
877  pastix_int_t *colptr = spm->colptr;
878  pastix_int_t *rowptr = spm->rowptr;
879  pastix_int_t *dofs = spm->dofs;
880  pastix_int_t baseval = spm->baseval;
881  pastix_int_t frow, lrow;
882  pastix_int_t k, j, ig, jg, igp, jgp;
883  pastix_int_t dofj, dofi;
884  int sym = (spm->mtxtype == SpmSymmetric) || (spm->mtxtype == SpmHermitian);
885 
886  assert( spm->dof <= 0 );
887  assert( spm->loc2glob == NULL );
888 
889  /* Goes through the column of the spm. */
890  for ( j=0; j<spm->n; j++, colptr++ ) {
891  jg = j;
892  dofj = dofs[jg+1] - dofs[jg];
893  jgp = ord->permtab[jg];
894  frow = colptr[0] - baseval;
895  lrow = colptr[1] - baseval;
896  assert( (lrow - frow) >= 0 );
897 
898  for ( k=frow; k<lrow; k++ ) {
899  ig = rowptr[k] - baseval;
900  dofi = dofs[ig+1] - dofs[ig];
901  /* Adds the number of values in the row ig and column jg. */
902  globcol[jgp] += dofi;
903 
904  /* Adds for At the number of values in the row ig and column jg. */
905  if ( sym && (ig != jg) ) {
906  igp= ord->permtab[ig];
907  globcol[igp] += dofj;
908  }
909  }
910  }
911 
912  return;
913 }
914 
915 #if defined(PASTIX_WITH_MPI)
916 /**
917  *******************************************************************************
918  *
919  * @brief Initializes the coltab of a block csc matrix. The coltab corresponds to
920  * the number of rows (expended) per column (non expended). This rountine
921  * is called when the matrix is distributed in the memory and the matrix's
922  * degree of liberty is constant.
923  *
924  * There are two cases:
925  *
926  * If the matrix is general: the full columns and rows of the blocks are stored
927  * in Lvalues and Uvalues.
928  * - The local data of the current process which are in remote blocks after
929  * the permutation need be to sent to the owner process. The data is stored
930  * in sendA if it is sent for the column only, in sendAt if it is sent for
931  * the row only and in sendAAt if it is sent for the row and column.
932  * - The local data of the current process which are in local column blocks
933  * after the permutation need to be added in globcol.
934  *
935  * If the matrix is Symmetric or Hermitian: only the full columns of the blocks
936  * are stored in Lvalues (and Uvalues = Lvalues). Only half of the spm is
937  * stored lower (or upper) triangular half, therefore we need to duplicate
938  * the lower (or upper) data to fill the upper (or lower half of the matrix
939  * in the blocks.
940  * - The local data of the current process which are in remote blocks after
941  * the permutation need be to sent to the owner process. The data is stored
942  * in sendA if it is sent for the lower (or upper) half or the column, in
943  * sendAt if it is sent for the upper (or lower) half of the column and in
944  * sendAAt if it is sent for both the lower and upper half of the column.
945  * The diagonal values are stored in sendA only.
946  * - The local data of the current process which are in column blocks after
947  * the permutation need to be added in globcol twice: once for the lower
948  * half and once for the upper half. The diagonal values need to be added
949  * only once.
950  *
951  *******************************************************************************
952  *
953  * @param[in] spm
954  * The initial sparse matrix in the spm format.
955  *
956  * @param[in] ord
957  * The ordering which needs to be applied on the spm to generate the
958  * block csc.
959  *
960  * @param[in] col2cblk
961  * The array which contains the repartition of the matrix columns
962  * into the block structure.
963  *
964  * @param[out] globcol
965  * The array which contains, for each column, its begining in the
966  * smp->colptr.
967  *
968  * @param[in,out] bcsc_comm
969  * On entry, the initialised bcsc_comm structure. On exit, the
970  * bcsc_handle_comm structure which contains the amount of data to
971  * send to the other processors.
972  *
973  *******************************************************************************/
974 static inline void
975 bcsc_init_global_coltab_dst_cdof( const spmatrix_t *spm,
976  const pastix_order_t *ord,
977  const pastix_int_t *col2cblk,
978  pastix_int_t *globcol,
979  bcsc_handle_comm_t *bcsc_comm )
980 {
981  pastix_int_t *colptr = spm->colptr;
982  pastix_int_t *rowptr = spm->rowptr;
983  pastix_int_t *loc2glob = spm->loc2glob;
984  pastix_int_t dof = spm->dof;
985  pastix_int_t baseval = spm->baseval;
986  bcsc_proc_comm_t *data_comm = bcsc_comm->data_comm;
987  bcsc_exch_comm_t *data_sendA, *data_sendAt, *data_sendAAt;
988  pastix_int_t frow, lrow;
989  pastix_int_t il, jl, ig, jg, igp, jgp;
990  int sym = (spm->mtxtype == SpmSymmetric) || (spm->mtxtype == SpmHermitian);
991  pastix_int_t ownerj, owneri;
992 
993  assert( dof > 0 );
994 
995  /* Goes through the columns of spm. */
996  for ( jl = 0; jl < spm->n; jl++, colptr++, loc2glob++ ) {
997  jg = *loc2glob - baseval;
998  jgp = ord->permtab[jg];
999 
1000  frow = colptr[0] - baseval;
1001  lrow = colptr[1] - baseval;
1002  assert( (lrow - frow) >= 0 );
1003 
1004  ownerj = col2cblk[jgp * dof];
1005 
1006  /* The column jp belongs to another process. */
1007  if ( ownerj < 0 ) {
1008  ownerj = - ownerj - 1;
1009  data_comm = bcsc_comm->data_comm + ownerj;
1010  data_sendA = &( data_comm->sendA );
1011 
1012  /* Goes through the rows of jl. */
1013  for ( il = frow; il < lrow; il++ ) {
1014  ig = rowptr[il] - baseval;
1015 
1016  /*
1017  * The diagonal values (ip, jp) belong to the same process.
1018  * They are sent to owneri in the sym case for A only.
1019  */
1020  if ( sym && ( ig == jg ) ) {
1021  data_sendA->size.idxcnt += 2;
1022  data_sendA->size.valcnt += dof * dof;
1023  continue;
1024  }
1025 
1026  igp = ord->permtab[ig];
1027  owneri = col2cblk[igp* dof];
1028 
1029  /* The row ip belongs to another process. */
1030  if ( owneri < 0 ) {
1031  owneri = - owneri - 1;
1032  data_comm = bcsc_comm->data_comm + owneri;
1033 
1034  /*
1035  * The diagonal values (ip, jp) belong to the same process.
1036  * They are sent to owneri for AAt in the general cae.
1037  */
1038  if ( owneri == ownerj ) {
1039  data_sendAAt = &( data_comm->sendAAt );
1040 
1041  data_sendAAt->size.idxcnt += 2;
1042  data_sendAAt->size.valcnt += dof * dof;
1043  }
1044  /*
1045  * The values (ip, jp) belong to different processes.
1046  * They are sent to owneri for At and to ownerj for A.
1047  */
1048  else {
1049  data_sendAt = &( data_comm->sendAt );
1050 
1051  data_sendAt->size.idxcnt += 2;
1052  data_sendAt->size.valcnt += dof * dof;
1053 
1054  data_sendA->size.idxcnt += 2;
1055  data_sendA->size.valcnt += dof * dof;
1056  }
1057  }
1058  /* The row ip is local. */
1059  else {
1060  /*
1061  * The values (ip, jp) belong to ownerj.
1062  * They are sent to ownerj for A.
1063  */
1064  data_sendA->size.idxcnt += 2;
1065  data_sendA->size.valcnt += dof * dof;
1066  /*
1067  * The values (ip, jp) are local.
1068  * In the sym case ther are added to globcol.
1069  */
1070  if ( sym ) {
1071  globcol[igp] += dof;
1072  }
1073  }
1074  }
1075  }
1076  /* The column jp is local. */
1077  else {
1078  /* The column is added to globcol. */
1079  globcol[jgp] += dof * ( lrow - frow );
1080 
1081  /* Goes through the rows of j. */
1082  for ( il = frow; il < lrow; il++ ) {
1083  ig = rowptr[il] - baseval;
1084 
1085  /*
1086  * The diagonal values (ip, jp) have already been
1087  * added to globcol in the sym case.
1088  */
1089  if ( sym && ( ig == jg ) ) {
1090  continue;
1091  }
1092 
1093  igp = ord->permtab[ig];
1094  owneri = col2cblk[igp* dof];
1095 
1096  /* The row ip belongs to another process. */
1097  if ( owneri < 0 ) {
1098  owneri = - owneri - 1;
1099  data_comm = bcsc_comm->data_comm + owneri;
1100 
1101  /*
1102  * The values (ip, jp) belong to owneri.
1103  * They are sent to ownerj for At.
1104  */
1105  data_sendAt = &( data_comm->sendAt );
1106 
1107  data_sendAt->size.idxcnt += 2;
1108  data_sendAt->size.valcnt += dof * dof;
1109  }
1110  else {
1111  /*
1112  * The values (ip, jp) are local.
1113  * In the sym case they are added to globcol.
1114  */
1115  if ( sym ) {
1116  globcol[igp] += dof;
1117  }
1118  }
1119  }
1120  }
1121  }
1122 
1123  return;
1124 }
1125 
1126 /**
1127  *******************************************************************************
1128  *
1129  * @brief Initializes the coltab of a block csc matrix. The coltab corresponds to
1130  * the number of rows (expended) per column (non expended). This rountine
1131  * is called when the matrix is distributed in the memory and the matrix's
1132  * degree of liberty is variadic.
1133  *
1134  * DO NOT CURRENTLY WORKS
1135  *
1136  *******************************************************************************
1137  *
1138  * @param[in] spm
1139  * The initial sparse matrix in the spm format.
1140  *
1141  * @param[in] ord
1142  * The ordering which needs to be applied on the spm to generate the
1143  * block csc.
1144  *
1145  * @param[in] col2cblk
1146  * The array which contains the repartition of the matrix columns
1147  * into the block structure.
1148  *
1149  * @param[out] globcol
1150  * The array which contains, for each column, its begining in the
1151  * smp->colptr.
1152  *
1153  * @param[out] bcsc_comm
1154  * The bcsc_handle_comm structure which contains the amount of
1155  * data to send to the other processors.
1156  *
1157  *******************************************************************************/
1158 static inline void
1159 bcsc_init_global_coltab_dst_vdof( __attribute__((unused)) const spmatrix_t *spm,
1160  __attribute__((unused)) const pastix_order_t *ord,
1161  __attribute__((unused)) const pastix_int_t *col2cblk,
1162  __attribute__((unused)) pastix_int_t *globcol,
1163  __attribute__((unused)) bcsc_handle_comm_t *bcsc_comm )
1164 {
1165  // pastix_int_t *colptr = spm->colptr;
1166  // pastix_int_t *rowptr = spm->rowptr;
1167  // pastix_int_t *loc2glob = spm->loc2glob;
1168  // pastix_int_t *dofs = spm->dofs;
1169  // pastix_int_t dof = spm->dof;
1170  // pastix_int_t baseval = spm->baseval;
1171  // pastix_int_t frow, lrow;
1172  // pastix_int_t k, j, ig, jg, igp, jgp;
1173  // pastix_int_t dofj, dofi;
1174  // int sym = (spm->mtxtype == SpmSymmetric) || (spm->mtxtype == SpmHermitian);
1175 
1176  // assert( dof <= 0 );
1177 
1178  // for ( j=0; j<spm->n; j++, colptr++, loc2glob++ )
1179  // {
1180  // jg = *loc2glob - baseval;
1181  // jgp = ord->permtab[jg];
1182  // dofj = dofs[jg+1] - dofs[jg];
1183 
1184  // frow = colptr[0] - baseval;
1185  // lrow = colptr[1] - baseval;
1186  // assert( (lrow - frow) >= 0 );
1187 
1188  // jgpe = ...;
1189  // ownerj = col2cblk[jgpe]; // FAUX
1190  // localj = ( ownerj >= 0 );
1191  // ownerj = - ownerj - 1;
1192 
1193  // for ( k=frow; k<lrow; k++ )
1194  // {
1195  // ig = rowptr[k] - baseval;
1196  // dofi = dofs[ig+1] - dofs[ig];
1197 
1198  // if ( localj ) {
1199  // /* The column is local */
1200  // globcol[jgp] += dofi;
1201  // }
1202  // else {
1203  // /* The column is remote */
1204  // //update_counter_tosend( ownerj, 1 /* Nbr Elt */, dofi /* Nbr values */ );
1205  // }
1206 
1207  // if ( sym && (ig != jg) ) {
1208  // igp = ord->permtab[ig];
1209  // igpe = ...;
1210  // owneri = col2cblk[igpe]; // FAUX
1211 
1212  // if ( owneri >= 0 ) {
1213  // globcol[igp] += dofj;
1214  // }
1215  // else {
1216  // owneri = - owneri - 1;
1217  // //update_counter_tosend( owneri, 1 /* Nbr Elt */, dofj /* Nbr values */ );
1218  // }
1219  // }
1220  // }
1221  // }
1222 
1223  return;
1224 }
1225 
1226 /**
1227  *******************************************************************************
1228  *
1229  * @brief Exchanges the indexes with the other processors.
1230  *
1231  *******************************************************************************
1232  *
1233  * @param[in,out] bcsc_comm
1234  * The bcsc_handle_comm structure which contains the data the current
1235  * processor has to send to the other processors on entry. On exit,
1236  * the structure is updated with the received data from the other
1237  * processors.
1238  *
1239  *******************************************************************************/
1240 void
1241 bcsc_exchange_indexes( bcsc_handle_comm_t *bcsc_comm )
1242 {
1243  pastix_int_t clustnbr = bcsc_comm->clustnbr;
1244  pastix_int_t clustnum = bcsc_comm->clustnum;
1245  bcsc_proc_comm_t *data_comm = bcsc_comm->data_comm;
1246  bcsc_proc_comm_t *data_local = bcsc_comm->data_comm + clustnum;
1247  bcsc_exch_comm_t *sendA_local = &( data_local->sendA );
1248  bcsc_exch_comm_t *sendAt_local = &( data_local->sendAt );
1249  bcsc_exch_comm_t *sendAAt_local = &( data_local->sendAAt );
1250  pastix_int_t counter_req = 0;
1251  pastix_int_t cntA = 0;
1252  pastix_int_t cntAt = 0;
1253  pastix_int_t cntAAt = 0;
1254  pastix_int_t idx_cnt_A[clustnbr];
1255  pastix_int_t idx_cnt_At[clustnbr];
1256  pastix_int_t idx_cnt_AAt[clustnbr];
1257  MPI_Status statuses[(clustnbr-1)*BCSC_COMM_NBR];
1258  MPI_Request requests[(clustnbr-1)*BCSC_COMM_NBR];
1259  bcsc_proc_comm_t *data_send, *data_recv;
1260  bcsc_exch_comm_t *send;
1261  bcsc_data_amount_t *recv;
1262  pastix_int_t c_send, c_recv, k;
1263 
1264  bcsc_allocate_buf( bcsc_comm, PastixTagMemRecvIdx );
1265 
1266  for ( k = 0; k < clustnbr; k++ ) {
1267  if ( k == clustnum ) {
1268  idx_cnt_A[k] = 0;
1269  idx_cnt_At[k] = 0;
1270  idx_cnt_AAt[k] = 0;
1271  continue;
1272  }
1273  idx_cnt_A[ k ] = cntA;
1274  cntA += data_comm[k].recvA.idxcnt;
1275  idx_cnt_At[ k ] = cntAt;
1276  cntAt += data_comm[k].recvAt.idxcnt;
1277  idx_cnt_AAt[ k ] = cntAAt;
1278  cntAAt += data_comm[k].recvAAt.size.idxcnt;
1279  }
1280 
1281  c_send = (clustnum+1) % clustnbr;
1282  c_recv = (clustnum-1+clustnbr) % clustnbr;
1283  for ( k = 0; k < clustnbr-1; k++ ) {
1284  data_send = data_comm + c_send;
1285  data_recv = data_comm + c_recv;
1286  if ( c_send == clustnum ) {
1287  continue;
1288  }
1289 
1290  /* Posts the receptions of the indexes. */
1291  recv = &( data_recv->recvA );
1292  if ( recv->idxcnt != 0 ) {
1293  MPI_Irecv( sendA_local->idxbuf + idx_cnt_A[c_recv], recv->idxcnt,
1294  PASTIX_MPI_INT, c_recv, PastixTagIndexesA, bcsc_comm->comm,
1295  &requests[counter_req++] );
1296  }
1297  recv = &( data_recv->recvAt );
1298  if ( recv->idxcnt != 0 ) {
1299  MPI_Irecv( sendAt_local->idxbuf + idx_cnt_At[c_recv], recv->idxcnt,
1300  PASTIX_MPI_INT, c_recv, PastixTagIndexesAt, bcsc_comm->comm,
1301  &requests[counter_req++] );
1302  }
1303  recv = &( data_recv->recvAAt.size );
1304  if ( recv->idxcnt != 0 ) {
1305  MPI_Irecv( sendAAt_local->idxbuf + idx_cnt_AAt[c_recv], recv->idxcnt,
1306  PASTIX_MPI_INT, c_recv, PastixTagIndexesAAt, bcsc_comm->comm,
1307  &requests[counter_req++] );
1308  }
1309 
1310  /* Posts the emissions of the indexes. */
1311  send = &( data_send->sendA );
1312  if ( send->size.idxcnt != 0 ) {
1313  MPI_Isend( send->idxbuf, send->size.idxcnt, PASTIX_MPI_INT, c_send,
1314  PastixTagIndexesA, bcsc_comm->comm, &requests[counter_req++] );
1315  }
1316  send = &( data_send->sendAt );
1317  if ( send->size.idxcnt != 0 ) {
1318  MPI_Isend( send->idxbuf, send->size.idxcnt, PASTIX_MPI_INT, c_send,
1319  PastixTagIndexesAt, bcsc_comm->comm, &requests[counter_req++] );
1320  }
1321  send = &( data_send->sendAAt );
1322  if ( send->size.idxcnt != 0 ) {
1323  MPI_Isend( send->idxbuf, send->size.idxcnt, PASTIX_MPI_INT, c_send,
1324  PastixTagIndexesAAt, bcsc_comm->comm, &requests[counter_req++] );
1325  }
1326  c_send = (c_send+1) % clustnbr;
1327  c_recv = (c_recv-1+clustnbr) % clustnbr;
1328  }
1329 
1330  MPI_Waitall( counter_req, requests, statuses );
1331 }
1332 
1333 /**
1334  *******************************************************************************
1335  *
1336  * @brief Updates globcol with the received indexes.
1337  *
1338  *******************************************************************************
1339  *
1340  * @param[in] spm
1341  * The initial sparse matrix in the spm format.
1342  *
1343  * @param[in] ord
1344  * The ordering which needs to be applied on the spm to generate the
1345  * block csc.
1346  *
1347  * @param[out] globcol
1348  * The array which contains, for each column, its begining in the
1349  * smp->colptr. This array is updated with the data received from the
1350  * other processors.
1351  *
1352  * @param[in] bcsc_comm
1353  * The bcsc_handle_comm structure which contains the received data
1354  * from the other processors.
1355  *
1356  *******************************************************************************/
1357 static inline void
1358 bcsc_update_globcol( const spmatrix_t *spm,
1359  const pastix_order_t *ord,
1360  pastix_int_t *globcol,
1361  bcsc_handle_comm_t *bcsc_comm )
1362 {
1363  pastix_int_t *dofs = spm->dofs;
1364  pastix_int_t dof = spm->dof;
1365  pastix_int_t clustnum = bcsc_comm->clustnum;
1366  bcsc_proc_comm_t *data_local = bcsc_comm->data_comm + clustnum;
1367  bcsc_exch_comm_t *sendA_local = &( data_local->sendA );
1368  bcsc_exch_comm_t *sendAt_local = &( data_local->sendAt );
1369  bcsc_exch_comm_t *sendAAt_local = &( data_local->sendAAt );
1370  pastix_int_t k, igp, jgp, jg, ig, baseval;
1371  pastix_int_t *indexes_A;
1372  pastix_int_t *indexes_At;
1373  pastix_int_t *indexes_AAt;
1374 
1375  assert( ord->baseval == 0 );
1376  baseval = ord->baseval;
1377 
1378  /* Updates globcol. */
1379  indexes_A = sendA_local->idxbuf;
1380  indexes_At = sendAt_local->idxbuf;
1381  indexes_AAt = sendAAt_local->idxbuf;
1382 
1383  /* Goes through indexes_A. */
1384  for ( k = 0; k < data_local->recvA.idxcnt; k += 2, indexes_A += 2 ) {
1385  igp = indexes_A[0];
1386  jgp = indexes_A[1];
1387  ig = ord->peritab[igp] - baseval;
1388 
1389  /* Adds the values (igp, jgp) to globcol. */
1390  globcol[jgp] += ( dof < 0 ) ? dofs[ ig+1 ] - dofs[ig] : dof;
1391  }
1392 
1393  /* Goes through indexes_At. */
1394  if ( spm->mtxtype != SpmGeneral ) {
1395  for ( k = 0; k < data_local->recvAt.idxcnt; k += 2, indexes_At += 2 ) {
1396  igp = indexes_At[0];
1397  jgp = indexes_At[1];
1398  jg = ord->peritab[jgp] - baseval;
1399 
1400  /* Adds the values (igp, jgp) to globcol. */
1401  globcol[igp] += ( dof < 0 ) ? dofs[ jg+1 ] - dofs[jg] : dof;
1402  }
1403  }
1404 
1405  /* Goes through indexes_AAt. */
1406  for ( k = 0; k < data_local->recvAAt.size.idxcnt; k += 2, indexes_AAt += 2 ) {
1407  igp = indexes_AAt[0];
1408  jgp = indexes_AAt[1];
1409  ig = ord->peritab[igp] - baseval;
1410  jg = ord->peritab[jgp] - baseval;
1411 
1412  /* Adds the values (igp, jgp) to globcol. */
1413  globcol[jgp] += ( dof < 0 ) ? dofs[ ig+1 ] - dofs[ig] : dof;
1414 
1415  if ( spm->mtxtype != SpmGeneral ) {
1416  /* Adds the values (igp, jgp) twice to globcol if sym. */
1417  globcol[igp] += ( dof < 0 ) ? dofs[ jg+1 ] - dofs[jg] : dof;
1418  }
1419  }
1420 }
1421 #endif
1422 
1423 /**
1424  *******************************************************************************
1425  *
1426  * @brief Initializes the coltab of a block csc matrix. The coltab corresponds to
1427  * the number of rows (expended) per column (non expended). This routine
1428  * is calls bcsc_init_global_coltab_[shm,dst]_[c,v]dof according to the way
1429  * the matrix is stored and if the degree of liberty of the matrix is
1430  * constant or variadic. If the matrix is distributed in the memory, this
1431  * function also calls the routines which exchange the amount of data for
1432  * the communication, store the indexes and values to send and exchange
1433  * the indexes.
1434  *
1435  *******************************************************************************
1436  *
1437  * @param[in] spm
1438  * The initial sparse matrix in the spm format.
1439  *
1440  * @param[in] ord
1441  * The ordering which needs to be applied on the spm to generate the
1442  * block csc.
1443  *
1444  * @param[in] solvmtx
1445  * The solver matrix structure which describes the data distribution.
1446  *
1447  * @param[in] col2cblk
1448  * The array which contains the repartition of the matrix columns
1449  * into the block structure.
1450  *
1451  * @param[in,out] bcsc_comm
1452  * The handle_comm_structure updated with the amount of data the current
1453  * processor has to send to the other processor if PASTIX_WITH_MPI = ON
1454  * and the matrix is distributed in memory. If it is not the case,
1455  * bcsc_comm = NULL.
1456  *
1457  *******************************************************************************
1458  *
1459  * @returns The array which contains, for each column, its begining in the
1460  * smp->colptr.
1461  *
1462  *******************************************************************************/
1463 static inline pastix_int_t*
1464 bcsc_init_global_coltab( const spmatrix_t *spm,
1465  const pastix_order_t *ord,
1466  const SolverMatrix *solvmtx,
1467  const pastix_int_t *col2cblk,
1468  bcsc_handle_comm_t *bcsc_comm )
1469 {
1470  spm_int_t *globcol;
1471 
1472  /*
1473  * Allocates and initializes globcol which contains the number of elements in
1474  * each column of the input matrix.
1475  * Globcol is equivalent to the classic colptr for the internal blocked
1476  * csc. The blocked csc integrates the permutation computed within order
1477  * structure.
1478  */
1479  MALLOC_INTERN( globcol, spm->gN+1, pastix_int_t );
1480  memset( globcol, 0, (spm->gN+1) * sizeof(pastix_int_t) );
1481 
1482  if ( bcsc_comm == NULL ) {
1483  if ( spm->dof > 0 ) {
1484  bcsc_init_global_coltab_shm_cdof( spm, ord, globcol );
1485  }
1486  else {
1487  bcsc_init_global_coltab_shm_vdof( spm, ord, globcol );
1488  }
1489  }
1490 #if defined(PASTIX_WITH_MPI)
1491  else {
1492  if ( spm->dof > 0 ) {
1493  bcsc_init_global_coltab_dst_cdof( spm, ord, col2cblk, globcol, bcsc_comm );
1494  }
1495  else {
1496  bcsc_init_global_coltab_dst_vdof( spm, ord, col2cblk, globcol, bcsc_comm );
1497  }
1498 
1499  /* Exchanges the amount of data which will be sent and received. */
1500  bcsc_exchange_amount_of_data( bcsc_comm );
1501 
1502  /* Stores the indexes and values the current processor has to send to the others. */
1503  switch( spm->flttype ) {
1504  case SpmFloat:
1505  bcsc_sstore_data( spm, ord, col2cblk, bcsc_comm );
1506  break;
1507  case SpmDouble:
1508  bcsc_dstore_data( spm, ord, col2cblk, bcsc_comm );
1509  break;
1510  case SpmComplex32:
1511  bcsc_cstore_data( spm, ord, col2cblk, bcsc_comm );
1512  break;
1513  case SpmComplex64:
1514  bcsc_zstore_data( spm, ord, col2cblk, bcsc_comm );
1515  break;
1516  case SpmPattern:
1517  default:
1518  fprintf(stderr, "bcsc_init: Error unknown floating type for input spm\n");
1519  }
1520 
1521  /* Exchanges the indexes and updates globcol with the received indexes. */
1522  bcsc_exchange_indexes( bcsc_comm );
1523  bcsc_update_globcol( spm, ord, globcol, bcsc_comm );
1524 
1525 #if !defined(NDEBUG)
1526  /* Check that globcol contains the right information. */
1527  if ( spm->dof > 0 ) {
1528  pastix_int_t ig, ip, ipe, dofi;
1529  pastix_int_t nnzl = 0;
1530  pastix_int_t nnzg = 0;
1531  pastix_int_t nnz;
1532  for( ig=0; ig<spm->gN; ig++ ) {
1533  ip = ord->permtab[ig];
1534  ipe = ( spm->dof > 0 ) ? ip * spm->dof : spm->dofs[ ig ] - spm->baseval;
1535  if ( col2cblk[ipe] < 0 ) {
1536  continue;
1537  }
1538 
1539  dofi = ( spm->dof > 0 ) ? spm->dof: spm->dofs[ig+1] - spm->dofs[ig];
1540  nnzl += globcol[ip] * dofi;
1541  }
1542  MPI_Allreduce( &nnzl, &nnzg, 1, PASTIX_MPI_INT, MPI_SUM, spm->comm );
1543 
1544  if ( spm->mtxtype != SpmGeneral ) {
1545  /*
1546  * We can't check the exact number of elements if some diagonal
1547  * values are missing (=0).
1548  */
1549  nnz = spm->gnnzexp * 2;
1550  assert( nnzg <= nnz );
1551  nnz = nnz - (spm->gN * spm->dof * spm->dof);
1552  assert( nnzg >= nnz );
1553  }
1554  else {
1555  nnz = spm->gnnzexp;
1556  assert( nnzg == nnz );
1557  }
1558  }
1559 #endif
1560  }
1561 
1562 #endif
1563 
1564  (void)solvmtx;
1565  (void)col2cblk;
1566  return globcol;
1567 }
1568 
1569 /**
1570  *******************************************************************************
1571  *
1572  * @ingroup bcsc_internal
1573  *
1574  * @brief Initializes the coltab of a block csc matrix. The coltab corresponds
1575  * to the number of rows (expended) per column (non expended). If the
1576  * matrix is distributed in the memory, this function also calls the
1577  * routines which exchange the amount of data for the communication,
1578  * store the indexes and values to send and exchange the indexes.
1579  *
1580  *******************************************************************************
1581  *
1582  * @param[in] spm
1583  * The spm structure that stores the dofs.
1584  *
1585  * @param[in] ord
1586  * The ordering which needs to be applied on the spm to generate the
1587  * block csc.
1588  *
1589  * @param[in] solvmtx
1590  * The solver matrix structure which describes the data distribution.
1591 
1592  * @param[inout] bcsc
1593  * On entry, the pointer to an allocated bcsc.
1594  * On exit, the bcsc stores the initialized coltab split per block
1595  * corresponding to the input spm with the permutation applied
1596  * and grouped accordingly to the distribution described in solvmtx.
1597  *
1598  *******************************************************************************
1599  *
1600  * @return The number of non zero unknowns in the matrix.
1601  *
1602  *******************************************************************************/
1604 bcsc_init_coltab( const spmatrix_t *spm,
1605  const pastix_order_t *ord,
1606  const SolverMatrix *solvmtx,
1607  pastix_bcsc_t *bcsc )
1608 {
1609  SolverCblk *cblk;
1610  bcsc_cblk_t *blockcol;
1611  pastix_int_t *dofshift = NULL;
1612  pastix_int_t *globcol = NULL;
1613  pastix_int_t cblknum, bcscnum, iter, idxcol, nodeidx, colsize;
1614 
1615  bcsc->cscfnbr = solvmtx->cblknbr - solvmtx->faninnbr - solvmtx->recvnbr;
1616  MALLOC_INTERN( bcsc->cscftab, bcsc->cscfnbr, bcsc_cblk_t );
1617 
1618  /* Creates an array to convert expanded indexes to not expanded indexes. */
1619  dofshift = bcsc_init_dofshift( spm, ord );
1620 
1621  /* Computes the number of rows (expanded) per column (not expanded). */
1622  globcol = bcsc_init_global_coltab( spm, ord, solvmtx, bcsc->col2cblk, bcsc->bcsc_comm );
1623 
1624  idxcol = 0;
1625  bcscnum = 0;
1626  cblk = solvmtx->cblktab;
1627  blockcol = bcsc->cscftab;
1628  for ( cblknum = 0; cblknum < solvmtx->cblknbr; cblknum++, cblk++ ) {
1629  if ( cblk->cblktype & (CBLK_FANIN|CBLK_RECV) ) {
1630  continue;
1631  }
1632 
1633  blockcol->cblknum = cblknum;
1634  blockcol->colnbr = cblk_colnbr( cblk );
1635  assert( cblk->bcscnum == bcscnum );
1636  MALLOC_INTERN( blockcol->coltab, blockcol->colnbr + 1, pastix_int_t );
1637 
1638  blockcol->coltab[0] = idxcol;
1639  for ( iter = 0; iter < blockcol->colnbr; iter++ ) {
1640  nodeidx = dofshift[ cblk->fcolnum + iter ];
1641  colsize = globcol[nodeidx];
1642  //jgpe = cblk->fcolnum + iter;
1643  //jgp = dofshift[ jgpe ];
1644  //colsize = globcol[jgp];
1645  blockcol->coltab[iter+1] = blockcol->coltab[iter] + colsize;
1646  }
1647  idxcol = blockcol->coltab[blockcol->colnbr];
1648 
1649  blockcol++;
1650  bcscnum++;
1651  }
1652  assert( (blockcol - bcsc->cscftab) == bcsc->cscfnbr );
1653  assert( bcscnum == bcsc->cscfnbr );
1654 
1655  memFree_null( globcol );
1656  memFree_null( dofshift );
1657 
1658  if ( idxcol > 0 ) {
1659  MALLOC_INTERN( bcsc->rowtab, idxcol, pastix_int_t);
1660  MALLOC_INTERN( bcsc->Lvalues, idxcol * pastix_size_of( bcsc->flttype ), char );
1661  }
1662  else {
1663  bcsc->rowtab = NULL;
1664  bcsc->Lvalues = NULL;
1665  }
1666  bcsc->Uvalues = NULL;
1667 
1668  return idxcol;
1669 }
1670 
1671 /**
1672  *******************************************************************************
1673  *
1674  * @ingroup bcsc_internal
1675  *
1676  * @brief Restores the coltab array when it has been modified to initialize
1677  * the row and values arrays.
1678  *
1679  *******************************************************************************
1680  *
1681  * @param[inout] bcsc
1682  * On entry, the bcsc to restore.
1683  * On exit, the coltab array of the bcsc is restored to the correct
1684  * indexes.
1685  *
1686  *******************************************************************************/
1687 void
1688 bcsc_restore_coltab( pastix_bcsc_t *bcsc )
1689 {
1690  bcsc_cblk_t *blockcol;
1691  pastix_int_t index, iter, idxcol, idxcoltmp;
1692 
1693  idxcol = 0;
1694  blockcol = bcsc->cscftab;
1695  for ( index=0; index<bcsc->cscfnbr; index++, blockcol++ )
1696  {
1697  for ( iter=0; iter <= blockcol->colnbr; iter++ )
1698  {
1699  idxcoltmp = blockcol->coltab[iter];
1700  blockcol->coltab[iter] = idxcol;
1701  idxcol = idxcoltmp;
1702  }
1703  }
1704  return;
1705 }
1706 
1707 /**
1708  *******************************************************************************
1709  *
1710  * @brief Initializes a block csc.
1711  *
1712  *******************************************************************************
1713  *
1714  * @param[in] spm
1715  * The initial sparse matrix in the spm format.
1716  *
1717  * @param[in] solvmtx
1718  * The solver matrix structure which describes the data distribution.
1719  *
1720  * @param[inout] bcsc
1721  * On entry, the pointer to an allocated bcsc.
1722  * On exit, the bcsc stores the input spm with the permutation applied
1723  * and grouped accordingly to the distribution described in solvmtx.
1724  *
1725  *******************************************************************************/
1726 void
1727 bcsc_init_struct( const spmatrix_t *spm,
1728  const SolverMatrix *solvmtx,
1729  pastix_bcsc_t *bcsc )
1730 {
1731  pastix_int_t *col2cblk = NULL;
1732 
1733  bcsc->mtxtype = spm->mtxtype;
1734  bcsc->flttype = spm->flttype;
1735  bcsc->gN = spm->gNexp;
1736  bcsc->n = solvmtx->nodenbr;
1737 
1738  /*
1739  * Creates the col2cblk array which associates each column to a cblk
1740  * (expanded).
1741  */
1742  col2cblk = bcsc_init_col2cblk( solvmtx, bcsc, spm );
1743  bcsc->col2cblk = col2cblk;
1744 
1745  /*
1746  * Initializes the coltab array of the bcsc and allocates the rowtab and
1747  * Lvalues arrays.
1748  */
1749  bcsc->bcsc_comm = NULL;
1750  if ( spm->loc2glob != NULL ) {
1751  bcsc_handle_comm_init( solvmtx, bcsc );
1752  }
1753 }
1754 
1755 /**
1756  *******************************************************************************
1757  *
1758  * @brief Cleanup the bcsc struct. (symmetric of bcsc_init_struct)
1759  *
1760  *******************************************************************************
1761  *
1762  * @param[inout] bcsc
1763  * On entry, the pointer to the initialized bcsc.
1764  * On exit, the bcsc freed from the informations initialized by
1765  * bcsc_init_struct().
1766  *
1767  *******************************************************************************/
1768 void
1769 bcsc_exit_struct( pastix_bcsc_t *bcsc )
1770 {
1771  if ( bcsc->col2cblk != NULL ) {
1772  memFree_null( bcsc->col2cblk );
1773  }
1774 
1775  if ( bcsc->bcsc_comm != NULL ) {
1776  bcsc_handle_comm_exit( bcsc->bcsc_comm );
1777  memFree_null( bcsc->bcsc_comm );
1778  }
1779 }
1780 
1781 /**
1782  *******************************************************************************
1783  *
1784  * @brief Initializes a block csc.
1785  *
1786  *******************************************************************************
1787  *
1788  * @param[in] spm
1789  * The initial sparse matrix in the spm format.
1790  *
1791  * @param[in] ord
1792  * The ordering which needs to be applied on the spm to generate the
1793  * block csc.
1794  *
1795  * @param[in] solvmtx
1796  * The solver matrix structure which describes the data distribution.
1797  *
1798  * @param[in] initAt
1799  * The test to know if At has to be initialized:
1800  * - if initAt = 0 then the matrix is symmetric of hermitian which
1801  * means that Lvalues = Uvalues so At does not need to be
1802  * initialised.
1803  * - if initAt = 1 then the matrix is general and which means that
1804  * At needs to be initialised and computed.
1805  *
1806  * @param[inout] bcsc
1807  * On entry, the pointer to an allocated bcsc.
1808  * On exit, the bcsc stores the input spm with the permutation applied
1809  * and grouped accordingly to the distribution described in solvmtx.
1810  *
1811  *******************************************************************************/
1812 static inline void
1813 bcsc_init( const spmatrix_t *spm,
1814  const pastix_order_t *ord,
1815  const SolverMatrix *solvmtx,
1816  pastix_int_t initAt,
1817  pastix_bcsc_t *bcsc )
1818 {
1819  pastix_int_t valuesize;
1820 
1821  bcsc_init_struct( spm, solvmtx, bcsc );
1822  valuesize = bcsc_init_coltab( spm, ord, solvmtx, bcsc );
1823 
1824  /*
1825  * Fills in the lower triangular part of the blocked csc with values and
1826  * rows. The upper triangular part is done later if required through LU
1827  * factorization.
1828  */
1829  switch( spm->flttype ) {
1830  case SpmFloat:
1831  bcsc_sinit( spm, ord, solvmtx, initAt, bcsc, valuesize );
1832  break;
1833  case SpmDouble:
1834  bcsc_dinit( spm, ord, solvmtx, initAt, bcsc, valuesize );
1835  break;
1836  case SpmComplex32:
1837  bcsc_cinit( spm, ord, solvmtx, initAt, bcsc, valuesize );
1838  break;
1839  case SpmComplex64:
1840  bcsc_zinit( spm, ord, solvmtx, initAt, bcsc, valuesize );
1841  break;
1842  case SpmPattern:
1843  default:
1844  fprintf(stderr, "bcsc_init: Error unknown floating type for input spm\n");
1845  }
1846 }
1847 
1848 /**
1849  *******************************************************************************
1850  *
1851  * @brief Initializes the block csc matrix.
1852  *
1853  * The block csc matrix is used to initialize the factorized matrix, and to
1854  * perform the matvec operations in refinement.
1855  *
1856  *******************************************************************************
1857  *
1858  * @param[in] spm
1859  * The initial sparse matrix in the spm format.
1860  *
1861  * @param[in] ord
1862  * The ordering which needs to be applied on the spm to generate the
1863  * block csc.
1864  *
1865  * @param[in] solvmtx
1866  * The solver matrix structure which describes the data distribution.
1867  *
1868  * @param[in] initAt
1869  * The test to know if At has to be initialized:
1870  * - if initAt = 0 then the matrix is symmetric of hermitian which
1871  * means that Lvalues = Uvalues so At does not need to be
1872  * initialised.
1873  * - if initAt = 1 then the matrix is general which means that
1874  * At needs to be initialised and computed.
1875  *
1876  * @param[inout] bcsc
1877  * On entry, the pointer to an allocated bcsc.
1878  * On exit, the bcsc stores the input spm with the permutation applied
1879  * and grouped accordingly to the distribution described in solvmtx.
1880  *
1881  *******************************************************************************
1882  *
1883  * @return The time spent to initialize the bcsc structure.
1884  *
1885  *******************************************************************************/
1886 double
1887 bcscInit( const spmatrix_t *spm,
1888  const pastix_order_t *ord,
1889  const SolverMatrix *solvmtx,
1890  pastix_int_t initAt,
1891  pastix_bcsc_t *bcsc )
1892 {
1893  double time = 0.;
1894 
1895  assert( ord->baseval == 0 );
1896  assert( ord->vertnbr == spm->gN );
1897 
1898  clockStart(time);
1899  bcsc_init( spm, ord, solvmtx, initAt, bcsc );
1900  clockStop(time);
1901 
1902  return time;
1903 }
1904 
1905 /**
1906  *******************************************************************************
1907  *
1908  * @brief Frees the block csc structure but do not free the bcsc pointer.
1909  *
1910  *******************************************************************************
1911  *
1912  * @param[inout] bcsc
1913  * The block csc matrix to free.
1914  *
1915  *******************************************************************************/
1916 void
1917 bcscExit( pastix_bcsc_t *bcsc )
1918 {
1919  bcsc_cblk_t *cblk;
1920  pastix_int_t i;
1921 
1922  if ( bcsc->cscftab == NULL ) {
1923  return;
1924  }
1925 
1926  for ( i=0, cblk=bcsc->cscftab; i < bcsc->cscfnbr; i++, cblk++ ) {
1927  memFree_null( cblk->coltab );
1928  }
1929 
1930  memFree_null( bcsc->cscftab );
1931  memFree_null( bcsc->rowtab );
1932 
1933  if ( (bcsc->Uvalues != NULL) &&
1934  (bcsc->Uvalues != bcsc->Lvalues) ) {
1935  memFree_null( bcsc->Uvalues );
1936  }
1937 
1938  memFree_null( bcsc->Lvalues );
1939 
1940  bcsc_exit_struct( bcsc );
1941 }
static void bcsc_init_global_coltab_shm_vdof(const spmatrix_t *spm, const pastix_order_t *ord, pastix_int_t *globcol)
Initializes the coltab of a block csc matrix. The coltab corresponds to the number of rows (expended)...
Definition: bcsc.c:873
static pastix_int_t * bcsc_init_global_coltab(const spmatrix_t *spm, const pastix_order_t *ord, const SolverMatrix *solvmtx, const pastix_int_t *col2cblk, bcsc_handle_comm_t *bcsc_comm)
Initializes the coltab of a block csc matrix. The coltab corresponds to the number of rows (expended)...
Definition: bcsc.c:1464
static void bcsc_init(const spmatrix_t *spm, const pastix_order_t *ord, const SolverMatrix *solvmtx, pastix_int_t initAt, pastix_bcsc_t *bcsc)
Initializes a block csc.
Definition: bcsc.c:1813
static void bcsc_init_global_coltab_shm_cdof(const spmatrix_t *spm, const pastix_order_t *ord, pastix_int_t *globcol)
Initializes the coltab of a block csc matrix. The coltab corresponds to the number of rows (expended)...
Definition: bcsc.c:803
static pastix_int_t * bcsc_init_dofshift(const spmatrix_t *spm, const pastix_order_t *ord)
Initializes the dofshit array of size gNexp which gives dofshift[index_permuted] = index....
Definition: bcsc.c:752
BEGIN_C_DECLS typedef int pastix_int_t
Definition: datatypes.h:51
void bcsc_restore_coltab(pastix_bcsc_t *bcsc)
Restores the coltab array when it has been modified to initialize the row and values arrays.
Definition: bcsc.c:1688
void bcsc_init_struct(const spmatrix_t *spm, const SolverMatrix *solvmtx, pastix_bcsc_t *bcsc)
Initializes a block csc.
Definition: bcsc.c:1727
pastix_int_t bcsc_init_coltab(const spmatrix_t *spm, const pastix_order_t *ord, const SolverMatrix *solvmtx, pastix_bcsc_t *bcsc)
Initializes the coltab of a block csc matrix. The coltab corresponds to the number of rows (expended)...
Definition: bcsc.c:1604
void bcsc_sinit(const spmatrix_t *spm, const pastix_order_t *ord, const SolverMatrix *solvmtx, int initAt, pastix_bcsc_t *bcsc, pastix_int_t valuesize)
Initializes a centralize float block csc.
Definition: bcsc_sinit.c:1374
pastix_int_t * bcsc_init_col2cblk(const SolverMatrix *solvmtx, const pastix_bcsc_t *bcsc, const spmatrix_t *spm)
Creates the array which represents the repartition of each column in the block structure....
Definition: bcsc.c:709
void bcsc_handle_comm_init(const SolverMatrix *solvmtx, pastix_bcsc_t *bcsc)
Initializes the bcsc_handle_comm_t structure.
Definition: bcsc.c:48
pastix_int_t * bcsc_init_col2cblk_shm(const SolverMatrix *solvmtx, const pastix_bcsc_t *bcsc)
Creates the array which represents the repartition of each column in the block structure....
Definition: bcsc.c:522
void bcsc_zinit(const spmatrix_t *spm, const pastix_order_t *ord, const SolverMatrix *solvmtx, int initAt, pastix_bcsc_t *bcsc, pastix_int_t valuesize)
Initializes a centralize pastix_complex64_t block csc.
Definition: bcsc_zinit.c:1374
void bcsc_cinit(const spmatrix_t *spm, const pastix_order_t *ord, const SolverMatrix *solvmtx, int initAt, pastix_bcsc_t *bcsc, pastix_int_t valuesize)
Initializes a centralize pastix_complex32_t block csc.
Definition: bcsc_cinit.c:1374
void bcsc_exit_struct(pastix_bcsc_t *bcsc)
Cleanup the bcsc struct. (symmetric of bcsc_init_struct)
Definition: bcsc.c:1769
void bcsc_dinit(const spmatrix_t *spm, const pastix_order_t *ord, const SolverMatrix *solvmtx, int initAt, pastix_bcsc_t *bcsc, pastix_int_t valuesize)
Initializes a centralize double block csc.
Definition: bcsc_dinit.c:1374
void bcsc_handle_comm_exit(bcsc_handle_comm_t *bcsc_comm)
Frees the bcsc_handle_comm pointers.
Definition: bcsc.c:79
bcsc_data_amount_t recvA
Definition: bcsc.h:83
bcsc_exch_comm_t recvAAt
Definition: bcsc.h:85
pastix_int_t max_idx
Definition: bcsc.h:97
bcsc_exch_comm_t sendAt
Definition: bcsc.h:81
PASTIX_Comm comm
Definition: bcsc.h:95
pastix_int_t idxcnt
Definition: bcsc.h:57
pastix_int_t colnbr
Definition: bcsc.h:111
bcsc_exch_comm_t sendA
Definition: bcsc.h:80
bcsc_data_amount_t size
Definition: bcsc.h:66
pastix_int_t clustnum
Definition: bcsc.h:94
bcsc_proc_comm_t data_comm[1]
Definition: bcsc.h:99
pastix_int_t * idxbuf
Definition: bcsc.h:67
pastix_int_t * coltab
Definition: bcsc.h:113
bcsc_data_amount_t recvAt
Definition: bcsc.h:84
bcsc_exch_comm_t sendAAt
Definition: bcsc.h:82
pastix_int_t valcnt
Definition: bcsc.h:58
pastix_coeftype_t flttype
Definition: bcsc.h:96
pastix_int_t cblknum
Definition: bcsc.h:112
void * valbuf
Definition: bcsc.h:70
pastix_int_t clustnbr
Definition: bcsc.h:93
pastix_int_t max_val
Definition: bcsc.h:98
enum bcsc_tag_ bcsc_tag_e
Tags used in MPI communications.
double bcscInit(const spmatrix_t *spm, const pastix_order_t *ord, const SolverMatrix *solvmtx, pastix_int_t initAt, pastix_bcsc_t *bcsc)
Initializes the block csc matrix.
Definition: bcsc.c:1887
struct bcsc_handle_comm_s bcsc_handle_comm_t
Structure to manage communications with distributed spm.
struct bcsc_proc_comm_s bcsc_proc_comm_t
Informations of the data exchanged with other processors.
void bcscExit(pastix_bcsc_t *bcsc)
Frees the block csc structure but do not free the bcsc pointer.
Definition: bcsc.c:1917
Compressed colptr format for the bcsc.
Definition: bcsc.h:110
Information about the amount of data.
Definition: bcsc.h:56
Information about the sending data.
Definition: bcsc.h:65
Structure to manage communications with distributed spm.
Definition: bcsc.h:92
Informations of the data exchanged with other processors.
Definition: bcsc.h:79
@ PASTIX_SUCCESS
Definition: api.h:367
pastix_int_t baseval
Definition: order.h:48
pastix_int_t * permtab
Definition: order.h:51
pastix_int_t * peritab
Definition: order.h:52
pastix_int_t vertnbr
Definition: order.h:49
Order structure.
Definition: order.h:47
pastix_int_t nodenbr
Definition: solver.h:205
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
Definition: solver.h:324
pastix_int_t cblknbr
Definition: solver.h:208
pastix_int_t faninnbr
Definition: solver.h:209
pastix_int_t recvnbr
Definition: solver.h:212
pastix_int_t bcscnum
Definition: solver.h:170
SolverCblk *restrict cblktab
Definition: solver.h:222
int8_t cblktype
Definition: solver.h:159
pastix_int_t lcolnum
Definition: solver.h:162
pastix_int_t fcolnum
Definition: solver.h:161
Solver column block structure.
Definition: solver.h:156
Solver column block structure.
Definition: solver.h:200