PaStiX Handbook  6.4.0
bcsc.c
Go to the documentation of this file.
1 /**
2  *
3  * @file bcsc.c
4  *
5  * @copyright 2004-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
6  * Univ. Bordeaux. All rights reserved.
7  *
8  * @version 6.4.0
9  * @author Mathieu Faverge
10  * @author Pierre Ramet
11  * @author Xavier Lacoste
12  * @author Theophile Terraz
13  * @author Tony Delarue
14  * @author Alycia Lisito
15  * @date 2024-07-05
16  *
17  **/
18 #include "common.h"
19 #include "pastix/order.h"
20 #include <spm.h>
21 #include "blend/solver.h"
22 #include "bcsc/bcsc.h"
23 
24 #include "bcsc/bcsc_z.h"
25 #include "bcsc/bcsc_c.h"
26 #include "bcsc/bcsc_d.h"
27 #include "bcsc/bcsc_s.h"
28 
29 #define BCSC_COMM_NBR 6
30 
31 /**
32  *******************************************************************************
33  *
34  * @ingroup bcsc_internal
35  *
36  * @brief Initializes the bcsc_handle_comm_t structure.
37  *
38  *******************************************************************************
39  *
40  * @param[in] solvmtx
41  * The solver matrix structure which describes the data distribution.
42  *
43  * @param[out] bcsc
44  * The bcsc.
45  *
46  *******************************************************************************/
47 void
49  pastix_bcsc_t *bcsc )
50 {
51  pastix_int_t size = sizeof(bcsc_handle_comm_t) + (solvmtx->clustnbr-1)*sizeof(bcsc_proc_comm_t);
52  bcsc_handle_comm_t *bcsc_comm;
53 
54  bcsc->bcsc_comm = (bcsc_handle_comm_t *)malloc( size );
55  bcsc_comm = bcsc->bcsc_comm;
56 
57  bcsc_comm->flttype = bcsc->flttype;
58  bcsc_comm->clustnbr = solvmtx->clustnbr;
59  bcsc_comm->clustnum = solvmtx->clustnum;
60  bcsc_comm->comm = solvmtx->solv_comm;
61 
62  memset( bcsc_comm->data_comm, 0, bcsc_comm->clustnbr * sizeof(bcsc_proc_comm_t) );
63 }
64 
65 /**
66  *******************************************************************************
67  *
68  * @ingroup bcsc_internal
69  *
70  * @brief Frees the bcsc_handle_comm pointers.
71  *
72  *******************************************************************************
73  *
74  * @param[inout] bcsc_comm
75  * The bcsc_handle_comm_t structure.
76  *
77  *******************************************************************************/
78 void
80 {
81  int c;
82  int clustnbr = bcsc_comm->clustnbr;
83  bcsc_proc_comm_t *data;
84 
85  for ( c = 0; c < clustnbr; c++ ) {
86  data = bcsc_comm->data_comm + c;
87 
88  if( data->sendA.idxbuf != NULL ) {
89  memFree_null( data->sendA.idxbuf );
90  }
91  if( data->sendA.valbuf != NULL ) {
92  memFree_null( data->sendA.valbuf );
93  }
94  if( data->sendAt.idxbuf != NULL ) {
95  memFree_null( data->sendAt.idxbuf );
96  }
97  if( data->sendAt.valbuf != NULL ) {
98  memFree_null( data->sendAt.valbuf );
99  }
100  if( data->sendAAt.idxbuf != NULL ) {
101  memFree_null( data->sendAAt.idxbuf );
102  }
103  if( data->sendAAt.valbuf != NULL ) {
104  memFree_null( data->sendAAt.valbuf );
105  }
106  if( data->recvAAt.idxbuf != NULL ) {
107  memFree_null( data->recvAAt.idxbuf );
108  }
109  if( data->recvAAt.valbuf != NULL ) {
110  memFree_null( data->recvAAt.valbuf );
111  }
112  }
113 }
114 
115 #if defined(PASTIX_WITH_MPI)
116 /**
117  *******************************************************************************
118  *
119  * @ingroup bcsc
120  *
121  * @brief Computes the maximum size of the sending indexes and values buffers.
122  *
123  *******************************************************************************
124  *
125  * @param[inout] bcsc_comm
126  * On entry the bcsc_comm initialized.
127  * At exit the fields max_idx and max_val of bcsc_comm are updated.
128  *
129  *******************************************************************************
130  *
131  * @retval PASTIX_SUCCESS
132  *
133  *******************************************************************************/
134 static inline int
135 bcsc_compute_max( bcsc_handle_comm_t *bcsc_comm )
136 {
137  bcsc_proc_comm_t *data = NULL;
138  bcsc_proc_comm_t *data_local = NULL;
139  pastix_int_t clustnbr = bcsc_comm->clustnbr;
140  pastix_int_t clustnum = bcsc_comm->clustnum;
141  pastix_int_t max_idx = 0;
142  pastix_int_t max_val = 0;
143  pastix_int_t idxsum_A = 0;
144  pastix_int_t valsum_A = 0;
145  pastix_int_t idxsum_At = 0;
146  pastix_int_t valsum_At = 0;
147  pastix_int_t idxsum_AAt = 0;
148  pastix_int_t valsum_AAt = 0;
149  pastix_int_t idxcnt_A, idxcnt_At, idxcnt_AAt, valcnt_A, valcnt_At, valcnt_AAt, c;
150 
151  /* Receives the amount of indexes and values. */
152  for ( c = 0; c < clustnbr; c++ ) {
153  data = bcsc_comm->data_comm + c;
154  if ( c == clustnum ) {
155  continue;
156  }
157 
158  idxcnt_A = data->recvA.idxcnt;
159  idxcnt_At = data->recvAt.idxcnt;
160  idxcnt_AAt = data->recvAAt.size.idxcnt;
161  valcnt_A = data->recvA.valcnt;
162  valcnt_At = data->recvAt.valcnt;
163  valcnt_AAt = data->recvAAt.size.valcnt;
164 
165  max_idx = pastix_imax( max_idx, idxcnt_A);
166  max_idx = pastix_imax( max_idx, idxcnt_At);
167  max_idx = pastix_imax( max_idx, idxcnt_AAt);
168  max_val = pastix_imax( max_val, valcnt_A);
169  max_val = pastix_imax( max_val, valcnt_At);
170  max_val = pastix_imax( max_val, valcnt_AAt);
171 
172  idxsum_A += idxcnt_A;
173  valsum_A += valcnt_A;
174  idxsum_At += idxcnt_At;
175  valsum_At += valcnt_At;
176  idxsum_AAt += idxcnt_AAt;
177  valsum_AAt += valcnt_AAt;
178  }
179 
180  data_local = bcsc_comm->data_comm + clustnum;
181  data_local->recvA.idxcnt = idxsum_A;
182  data_local->recvA.valcnt = valsum_A;
183  data_local->recvAt.idxcnt = idxsum_At;
184  data_local->recvAt.valcnt = valsum_At;
185  data_local->recvAAt.size.idxcnt = idxsum_AAt;
186  data_local->recvAAt.size.valcnt = valsum_AAt;
187 
188  assert( max_idx <= 2 * max_val );
189 
190  bcsc_comm->max_idx = max_idx;
191  bcsc_comm->max_val = max_val;
192 
193  return PASTIX_SUCCESS;
194 }
195 
196 /**
197  *******************************************************************************
198  *
199  * @ingroup bcsc
200  *
201  * @brief Allocates the sending buffers in bcsc_comm->data_comm. These buffers
202  * are filled with the sending values.
203  *
204  *******************************************************************************
205  *
206  * @param[inout] bcsc_comm
207  * On entry the bcsc_comm initialized.
208  * At exit the arrays of bcsc_comm->data_comm are allocated.
209  *
210  * @param[in] mode
211  * If PastixTagMemRecvIdx: allocates receiving indexes A and At buffers.
212  * If PastixTagMemSend: allocates sending indexes and values A and At
213  * buffers.
214  * If PastixTagMemRecvValAAt: allocates receiving values AAt buffers, it
215  * used only if the spm is general.
216  *
217  *******************************************************************************
218  *
219  * @retval PASTIX_SUCCESS
220  *
221  *******************************************************************************/
222 int
223 bcsc_allocate_buf( bcsc_handle_comm_t *bcsc_comm,
224  bcsc_tag_e mode )
225 {
226  bcsc_proc_comm_t *data = NULL;
227  pastix_int_t clustnbr = bcsc_comm->clustnbr;
228  pastix_int_t clustnum = bcsc_comm->clustnum;
229  pastix_int_t c;
230  size_t size;
231 
232  if ( mode == PastixTagMemRecvIdx ) {
233  data = bcsc_comm->data_comm + clustnum;
234 
235  if ( ( data->recvA.idxcnt > 0 ) && ( data->sendA.idxbuf == NULL ) ) {
236  MALLOC_INTERN( data->sendA.idxbuf, data->recvA.idxcnt, pastix_int_t );
237  }
238 
239  if ( ( data->recvAt.idxcnt > 0 ) && ( data->sendAt.idxbuf == NULL ) ) {
240  MALLOC_INTERN( data->sendAt.idxbuf, data->recvAt.idxcnt, pastix_int_t );
241  }
242 
243  if ( ( data->recvAAt.size.idxcnt > 0 ) && ( data->sendAAt.idxbuf == NULL ) ) {
244  MALLOC_INTERN( data->sendAAt.idxbuf, data->recvAAt.size.idxcnt, pastix_int_t );
245  }
246  }
247 
248  if ( mode == PastixTagMemRecvValAAt ) {
249  for ( c = 0; c < clustnbr; c ++ ) {
250  data = bcsc_comm->data_comm + c;
251  if ( c == clustnum ) {
252  continue;
253  }
254  if ( ( data->recvAAt.size.valcnt > 0 ) && ( data->recvAAt.valbuf == NULL ) ) {
255  size = data->recvAAt.size.valcnt * pastix_size_of( bcsc_comm->flttype );
256  MALLOC_INTERN( data->recvAAt.valbuf, size, char );
257  }
258  }
259  }
260 
261  if ( mode == PastixTagMemSend ) {
262  for ( c = 0; c < clustnbr; c ++ ) {
263  data = bcsc_comm->data_comm + c;
264 
265  if ( c == clustnum ) {
266  continue;
267  }
268 
269  if ( ( data->sendA.size.idxcnt > 0 ) && ( data->sendA.idxbuf == NULL ) ) {
270  MALLOC_INTERN( data->sendA.idxbuf, data->sendA.size.idxcnt, pastix_int_t );
271  }
272  if ( ( data->sendA.size.valcnt > 0 ) && ( data->sendA.valbuf == NULL ) ) {
273  size = data->sendA.size.valcnt * pastix_size_of( bcsc_comm->flttype );
274  MALLOC_INTERN( data->sendA.valbuf, size, char );
275  }
276 
277  if ( ( data->sendAt.size.idxcnt > 0 ) && ( data->sendAt.idxbuf == NULL ) ) {
278  MALLOC_INTERN( data->sendAt.idxbuf, data->sendAt.size.idxcnt, pastix_int_t );
279  }
280  if ( ( data->sendAt.size.valcnt > 0 ) && ( data->sendAt.valbuf == NULL ) ) {
281  size = data->sendAt.size.valcnt * pastix_size_of( bcsc_comm->flttype );
282  MALLOC_INTERN( data->sendAt.valbuf, size, char );
283  }
284 
285  if ( ( data->sendAAt.size.idxcnt > 0 ) && ( data->sendAAt.idxbuf == NULL ) ) {
286  MALLOC_INTERN( data->sendAAt.idxbuf, data->sendAAt.size.idxcnt, pastix_int_t );
287  }
288  if ( ( data->sendAAt.size.valcnt > 0 ) && ( data->sendAAt.valbuf == NULL ) ) {
289  size = data->sendAAt.size.valcnt * pastix_size_of( bcsc_comm->flttype );
290  MALLOC_INTERN( data->sendAAt.valbuf, size, char );
291  }
292  }
293  }
294 
295  return PASTIX_SUCCESS;
296 }
297 
298 /**
299  *******************************************************************************
300  *
301  * @ingroup bcsc
302  *
303  * @brief Frees the sending and receiving buffers in bcsc_comm->data_comm.
304  * These buffers are filled with the sending adn receiving values.
305  *
306  *******************************************************************************
307  *
308  * @param[inout] bcsc_comm
309  * On entry the bcsc_comm initialized.
310  * At exit the arrays of bcsc_comm->data_comm are freed.
311  *
312  * @param[in] mode
313  * If PastixTagMemSendIdx: frees sending indexes A, At and AAt buffers.
314  * If PastixTagMemSendValA: frees sending values A buffers.
315  * If PastixTagMemSendValAt: frees sending values At buffers.
316  * If PastixTagMemSendValAAt: frees sending values AAt buffers.
317  * If PastixTagMemRecvIdxA: frees receiving indexes A buffers.
318  * If PastixTagMemRecvIdxAt: frees receiving indexes At buffers.
319  * If PastixTagMemRecvAAt:frees receiving indexes and values if the
320  * spm is general AAt buffers.
321  *
322  *******************************************************************************
323  *
324  * @retval PASTIX_SUCCESS
325  *
326  *******************************************************************************/
327 int
328 bcsc_free_buf( bcsc_handle_comm_t *bcsc_comm,
329  bcsc_tag_e mode )
330 {
331  bcsc_proc_comm_t *data = NULL;
332  pastix_int_t clustnbr = bcsc_comm->clustnbr;
333  pastix_int_t clustnum = bcsc_comm->clustnum;
334  pastix_int_t c;
335 
336  if ( mode == PastixTagMemSendIdx ) {
337  for ( c = 0; c < clustnbr; c ++ ) {
338  data = bcsc_comm->data_comm + c;
339  if ( c == clustnum ) {
340  continue;
341  }
342  if ( data->sendA.idxbuf != NULL ) {
343  memFree_null( data->sendA.idxbuf );
344  }
345  if ( data->sendAt.idxbuf != NULL ) {
346  memFree_null( data->sendAt.idxbuf );
347  }
348  if ( data->sendAAt.idxbuf != NULL ) {
349  memFree_null( data->sendAAt.idxbuf );
350  }
351  }
352  }
353 
354  if ( mode == PastixTagMemSendValA ) {
355  for ( c = 0; c < clustnbr; c ++ ) {
356  data = bcsc_comm->data_comm + c;
357  if ( c == clustnum ) {
358  continue;
359  }
360  if ( data->sendA.valbuf != NULL ) {
361  memFree_null( data->sendA.valbuf );
362  }
363  }
364  }
365 
366  if ( mode == PastixTagMemSendValAt ) {
367  for ( c = 0; c < clustnbr; c ++ ) {
368  data = bcsc_comm->data_comm + c;
369  if ( c == clustnum ) {
370  continue;
371  }
372  if ( data->sendAt.valbuf != NULL ) {
373  memFree_null( data->sendAt.valbuf );
374  }
375  }
376  }
377 
378  if ( mode == PastixTagMemSendValAAt ) {
379  for ( c = 0; c < clustnbr; c ++ ) {
380  data = bcsc_comm->data_comm + c;
381  if ( c == clustnum ) {
382  continue;
383  }
384  if ( data->sendAAt.valbuf != NULL ) {
385  memFree_null( data->sendAAt.valbuf );
386  }
387  }
388  }
389 
390  if ( mode == PastixTagMemRecvIdxA ) {
391  data = bcsc_comm->data_comm + clustnum;
392  if ( data->sendA.idxbuf != NULL ) {
393  memFree_null( data->sendA.idxbuf );
394  }
395  }
396 
397  if ( mode == PastixTagMemRecvIdxAt ) {
398  data = bcsc_comm->data_comm + clustnum;
399  if ( data->sendAt.idxbuf != NULL ) {
400  memFree_null( data->sendAt.idxbuf );
401  }
402  }
403 
404  if ( mode == PastixTagMemRecvAAt ) {
405  data = bcsc_comm->data_comm + clustnum;
406  if ( data->sendAAt.idxbuf != NULL ) {
407  memFree_null( data->sendAAt.idxbuf );
408  }
409  if ( data->recvAAt.valbuf != NULL ) {
410  memFree_null( data->recvAAt.valbuf );
411  }
412  }
413 
414  return PASTIX_SUCCESS;
415 }
416 
417 /**
418  *******************************************************************************
419  *
420  * @ingroup bcsc_internal
421  *
422  * @brief Exchanges the amount of data the current processor will send to and
423  * receive from each processor.
424  *
425  *******************************************************************************
426  *
427  * @param[in] bcsc_comm
428  * The bcsc_handle_comm_t structure.
429  *
430  *******************************************************************************/
431 void
432 bcsc_exchange_amount_of_data( bcsc_handle_comm_t *bcsc_comm )
433 {
434  bcsc_proc_comm_t *data_comm = bcsc_comm->data_comm;
435  pastix_int_t clustnbr = bcsc_comm->clustnbr;
436  pastix_int_t clustnum = bcsc_comm->clustnum;
437  bcsc_proc_comm_t *data_send = NULL;
438  bcsc_proc_comm_t *data_recv = NULL;
439  pastix_int_t counter_req = 0;
440  MPI_Status statuses[(clustnbr-1)*BCSC_COMM_NBR];
441  MPI_Request requests[(clustnbr-1)*BCSC_COMM_NBR];
442  bcsc_data_amount_t *sends, *recvs;
443  pastix_int_t c_send, c_recv, k;
444 
445  /* Exchanges the amount of indexes and values. */
446  c_send = (clustnum+1) % clustnbr;
447  c_recv = (clustnum-1+clustnbr) % clustnbr;
448  for ( k = 0; k < clustnbr-1; k++ ) {
449  data_send = data_comm + c_send;
450  data_recv = data_comm + c_recv;
451 
452  if ( c_send == clustnum ) {
453  continue;
454  }
455 
456  /* Exchanges the amount of indexes and values for A. */
457  sends = &( data_send->sendA.size );
458  recvs = &( data_recv->recvA );
459  MPI_Irecv( recvs, 2, PASTIX_MPI_INT, c_recv,
460  PastixTagCountA, bcsc_comm->comm, &requests[counter_req++] );
461 
462  MPI_Isend( sends, 2, PASTIX_MPI_INT, c_send,
463  PastixTagCountA, bcsc_comm->comm, &requests[counter_req++] );
464 
465  /* Exchanges the amount of indexes and values for At. */
466  sends = &( data_send->sendAt.size );
467  recvs = &( data_recv->recvAt );
468  MPI_Irecv( recvs, 2, PASTIX_MPI_INT, c_recv,
469  PastixTagCountAt, bcsc_comm->comm, &requests[counter_req++] );
470 
471  MPI_Isend( sends, 2, PASTIX_MPI_INT, c_send,
472  PastixTagCountAt, bcsc_comm->comm, &requests[counter_req++] );
473 
474  /* Exchanges the amount of indexes and values for AAt. */
475  sends = &( data_send->sendAAt.size );
476  recvs = &( data_recv->recvAAt.size );
477  MPI_Irecv( recvs, 2, PASTIX_MPI_INT, c_recv,
478  PastixTagCountAAt, bcsc_comm->comm, &requests[counter_req++] );
479 
480  MPI_Isend( sends, 2, PASTIX_MPI_INT, c_send,
481  PastixTagCountAAt, bcsc_comm->comm, &requests[counter_req++] );
482 
483  c_send = (c_send+1) % clustnbr;
484  c_recv = (c_recv-1+clustnbr) % clustnbr;
485  }
486 
487  MPI_Waitall( counter_req, requests, statuses );
488 
489  bcsc_compute_max( bcsc_comm );
490 
491  return;
492 }
493 #endif
494 
495 /**
496  *******************************************************************************
497  *
498  * @ingroup bcsc_internal
499  *
500  * @brief Creates the array which represents the repartition of each column
501  * in the block structure. The array size is spm->gNexp where:
502  * - col2cblk[k] = cblknum, with cblknum the index of the block column
503  * where the column k is stored.
504  * This routine is called when the matrix is in shared memory.
505  *
506  *******************************************************************************
507  *
508  * @param[in] solvmtx
509  * The solvmtx structure associated to the problem.
510  *
511  * @param[in,out] bcsc
512  * The internal block CSC structure.
513  * The number of local columns is updated.
514  *
515  *******************************************************************************
516  *
517  * @return The col2cblk array which gives the repartition of the solvmtx columns
518  * into the block structure.
519  *
520  *******************************************************************************/
521 pastix_int_t *
523  const pastix_bcsc_t *bcsc )
524 {
525  pastix_int_t j;
526  pastix_int_t cblknum;
527  pastix_int_t *col2cblk;
528 
529  /* Allocates the col2cblk. */
530  MALLOC_INTERN( col2cblk, bcsc->gN, pastix_int_t );
531  memset( col2cblk, 0xff, bcsc->gN * sizeof(pastix_int_t) );
532 
533  const SolverCblk *cblk = solvmtx->cblktab;
534  pastix_int_t cblknbr = solvmtx->cblknbr;
535  /* Goes through the blocks. */
536  for ( cblknum = 0; cblknum < cblknbr; cblknum++, cblk++ ) {
537  if ( cblk->cblktype & (CBLK_FANIN|CBLK_RECV) ) {
538  continue;
539  }
540  /*
541  * Goes through the columns of the block and adds the number of
542  * the block in col2cblk at the corresponding index.
543  */
544  for ( j = cblk->fcolnum; j <= cblk->lcolnum; j++ ) {
545  col2cblk[j] = cblknum;
546  }
547  }
548 
549  return col2cblk;
550 }
551 
552 #if defined(PASTIX_WITH_MPI)
553 /**
554  *******************************************************************************
555  *
556  * @ingroup bcsc_internal
557  *
558  * @brief Creates the array which represents the repartition of each column
559  * in the block structure. The array size is spm->gNexp where:
560  * - col2cblk[k] = - (owner + 1) if the column is not stored in a local block
561  * - col2cblk[k] = cblknum, if the column k is stored in a local block, with
562  * cblknum the index of this block column.
563  * This routine is called when the matrix is in distributed memory.
564  *
565  *******************************************************************************
566  *
567  * @param[in] solvmtx
568  * The solvmtx structure associated to the problem.
569  *
570  * @param[in,out] bcsc
571  * The internal block CSC structure.
572  * The number of local columns is updated.
573  *
574  *******************************************************************************
575  *
576  * @return The col2cblk array which gives the repartition of the solvmtx columns
577  * into the block structure.
578  *
579  *******************************************************************************/
580 pastix_int_t *
581 bcsc_init_col2cblk_dst( const SolverMatrix *solvmtx,
582  const pastix_bcsc_t *bcsc )
583 {
584  pastix_int_t n, nr = 0;
585  pastix_int_t k, j, c;
586  pastix_int_t clustnum = solvmtx->clustnum;
587  pastix_int_t clustnbr = solvmtx->clustnbr;
588  pastix_int_t fcolnum, lcolnum, cblknum;
589  pastix_int_t *col2cblk;
590  pastix_int_t *col2cblk_bcast = NULL;
591 
592  /* Allocates the col2cblk. */
593  MALLOC_INTERN( col2cblk, bcsc->gN, pastix_int_t );
594  memset( col2cblk, 0xff, bcsc->gN * sizeof(pastix_int_t) );
595 
596  for( c = 0; c < clustnbr; c++ ) {
597  if ( c == clustnum ) {
598  const SolverCblk *cblk = solvmtx->cblktab;
599  pastix_int_t cblknbr = solvmtx->cblknbr;
600  pastix_int_t colcount;
601 
602  n = (solvmtx->cblknbr - solvmtx->faninnbr - solvmtx->recvnbr) * 2;
603 
604  /* Sends the size of data. */
605  MPI_Bcast( &n, 1, PASTIX_MPI_INT, c, solvmtx->solv_comm );
606 
607  if ( n == 0 ) {
608  continue;
609  }
610 
611  if ( n > nr ) {
612  pastix_int_t *tmp;
613  nr = n;
614  tmp = (pastix_int_t *)realloc( col2cblk_bcast, nr * sizeof(pastix_int_t) );
615  /* Protection for static analysis */
616  if ( pastix_unlikely( tmp == NULL ) ) {
617  pastix_print_error( "Error reallocating col2cblk_bcast\n" );
618  }
619  col2cblk_bcast = tmp;
620  }
621 
622  colcount = 0;
623  k = 0;
624  /* Goes through the blocks. */
625  for ( cblknum = 0; cblknum < cblknbr; cblknum++, cblk++ ) {
626  if ( cblk->cblktype & (CBLK_FANIN|CBLK_RECV) ) {
627  continue;
628  }
629  assert( col2cblk_bcast != NULL );
630 
631  /* Adds the first and last columns of the block in col2cblk_bcast. */
632  col2cblk_bcast[k] = cblk->fcolnum;
633  col2cblk_bcast[k+1] = cblk->lcolnum;
634  k += 2;
635  /*
636  * Goes through the columns of the block and adds the
637  * block number in col2cblk.
638  */
639  for ( j = cblk->fcolnum; j <= cblk->lcolnum; j++ ) {
640  colcount++;
641  col2cblk[j] = cblknum;
642  }
643  }
644  assert( colcount == bcsc->n );
645 
646  /* Sends the col2cblk_bcast. */
647  if ( n > 0 ) {
648  MPI_Bcast( col2cblk_bcast, n, PASTIX_MPI_INT, c, solvmtx->solv_comm );
649  }
650  }
651  else {
652  /* Receives the size of data from c. */
653  MPI_Bcast( &n, 1, PASTIX_MPI_INT, c, solvmtx->solv_comm );
654 
655  if ( n == 0 ) {
656  continue;
657  }
658 
659  if ( n > nr ) {
660  pastix_int_t *tmp;
661  nr = n;
662  tmp = (pastix_int_t *)realloc( col2cblk_bcast, nr * sizeof(pastix_int_t) );
663  /* Protection for static analysis */
664  if ( pastix_unlikely( tmp == NULL ) ) {
665  pastix_print_error( "Error reallocating col2cblk_bcast\n" );
666  }
667  col2cblk_bcast = tmp;
668  }
669 
670  /* Receives the col2cblk_bcast from c. */
671  MPI_Bcast( col2cblk_bcast, n, PASTIX_MPI_INT, c, solvmtx->solv_comm );
672  /*
673  * Goes through the columns in col2cblk_bcast and adds the processor
674  * number in col2cblk.
675  */
676  for ( k = 0; k < n; k += 2 ) {
677  fcolnum = col2cblk_bcast[k];
678  lcolnum = col2cblk_bcast[k+1];
679  for ( j = fcolnum; j <= lcolnum; j++ ) {
680  col2cblk[j] = - c - 1;
681  }
682  }
683  }
684  }
685 
686  free( col2cblk_bcast );
687 
688  return col2cblk;
689 }
690 #endif
691 
692 /**
693  *******************************************************************************
694  *
695  * @ingroup bcsc_internal
696  *
697  * @brief Creates the array which represents the repartition of each column
698  * in the block structure. This routine calls bcsc_init_col2cblk_shm or
699  * bcsc_init_col2cblk_dst according to the way the matrix is stored in the
700  * memory.
701  *
702  *******************************************************************************
703  *
704  * @param[in] solvmtx
705  * The solvmtx structure associated to the problem.
706  *
707  * @param[in] bcsc
708  * The internal block CSC structure.
709  * The number of local columns is updated.
710  *
711  * @param[in] spm
712  * The initial sparse matrix in the spm format.
713  *
714  *******************************************************************************
715  *
716  * @return The col2cblk array which gives the repartition of the solvmtx columns
717  * into the block structure.
718  *
719  *******************************************************************************/
720 pastix_int_t *
722  const pastix_bcsc_t *bcsc,
723  const spmatrix_t *spm )
724 {
725  pastix_int_t *col2cblk;
726 
727  /* Tests if the spm is in shared or distributed memory. */
728 #if defined(PASTIX_WITH_MPI)
729  if ( spm->loc2glob != NULL ) {
730  col2cblk = bcsc_init_col2cblk_dst( solvmtx, bcsc );
731  }
732  else
733 #endif
734  {
735  col2cblk = bcsc_init_col2cblk_shm( solvmtx, bcsc );
736  }
737 
738  (void)spm;
739  return col2cblk;
740 }
741 
742 /**
743  *******************************************************************************
744  *
745  * @brief Initializes the dofshit array of size gNexp which gives
746  * dofshift[index_permuted] = index. This corresponds to the inverse of
747  * the permutation given in ord->permtab.
748  *
749  *******************************************************************************
750  *
751  * @param[in] spm
752  * The initial sparse matrix in the spm format.
753  *
754  * @param[in] ord
755  * The ordering that needs to be applied on the spm to generate the
756  * block csc.
757  *
758  *******************************************************************************
759  *
760  * @return The dofshift array.
761  *
762  *******************************************************************************/
763 static inline pastix_int_t*
764 bcsc_init_dofshift( const spmatrix_t *spm,
765  const pastix_order_t *ord )
766 {
767  pastix_int_t *dofshift, *ptr;
768  pastix_int_t *dofs;
769  pastix_int_t dof;
770  pastix_int_t idof, dofj, dofidx;
771  pastix_int_t jg, jgp;
772 
773  /* Allocates the dofshift array. */
774  MALLOC_INTERN( dofshift, spm->gNexp, pastix_int_t );
775 
776  dofs = spm->dofs;
777  dof = spm->dof;
778  ptr = dofshift;
779  for ( jg = 0; jg < spm->gN; jg++ ) {
780  jgp = ord->permtab[jg];
781  dofidx = (dof > 0) ? jgp * dof : dofs[jg];
782  ptr = dofshift + dofidx;
783  dofj = (dof > 0) ? dof : dofs[jg+1] - dofs[jg];
784  for ( idof = 0; idof < dofj; idof++, ptr++ ) {
785  *ptr = jgp;
786  }
787  }
788  return dofshift;
789 }
790 
791 /**
792  *******************************************************************************
793  *
794  * @brief Initializes the coltab of a block csc matrix. The coltab corresponds to
795  * the number of rows (expended) per column (non expended). This rountine
796  * is called when the matrix is stored in shared memory or the matrix is
797  * replicated on the processors and the matrix's degree of liberty is
798  * constant.
799  *
800  *******************************************************************************
801  *
802  * @param[in] spm
803  * The initial sparse matrix in the spm format.
804  *
805  * @param[in] ord
806  * The ordering which needs to be applied on the spm to generate the
807  * block csc.
808  *
809  * @param[out] globcol
810  * The array which contains, for each column, its beginning in the
811  * smp->colptr.
812  *
813  *******************************************************************************/
814 static inline void
815 bcsc_init_global_coltab_shm_cdof( const spmatrix_t *spm,
816  const pastix_order_t *ord,
817  pastix_int_t *globcol )
818 {
819  pastix_int_t *colptr = spm->colptr;
820  pastix_int_t *rowptr = spm->rowptr;
821  pastix_int_t dof = spm->dof;
822  pastix_int_t baseval = spm->baseval;
823  pastix_int_t frow, lrow;
824  pastix_int_t k, j, ig, jg, igp, jgp;
825  int sym = (spm->mtxtype == SpmSymmetric) || (spm->mtxtype == SpmHermitian);
826 
827  assert( dof > 0 );
828  assert( spm->loc2glob == NULL );
829 
830  /* Goes through the column of the spm. */
831  for ( j = 0; j < spm->n; j++, colptr++ ) {
832  jg = j;
833  jgp = ord->permtab[jg];
834  frow = colptr[0] - baseval;
835  lrow = colptr[1] - baseval;
836  assert( (lrow - frow) >= 0 );
837  /* Adds the number of values in the column jg. */
838  globcol[jgp] += (lrow - frow) * dof;
839 
840  /*
841  * Adds for At the number of values in the row ig and column jg. This
842  * is not required for the general case as the spm has a symmetric
843  * pattern.
844  */
845  if ( !sym ) {
846  continue;
847  }
848 
849  for ( k = frow; k < lrow; k++ ) {
850  ig = rowptr[k] - baseval;
851  if ( ig != jg ) {
852  igp = ord->permtab[ig];
853  globcol[igp] += dof;
854  }
855  }
856  }
857 
858  return;
859 }
860 
861 /**
862  *******************************************************************************
863  *
864  * @brief Initializes the coltab of a block csc matrix. The coltab corresponds to
865  * the number of rows (expended) per column (non expended). This rountine
866  * is called when the matrix is stored in shared memory or the matrix is
867  * replicated on the processors and the matrix's degree of liberty is
868  * variadic.
869  *
870  *******************************************************************************
871  *
872  * @param[in] spm
873  * The initial sparse matrix in the spm format.
874  *
875  * @param[in] ord
876  * The ordering which needs to be applied on the spm to generate the
877  * block csc.
878  *
879  * @param[out] globcol
880  * The array which contains, for each column, its begining in the
881  * smp->colptr.
882  *
883  *******************************************************************************/
884 static inline void
885 bcsc_init_global_coltab_shm_vdof( const spmatrix_t *spm,
886  const pastix_order_t *ord,
887  pastix_int_t *globcol )
888 {
889  pastix_int_t *colptr = spm->colptr;
890  pastix_int_t *rowptr = spm->rowptr;
891  pastix_int_t *dofs = spm->dofs;
892  pastix_int_t baseval = spm->baseval;
893  pastix_int_t frow, lrow;
894  pastix_int_t k, j, ig, jg, igp, jgp;
895  pastix_int_t dofj, dofi;
896  int sym = (spm->mtxtype == SpmSymmetric) || (spm->mtxtype == SpmHermitian);
897 
898  assert( spm->dof <= 0 );
899  assert( spm->loc2glob == NULL );
900 
901  /* Goes through the column of the spm. */
902  for ( j=0; j<spm->n; j++, colptr++ ) {
903  jg = j;
904  dofj = dofs[jg+1] - dofs[jg];
905  jgp = ord->permtab[jg];
906  frow = colptr[0] - baseval;
907  lrow = colptr[1] - baseval;
908  assert( (lrow - frow) >= 0 );
909 
910  for ( k=frow; k<lrow; k++ ) {
911  ig = rowptr[k] - baseval;
912  dofi = dofs[ig+1] - dofs[ig];
913  /* Adds the number of values in the row ig and column jg. */
914  globcol[jgp] += dofi;
915 
916  /* Adds for At the number of values in the row ig and column jg. */
917  if ( sym && (ig != jg) ) {
918  igp= ord->permtab[ig];
919  globcol[igp] += dofj;
920  }
921  }
922  }
923 
924  return;
925 }
926 
927 #if defined(PASTIX_WITH_MPI)
928 /**
929  *******************************************************************************
930  *
931  * @brief Initializes the coltab of a block csc matrix. The coltab corresponds to
932  * the number of rows (expended) per column (non expended). This rountine
933  * is called when the matrix is distributed in the memory and the matrix's
934  * degree of liberty is constant.
935  *
936  * There are two cases:
937  *
938  * If the matrix is general: the full columns and rows of the blocks are stored
939  * in Lvalues and Uvalues.
940  * - The local data of the current process which are in remote blocks after
941  * the permutation need be to sent to the owner process. The data is stored
942  * in sendA if it is sent for the column only, in sendAt if it is sent for
943  * the row only and in sendAAt if it is sent for the row and column.
944  * - The local data of the current process which are in local column blocks
945  * after the permutation need to be added in globcol.
946  *
947  * If the matrix is Symmetric or Hermitian: only the full columns of the blocks
948  * are stored in Lvalues (and Uvalues = Lvalues). Only half of the spm is
949  * stored lower (or upper) triangular half, therefore we need to duplicate
950  * the lower (or upper) data to fill the upper (or lower half of the matrix
951  * in the blocks.
952  * - The local data of the current process which are in remote blocks after
953  * the permutation need be to sent to the owner process. The data is stored
954  * in sendA if it is sent for the lower (or upper) half or the column, in
955  * sendAt if it is sent for the upper (or lower) half of the column and in
956  * sendAAt if it is sent for both the lower and upper half of the column.
957  * The diagonal values are stored in sendA only.
958  * - The local data of the current process which are in column blocks after
959  * the permutation need to be added in globcol twice: once for the lower
960  * half and once for the upper half. The diagonal values need to be added
961  * only once.
962  *
963  *******************************************************************************
964  *
965  * @param[in] spm
966  * The initial sparse matrix in the spm format.
967  *
968  * @param[in] ord
969  * The ordering which needs to be applied on the spm to generate the
970  * block csc.
971  *
972  * @param[in] col2cblk
973  * The array which contains the repartition of the matrix columns
974  * into the block structure.
975  *
976  * @param[out] globcol
977  * The array which contains, for each column, its begining in the
978  * smp->colptr.
979  *
980  * @param[in,out] bcsc_comm
981  * On entry, the initialised bcsc_comm structure. On exit, the
982  * bcsc_handle_comm structure which contains the amount of data to
983  * send to the other processors.
984  *
985  *******************************************************************************/
986 static inline void
987 bcsc_init_global_coltab_dst_cdof( const spmatrix_t *spm,
988  const pastix_order_t *ord,
989  const pastix_int_t *col2cblk,
990  pastix_int_t *globcol,
991  bcsc_handle_comm_t *bcsc_comm )
992 {
993  pastix_int_t *colptr = spm->colptr;
994  pastix_int_t *rowptr = spm->rowptr;
995  pastix_int_t *loc2glob = spm->loc2glob;
996  pastix_int_t dof = spm->dof;
997  pastix_int_t baseval = spm->baseval;
998  bcsc_proc_comm_t *data_comm = bcsc_comm->data_comm;
999  bcsc_exch_comm_t *data_sendA, *data_sendAt, *data_sendAAt;
1000  pastix_int_t frow, lrow;
1001  pastix_int_t il, jl, ig, jg, igp, jgp;
1002  int sym = (spm->mtxtype == SpmSymmetric) || (spm->mtxtype == SpmHermitian);
1003  pastix_int_t ownerj, owneri;
1004 
1005  assert( dof > 0 );
1006 
1007  /* Goes through the columns of spm. */
1008  for ( jl = 0; jl < spm->n; jl++, colptr++, loc2glob++ ) {
1009  jg = *loc2glob - baseval;
1010  jgp = ord->permtab[jg];
1011 
1012  frow = colptr[0] - baseval;
1013  lrow = colptr[1] - baseval;
1014  assert( (lrow - frow) >= 0 );
1015 
1016  ownerj = col2cblk[jgp * dof];
1017 
1018  /* The column jp belongs to another process. */
1019  if ( ownerj < 0 ) {
1020  ownerj = - ownerj - 1;
1021  data_comm = bcsc_comm->data_comm + ownerj;
1022  data_sendA = &( data_comm->sendA );
1023 
1024  /* Goes through the rows of jl. */
1025  for ( il = frow; il < lrow; il++ ) {
1026  ig = rowptr[il] - baseval;
1027 
1028  /*
1029  * The diagonal values (ip, jp) belong to the same process.
1030  * They are sent to owneri in the sym case for A only.
1031  */
1032  if ( sym && ( ig == jg ) ) {
1033  data_sendA->size.idxcnt += 2;
1034  data_sendA->size.valcnt += dof * dof;
1035  continue;
1036  }
1037 
1038  igp = ord->permtab[ig];
1039  owneri = col2cblk[igp* dof];
1040 
1041  /* The row ip belongs to another process. */
1042  if ( owneri < 0 ) {
1043  owneri = - owneri - 1;
1044  data_comm = bcsc_comm->data_comm + owneri;
1045 
1046  /*
1047  * The diagonal values (ip, jp) belong to the same process.
1048  * They are sent to owneri for AAt in the general cae.
1049  */
1050  if ( owneri == ownerj ) {
1051  data_sendAAt = &( data_comm->sendAAt );
1052 
1053  data_sendAAt->size.idxcnt += 2;
1054  data_sendAAt->size.valcnt += dof * dof;
1055  }
1056  /*
1057  * The values (ip, jp) belong to different processes.
1058  * They are sent to owneri for At and to ownerj for A.
1059  */
1060  else {
1061  data_sendAt = &( data_comm->sendAt );
1062 
1063  data_sendAt->size.idxcnt += 2;
1064  data_sendAt->size.valcnt += dof * dof;
1065 
1066  data_sendA->size.idxcnt += 2;
1067  data_sendA->size.valcnt += dof * dof;
1068  }
1069  }
1070  /* The row ip is local. */
1071  else {
1072  /*
1073  * The values (ip, jp) belong to ownerj.
1074  * They are sent to ownerj for A.
1075  */
1076  data_sendA->size.idxcnt += 2;
1077  data_sendA->size.valcnt += dof * dof;
1078  /*
1079  * The values (ip, jp) are local.
1080  * In the sym case ther are added to globcol.
1081  */
1082  if ( sym ) {
1083  globcol[igp] += dof;
1084  }
1085  }
1086  }
1087  }
1088  /* The column jp is local. */
1089  else {
1090  /* The column is added to globcol. */
1091  globcol[jgp] += dof * ( lrow - frow );
1092 
1093  /* Goes through the rows of j. */
1094  for ( il = frow; il < lrow; il++ ) {
1095  ig = rowptr[il] - baseval;
1096 
1097  /*
1098  * The diagonal values (ip, jp) have already been
1099  * added to globcol in the sym case.
1100  */
1101  if ( sym && ( ig == jg ) ) {
1102  continue;
1103  }
1104 
1105  igp = ord->permtab[ig];
1106  owneri = col2cblk[igp* dof];
1107 
1108  /* The row ip belongs to another process. */
1109  if ( owneri < 0 ) {
1110  owneri = - owneri - 1;
1111  data_comm = bcsc_comm->data_comm + owneri;
1112 
1113  /*
1114  * The values (ip, jp) belong to owneri.
1115  * They are sent to ownerj for At.
1116  */
1117  data_sendAt = &( data_comm->sendAt );
1118 
1119  data_sendAt->size.idxcnt += 2;
1120  data_sendAt->size.valcnt += dof * dof;
1121  }
1122  else {
1123  /*
1124  * The values (ip, jp) are local.
1125  * In the sym case they are added to globcol.
1126  */
1127  if ( sym ) {
1128  globcol[igp] += dof;
1129  }
1130  }
1131  }
1132  }
1133  }
1134 
1135  return;
1136 }
1137 
1138 /**
1139  *******************************************************************************
1140  *
1141  * @brief Initializes the coltab of a block csc matrix. The coltab corresponds to
1142  * the number of rows (expended) per column (non expended). This rountine
1143  * is called when the matrix is distributed in the memory and the matrix's
1144  * degree of liberty is variadic.
1145  *
1146  * DO NOT CURRENTLY WORKS
1147  *
1148  *******************************************************************************
1149  *
1150  * @param[in] spm
1151  * The initial sparse matrix in the spm format.
1152  *
1153  * @param[in] ord
1154  * The ordering which needs to be applied on the spm to generate the
1155  * block csc.
1156  *
1157  * @param[in] col2cblk
1158  * The array which contains the repartition of the matrix columns
1159  * into the block structure.
1160  *
1161  * @param[out] globcol
1162  * The array which contains, for each column, its begining in the
1163  * smp->colptr.
1164  *
1165  * @param[out] bcsc_comm
1166  * The bcsc_handle_comm structure which contains the amount of
1167  * data to send to the other processors.
1168  *
1169  *******************************************************************************/
1170 static inline void
1171 bcsc_init_global_coltab_dst_vdof( __attribute__((unused)) const spmatrix_t *spm,
1172  __attribute__((unused)) const pastix_order_t *ord,
1173  __attribute__((unused)) const pastix_int_t *col2cblk,
1174  __attribute__((unused)) pastix_int_t *globcol,
1175  __attribute__((unused)) bcsc_handle_comm_t *bcsc_comm )
1176 {
1177  // pastix_int_t *colptr = spm->colptr;
1178  // pastix_int_t *rowptr = spm->rowptr;
1179  // pastix_int_t *loc2glob = spm->loc2glob;
1180  // pastix_int_t *dofs = spm->dofs;
1181  // pastix_int_t dof = spm->dof;
1182  // pastix_int_t baseval = spm->baseval;
1183  // pastix_int_t frow, lrow;
1184  // pastix_int_t k, j, ig, jg, igp, jgp;
1185  // pastix_int_t dofj, dofi;
1186  // int sym = (spm->mtxtype == SpmSymmetric) || (spm->mtxtype == SpmHermitian);
1187 
1188  // assert( dof <= 0 );
1189 
1190  // for ( j=0; j<spm->n; j++, colptr++, loc2glob++ )
1191  // {
1192  // jg = *loc2glob - baseval;
1193  // jgp = ord->permtab[jg];
1194  // dofj = dofs[jg+1] - dofs[jg];
1195 
1196  // frow = colptr[0] - baseval;
1197  // lrow = colptr[1] - baseval;
1198  // assert( (lrow - frow) >= 0 );
1199 
1200  // jgpe = ...;
1201  // ownerj = col2cblk[jgpe]; // FAUX
1202  // localj = ( ownerj >= 0 );
1203  // ownerj = - ownerj - 1;
1204 
1205  // for ( k=frow; k<lrow; k++ )
1206  // {
1207  // ig = rowptr[k] - baseval;
1208  // dofi = dofs[ig+1] - dofs[ig];
1209 
1210  // if ( localj ) {
1211  // /* The column is local */
1212  // globcol[jgp] += dofi;
1213  // }
1214  // else {
1215  // /* The column is remote */
1216  // //update_counter_tosend( ownerj, 1 /* Nbr Elt */, dofi /* Nbr values */ );
1217  // }
1218 
1219  // if ( sym && (ig != jg) ) {
1220  // igp = ord->permtab[ig];
1221  // igpe = ...;
1222  // owneri = col2cblk[igpe]; // FAUX
1223 
1224  // if ( owneri >= 0 ) {
1225  // globcol[igp] += dofj;
1226  // }
1227  // else {
1228  // owneri = - owneri - 1;
1229  // //update_counter_tosend( owneri, 1 /* Nbr Elt */, dofj /* Nbr values */ );
1230  // }
1231  // }
1232  // }
1233  // }
1234 
1235  return;
1236 }
1237 
1238 /**
1239  *******************************************************************************
1240  *
1241  * @brief Exchanges the indexes with the other processors.
1242  *
1243  *******************************************************************************
1244  *
1245  * @param[in,out] bcsc_comm
1246  * The bcsc_handle_comm structure which contains the data the current
1247  * processor has to send to the other processors on entry. On exit,
1248  * the structure is updated with the received data from the other
1249  * processors.
1250  *
1251  *******************************************************************************/
1252 void
1253 bcsc_exchange_indexes( bcsc_handle_comm_t *bcsc_comm )
1254 {
1255  pastix_int_t clustnbr = bcsc_comm->clustnbr;
1256  pastix_int_t clustnum = bcsc_comm->clustnum;
1257  bcsc_proc_comm_t *data_comm = bcsc_comm->data_comm;
1258  bcsc_proc_comm_t *data_local = bcsc_comm->data_comm + clustnum;
1259  bcsc_exch_comm_t *sendA_local = &( data_local->sendA );
1260  bcsc_exch_comm_t *sendAt_local = &( data_local->sendAt );
1261  bcsc_exch_comm_t *sendAAt_local = &( data_local->sendAAt );
1262  pastix_int_t counter_req = 0;
1263  pastix_int_t cntA = 0;
1264  pastix_int_t cntAt = 0;
1265  pastix_int_t cntAAt = 0;
1266  pastix_int_t idx_cnt_A[clustnbr];
1267  pastix_int_t idx_cnt_At[clustnbr];
1268  pastix_int_t idx_cnt_AAt[clustnbr];
1269  MPI_Status statuses[(clustnbr-1)*BCSC_COMM_NBR];
1270  MPI_Request requests[(clustnbr-1)*BCSC_COMM_NBR];
1271  bcsc_proc_comm_t *data_send, *data_recv;
1272  bcsc_exch_comm_t *send;
1273  bcsc_data_amount_t *recv;
1274  pastix_int_t c_send, c_recv, k;
1275 
1276  bcsc_allocate_buf( bcsc_comm, PastixTagMemRecvIdx );
1277 
1278  for ( k = 0; k < clustnbr; k++ ) {
1279  if ( k == clustnum ) {
1280  idx_cnt_A[k] = 0;
1281  idx_cnt_At[k] = 0;
1282  idx_cnt_AAt[k] = 0;
1283  continue;
1284  }
1285  idx_cnt_A[ k ] = cntA;
1286  cntA += data_comm[k].recvA.idxcnt;
1287  idx_cnt_At[ k ] = cntAt;
1288  cntAt += data_comm[k].recvAt.idxcnt;
1289  idx_cnt_AAt[ k ] = cntAAt;
1290  cntAAt += data_comm[k].recvAAt.size.idxcnt;
1291  }
1292 
1293  c_send = (clustnum+1) % clustnbr;
1294  c_recv = (clustnum-1+clustnbr) % clustnbr;
1295  for ( k = 0; k < clustnbr-1; k++ ) {
1296  data_send = data_comm + c_send;
1297  data_recv = data_comm + c_recv;
1298  if ( c_send == clustnum ) {
1299  continue;
1300  }
1301 
1302  /* Posts the receptions of the indexes. */
1303  recv = &( data_recv->recvA );
1304  if ( recv->idxcnt != 0 ) {
1305  MPI_Irecv( sendA_local->idxbuf + idx_cnt_A[c_recv], recv->idxcnt,
1306  PASTIX_MPI_INT, c_recv, PastixTagIndexesA, bcsc_comm->comm,
1307  &requests[counter_req++] );
1308  }
1309  recv = &( data_recv->recvAt );
1310  if ( recv->idxcnt != 0 ) {
1311  MPI_Irecv( sendAt_local->idxbuf + idx_cnt_At[c_recv], recv->idxcnt,
1312  PASTIX_MPI_INT, c_recv, PastixTagIndexesAt, bcsc_comm->comm,
1313  &requests[counter_req++] );
1314  }
1315  recv = &( data_recv->recvAAt.size );
1316  if ( recv->idxcnt != 0 ) {
1317  MPI_Irecv( sendAAt_local->idxbuf + idx_cnt_AAt[c_recv], recv->idxcnt,
1318  PASTIX_MPI_INT, c_recv, PastixTagIndexesAAt, bcsc_comm->comm,
1319  &requests[counter_req++] );
1320  }
1321 
1322  /* Posts the emissions of the indexes. */
1323  send = &( data_send->sendA );
1324  if ( send->size.idxcnt != 0 ) {
1325  MPI_Isend( send->idxbuf, send->size.idxcnt, PASTIX_MPI_INT, c_send,
1326  PastixTagIndexesA, bcsc_comm->comm, &requests[counter_req++] );
1327  }
1328  send = &( data_send->sendAt );
1329  if ( send->size.idxcnt != 0 ) {
1330  MPI_Isend( send->idxbuf, send->size.idxcnt, PASTIX_MPI_INT, c_send,
1331  PastixTagIndexesAt, bcsc_comm->comm, &requests[counter_req++] );
1332  }
1333  send = &( data_send->sendAAt );
1334  if ( send->size.idxcnt != 0 ) {
1335  MPI_Isend( send->idxbuf, send->size.idxcnt, PASTIX_MPI_INT, c_send,
1336  PastixTagIndexesAAt, bcsc_comm->comm, &requests[counter_req++] );
1337  }
1338  c_send = (c_send+1) % clustnbr;
1339  c_recv = (c_recv-1+clustnbr) % clustnbr;
1340  }
1341 
1342  MPI_Waitall( counter_req, requests, statuses );
1343 }
1344 
1345 /**
1346  *******************************************************************************
1347  *
1348  * @brief Updates globcol with the received indexes.
1349  *
1350  *******************************************************************************
1351  *
1352  * @param[in] spm
1353  * The initial sparse matrix in the spm format.
1354  *
1355  * @param[in] ord
1356  * The ordering which needs to be applied on the spm to generate the
1357  * block csc.
1358  *
1359  * @param[out] globcol
1360  * The array which contains, for each column, its begining in the
1361  * smp->colptr. This array is updated with the data received from the
1362  * other processors.
1363  *
1364  * @param[in] bcsc_comm
1365  * The bcsc_handle_comm structure which contains the received data
1366  * from the other processors.
1367  *
1368  *******************************************************************************/
1369 static inline void
1370 bcsc_update_globcol( const spmatrix_t *spm,
1371  const pastix_order_t *ord,
1372  pastix_int_t *globcol,
1373  bcsc_handle_comm_t *bcsc_comm )
1374 {
1375  pastix_int_t *dofs = spm->dofs;
1376  pastix_int_t dof = spm->dof;
1377  pastix_int_t clustnum = bcsc_comm->clustnum;
1378  bcsc_proc_comm_t *data_local = bcsc_comm->data_comm + clustnum;
1379  bcsc_exch_comm_t *sendA_local = &( data_local->sendA );
1380  bcsc_exch_comm_t *sendAt_local = &( data_local->sendAt );
1381  bcsc_exch_comm_t *sendAAt_local = &( data_local->sendAAt );
1382  pastix_int_t k, igp, jgp, jg, ig, baseval;
1383  pastix_int_t *indexes_A;
1384  pastix_int_t *indexes_At;
1385  pastix_int_t *indexes_AAt;
1386 
1387  assert( ord->baseval == 0 );
1388  baseval = ord->baseval;
1389 
1390  /* Updates globcol. */
1391  indexes_A = sendA_local->idxbuf;
1392  indexes_At = sendAt_local->idxbuf;
1393  indexes_AAt = sendAAt_local->idxbuf;
1394 
1395  /* Goes through indexes_A. */
1396  for ( k = 0; k < data_local->recvA.idxcnt; k += 2, indexes_A += 2 ) {
1397  igp = indexes_A[0];
1398  jgp = indexes_A[1];
1399  ig = ord->peritab[igp] - baseval;
1400 
1401  /* Adds the values (igp, jgp) to globcol. */
1402  globcol[jgp] += ( dof < 0 ) ? dofs[ ig+1 ] - dofs[ig] : dof;
1403  }
1404 
1405  /* Goes through indexes_At. */
1406  if ( spm->mtxtype != SpmGeneral ) {
1407  for ( k = 0; k < data_local->recvAt.idxcnt; k += 2, indexes_At += 2 ) {
1408  igp = indexes_At[0];
1409  jgp = indexes_At[1];
1410  jg = ord->peritab[jgp] - baseval;
1411 
1412  /* Adds the values (igp, jgp) to globcol. */
1413  globcol[igp] += ( dof < 0 ) ? dofs[ jg+1 ] - dofs[jg] : dof;
1414  }
1415  }
1416 
1417  /* Goes through indexes_AAt. */
1418  for ( k = 0; k < data_local->recvAAt.size.idxcnt; k += 2, indexes_AAt += 2 ) {
1419  igp = indexes_AAt[0];
1420  jgp = indexes_AAt[1];
1421  ig = ord->peritab[igp] - baseval;
1422  jg = ord->peritab[jgp] - baseval;
1423 
1424  /* Adds the values (igp, jgp) to globcol. */
1425  globcol[jgp] += ( dof < 0 ) ? dofs[ ig+1 ] - dofs[ig] : dof;
1426 
1427  if ( spm->mtxtype != SpmGeneral ) {
1428  /* Adds the values (igp, jgp) twice to globcol if sym. */
1429  globcol[igp] += ( dof < 0 ) ? dofs[ jg+1 ] - dofs[jg] : dof;
1430  }
1431  }
1432 }
1433 #endif
1434 
1435 /**
1436  *******************************************************************************
1437  *
1438  * @brief Initializes the coltab of a block csc matrix. The coltab corresponds to
1439  * the number of rows (expended) per column (non expended). This routine
1440  * is calls bcsc_init_global_coltab_[shm,dst]_[c,v]dof according to the way
1441  * the matrix is stored and if the degree of liberty of the matrix is
1442  * constant or variadic. If the matrix is distributed in the memory, this
1443  * function also calls the routines which exchange the amount of data for
1444  * the communication, store the indexes and values to send and exchange
1445  * the indexes.
1446  *
1447  *******************************************************************************
1448  *
1449  * @param[in] spm
1450  * The initial sparse matrix in the spm format.
1451  *
1452  * @param[in] ord
1453  * The ordering which needs to be applied on the spm to generate the
1454  * block csc.
1455  *
1456  * @param[in] solvmtx
1457  * The solver matrix structure which describes the data distribution.
1458  *
1459  * @param[in] col2cblk
1460  * The array which contains the repartition of the matrix columns
1461  * into the block structure.
1462  *
1463  * @param[in,out] bcsc_comm
1464  * The handle_comm_structure updated with the amount of data the current
1465  * processor has to send to the other processor if PASTIX_WITH_MPI = ON
1466  * and the matrix is distributed in memory. If it is not the case,
1467  * bcsc_comm = NULL.
1468  *
1469  *******************************************************************************
1470  *
1471  * @returns The array which contains, for each column, its begining in the
1472  * smp->colptr.
1473  *
1474  *******************************************************************************/
1475 static inline pastix_int_t*
1476 bcsc_init_global_coltab( const spmatrix_t *spm,
1477  const pastix_order_t *ord,
1478  const SolverMatrix *solvmtx,
1479  const pastix_int_t *col2cblk,
1480  bcsc_handle_comm_t *bcsc_comm )
1481 {
1482  spm_int_t *globcol;
1483 
1484  /*
1485  * Allocates and initializes globcol which contains the number of elements in
1486  * each column of the input matrix.
1487  * Globcol is equivalent to the classic colptr for the internal blocked
1488  * csc. The blocked csc integrates the permutation computed within order
1489  * structure.
1490  */
1491  MALLOC_INTERN( globcol, spm->gN+1, pastix_int_t );
1492  memset( globcol, 0, (spm->gN+1) * sizeof(pastix_int_t) );
1493 
1494  if ( bcsc_comm == NULL ) {
1495  if ( spm->dof > 0 ) {
1496  bcsc_init_global_coltab_shm_cdof( spm, ord, globcol );
1497  }
1498  else {
1499  bcsc_init_global_coltab_shm_vdof( spm, ord, globcol );
1500  }
1501  }
1502 #if defined(PASTIX_WITH_MPI)
1503  else {
1504  if ( spm->dof > 0 ) {
1505  bcsc_init_global_coltab_dst_cdof( spm, ord, col2cblk, globcol, bcsc_comm );
1506  }
1507  else {
1508  bcsc_init_global_coltab_dst_vdof( spm, ord, col2cblk, globcol, bcsc_comm );
1509  }
1510 
1511  /* Exchanges the amount of data which will be sent and received. */
1512  bcsc_exchange_amount_of_data( bcsc_comm );
1513 
1514  /* Stores the indexes and values the current processor has to send to the others. */
1515  switch( spm->flttype ) {
1516  case SpmFloat:
1517  bcsc_sstore_data( spm, ord, col2cblk, bcsc_comm );
1518  break;
1519  case SpmDouble:
1520  bcsc_dstore_data( spm, ord, col2cblk, bcsc_comm );
1521  break;
1522  case SpmComplex32:
1523  bcsc_cstore_data( spm, ord, col2cblk, bcsc_comm );
1524  break;
1525  case SpmComplex64:
1526  bcsc_zstore_data( spm, ord, col2cblk, bcsc_comm );
1527  break;
1528  case SpmPattern:
1529  default:
1530  fprintf(stderr, "bcsc_init: Error unknown floating type for input spm\n");
1531  }
1532 
1533  /* Exchanges the indexes and updates globcol with the received indexes. */
1534  bcsc_exchange_indexes( bcsc_comm );
1535  bcsc_update_globcol( spm, ord, globcol, bcsc_comm );
1536 
1537 #if !defined(NDEBUG)
1538  /* Check that globcol contains the right information. */
1539  if ( spm->dof > 0 ) {
1540  pastix_int_t ig, ip, ipe, dofi;
1541  pastix_int_t nnzl = 0;
1542  pastix_int_t nnzg = 0;
1543  pastix_int_t nnz;
1544  for( ig=0; ig<spm->gN; ig++ ) {
1545  ip = ord->permtab[ig];
1546  ipe = ( spm->dof > 0 ) ? ip * spm->dof : spm->dofs[ ig ] - spm->baseval;
1547  if ( col2cblk[ipe] < 0 ) {
1548  continue;
1549  }
1550 
1551  dofi = ( spm->dof > 0 ) ? spm->dof: spm->dofs[ig+1] - spm->dofs[ig];
1552  nnzl += globcol[ip] * dofi;
1553  }
1554  MPI_Allreduce( &nnzl, &nnzg, 1, PASTIX_MPI_INT, MPI_SUM, spm->comm );
1555 
1556  if ( spm->mtxtype != SpmGeneral ) {
1557  /*
1558  * We can't check the exact number of elements if some diagonal
1559  * values are missing (=0).
1560  */
1561  nnz = spm->gnnzexp * 2;
1562  assert( nnzg <= nnz );
1563  nnz = nnz - (spm->gN * spm->dof * spm->dof);
1564  assert( nnzg >= nnz );
1565  }
1566  else {
1567  nnz = spm->gnnzexp;
1568  assert( nnzg == nnz );
1569  }
1570  }
1571 #endif
1572  }
1573 
1574 #endif
1575 
1576  (void)solvmtx;
1577  (void)col2cblk;
1578  return globcol;
1579 }
1580 
1581 /**
1582  *******************************************************************************
1583  *
1584  * @ingroup bcsc_internal
1585  *
1586  * @brief Initializes the coltab of a block csc matrix. The coltab corresponds
1587  * to the number of rows (expended) per column (non expended). If the
1588  * matrix is distributed in the memory, this function also calls the
1589  * routines which exchange the amount of data for the communication,
1590  * store the indexes and values to send and exchange the indexes.
1591  *
1592  *******************************************************************************
1593  *
1594  * @param[in] spm
1595  * The spm structure that stores the dofs.
1596  *
1597  * @param[in] ord
1598  * The ordering which needs to be applied on the spm to generate the
1599  * block csc.
1600  *
1601  * @param[in] solvmtx
1602  * The solver matrix structure which describes the data distribution.
1603 
1604  * @param[inout] bcsc
1605  * On entry, the pointer to an allocated bcsc.
1606  * On exit, the bcsc stores the initialized coltab split per block
1607  * corresponding to the input spm with the permutation applied
1608  * and grouped accordingly to the distribution described in solvmtx.
1609  *
1610  *******************************************************************************
1611  *
1612  * @return The number of non zero unknowns in the matrix.
1613  *
1614  *******************************************************************************/
1616 bcsc_init_coltab( const spmatrix_t *spm,
1617  const pastix_order_t *ord,
1618  const SolverMatrix *solvmtx,
1619  pastix_bcsc_t *bcsc )
1620 {
1621  SolverCblk *cblk;
1622  bcsc_cblk_t *blockcol;
1623  pastix_int_t *dofshift = NULL;
1624  pastix_int_t *globcol = NULL;
1625  pastix_int_t cblknum, bcscnum, iter, idxcol, nodeidx, colsize;
1626 
1627  bcsc->cscfnbr = solvmtx->cblknbr - solvmtx->faninnbr - solvmtx->recvnbr;
1628  MALLOC_INTERN( bcsc->cscftab, bcsc->cscfnbr, bcsc_cblk_t );
1629 
1630  /* Creates an array to convert expanded indexes to not expanded indexes. */
1631  dofshift = bcsc_init_dofshift( spm, ord );
1632 
1633  /* Computes the number of rows (expanded) per column (not expanded). */
1634  globcol = bcsc_init_global_coltab( spm, ord, solvmtx, bcsc->col2cblk, bcsc->bcsc_comm );
1635 
1636  idxcol = 0;
1637  bcscnum = 0;
1638  cblk = solvmtx->cblktab;
1639  blockcol = bcsc->cscftab;
1640  for ( cblknum = 0; cblknum < solvmtx->cblknbr; cblknum++, cblk++ ) {
1641  if ( cblk->cblktype & (CBLK_FANIN|CBLK_RECV) ) {
1642  continue;
1643  }
1644 
1645  blockcol->cblknum = cblknum;
1646  blockcol->colnbr = cblk_colnbr( cblk );
1647  assert( cblk->bcscnum == bcscnum );
1648  MALLOC_INTERN( blockcol->coltab, blockcol->colnbr + 1, pastix_int_t );
1649 
1650  blockcol->coltab[0] = idxcol;
1651  for ( iter = 0; iter < blockcol->colnbr; iter++ ) {
1652  nodeidx = dofshift[ cblk->fcolnum + iter ];
1653  colsize = globcol[nodeidx];
1654  //jgpe = cblk->fcolnum + iter;
1655  //jgp = dofshift[ jgpe ];
1656  //colsize = globcol[jgp];
1657  blockcol->coltab[iter+1] = blockcol->coltab[iter] + colsize;
1658  }
1659  idxcol = blockcol->coltab[blockcol->colnbr];
1660 
1661  blockcol++;
1662  bcscnum++;
1663  }
1664  assert( (blockcol - bcsc->cscftab) == bcsc->cscfnbr );
1665  assert( bcscnum == bcsc->cscfnbr );
1666 
1667  memFree_null( globcol );
1668  memFree_null( dofshift );
1669 
1670  if ( idxcol > 0 ) {
1671  MALLOC_INTERN( bcsc->rowtab, idxcol, pastix_int_t);
1672  MALLOC_INTERN( bcsc->Lvalues, idxcol * pastix_size_of( bcsc->flttype ), char );
1673  }
1674  else {
1675  bcsc->rowtab = NULL;
1676  bcsc->Lvalues = NULL;
1677  }
1678  bcsc->Uvalues = NULL;
1679 
1680  return idxcol;
1681 }
1682 
1683 /**
1684  *******************************************************************************
1685  *
1686  * @ingroup bcsc_internal
1687  *
1688  * @brief Restores the coltab array when it has been modified to initialize
1689  * the row and values arrays.
1690  *
1691  *******************************************************************************
1692  *
1693  * @param[inout] bcsc
1694  * On entry, the bcsc to restore.
1695  * On exit, the coltab array of the bcsc is restored to the correct
1696  * indexes.
1697  *
1698  *******************************************************************************/
1699 void
1700 bcsc_restore_coltab( pastix_bcsc_t *bcsc )
1701 {
1702  bcsc_cblk_t *blockcol;
1703  pastix_int_t index, iter, idxcol, idxcoltmp;
1704 
1705  idxcol = 0;
1706  blockcol = bcsc->cscftab;
1707  for ( index=0; index<bcsc->cscfnbr; index++, blockcol++ )
1708  {
1709  for ( iter=0; iter <= blockcol->colnbr; iter++ )
1710  {
1711  idxcoltmp = blockcol->coltab[iter];
1712  blockcol->coltab[iter] = idxcol;
1713  idxcol = idxcoltmp;
1714  }
1715  }
1716  return;
1717 }
1718 
1719 /**
1720  *******************************************************************************
1721  *
1722  * @brief Initializes a block csc.
1723  *
1724  *******************************************************************************
1725  *
1726  * @param[in] spm
1727  * The initial sparse matrix in the spm format.
1728  *
1729  * @param[in] solvmtx
1730  * The solver matrix structure which describes the data distribution.
1731  *
1732  * @param[inout] bcsc
1733  * On entry, the pointer to an allocated bcsc.
1734  * On exit, the bcsc stores the input spm with the permutation applied
1735  * and grouped accordingly to the distribution described in solvmtx.
1736  *
1737  *******************************************************************************/
1738 void
1739 bcsc_init_struct( const spmatrix_t *spm,
1740  const SolverMatrix *solvmtx,
1741  pastix_bcsc_t *bcsc )
1742 {
1743  pastix_int_t *col2cblk = NULL;
1744 
1745  bcsc->mtxtype = spm->mtxtype;
1746  bcsc->flttype = spm->flttype;
1747  bcsc->gN = spm->gNexp;
1748  bcsc->n = solvmtx->nodenbr;
1749 
1750  /*
1751  * Creates the col2cblk array which associates each column to a cblk
1752  * (expanded).
1753  */
1754  col2cblk = bcsc_init_col2cblk( solvmtx, bcsc, spm );
1755  bcsc->col2cblk = col2cblk;
1756 
1757  /*
1758  * Initializes the coltab array of the bcsc and allocates the rowtab and
1759  * Lvalues arrays.
1760  */
1761  bcsc->bcsc_comm = NULL;
1762  if ( spm->loc2glob != NULL ) {
1763  bcsc_handle_comm_init( solvmtx, bcsc );
1764  }
1765 }
1766 
1767 /**
1768  *******************************************************************************
1769  *
1770  * @brief Cleanup the bcsc struct. (symmetric of bcsc_init_struct)
1771  *
1772  *******************************************************************************
1773  *
1774  * @param[inout] bcsc
1775  * On entry, the pointer to the initialized bcsc.
1776  * On exit, the bcsc freed from the informations initialized by
1777  * bcsc_init_struct().
1778  *
1779  *******************************************************************************/
1780 void
1781 bcsc_exit_struct( pastix_bcsc_t *bcsc )
1782 {
1783  if ( bcsc->col2cblk != NULL ) {
1784  memFree_null( bcsc->col2cblk );
1785  }
1786 
1787  if ( bcsc->bcsc_comm != NULL ) {
1788  bcsc_handle_comm_exit( bcsc->bcsc_comm );
1789  memFree_null( bcsc->bcsc_comm );
1790  }
1791 }
1792 
1793 /**
1794  *******************************************************************************
1795  *
1796  * @brief Initializes a block csc.
1797  *
1798  *******************************************************************************
1799  *
1800  * @param[in] spm
1801  * The initial sparse matrix in the spm format.
1802  *
1803  * @param[in] ord
1804  * The ordering which needs to be applied on the spm to generate the
1805  * block csc.
1806  *
1807  * @param[in] solvmtx
1808  * The solver matrix structure which describes the data distribution.
1809  *
1810  * @param[in] initAt
1811  * The test to know if At has to be initialized:
1812  * - if initAt = 0 then the matrix is symmetric of hermitian which
1813  * means that Lvalues = Uvalues so At does not need to be
1814  * initialised.
1815  * - if initAt = 1 then the matrix is general and which means that
1816  * At needs to be initialised and computed.
1817  *
1818  * @param[inout] bcsc
1819  * On entry, the pointer to an allocated bcsc.
1820  * On exit, the bcsc stores the input spm with the permutation applied
1821  * and grouped accordingly to the distribution described in solvmtx.
1822  *
1823  *******************************************************************************/
1824 static inline void
1825 bcsc_init( const spmatrix_t *spm,
1826  const pastix_order_t *ord,
1827  const SolverMatrix *solvmtx,
1828  pastix_int_t initAt,
1829  pastix_bcsc_t *bcsc )
1830 {
1831  pastix_int_t valuesize;
1832 
1833  bcsc_init_struct( spm, solvmtx, bcsc );
1834  valuesize = bcsc_init_coltab( spm, ord, solvmtx, bcsc );
1835 
1836  /*
1837  * Fills in the lower triangular part of the blocked csc with values and
1838  * rows. The upper triangular part is done later if required through LU
1839  * factorization.
1840  */
1841  switch( spm->flttype ) {
1842  case SpmFloat:
1843  bcsc_sinit( spm, ord, solvmtx, initAt, bcsc, valuesize );
1844  break;
1845  case SpmDouble:
1846  bcsc_dinit( spm, ord, solvmtx, initAt, bcsc, valuesize );
1847  break;
1848  case SpmComplex32:
1849  bcsc_cinit( spm, ord, solvmtx, initAt, bcsc, valuesize );
1850  break;
1851  case SpmComplex64:
1852  bcsc_zinit( spm, ord, solvmtx, initAt, bcsc, valuesize );
1853  break;
1854  case SpmPattern:
1855  default:
1856  fprintf(stderr, "bcsc_init: Error unknown floating type for input spm\n");
1857  }
1858 }
1859 
1860 /**
1861  *******************************************************************************
1862  *
1863  * @brief Initializes the block csc matrix.
1864  *
1865  * The block csc matrix is used to initialize the factorized matrix, and to
1866  * perform the matvec operations in refinement.
1867  *
1868  *******************************************************************************
1869  *
1870  * @param[in] spm
1871  * The initial sparse matrix in the spm format.
1872  *
1873  * @param[in] ord
1874  * The ordering which needs to be applied on the spm to generate the
1875  * block csc.
1876  *
1877  * @param[in] solvmtx
1878  * The solver matrix structure which describes the data distribution.
1879  *
1880  * @param[in] initAt
1881  * The test to know if At has to be initialized:
1882  * - if initAt = 0 then the matrix is symmetric of hermitian which
1883  * means that Lvalues = Uvalues so At does not need to be
1884  * initialised.
1885  * - if initAt = 1 then the matrix is general which means that
1886  * At needs to be initialised and computed.
1887  *
1888  * @param[inout] bcsc
1889  * On entry, the pointer to an allocated bcsc.
1890  * On exit, the bcsc stores the input spm with the permutation applied
1891  * and grouped accordingly to the distribution described in solvmtx.
1892  *
1893  *******************************************************************************
1894  *
1895  * @return The time spent to initialize the bcsc structure.
1896  *
1897  *******************************************************************************/
1898 double
1899 bcscInit( const spmatrix_t *spm,
1900  const pastix_order_t *ord,
1901  const SolverMatrix *solvmtx,
1902  pastix_int_t initAt,
1903  pastix_bcsc_t *bcsc )
1904 {
1905  double time = 0.;
1906 
1907  assert( ord->baseval == 0 );
1908  assert( ord->vertnbr == spm->gN );
1909 
1910  clockStart(time);
1911  bcsc_init( spm, ord, solvmtx, initAt, bcsc );
1912  clockStop(time);
1913 
1914  return time;
1915 }
1916 
1917 /**
1918  *******************************************************************************
1919  *
1920  * @brief Frees the block csc structure but do not free the bcsc pointer.
1921  *
1922  *******************************************************************************
1923  *
1924  * @param[inout] bcsc
1925  * The block csc matrix to free.
1926  *
1927  *******************************************************************************/
1928 void
1929 bcscExit( pastix_bcsc_t *bcsc )
1930 {
1931  bcsc_cblk_t *cblk;
1932  pastix_int_t i;
1933 
1934  if ( bcsc->cscftab == NULL ) {
1935  return;
1936  }
1937 
1938  for ( i=0, cblk=bcsc->cscftab; i < bcsc->cscfnbr; i++, cblk++ ) {
1939  memFree_null( cblk->coltab );
1940  }
1941 
1942  memFree_null( bcsc->cscftab );
1943  memFree_null( bcsc->rowtab );
1944 
1945  if ( (bcsc->Uvalues != NULL) &&
1946  (bcsc->Uvalues != bcsc->Lvalues) ) {
1947  memFree_null( bcsc->Uvalues );
1948  }
1949 
1950  memFree_null( bcsc->Lvalues );
1951 
1952  bcsc_exit_struct( bcsc );
1953 }
static void bcsc_init_global_coltab_shm_vdof(const spmatrix_t *spm, const pastix_order_t *ord, pastix_int_t *globcol)
Initializes the coltab of a block csc matrix. The coltab corresponds to the number of rows (expended)...
Definition: bcsc.c:885
static pastix_int_t * bcsc_init_global_coltab(const spmatrix_t *spm, const pastix_order_t *ord, const SolverMatrix *solvmtx, const pastix_int_t *col2cblk, bcsc_handle_comm_t *bcsc_comm)
Initializes the coltab of a block csc matrix. The coltab corresponds to the number of rows (expended)...
Definition: bcsc.c:1476
static void bcsc_init(const spmatrix_t *spm, const pastix_order_t *ord, const SolverMatrix *solvmtx, pastix_int_t initAt, pastix_bcsc_t *bcsc)
Initializes a block csc.
Definition: bcsc.c:1825
static void bcsc_init_global_coltab_shm_cdof(const spmatrix_t *spm, const pastix_order_t *ord, pastix_int_t *globcol)
Initializes the coltab of a block csc matrix. The coltab corresponds to the number of rows (expended)...
Definition: bcsc.c:815
static pastix_int_t * bcsc_init_dofshift(const spmatrix_t *spm, const pastix_order_t *ord)
Initializes the dofshit array of size gNexp which gives dofshift[index_permuted] = index....
Definition: bcsc.c:764
BEGIN_C_DECLS typedef int pastix_int_t
Definition: datatypes.h:51
void bcsc_restore_coltab(pastix_bcsc_t *bcsc)
Restores the coltab array when it has been modified to initialize the row and values arrays.
Definition: bcsc.c:1700
void bcsc_init_struct(const spmatrix_t *spm, const SolverMatrix *solvmtx, pastix_bcsc_t *bcsc)
Initializes a block csc.
Definition: bcsc.c:1739
pastix_int_t bcsc_init_coltab(const spmatrix_t *spm, const pastix_order_t *ord, const SolverMatrix *solvmtx, pastix_bcsc_t *bcsc)
Initializes the coltab of a block csc matrix. The coltab corresponds to the number of rows (expended)...
Definition: bcsc.c:1616
void bcsc_sinit(const spmatrix_t *spm, const pastix_order_t *ord, const SolverMatrix *solvmtx, int initAt, pastix_bcsc_t *bcsc, pastix_int_t valuesize)
Initializes a centralize float block csc.
Definition: bcsc_sinit.c:1378
pastix_int_t * bcsc_init_col2cblk(const SolverMatrix *solvmtx, const pastix_bcsc_t *bcsc, const spmatrix_t *spm)
Creates the array which represents the repartition of each column in the block structure....
Definition: bcsc.c:721
void bcsc_handle_comm_init(const SolverMatrix *solvmtx, pastix_bcsc_t *bcsc)
Initializes the bcsc_handle_comm_t structure.
Definition: bcsc.c:48
pastix_int_t * bcsc_init_col2cblk_shm(const SolverMatrix *solvmtx, const pastix_bcsc_t *bcsc)
Creates the array which represents the repartition of each column in the block structure....
Definition: bcsc.c:522
void bcsc_zinit(const spmatrix_t *spm, const pastix_order_t *ord, const SolverMatrix *solvmtx, int initAt, pastix_bcsc_t *bcsc, pastix_int_t valuesize)
Initializes a centralize pastix_complex64_t block csc.
Definition: bcsc_zinit.c:1378
void bcsc_cinit(const spmatrix_t *spm, const pastix_order_t *ord, const SolverMatrix *solvmtx, int initAt, pastix_bcsc_t *bcsc, pastix_int_t valuesize)
Initializes a centralize pastix_complex32_t block csc.
Definition: bcsc_cinit.c:1378
void bcsc_exit_struct(pastix_bcsc_t *bcsc)
Cleanup the bcsc struct. (symmetric of bcsc_init_struct)
Definition: bcsc.c:1781
void bcsc_dinit(const spmatrix_t *spm, const pastix_order_t *ord, const SolverMatrix *solvmtx, int initAt, pastix_bcsc_t *bcsc, pastix_int_t valuesize)
Initializes a centralize double block csc.
Definition: bcsc_dinit.c:1378
void bcsc_handle_comm_exit(bcsc_handle_comm_t *bcsc_comm)
Frees the bcsc_handle_comm pointers.
Definition: bcsc.c:79
bcsc_data_amount_t recvA
Definition: bcsc.h:83
bcsc_exch_comm_t recvAAt
Definition: bcsc.h:85
pastix_int_t max_idx
Definition: bcsc.h:97
bcsc_exch_comm_t sendAt
Definition: bcsc.h:81
PASTIX_Comm comm
Definition: bcsc.h:95
pastix_int_t idxcnt
Definition: bcsc.h:57
pastix_int_t colnbr
Definition: bcsc.h:111
bcsc_exch_comm_t sendA
Definition: bcsc.h:80
bcsc_data_amount_t size
Definition: bcsc.h:66
pastix_int_t clustnum
Definition: bcsc.h:94
bcsc_proc_comm_t data_comm[1]
Definition: bcsc.h:99
pastix_int_t * idxbuf
Definition: bcsc.h:67
pastix_int_t * coltab
Definition: bcsc.h:113
bcsc_data_amount_t recvAt
Definition: bcsc.h:84
bcsc_exch_comm_t sendAAt
Definition: bcsc.h:82
pastix_int_t valcnt
Definition: bcsc.h:58
pastix_coeftype_t flttype
Definition: bcsc.h:96
pastix_int_t cblknum
Definition: bcsc.h:112
void * valbuf
Definition: bcsc.h:70
pastix_int_t clustnbr
Definition: bcsc.h:93
pastix_int_t max_val
Definition: bcsc.h:98
enum bcsc_tag_ bcsc_tag_e
Tags used in MPI communications.
double bcscInit(const spmatrix_t *spm, const pastix_order_t *ord, const SolverMatrix *solvmtx, pastix_int_t initAt, pastix_bcsc_t *bcsc)
Initializes the block csc matrix.
Definition: bcsc.c:1899
struct bcsc_handle_comm_s bcsc_handle_comm_t
Structure to manage communications with distributed spm.
struct bcsc_proc_comm_s bcsc_proc_comm_t
Informations of the data exchanged with other processors.
void bcscExit(pastix_bcsc_t *bcsc)
Frees the block csc structure but do not free the bcsc pointer.
Definition: bcsc.c:1929
Compressed colptr format for the bcsc.
Definition: bcsc.h:110
Information about the amount of data.
Definition: bcsc.h:56
Information about the sending data.
Definition: bcsc.h:65
Structure to manage communications with distributed spm.
Definition: bcsc.h:92
Informations of the data exchanged with other processors.
Definition: bcsc.h:79
@ PASTIX_SUCCESS
Definition: api.h:367
pastix_int_t baseval
Definition: order.h:48
pastix_int_t * permtab
Definition: order.h:51
pastix_int_t * peritab
Definition: order.h:52
pastix_int_t vertnbr
Definition: order.h:49
Order structure.
Definition: order.h:47
pastix_int_t nodenbr
Definition: solver.h:208
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
Definition: solver.h:329
pastix_int_t cblknbr
Definition: solver.h:211
pastix_int_t faninnbr
Definition: solver.h:213
pastix_int_t recvnbr
Definition: solver.h:216
pastix_int_t bcscnum
Definition: solver.h:175
SolverCblk *restrict cblktab
Definition: solver.h:228
int8_t cblktype
Definition: solver.h:164
pastix_int_t lcolnum
Definition: solver.h:167
pastix_int_t fcolnum
Definition: solver.h:166
Solver column block structure.
Definition: solver.h:161
Solver column block structure.
Definition: solver.h:203