PaStiX Handbook 6.4.0
Loading...
Searching...
No Matches
bcsc.c
Go to the documentation of this file.
1/**
2 *
3 * @file bcsc.c
4 *
5 * @copyright 2004-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
6 * Univ. Bordeaux. All rights reserved.
7 *
8 * @version 6.4.0
9 * @author Mathieu Faverge
10 * @author Pierre Ramet
11 * @author Xavier Lacoste
12 * @author Theophile Terraz
13 * @author Tony Delarue
14 * @author Alycia Lisito
15 * @date 2024-07-05
16 *
17 **/
18#include "common.h"
19#include "pastix/order.h"
20#include <spm.h>
21#include "blend/solver.h"
22#include "bcsc/bcsc.h"
23
24#include "bcsc/bcsc_z.h"
25#include "bcsc/bcsc_c.h"
26#include "bcsc/bcsc_d.h"
27#include "bcsc/bcsc_s.h"
28
29#define BCSC_COMM_NBR 6
30
31/**
32 *******************************************************************************
33 *
34 * @ingroup bcsc_internal
35 *
36 * @brief Initializes the bcsc_handle_comm_t structure.
37 *
38 *******************************************************************************
39 *
40 * @param[in] solvmtx
41 * The solver matrix structure which describes the data distribution.
42 *
43 * @param[out] bcsc
44 * The bcsc.
45 *
46 *******************************************************************************/
47void
49 pastix_bcsc_t *bcsc )
50{
51 pastix_int_t size = sizeof(bcsc_handle_comm_t) + (solvmtx->clustnbr-1)*sizeof(bcsc_proc_comm_t);
52 bcsc_handle_comm_t *bcsc_comm;
53
54 bcsc->bcsc_comm = (bcsc_handle_comm_t *)malloc( size );
55 bcsc_comm = bcsc->bcsc_comm;
56
57 bcsc_comm->flttype = bcsc->flttype;
58 bcsc_comm->clustnbr = solvmtx->clustnbr;
59 bcsc_comm->clustnum = solvmtx->clustnum;
60 bcsc_comm->comm = solvmtx->solv_comm;
61
62 memset( bcsc_comm->data_comm, 0, bcsc_comm->clustnbr * sizeof(bcsc_proc_comm_t) );
63}
64
65/**
66 *******************************************************************************
67 *
68 * @ingroup bcsc_internal
69 *
70 * @brief Frees the bcsc_handle_comm pointers.
71 *
72 *******************************************************************************
73 *
74 * @param[inout] bcsc_comm
75 * The bcsc_handle_comm_t structure.
76 *
77 *******************************************************************************/
78void
80{
81 int c;
82 int clustnbr = bcsc_comm->clustnbr;
83 bcsc_proc_comm_t *data;
84
85 for ( c = 0; c < clustnbr; c++ ) {
86 data = bcsc_comm->data_comm + c;
87
88 if( data->sendA.idxbuf != NULL ) {
89 memFree_null( data->sendA.idxbuf );
90 }
91 if( data->sendA.valbuf != NULL ) {
92 memFree_null( data->sendA.valbuf );
93 }
94 if( data->sendAt.idxbuf != NULL ) {
95 memFree_null( data->sendAt.idxbuf );
96 }
97 if( data->sendAt.valbuf != NULL ) {
98 memFree_null( data->sendAt.valbuf );
99 }
100 if( data->sendAAt.idxbuf != NULL ) {
101 memFree_null( data->sendAAt.idxbuf );
102 }
103 if( data->sendAAt.valbuf != NULL ) {
104 memFree_null( data->sendAAt.valbuf );
105 }
106 if( data->recvAAt.idxbuf != NULL ) {
107 memFree_null( data->recvAAt.idxbuf );
108 }
109 if( data->recvAAt.valbuf != NULL ) {
110 memFree_null( data->recvAAt.valbuf );
111 }
112 }
113}
114
115#if defined(PASTIX_WITH_MPI)
116/**
117 *******************************************************************************
118 *
119 * @ingroup bcsc
120 *
121 * @brief Computes the maximum size of the sending indexes and values buffers.
122 *
123 *******************************************************************************
124 *
125 * @param[inout] bcsc_comm
126 * On entry the bcsc_comm initialized.
127 * At exit the fields max_idx and max_val of bcsc_comm are updated.
128 *
129 *******************************************************************************
130 *
131 * @retval PASTIX_SUCCESS
132 *
133 *******************************************************************************/
134static inline int
135bcsc_compute_max( bcsc_handle_comm_t *bcsc_comm )
136{
137 bcsc_proc_comm_t *data = NULL;
138 bcsc_proc_comm_t *data_local = NULL;
139 pastix_int_t clustnbr = bcsc_comm->clustnbr;
140 pastix_int_t clustnum = bcsc_comm->clustnum;
141 pastix_int_t max_idx = 0;
142 pastix_int_t max_val = 0;
143 pastix_int_t idxsum_A = 0;
144 pastix_int_t valsum_A = 0;
145 pastix_int_t idxsum_At = 0;
146 pastix_int_t valsum_At = 0;
147 pastix_int_t idxsum_AAt = 0;
148 pastix_int_t valsum_AAt = 0;
149 pastix_int_t idxcnt_A, idxcnt_At, idxcnt_AAt, valcnt_A, valcnt_At, valcnt_AAt, c;
150
151 /* Receives the amount of indexes and values. */
152 for ( c = 0; c < clustnbr; c++ ) {
153 data = bcsc_comm->data_comm + c;
154 if ( c == clustnum ) {
155 continue;
156 }
157
158 idxcnt_A = data->recvA.idxcnt;
159 idxcnt_At = data->recvAt.idxcnt;
160 idxcnt_AAt = data->recvAAt.size.idxcnt;
161 valcnt_A = data->recvA.valcnt;
162 valcnt_At = data->recvAt.valcnt;
163 valcnt_AAt = data->recvAAt.size.valcnt;
164
165 max_idx = pastix_imax( max_idx, idxcnt_A);
166 max_idx = pastix_imax( max_idx, idxcnt_At);
167 max_idx = pastix_imax( max_idx, idxcnt_AAt);
168 max_val = pastix_imax( max_val, valcnt_A);
169 max_val = pastix_imax( max_val, valcnt_At);
170 max_val = pastix_imax( max_val, valcnt_AAt);
171
172 idxsum_A += idxcnt_A;
173 valsum_A += valcnt_A;
174 idxsum_At += idxcnt_At;
175 valsum_At += valcnt_At;
176 idxsum_AAt += idxcnt_AAt;
177 valsum_AAt += valcnt_AAt;
178 }
179
180 data_local = bcsc_comm->data_comm + clustnum;
181 data_local->recvA.idxcnt = idxsum_A;
182 data_local->recvA.valcnt = valsum_A;
183 data_local->recvAt.idxcnt = idxsum_At;
184 data_local->recvAt.valcnt = valsum_At;
185 data_local->recvAAt.size.idxcnt = idxsum_AAt;
186 data_local->recvAAt.size.valcnt = valsum_AAt;
187
188 assert( max_idx <= 2 * max_val );
189
190 bcsc_comm->max_idx = max_idx;
191 bcsc_comm->max_val = max_val;
192
193 return PASTIX_SUCCESS;
194}
195
196/**
197 *******************************************************************************
198 *
199 * @ingroup bcsc
200 *
201 * @brief Allocates the sending buffers in bcsc_comm->data_comm. These buffers
202 * are filled with the sending values.
203 *
204 *******************************************************************************
205 *
206 * @param[inout] bcsc_comm
207 * On entry the bcsc_comm initialized.
208 * At exit the arrays of bcsc_comm->data_comm are allocated.
209 *
210 * @param[in] mode
211 * If PastixTagMemRecvIdx: allocates receiving indexes A and At buffers.
212 * If PastixTagMemSend: allocates sending indexes and values A and At
213 * buffers.
214 * If PastixTagMemRecvValAAt: allocates receiving values AAt buffers, it
215 * used only if the spm is general.
216 *
217 *******************************************************************************
218 *
219 * @retval PASTIX_SUCCESS
220 *
221 *******************************************************************************/
222int
223bcsc_allocate_buf( bcsc_handle_comm_t *bcsc_comm,
224 bcsc_tag_e mode )
225{
226 bcsc_proc_comm_t *data = NULL;
227 pastix_int_t clustnbr = bcsc_comm->clustnbr;
228 pastix_int_t clustnum = bcsc_comm->clustnum;
229 pastix_int_t c;
230 size_t size;
231
232 if ( mode == PastixTagMemRecvIdx ) {
233 data = bcsc_comm->data_comm + clustnum;
234
235 if ( ( data->recvA.idxcnt > 0 ) && ( data->sendA.idxbuf == NULL ) ) {
236 MALLOC_INTERN( data->sendA.idxbuf, data->recvA.idxcnt, pastix_int_t );
237 }
238
239 if ( ( data->recvAt.idxcnt > 0 ) && ( data->sendAt.idxbuf == NULL ) ) {
240 MALLOC_INTERN( data->sendAt.idxbuf, data->recvAt.idxcnt, pastix_int_t );
241 }
242
243 if ( ( data->recvAAt.size.idxcnt > 0 ) && ( data->sendAAt.idxbuf == NULL ) ) {
244 MALLOC_INTERN( data->sendAAt.idxbuf, data->recvAAt.size.idxcnt, pastix_int_t );
245 }
246 }
247
248 if ( mode == PastixTagMemRecvValAAt ) {
249 for ( c = 0; c < clustnbr; c ++ ) {
250 data = bcsc_comm->data_comm + c;
251 if ( c == clustnum ) {
252 continue;
253 }
254 if ( ( data->recvAAt.size.valcnt > 0 ) && ( data->recvAAt.valbuf == NULL ) ) {
255 size = data->recvAAt.size.valcnt * pastix_size_of( bcsc_comm->flttype );
256 MALLOC_INTERN( data->recvAAt.valbuf, size, char );
257 }
258 }
259 }
260
261 if ( mode == PastixTagMemSend ) {
262 for ( c = 0; c < clustnbr; c ++ ) {
263 data = bcsc_comm->data_comm + c;
264
265 if ( c == clustnum ) {
266 continue;
267 }
268
269 if ( ( data->sendA.size.idxcnt > 0 ) && ( data->sendA.idxbuf == NULL ) ) {
270 MALLOC_INTERN( data->sendA.idxbuf, data->sendA.size.idxcnt, pastix_int_t );
271 }
272 if ( ( data->sendA.size.valcnt > 0 ) && ( data->sendA.valbuf == NULL ) ) {
273 size = data->sendA.size.valcnt * pastix_size_of( bcsc_comm->flttype );
274 MALLOC_INTERN( data->sendA.valbuf, size, char );
275 }
276
277 if ( ( data->sendAt.size.idxcnt > 0 ) && ( data->sendAt.idxbuf == NULL ) ) {
278 MALLOC_INTERN( data->sendAt.idxbuf, data->sendAt.size.idxcnt, pastix_int_t );
279 }
280 if ( ( data->sendAt.size.valcnt > 0 ) && ( data->sendAt.valbuf == NULL ) ) {
281 size = data->sendAt.size.valcnt * pastix_size_of( bcsc_comm->flttype );
282 MALLOC_INTERN( data->sendAt.valbuf, size, char );
283 }
284
285 if ( ( data->sendAAt.size.idxcnt > 0 ) && ( data->sendAAt.idxbuf == NULL ) ) {
286 MALLOC_INTERN( data->sendAAt.idxbuf, data->sendAAt.size.idxcnt, pastix_int_t );
287 }
288 if ( ( data->sendAAt.size.valcnt > 0 ) && ( data->sendAAt.valbuf == NULL ) ) {
289 size = data->sendAAt.size.valcnt * pastix_size_of( bcsc_comm->flttype );
290 MALLOC_INTERN( data->sendAAt.valbuf, size, char );
291 }
292 }
293 }
294
295 return PASTIX_SUCCESS;
296}
297
298/**
299 *******************************************************************************
300 *
301 * @ingroup bcsc
302 *
303 * @brief Frees the sending and receiving buffers in bcsc_comm->data_comm.
304 * These buffers are filled with the sending adn receiving values.
305 *
306 *******************************************************************************
307 *
308 * @param[inout] bcsc_comm
309 * On entry the bcsc_comm initialized.
310 * At exit the arrays of bcsc_comm->data_comm are freed.
311 *
312 * @param[in] mode
313 * If PastixTagMemSendIdx: frees sending indexes A, At and AAt buffers.
314 * If PastixTagMemSendValA: frees sending values A buffers.
315 * If PastixTagMemSendValAt: frees sending values At buffers.
316 * If PastixTagMemSendValAAt: frees sending values AAt buffers.
317 * If PastixTagMemRecvIdxA: frees receiving indexes A buffers.
318 * If PastixTagMemRecvIdxAt: frees receiving indexes At buffers.
319 * If PastixTagMemRecvAAt:frees receiving indexes and values if the
320 * spm is general AAt buffers.
321 *
322 *******************************************************************************
323 *
324 * @retval PASTIX_SUCCESS
325 *
326 *******************************************************************************/
327int
328bcsc_free_buf( bcsc_handle_comm_t *bcsc_comm,
329 bcsc_tag_e mode )
330{
331 bcsc_proc_comm_t *data = NULL;
332 pastix_int_t clustnbr = bcsc_comm->clustnbr;
333 pastix_int_t clustnum = bcsc_comm->clustnum;
334 pastix_int_t c;
335
336 if ( mode == PastixTagMemSendIdx ) {
337 for ( c = 0; c < clustnbr; c ++ ) {
338 data = bcsc_comm->data_comm + c;
339 if ( c == clustnum ) {
340 continue;
341 }
342 if ( data->sendA.idxbuf != NULL ) {
343 memFree_null( data->sendA.idxbuf );
344 }
345 if ( data->sendAt.idxbuf != NULL ) {
346 memFree_null( data->sendAt.idxbuf );
347 }
348 if ( data->sendAAt.idxbuf != NULL ) {
349 memFree_null( data->sendAAt.idxbuf );
350 }
351 }
352 }
353
354 if ( mode == PastixTagMemSendValA ) {
355 for ( c = 0; c < clustnbr; c ++ ) {
356 data = bcsc_comm->data_comm + c;
357 if ( c == clustnum ) {
358 continue;
359 }
360 if ( data->sendA.valbuf != NULL ) {
361 memFree_null( data->sendA.valbuf );
362 }
363 }
364 }
365
366 if ( mode == PastixTagMemSendValAt ) {
367 for ( c = 0; c < clustnbr; c ++ ) {
368 data = bcsc_comm->data_comm + c;
369 if ( c == clustnum ) {
370 continue;
371 }
372 if ( data->sendAt.valbuf != NULL ) {
373 memFree_null( data->sendAt.valbuf );
374 }
375 }
376 }
377
378 if ( mode == PastixTagMemSendValAAt ) {
379 for ( c = 0; c < clustnbr; c ++ ) {
380 data = bcsc_comm->data_comm + c;
381 if ( c == clustnum ) {
382 continue;
383 }
384 if ( data->sendAAt.valbuf != NULL ) {
385 memFree_null( data->sendAAt.valbuf );
386 }
387 }
388 }
389
390 if ( mode == PastixTagMemRecvIdxA ) {
391 data = bcsc_comm->data_comm + clustnum;
392 if ( data->sendA.idxbuf != NULL ) {
393 memFree_null( data->sendA.idxbuf );
394 }
395 }
396
397 if ( mode == PastixTagMemRecvIdxAt ) {
398 data = bcsc_comm->data_comm + clustnum;
399 if ( data->sendAt.idxbuf != NULL ) {
400 memFree_null( data->sendAt.idxbuf );
401 }
402 }
403
404 if ( mode == PastixTagMemRecvAAt ) {
405 data = bcsc_comm->data_comm + clustnum;
406 if ( data->sendAAt.idxbuf != NULL ) {
407 memFree_null( data->sendAAt.idxbuf );
408 }
409 if ( data->recvAAt.valbuf != NULL ) {
410 memFree_null( data->recvAAt.valbuf );
411 }
412 }
413
414 return PASTIX_SUCCESS;
415}
416
417/**
418 *******************************************************************************
419 *
420 * @ingroup bcsc_internal
421 *
422 * @brief Exchanges the amount of data the current processor will send to and
423 * receive from each processor.
424 *
425 *******************************************************************************
426 *
427 * @param[in] bcsc_comm
428 * The bcsc_handle_comm_t structure.
429 *
430 *******************************************************************************/
431void
432bcsc_exchange_amount_of_data( bcsc_handle_comm_t *bcsc_comm )
433{
434 bcsc_proc_comm_t *data_comm = bcsc_comm->data_comm;
435 pastix_int_t clustnbr = bcsc_comm->clustnbr;
436 pastix_int_t clustnum = bcsc_comm->clustnum;
437 bcsc_proc_comm_t *data_send = NULL;
438 bcsc_proc_comm_t *data_recv = NULL;
439 pastix_int_t counter_req = 0;
440 MPI_Status statuses[(clustnbr-1)*BCSC_COMM_NBR];
441 MPI_Request requests[(clustnbr-1)*BCSC_COMM_NBR];
442 bcsc_data_amount_t *sends, *recvs;
443 pastix_int_t c_send, c_recv, k;
444
445 /* Exchanges the amount of indexes and values. */
446 c_send = (clustnum+1) % clustnbr;
447 c_recv = (clustnum-1+clustnbr) % clustnbr;
448 for ( k = 0; k < clustnbr-1; k++ ) {
449 data_send = data_comm + c_send;
450 data_recv = data_comm + c_recv;
451
452 if ( c_send == clustnum ) {
453 continue;
454 }
455
456 /* Exchanges the amount of indexes and values for A. */
457 sends = &( data_send->sendA.size );
458 recvs = &( data_recv->recvA );
459 MPI_Irecv( recvs, 2, PASTIX_MPI_INT, c_recv,
460 PastixTagCountA, bcsc_comm->comm, &requests[counter_req++] );
461
462 MPI_Isend( sends, 2, PASTIX_MPI_INT, c_send,
463 PastixTagCountA, bcsc_comm->comm, &requests[counter_req++] );
464
465 /* Exchanges the amount of indexes and values for At. */
466 sends = &( data_send->sendAt.size );
467 recvs = &( data_recv->recvAt );
468 MPI_Irecv( recvs, 2, PASTIX_MPI_INT, c_recv,
469 PastixTagCountAt, bcsc_comm->comm, &requests[counter_req++] );
470
471 MPI_Isend( sends, 2, PASTIX_MPI_INT, c_send,
472 PastixTagCountAt, bcsc_comm->comm, &requests[counter_req++] );
473
474 /* Exchanges the amount of indexes and values for AAt. */
475 sends = &( data_send->sendAAt.size );
476 recvs = &( data_recv->recvAAt.size );
477 MPI_Irecv( recvs, 2, PASTIX_MPI_INT, c_recv,
478 PastixTagCountAAt, bcsc_comm->comm, &requests[counter_req++] );
479
480 MPI_Isend( sends, 2, PASTIX_MPI_INT, c_send,
481 PastixTagCountAAt, bcsc_comm->comm, &requests[counter_req++] );
482
483 c_send = (c_send+1) % clustnbr;
484 c_recv = (c_recv-1+clustnbr) % clustnbr;
485 }
486
487 MPI_Waitall( counter_req, requests, statuses );
488
489 bcsc_compute_max( bcsc_comm );
490
491 return;
492}
493#endif
494
495/**
496 *******************************************************************************
497 *
498 * @ingroup bcsc_internal
499 *
500 * @brief Creates the array which represents the repartition of each column
501 * in the block structure. The array size is spm->gNexp where:
502 * - col2cblk[k] = cblknum, with cblknum the index of the block column
503 * where the column k is stored.
504 * This routine is called when the matrix is in shared memory.
505 *
506 *******************************************************************************
507 *
508 * @param[in] solvmtx
509 * The solvmtx structure associated to the problem.
510 *
511 * @param[in,out] bcsc
512 * The internal block CSC structure.
513 * The number of local columns is updated.
514 *
515 *******************************************************************************
516 *
517 * @return The col2cblk array which gives the repartition of the solvmtx columns
518 * into the block structure.
519 *
520 *******************************************************************************/
523 const pastix_bcsc_t *bcsc )
524{
525 pastix_int_t j;
526 pastix_int_t cblknum;
527 pastix_int_t *col2cblk;
528
529 /* Allocates the col2cblk. */
530 MALLOC_INTERN( col2cblk, bcsc->gN, pastix_int_t );
531 memset( col2cblk, 0xff, bcsc->gN * sizeof(pastix_int_t) );
532
533 const SolverCblk *cblk = solvmtx->cblktab;
534 pastix_int_t cblknbr = solvmtx->cblknbr;
535 /* Goes through the blocks. */
536 for ( cblknum = 0; cblknum < cblknbr; cblknum++, cblk++ ) {
537 if ( cblk->cblktype & (CBLK_FANIN|CBLK_RECV) ) {
538 continue;
539 }
540 /*
541 * Goes through the columns of the block and adds the number of
542 * the block in col2cblk at the corresponding index.
543 */
544 for ( j = cblk->fcolnum; j <= cblk->lcolnum; j++ ) {
545 col2cblk[j] = cblknum;
546 }
547 }
548
549 return col2cblk;
550}
551
552#if defined(PASTIX_WITH_MPI)
553/**
554 *******************************************************************************
555 *
556 * @ingroup bcsc_internal
557 *
558 * @brief Creates the array which represents the repartition of each column
559 * in the block structure. The array size is spm->gNexp where:
560 * - col2cblk[k] = - (owner + 1) if the column is not stored in a local block
561 * - col2cblk[k] = cblknum, if the column k is stored in a local block, with
562 * cblknum the index of this block column.
563 * This routine is called when the matrix is in distributed memory.
564 *
565 *******************************************************************************
566 *
567 * @param[in] solvmtx
568 * The solvmtx structure associated to the problem.
569 *
570 * @param[in,out] bcsc
571 * The internal block CSC structure.
572 * The number of local columns is updated.
573 *
574 *******************************************************************************
575 *
576 * @return The col2cblk array which gives the repartition of the solvmtx columns
577 * into the block structure.
578 *
579 *******************************************************************************/
581bcsc_init_col2cblk_dst( const SolverMatrix *solvmtx,
582 const pastix_bcsc_t *bcsc )
583{
584 pastix_int_t n, nr = 0;
585 pastix_int_t k, j, c;
586 pastix_int_t clustnum = solvmtx->clustnum;
587 pastix_int_t clustnbr = solvmtx->clustnbr;
588 pastix_int_t fcolnum, lcolnum, cblknum;
589 pastix_int_t *col2cblk;
590 pastix_int_t *col2cblk_bcast = NULL;
591
592 /* Allocates the col2cblk. */
593 MALLOC_INTERN( col2cblk, bcsc->gN, pastix_int_t );
594 memset( col2cblk, 0xff, bcsc->gN * sizeof(pastix_int_t) );
595
596 for( c = 0; c < clustnbr; c++ ) {
597 if ( c == clustnum ) {
598 const SolverCblk *cblk = solvmtx->cblktab;
599 pastix_int_t cblknbr = solvmtx->cblknbr;
600 pastix_int_t colcount;
601
602 n = (solvmtx->cblknbr - solvmtx->faninnbr - solvmtx->recvnbr) * 2;
603
604 /* Sends the size of data. */
605 MPI_Bcast( &n, 1, PASTIX_MPI_INT, c, solvmtx->solv_comm );
606
607 if ( n == 0 ) {
608 continue;
609 }
610
611 if ( n > nr ) {
612 pastix_int_t *tmp;
613 nr = n;
614 tmp = (pastix_int_t *)realloc( col2cblk_bcast, nr * sizeof(pastix_int_t) );
615 /* Protection for static analysis */
616 if ( pastix_unlikely( tmp == NULL ) ) {
617 pastix_print_error( "Error reallocating col2cblk_bcast\n" );
618 }
619 col2cblk_bcast = tmp;
620 }
621
622 colcount = 0;
623 k = 0;
624 /* Goes through the blocks. */
625 for ( cblknum = 0; cblknum < cblknbr; cblknum++, cblk++ ) {
626 if ( cblk->cblktype & (CBLK_FANIN|CBLK_RECV) ) {
627 continue;
628 }
629 assert( col2cblk_bcast != NULL );
630
631 /* Adds the first and last columns of the block in col2cblk_bcast. */
632 col2cblk_bcast[k] = cblk->fcolnum;
633 col2cblk_bcast[k+1] = cblk->lcolnum;
634 k += 2;
635 /*
636 * Goes through the columns of the block and adds the
637 * block number in col2cblk.
638 */
639 for ( j = cblk->fcolnum; j <= cblk->lcolnum; j++ ) {
640 colcount++;
641 col2cblk[j] = cblknum;
642 }
643 }
644 assert( colcount == bcsc->n );
645
646 /* Sends the col2cblk_bcast. */
647 if ( n > 0 ) {
648 MPI_Bcast( col2cblk_bcast, n, PASTIX_MPI_INT, c, solvmtx->solv_comm );
649 }
650 }
651 else {
652 /* Receives the size of data from c. */
653 MPI_Bcast( &n, 1, PASTIX_MPI_INT, c, solvmtx->solv_comm );
654
655 if ( n == 0 ) {
656 continue;
657 }
658
659 if ( n > nr ) {
660 pastix_int_t *tmp;
661 nr = n;
662 tmp = (pastix_int_t *)realloc( col2cblk_bcast, nr * sizeof(pastix_int_t) );
663 /* Protection for static analysis */
664 if ( pastix_unlikely( tmp == NULL ) ) {
665 pastix_print_error( "Error reallocating col2cblk_bcast\n" );
666 }
667 col2cblk_bcast = tmp;
668 }
669
670 /* Receives the col2cblk_bcast from c. */
671 MPI_Bcast( col2cblk_bcast, n, PASTIX_MPI_INT, c, solvmtx->solv_comm );
672 /*
673 * Goes through the columns in col2cblk_bcast and adds the processor
674 * number in col2cblk.
675 */
676 for ( k = 0; k < n; k += 2 ) {
677 fcolnum = col2cblk_bcast[k];
678 lcolnum = col2cblk_bcast[k+1];
679 for ( j = fcolnum; j <= lcolnum; j++ ) {
680 col2cblk[j] = - c - 1;
681 }
682 }
683 }
684 }
685
686 free( col2cblk_bcast );
687
688 return col2cblk;
689}
690#endif
691
692/**
693 *******************************************************************************
694 *
695 * @ingroup bcsc_internal
696 *
697 * @brief Creates the array which represents the repartition of each column
698 * in the block structure. This routine calls bcsc_init_col2cblk_shm or
699 * bcsc_init_col2cblk_dst according to the way the matrix is stored in the
700 * memory.
701 *
702 *******************************************************************************
703 *
704 * @param[in] solvmtx
705 * The solvmtx structure associated to the problem.
706 *
707 * @param[in] bcsc
708 * The internal block CSC structure.
709 * The number of local columns is updated.
710 *
711 * @param[in] spm
712 * The initial sparse matrix in the spm format.
713 *
714 *******************************************************************************
715 *
716 * @return The col2cblk array which gives the repartition of the solvmtx columns
717 * into the block structure.
718 *
719 *******************************************************************************/
722 const pastix_bcsc_t *bcsc,
723 const spmatrix_t *spm )
724{
725 pastix_int_t *col2cblk;
726
727 /* Tests if the spm is in shared or distributed memory. */
728#if defined(PASTIX_WITH_MPI)
729 if ( spm->loc2glob != NULL ) {
730 col2cblk = bcsc_init_col2cblk_dst( solvmtx, bcsc );
731 }
732 else
733#endif
734 {
735 col2cblk = bcsc_init_col2cblk_shm( solvmtx, bcsc );
736 }
737
738 (void)spm;
739 return col2cblk;
740}
741
742/**
743 *******************************************************************************
744 *
745 * @brief Initializes the dofshit array of size gNexp which gives
746 * dofshift[index_permuted] = index. This corresponds to the inverse of
747 * the permutation given in ord->permtab.
748 *
749 *******************************************************************************
750 *
751 * @param[in] spm
752 * The initial sparse matrix in the spm format.
753 *
754 * @param[in] ord
755 * The ordering that needs to be applied on the spm to generate the
756 * block csc.
757 *
758 *******************************************************************************
759 *
760 * @return The dofshift array.
761 *
762 *******************************************************************************/
763static inline pastix_int_t*
764bcsc_init_dofshift( const spmatrix_t *spm,
765 const pastix_order_t *ord )
766{
767 pastix_int_t *dofshift, *ptr;
768 pastix_int_t *dofs;
769 pastix_int_t dof;
770 pastix_int_t idof, dofj, dofidx;
771 pastix_int_t jg, jgp;
772
773 /* Allocates the dofshift array. */
774 MALLOC_INTERN( dofshift, spm->gNexp, pastix_int_t );
775
776 dofs = spm->dofs;
777 dof = spm->dof;
778 ptr = dofshift;
779 for ( jg = 0; jg < spm->gN; jg++ ) {
780 jgp = ord->permtab[jg];
781 dofidx = (dof > 0) ? jgp * dof : dofs[jg];
782 ptr = dofshift + dofidx;
783 dofj = (dof > 0) ? dof : dofs[jg+1] - dofs[jg];
784 for ( idof = 0; idof < dofj; idof++, ptr++ ) {
785 *ptr = jgp;
786 }
787 }
788 return dofshift;
789}
790
791/**
792 *******************************************************************************
793 *
794 * @brief Initializes the coltab of a block csc matrix. The coltab corresponds to
795 * the number of rows (expended) per column (non expended). This rountine
796 * is called when the matrix is stored in shared memory or the matrix is
797 * replicated on the processors and the matrix's degree of liberty is
798 * constant.
799 *
800 *******************************************************************************
801 *
802 * @param[in] spm
803 * The initial sparse matrix in the spm format.
804 *
805 * @param[in] ord
806 * The ordering which needs to be applied on the spm to generate the
807 * block csc.
808 *
809 * @param[out] globcol
810 * The array which contains, for each column, its beginning in the
811 * smp->colptr.
812 *
813 *******************************************************************************/
814static inline void
815bcsc_init_global_coltab_shm_cdof( const spmatrix_t *spm,
816 const pastix_order_t *ord,
817 pastix_int_t *globcol )
818{
819 pastix_int_t *colptr = spm->colptr;
820 pastix_int_t *rowptr = spm->rowptr;
821 pastix_int_t dof = spm->dof;
822 pastix_int_t baseval = spm->baseval;
823 pastix_int_t frow, lrow;
824 pastix_int_t k, j, ig, jg, igp, jgp;
825 int sym = (spm->mtxtype == SpmSymmetric) || (spm->mtxtype == SpmHermitian);
826
827 assert( dof > 0 );
828 assert( spm->loc2glob == NULL );
829
830 /* Goes through the column of the spm. */
831 for ( j = 0; j < spm->n; j++, colptr++ ) {
832 jg = j;
833 jgp = ord->permtab[jg];
834 frow = colptr[0] - baseval;
835 lrow = colptr[1] - baseval;
836 assert( (lrow - frow) >= 0 );
837 /* Adds the number of values in the column jg. */
838 globcol[jgp] += (lrow - frow) * dof;
839
840 /*
841 * Adds for At the number of values in the row ig and column jg. This
842 * is not required for the general case as the spm has a symmetric
843 * pattern.
844 */
845 if ( !sym ) {
846 continue;
847 }
848
849 for ( k = frow; k < lrow; k++ ) {
850 ig = rowptr[k] - baseval;
851 if ( ig != jg ) {
852 igp = ord->permtab[ig];
853 globcol[igp] += dof;
854 }
855 }
856 }
857
858 return;
859}
860
861/**
862 *******************************************************************************
863 *
864 * @brief Initializes the coltab of a block csc matrix. The coltab corresponds to
865 * the number of rows (expended) per column (non expended). This rountine
866 * is called when the matrix is stored in shared memory or the matrix is
867 * replicated on the processors and the matrix's degree of liberty is
868 * variadic.
869 *
870 *******************************************************************************
871 *
872 * @param[in] spm
873 * The initial sparse matrix in the spm format.
874 *
875 * @param[in] ord
876 * The ordering which needs to be applied on the spm to generate the
877 * block csc.
878 *
879 * @param[out] globcol
880 * The array which contains, for each column, its begining in the
881 * smp->colptr.
882 *
883 *******************************************************************************/
884static inline void
885bcsc_init_global_coltab_shm_vdof( const spmatrix_t *spm,
886 const pastix_order_t *ord,
887 pastix_int_t *globcol )
888{
889 pastix_int_t *colptr = spm->colptr;
890 pastix_int_t *rowptr = spm->rowptr;
891 pastix_int_t *dofs = spm->dofs;
892 pastix_int_t baseval = spm->baseval;
893 pastix_int_t frow, lrow;
894 pastix_int_t k, j, ig, jg, igp, jgp;
895 pastix_int_t dofj, dofi;
896 int sym = (spm->mtxtype == SpmSymmetric) || (spm->mtxtype == SpmHermitian);
897
898 assert( spm->dof <= 0 );
899 assert( spm->loc2glob == NULL );
900
901 /* Goes through the column of the spm. */
902 for ( j=0; j<spm->n; j++, colptr++ ) {
903 jg = j;
904 dofj = dofs[jg+1] - dofs[jg];
905 jgp = ord->permtab[jg];
906 frow = colptr[0] - baseval;
907 lrow = colptr[1] - baseval;
908 assert( (lrow - frow) >= 0 );
909
910 for ( k=frow; k<lrow; k++ ) {
911 ig = rowptr[k] - baseval;
912 dofi = dofs[ig+1] - dofs[ig];
913 /* Adds the number of values in the row ig and column jg. */
914 globcol[jgp] += dofi;
915
916 /* Adds for At the number of values in the row ig and column jg. */
917 if ( sym && (ig != jg) ) {
918 igp= ord->permtab[ig];
919 globcol[igp] += dofj;
920 }
921 }
922 }
923
924 return;
925}
926
927#if defined(PASTIX_WITH_MPI)
928/**
929 *******************************************************************************
930 *
931 * @brief Initializes the coltab of a block csc matrix. The coltab corresponds to
932 * the number of rows (expended) per column (non expended). This rountine
933 * is called when the matrix is distributed in the memory and the matrix's
934 * degree of liberty is constant.
935 *
936 * There are two cases:
937 *
938 * If the matrix is general: the full columns and rows of the blocks are stored
939 * in Lvalues and Uvalues.
940 * - The local data of the current process which are in remote blocks after
941 * the permutation need be to sent to the owner process. The data is stored
942 * in sendA if it is sent for the column only, in sendAt if it is sent for
943 * the row only and in sendAAt if it is sent for the row and column.
944 * - The local data of the current process which are in local column blocks
945 * after the permutation need to be added in globcol.
946 *
947 * If the matrix is Symmetric or Hermitian: only the full columns of the blocks
948 * are stored in Lvalues (and Uvalues = Lvalues). Only half of the spm is
949 * stored lower (or upper) triangular half, therefore we need to duplicate
950 * the lower (or upper) data to fill the upper (or lower half of the matrix
951 * in the blocks.
952 * - The local data of the current process which are in remote blocks after
953 * the permutation need be to sent to the owner process. The data is stored
954 * in sendA if it is sent for the lower (or upper) half or the column, in
955 * sendAt if it is sent for the upper (or lower) half of the column and in
956 * sendAAt if it is sent for both the lower and upper half of the column.
957 * The diagonal values are stored in sendA only.
958 * - The local data of the current process which are in column blocks after
959 * the permutation need to be added in globcol twice: once for the lower
960 * half and once for the upper half. The diagonal values need to be added
961 * only once.
962 *
963 *******************************************************************************
964 *
965 * @param[in] spm
966 * The initial sparse matrix in the spm format.
967 *
968 * @param[in] ord
969 * The ordering which needs to be applied on the spm to generate the
970 * block csc.
971 *
972 * @param[in] col2cblk
973 * The array which contains the repartition of the matrix columns
974 * into the block structure.
975 *
976 * @param[out] globcol
977 * The array which contains, for each column, its begining in the
978 * smp->colptr.
979 *
980 * @param[in,out] bcsc_comm
981 * On entry, the initialised bcsc_comm structure. On exit, the
982 * bcsc_handle_comm structure which contains the amount of data to
983 * send to the other processors.
984 *
985 *******************************************************************************/
986static inline void
987bcsc_init_global_coltab_dst_cdof( const spmatrix_t *spm,
988 const pastix_order_t *ord,
989 const pastix_int_t *col2cblk,
990 pastix_int_t *globcol,
991 bcsc_handle_comm_t *bcsc_comm )
992{
993 pastix_int_t *colptr = spm->colptr;
994 pastix_int_t *rowptr = spm->rowptr;
995 pastix_int_t *loc2glob = spm->loc2glob;
996 pastix_int_t dof = spm->dof;
997 pastix_int_t baseval = spm->baseval;
998 bcsc_proc_comm_t *data_comm = bcsc_comm->data_comm;
999 bcsc_exch_comm_t *data_sendA, *data_sendAt, *data_sendAAt;
1000 pastix_int_t frow, lrow;
1001 pastix_int_t il, jl, ig, jg, igp, jgp;
1002 int sym = (spm->mtxtype == SpmSymmetric) || (spm->mtxtype == SpmHermitian);
1003 pastix_int_t ownerj, owneri;
1004
1005 assert( dof > 0 );
1006
1007 /* Goes through the columns of spm. */
1008 for ( jl = 0; jl < spm->n; jl++, colptr++, loc2glob++ ) {
1009 jg = *loc2glob - baseval;
1010 jgp = ord->permtab[jg];
1011
1012 frow = colptr[0] - baseval;
1013 lrow = colptr[1] - baseval;
1014 assert( (lrow - frow) >= 0 );
1015
1016 ownerj = col2cblk[jgp * dof];
1017
1018 /* The column jp belongs to another process. */
1019 if ( ownerj < 0 ) {
1020 ownerj = - ownerj - 1;
1021 data_comm = bcsc_comm->data_comm + ownerj;
1022 data_sendA = &( data_comm->sendA );
1023
1024 /* Goes through the rows of jl. */
1025 for ( il = frow; il < lrow; il++ ) {
1026 ig = rowptr[il] - baseval;
1027
1028 /*
1029 * The diagonal values (ip, jp) belong to the same process.
1030 * They are sent to owneri in the sym case for A only.
1031 */
1032 if ( sym && ( ig == jg ) ) {
1033 data_sendA->size.idxcnt += 2;
1034 data_sendA->size.valcnt += dof * dof;
1035 continue;
1036 }
1037
1038 igp = ord->permtab[ig];
1039 owneri = col2cblk[igp* dof];
1040
1041 /* The row ip belongs to another process. */
1042 if ( owneri < 0 ) {
1043 owneri = - owneri - 1;
1044 data_comm = bcsc_comm->data_comm + owneri;
1045
1046 /*
1047 * The diagonal values (ip, jp) belong to the same process.
1048 * They are sent to owneri for AAt in the general cae.
1049 */
1050 if ( owneri == ownerj ) {
1051 data_sendAAt = &( data_comm->sendAAt );
1052
1053 data_sendAAt->size.idxcnt += 2;
1054 data_sendAAt->size.valcnt += dof * dof;
1055 }
1056 /*
1057 * The values (ip, jp) belong to different processes.
1058 * They are sent to owneri for At and to ownerj for A.
1059 */
1060 else {
1061 data_sendAt = &( data_comm->sendAt );
1062
1063 data_sendAt->size.idxcnt += 2;
1064 data_sendAt->size.valcnt += dof * dof;
1065
1066 data_sendA->size.idxcnt += 2;
1067 data_sendA->size.valcnt += dof * dof;
1068 }
1069 }
1070 /* The row ip is local. */
1071 else {
1072 /*
1073 * The values (ip, jp) belong to ownerj.
1074 * They are sent to ownerj for A.
1075 */
1076 data_sendA->size.idxcnt += 2;
1077 data_sendA->size.valcnt += dof * dof;
1078 /*
1079 * The values (ip, jp) are local.
1080 * In the sym case ther are added to globcol.
1081 */
1082 if ( sym ) {
1083 globcol[igp] += dof;
1084 }
1085 }
1086 }
1087 }
1088 /* The column jp is local. */
1089 else {
1090 /* The column is added to globcol. */
1091 globcol[jgp] += dof * ( lrow - frow );
1092
1093 /* Goes through the rows of j. */
1094 for ( il = frow; il < lrow; il++ ) {
1095 ig = rowptr[il] - baseval;
1096
1097 /*
1098 * The diagonal values (ip, jp) have already been
1099 * added to globcol in the sym case.
1100 */
1101 if ( sym && ( ig == jg ) ) {
1102 continue;
1103 }
1104
1105 igp = ord->permtab[ig];
1106 owneri = col2cblk[igp* dof];
1107
1108 /* The row ip belongs to another process. */
1109 if ( owneri < 0 ) {
1110 owneri = - owneri - 1;
1111 data_comm = bcsc_comm->data_comm + owneri;
1112
1113 /*
1114 * The values (ip, jp) belong to owneri.
1115 * They are sent to ownerj for At.
1116 */
1117 data_sendAt = &( data_comm->sendAt );
1118
1119 data_sendAt->size.idxcnt += 2;
1120 data_sendAt->size.valcnt += dof * dof;
1121 }
1122 else {
1123 /*
1124 * The values (ip, jp) are local.
1125 * In the sym case they are added to globcol.
1126 */
1127 if ( sym ) {
1128 globcol[igp] += dof;
1129 }
1130 }
1131 }
1132 }
1133 }
1134
1135 return;
1136}
1137
1138/**
1139 *******************************************************************************
1140 *
1141 * @brief Initializes the coltab of a block csc matrix. The coltab corresponds to
1142 * the number of rows (expended) per column (non expended). This rountine
1143 * is called when the matrix is distributed in the memory and the matrix's
1144 * degree of liberty is variadic.
1145 *
1146 * DO NOT CURRENTLY WORKS
1147 *
1148 *******************************************************************************
1149 *
1150 * @param[in] spm
1151 * The initial sparse matrix in the spm format.
1152 *
1153 * @param[in] ord
1154 * The ordering which needs to be applied on the spm to generate the
1155 * block csc.
1156 *
1157 * @param[in] col2cblk
1158 * The array which contains the repartition of the matrix columns
1159 * into the block structure.
1160 *
1161 * @param[out] globcol
1162 * The array which contains, for each column, its begining in the
1163 * smp->colptr.
1164 *
1165 * @param[out] bcsc_comm
1166 * The bcsc_handle_comm structure which contains the amount of
1167 * data to send to the other processors.
1168 *
1169 *******************************************************************************/
1170static inline void
1171bcsc_init_global_coltab_dst_vdof( __attribute__((unused)) const spmatrix_t *spm,
1172 __attribute__((unused)) const pastix_order_t *ord,
1173 __attribute__((unused)) const pastix_int_t *col2cblk,
1174 __attribute__((unused)) pastix_int_t *globcol,
1175 __attribute__((unused)) bcsc_handle_comm_t *bcsc_comm )
1176{
1177 // pastix_int_t *colptr = spm->colptr;
1178 // pastix_int_t *rowptr = spm->rowptr;
1179 // pastix_int_t *loc2glob = spm->loc2glob;
1180 // pastix_int_t *dofs = spm->dofs;
1181 // pastix_int_t dof = spm->dof;
1182 // pastix_int_t baseval = spm->baseval;
1183 // pastix_int_t frow, lrow;
1184 // pastix_int_t k, j, ig, jg, igp, jgp;
1185 // pastix_int_t dofj, dofi;
1186 // int sym = (spm->mtxtype == SpmSymmetric) || (spm->mtxtype == SpmHermitian);
1187
1188 // assert( dof <= 0 );
1189
1190 // for ( j=0; j<spm->n; j++, colptr++, loc2glob++ )
1191 // {
1192 // jg = *loc2glob - baseval;
1193 // jgp = ord->permtab[jg];
1194 // dofj = dofs[jg+1] - dofs[jg];
1195
1196 // frow = colptr[0] - baseval;
1197 // lrow = colptr[1] - baseval;
1198 // assert( (lrow - frow) >= 0 );
1199
1200 // jgpe = ...;
1201 // ownerj = col2cblk[jgpe]; // FAUX
1202 // localj = ( ownerj >= 0 );
1203 // ownerj = - ownerj - 1;
1204
1205 // for ( k=frow; k<lrow; k++ )
1206 // {
1207 // ig = rowptr[k] - baseval;
1208 // dofi = dofs[ig+1] - dofs[ig];
1209
1210 // if ( localj ) {
1211 // /* The column is local */
1212 // globcol[jgp] += dofi;
1213 // }
1214 // else {
1215 // /* The column is remote */
1216 // //update_counter_tosend( ownerj, 1 /* Nbr Elt */, dofi /* Nbr values */ );
1217 // }
1218
1219 // if ( sym && (ig != jg) ) {
1220 // igp = ord->permtab[ig];
1221 // igpe = ...;
1222 // owneri = col2cblk[igpe]; // FAUX
1223
1224 // if ( owneri >= 0 ) {
1225 // globcol[igp] += dofj;
1226 // }
1227 // else {
1228 // owneri = - owneri - 1;
1229 // //update_counter_tosend( owneri, 1 /* Nbr Elt */, dofj /* Nbr values */ );
1230 // }
1231 // }
1232 // }
1233 // }
1234
1235 return;
1236}
1237
1238/**
1239 *******************************************************************************
1240 *
1241 * @brief Exchanges the indexes with the other processors.
1242 *
1243 *******************************************************************************
1244 *
1245 * @param[in,out] bcsc_comm
1246 * The bcsc_handle_comm structure which contains the data the current
1247 * processor has to send to the other processors on entry. On exit,
1248 * the structure is updated with the received data from the other
1249 * processors.
1250 *
1251 *******************************************************************************/
1252void
1253bcsc_exchange_indexes( bcsc_handle_comm_t *bcsc_comm )
1254{
1255 pastix_int_t clustnbr = bcsc_comm->clustnbr;
1256 pastix_int_t clustnum = bcsc_comm->clustnum;
1257 bcsc_proc_comm_t *data_comm = bcsc_comm->data_comm;
1258 bcsc_proc_comm_t *data_local = bcsc_comm->data_comm + clustnum;
1259 bcsc_exch_comm_t *sendA_local = &( data_local->sendA );
1260 bcsc_exch_comm_t *sendAt_local = &( data_local->sendAt );
1261 bcsc_exch_comm_t *sendAAt_local = &( data_local->sendAAt );
1262 pastix_int_t counter_req = 0;
1263 pastix_int_t cntA = 0;
1264 pastix_int_t cntAt = 0;
1265 pastix_int_t cntAAt = 0;
1266 pastix_int_t idx_cnt_A[clustnbr];
1267 pastix_int_t idx_cnt_At[clustnbr];
1268 pastix_int_t idx_cnt_AAt[clustnbr];
1269 MPI_Status statuses[(clustnbr-1)*BCSC_COMM_NBR];
1270 MPI_Request requests[(clustnbr-1)*BCSC_COMM_NBR];
1271 bcsc_proc_comm_t *data_send, *data_recv;
1272 bcsc_exch_comm_t *send;
1273 bcsc_data_amount_t *recv;
1274 pastix_int_t c_send, c_recv, k;
1275
1276 bcsc_allocate_buf( bcsc_comm, PastixTagMemRecvIdx );
1277
1278 for ( k = 0; k < clustnbr; k++ ) {
1279 if ( k == clustnum ) {
1280 idx_cnt_A[k] = 0;
1281 idx_cnt_At[k] = 0;
1282 idx_cnt_AAt[k] = 0;
1283 continue;
1284 }
1285 idx_cnt_A[ k ] = cntA;
1286 cntA += data_comm[k].recvA.idxcnt;
1287 idx_cnt_At[ k ] = cntAt;
1288 cntAt += data_comm[k].recvAt.idxcnt;
1289 idx_cnt_AAt[ k ] = cntAAt;
1290 cntAAt += data_comm[k].recvAAt.size.idxcnt;
1291 }
1292
1293 c_send = (clustnum+1) % clustnbr;
1294 c_recv = (clustnum-1+clustnbr) % clustnbr;
1295 for ( k = 0; k < clustnbr-1; k++ ) {
1296 data_send = data_comm + c_send;
1297 data_recv = data_comm + c_recv;
1298 if ( c_send == clustnum ) {
1299 continue;
1300 }
1301
1302 /* Posts the receptions of the indexes. */
1303 recv = &( data_recv->recvA );
1304 if ( recv->idxcnt != 0 ) {
1305 MPI_Irecv( sendA_local->idxbuf + idx_cnt_A[c_recv], recv->idxcnt,
1306 PASTIX_MPI_INT, c_recv, PastixTagIndexesA, bcsc_comm->comm,
1307 &requests[counter_req++] );
1308 }
1309 recv = &( data_recv->recvAt );
1310 if ( recv->idxcnt != 0 ) {
1311 MPI_Irecv( sendAt_local->idxbuf + idx_cnt_At[c_recv], recv->idxcnt,
1312 PASTIX_MPI_INT, c_recv, PastixTagIndexesAt, bcsc_comm->comm,
1313 &requests[counter_req++] );
1314 }
1315 recv = &( data_recv->recvAAt.size );
1316 if ( recv->idxcnt != 0 ) {
1317 MPI_Irecv( sendAAt_local->idxbuf + idx_cnt_AAt[c_recv], recv->idxcnt,
1318 PASTIX_MPI_INT, c_recv, PastixTagIndexesAAt, bcsc_comm->comm,
1319 &requests[counter_req++] );
1320 }
1321
1322 /* Posts the emissions of the indexes. */
1323 send = &( data_send->sendA );
1324 if ( send->size.idxcnt != 0 ) {
1325 MPI_Isend( send->idxbuf, send->size.idxcnt, PASTIX_MPI_INT, c_send,
1326 PastixTagIndexesA, bcsc_comm->comm, &requests[counter_req++] );
1327 }
1328 send = &( data_send->sendAt );
1329 if ( send->size.idxcnt != 0 ) {
1330 MPI_Isend( send->idxbuf, send->size.idxcnt, PASTIX_MPI_INT, c_send,
1331 PastixTagIndexesAt, bcsc_comm->comm, &requests[counter_req++] );
1332 }
1333 send = &( data_send->sendAAt );
1334 if ( send->size.idxcnt != 0 ) {
1335 MPI_Isend( send->idxbuf, send->size.idxcnt, PASTIX_MPI_INT, c_send,
1336 PastixTagIndexesAAt, bcsc_comm->comm, &requests[counter_req++] );
1337 }
1338 c_send = (c_send+1) % clustnbr;
1339 c_recv = (c_recv-1+clustnbr) % clustnbr;
1340 }
1341
1342 MPI_Waitall( counter_req, requests, statuses );
1343}
1344
1345/**
1346 *******************************************************************************
1347 *
1348 * @brief Updates globcol with the received indexes.
1349 *
1350 *******************************************************************************
1351 *
1352 * @param[in] spm
1353 * The initial sparse matrix in the spm format.
1354 *
1355 * @param[in] ord
1356 * The ordering which needs to be applied on the spm to generate the
1357 * block csc.
1358 *
1359 * @param[out] globcol
1360 * The array which contains, for each column, its begining in the
1361 * smp->colptr. This array is updated with the data received from the
1362 * other processors.
1363 *
1364 * @param[in] bcsc_comm
1365 * The bcsc_handle_comm structure which contains the received data
1366 * from the other processors.
1367 *
1368 *******************************************************************************/
1369static inline void
1370bcsc_update_globcol( const spmatrix_t *spm,
1371 const pastix_order_t *ord,
1372 pastix_int_t *globcol,
1373 bcsc_handle_comm_t *bcsc_comm )
1374{
1375 pastix_int_t *dofs = spm->dofs;
1376 pastix_int_t dof = spm->dof;
1377 pastix_int_t clustnum = bcsc_comm->clustnum;
1378 bcsc_proc_comm_t *data_local = bcsc_comm->data_comm + clustnum;
1379 bcsc_exch_comm_t *sendA_local = &( data_local->sendA );
1380 bcsc_exch_comm_t *sendAt_local = &( data_local->sendAt );
1381 bcsc_exch_comm_t *sendAAt_local = &( data_local->sendAAt );
1382 pastix_int_t k, igp, jgp, jg, ig, baseval;
1383 pastix_int_t *indexes_A;
1384 pastix_int_t *indexes_At;
1385 pastix_int_t *indexes_AAt;
1386
1387 assert( ord->baseval == 0 );
1388 baseval = ord->baseval;
1389
1390 /* Updates globcol. */
1391 indexes_A = sendA_local->idxbuf;
1392 indexes_At = sendAt_local->idxbuf;
1393 indexes_AAt = sendAAt_local->idxbuf;
1394
1395 /* Goes through indexes_A. */
1396 for ( k = 0; k < data_local->recvA.idxcnt; k += 2, indexes_A += 2 ) {
1397 igp = indexes_A[0];
1398 jgp = indexes_A[1];
1399 ig = ord->peritab[igp] - baseval;
1400
1401 /* Adds the values (igp, jgp) to globcol. */
1402 globcol[jgp] += ( dof < 0 ) ? dofs[ ig+1 ] - dofs[ig] : dof;
1403 }
1404
1405 /* Goes through indexes_At. */
1406 if ( spm->mtxtype != SpmGeneral ) {
1407 for ( k = 0; k < data_local->recvAt.idxcnt; k += 2, indexes_At += 2 ) {
1408 igp = indexes_At[0];
1409 jgp = indexes_At[1];
1410 jg = ord->peritab[jgp] - baseval;
1411
1412 /* Adds the values (igp, jgp) to globcol. */
1413 globcol[igp] += ( dof < 0 ) ? dofs[ jg+1 ] - dofs[jg] : dof;
1414 }
1415 }
1416
1417 /* Goes through indexes_AAt. */
1418 for ( k = 0; k < data_local->recvAAt.size.idxcnt; k += 2, indexes_AAt += 2 ) {
1419 igp = indexes_AAt[0];
1420 jgp = indexes_AAt[1];
1421 ig = ord->peritab[igp] - baseval;
1422 jg = ord->peritab[jgp] - baseval;
1423
1424 /* Adds the values (igp, jgp) to globcol. */
1425 globcol[jgp] += ( dof < 0 ) ? dofs[ ig+1 ] - dofs[ig] : dof;
1426
1427 if ( spm->mtxtype != SpmGeneral ) {
1428 /* Adds the values (igp, jgp) twice to globcol if sym. */
1429 globcol[igp] += ( dof < 0 ) ? dofs[ jg+1 ] - dofs[jg] : dof;
1430 }
1431 }
1432}
1433#endif
1434
1435/**
1436 *******************************************************************************
1437 *
1438 * @brief Initializes the coltab of a block csc matrix. The coltab corresponds to
1439 * the number of rows (expended) per column (non expended). This routine
1440 * is calls bcsc_init_global_coltab_[shm,dst]_[c,v]dof according to the way
1441 * the matrix is stored and if the degree of liberty of the matrix is
1442 * constant or variadic. If the matrix is distributed in the memory, this
1443 * function also calls the routines which exchange the amount of data for
1444 * the communication, store the indexes and values to send and exchange
1445 * the indexes.
1446 *
1447 *******************************************************************************
1448 *
1449 * @param[in] spm
1450 * The initial sparse matrix in the spm format.
1451 *
1452 * @param[in] ord
1453 * The ordering which needs to be applied on the spm to generate the
1454 * block csc.
1455 *
1456 * @param[in] solvmtx
1457 * The solver matrix structure which describes the data distribution.
1458 *
1459 * @param[in] col2cblk
1460 * The array which contains the repartition of the matrix columns
1461 * into the block structure.
1462 *
1463 * @param[in,out] bcsc_comm
1464 * The handle_comm_structure updated with the amount of data the current
1465 * processor has to send to the other processor if PASTIX_WITH_MPI = ON
1466 * and the matrix is distributed in memory. If it is not the case,
1467 * bcsc_comm = NULL.
1468 *
1469 *******************************************************************************
1470 *
1471 * @returns The array which contains, for each column, its begining in the
1472 * smp->colptr.
1473 *
1474 *******************************************************************************/
1475static inline pastix_int_t*
1476bcsc_init_global_coltab( const spmatrix_t *spm,
1477 const pastix_order_t *ord,
1478 const SolverMatrix *solvmtx,
1479 const pastix_int_t *col2cblk,
1480 bcsc_handle_comm_t *bcsc_comm )
1481{
1482 spm_int_t *globcol;
1483
1484 /*
1485 * Allocates and initializes globcol which contains the number of elements in
1486 * each column of the input matrix.
1487 * Globcol is equivalent to the classic colptr for the internal blocked
1488 * csc. The blocked csc integrates the permutation computed within order
1489 * structure.
1490 */
1491 MALLOC_INTERN( globcol, spm->gN+1, pastix_int_t );
1492 memset( globcol, 0, (spm->gN+1) * sizeof(pastix_int_t) );
1493
1494 if ( bcsc_comm == NULL ) {
1495 if ( spm->dof > 0 ) {
1496 bcsc_init_global_coltab_shm_cdof( spm, ord, globcol );
1497 }
1498 else {
1499 bcsc_init_global_coltab_shm_vdof( spm, ord, globcol );
1500 }
1501 }
1502#if defined(PASTIX_WITH_MPI)
1503 else {
1504 if ( spm->dof > 0 ) {
1505 bcsc_init_global_coltab_dst_cdof( spm, ord, col2cblk, globcol, bcsc_comm );
1506 }
1507 else {
1508 bcsc_init_global_coltab_dst_vdof( spm, ord, col2cblk, globcol, bcsc_comm );
1509 }
1510
1511 /* Exchanges the amount of data which will be sent and received. */
1512 bcsc_exchange_amount_of_data( bcsc_comm );
1513
1514 /* Stores the indexes and values the current processor has to send to the others. */
1515 switch( spm->flttype ) {
1516 case SpmFloat:
1517 bcsc_sstore_data( spm, ord, col2cblk, bcsc_comm );
1518 break;
1519 case SpmDouble:
1520 bcsc_dstore_data( spm, ord, col2cblk, bcsc_comm );
1521 break;
1522 case SpmComplex32:
1523 bcsc_cstore_data( spm, ord, col2cblk, bcsc_comm );
1524 break;
1525 case SpmComplex64:
1526 bcsc_zstore_data( spm, ord, col2cblk, bcsc_comm );
1527 break;
1528 case SpmPattern:
1529 default:
1530 fprintf(stderr, "bcsc_init: Error unknown floating type for input spm\n");
1531 }
1532
1533 /* Exchanges the indexes and updates globcol with the received indexes. */
1534 bcsc_exchange_indexes( bcsc_comm );
1535 bcsc_update_globcol( spm, ord, globcol, bcsc_comm );
1536
1537#if !defined(NDEBUG)
1538 /* Check that globcol contains the right information. */
1539 if ( spm->dof > 0 ) {
1540 pastix_int_t ig, ip, ipe, dofi;
1541 pastix_int_t nnzl = 0;
1542 pastix_int_t nnzg = 0;
1543 pastix_int_t nnz;
1544 for( ig=0; ig<spm->gN; ig++ ) {
1545 ip = ord->permtab[ig];
1546 ipe = ( spm->dof > 0 ) ? ip * spm->dof : spm->dofs[ ig ] - spm->baseval;
1547 if ( col2cblk[ipe] < 0 ) {
1548 continue;
1549 }
1550
1551 dofi = ( spm->dof > 0 ) ? spm->dof: spm->dofs[ig+1] - spm->dofs[ig];
1552 nnzl += globcol[ip] * dofi;
1553 }
1554 MPI_Allreduce( &nnzl, &nnzg, 1, PASTIX_MPI_INT, MPI_SUM, spm->comm );
1555
1556 if ( spm->mtxtype != SpmGeneral ) {
1557 /*
1558 * We can't check the exact number of elements if some diagonal
1559 * values are missing (=0).
1560 */
1561 nnz = spm->gnnzexp * 2;
1562 assert( nnzg <= nnz );
1563 nnz = nnz - (spm->gN * spm->dof * spm->dof);
1564 assert( nnzg >= nnz );
1565 }
1566 else {
1567 nnz = spm->gnnzexp;
1568 assert( nnzg == nnz );
1569 }
1570 }
1571#endif
1572 }
1573
1574#endif
1575
1576 (void)solvmtx;
1577 (void)col2cblk;
1578 return globcol;
1579}
1580
1581/**
1582 *******************************************************************************
1583 *
1584 * @ingroup bcsc_internal
1585 *
1586 * @brief Initializes the coltab of a block csc matrix. The coltab corresponds
1587 * to the number of rows (expended) per column (non expended). If the
1588 * matrix is distributed in the memory, this function also calls the
1589 * routines which exchange the amount of data for the communication,
1590 * store the indexes and values to send and exchange the indexes.
1591 *
1592 *******************************************************************************
1593 *
1594 * @param[in] spm
1595 * The spm structure that stores the dofs.
1596 *
1597 * @param[in] ord
1598 * The ordering which needs to be applied on the spm to generate the
1599 * block csc.
1600 *
1601 * @param[in] solvmtx
1602 * The solver matrix structure which describes the data distribution.
1603
1604 * @param[inout] bcsc
1605 * On entry, the pointer to an allocated bcsc.
1606 * On exit, the bcsc stores the initialized coltab split per block
1607 * corresponding to the input spm with the permutation applied
1608 * and grouped accordingly to the distribution described in solvmtx.
1609 *
1610 *******************************************************************************
1611 *
1612 * @return The number of non zero unknowns in the matrix.
1613 *
1614 *******************************************************************************/
1616bcsc_init_coltab( const spmatrix_t *spm,
1617 const pastix_order_t *ord,
1618 const SolverMatrix *solvmtx,
1619 pastix_bcsc_t *bcsc )
1620{
1621 SolverCblk *cblk;
1622 bcsc_cblk_t *blockcol;
1623 pastix_int_t *dofshift = NULL;
1624 pastix_int_t *globcol = NULL;
1625 pastix_int_t cblknum, bcscnum, iter, idxcol, nodeidx, colsize;
1626
1627 bcsc->cscfnbr = solvmtx->cblknbr - solvmtx->faninnbr - solvmtx->recvnbr;
1628 MALLOC_INTERN( bcsc->cscftab, bcsc->cscfnbr, bcsc_cblk_t );
1629
1630 /* Creates an array to convert expanded indexes to not expanded indexes. */
1631 dofshift = bcsc_init_dofshift( spm, ord );
1632
1633 /* Computes the number of rows (expanded) per column (not expanded). */
1634 globcol = bcsc_init_global_coltab( spm, ord, solvmtx, bcsc->col2cblk, bcsc->bcsc_comm );
1635
1636 idxcol = 0;
1637 bcscnum = 0;
1638 cblk = solvmtx->cblktab;
1639 blockcol = bcsc->cscftab;
1640 for ( cblknum = 0; cblknum < solvmtx->cblknbr; cblknum++, cblk++ ) {
1641 if ( cblk->cblktype & (CBLK_FANIN|CBLK_RECV) ) {
1642 continue;
1643 }
1644
1645 blockcol->cblknum = cblknum;
1646 blockcol->colnbr = cblk_colnbr( cblk );
1647 assert( cblk->bcscnum == bcscnum );
1648 MALLOC_INTERN( blockcol->coltab, blockcol->colnbr + 1, pastix_int_t );
1649
1650 blockcol->coltab[0] = idxcol;
1651 for ( iter = 0; iter < blockcol->colnbr; iter++ ) {
1652 nodeidx = dofshift[ cblk->fcolnum + iter ];
1653 colsize = globcol[nodeidx];
1654 //jgpe = cblk->fcolnum + iter;
1655 //jgp = dofshift[ jgpe ];
1656 //colsize = globcol[jgp];
1657 blockcol->coltab[iter+1] = blockcol->coltab[iter] + colsize;
1658 }
1659 idxcol = blockcol->coltab[blockcol->colnbr];
1660
1661 blockcol++;
1662 bcscnum++;
1663 }
1664 assert( (blockcol - bcsc->cscftab) == bcsc->cscfnbr );
1665 assert( bcscnum == bcsc->cscfnbr );
1666
1667 memFree_null( globcol );
1668 memFree_null( dofshift );
1669
1670 if ( idxcol > 0 ) {
1671 MALLOC_INTERN( bcsc->rowtab, idxcol, pastix_int_t);
1672 MALLOC_INTERN( bcsc->Lvalues, idxcol * pastix_size_of( bcsc->flttype ), char );
1673 }
1674 else {
1675 bcsc->rowtab = NULL;
1676 bcsc->Lvalues = NULL;
1677 }
1678 bcsc->Uvalues = NULL;
1679
1680 return idxcol;
1681}
1682
1683/**
1684 *******************************************************************************
1685 *
1686 * @ingroup bcsc_internal
1687 *
1688 * @brief Restores the coltab array when it has been modified to initialize
1689 * the row and values arrays.
1690 *
1691 *******************************************************************************
1692 *
1693 * @param[inout] bcsc
1694 * On entry, the bcsc to restore.
1695 * On exit, the coltab array of the bcsc is restored to the correct
1696 * indexes.
1697 *
1698 *******************************************************************************/
1699void
1700bcsc_restore_coltab( pastix_bcsc_t *bcsc )
1701{
1702 bcsc_cblk_t *blockcol;
1703 pastix_int_t index, iter, idxcol, idxcoltmp;
1704
1705 idxcol = 0;
1706 blockcol = bcsc->cscftab;
1707 for ( index=0; index<bcsc->cscfnbr; index++, blockcol++ )
1708 {
1709 for ( iter=0; iter <= blockcol->colnbr; iter++ )
1710 {
1711 idxcoltmp = blockcol->coltab[iter];
1712 blockcol->coltab[iter] = idxcol;
1713 idxcol = idxcoltmp;
1714 }
1715 }
1716 return;
1717}
1718
1719/**
1720 *******************************************************************************
1721 *
1722 * @brief Initializes a block csc.
1723 *
1724 *******************************************************************************
1725 *
1726 * @param[in] spm
1727 * The initial sparse matrix in the spm format.
1728 *
1729 * @param[in] solvmtx
1730 * The solver matrix structure which describes the data distribution.
1731 *
1732 * @param[inout] bcsc
1733 * On entry, the pointer to an allocated bcsc.
1734 * On exit, the bcsc stores the input spm with the permutation applied
1735 * and grouped accordingly to the distribution described in solvmtx.
1736 *
1737 *******************************************************************************/
1738void
1739bcsc_init_struct( const spmatrix_t *spm,
1740 const SolverMatrix *solvmtx,
1741 pastix_bcsc_t *bcsc )
1742{
1743 pastix_int_t *col2cblk = NULL;
1744
1745 bcsc->mtxtype = spm->mtxtype;
1746 bcsc->flttype = spm->flttype;
1747 bcsc->gN = spm->gNexp;
1748 bcsc->n = solvmtx->nodenbr;
1749
1750 /*
1751 * Creates the col2cblk array which associates each column to a cblk
1752 * (expanded).
1753 */
1754 col2cblk = bcsc_init_col2cblk( solvmtx, bcsc, spm );
1755 bcsc->col2cblk = col2cblk;
1756
1757 /*
1758 * Initializes the coltab array of the bcsc and allocates the rowtab and
1759 * Lvalues arrays.
1760 */
1761 bcsc->bcsc_comm = NULL;
1762 if ( spm->loc2glob != NULL ) {
1763 bcsc_handle_comm_init( solvmtx, bcsc );
1764 }
1765}
1766
1767/**
1768 *******************************************************************************
1769 *
1770 * @brief Cleanup the bcsc struct. (symmetric of bcsc_init_struct)
1771 *
1772 *******************************************************************************
1773 *
1774 * @param[inout] bcsc
1775 * On entry, the pointer to the initialized bcsc.
1776 * On exit, the bcsc freed from the informations initialized by
1777 * bcsc_init_struct().
1778 *
1779 *******************************************************************************/
1780void
1781bcsc_exit_struct( pastix_bcsc_t *bcsc )
1782{
1783 if ( bcsc->col2cblk != NULL ) {
1784 memFree_null( bcsc->col2cblk );
1785 }
1786
1787 if ( bcsc->bcsc_comm != NULL ) {
1788 bcsc_handle_comm_exit( bcsc->bcsc_comm );
1789 memFree_null( bcsc->bcsc_comm );
1790 }
1791}
1792
1793/**
1794 *******************************************************************************
1795 *
1796 * @brief Initializes a block csc.
1797 *
1798 *******************************************************************************
1799 *
1800 * @param[in] spm
1801 * The initial sparse matrix in the spm format.
1802 *
1803 * @param[in] ord
1804 * The ordering which needs to be applied on the spm to generate the
1805 * block csc.
1806 *
1807 * @param[in] solvmtx
1808 * The solver matrix structure which describes the data distribution.
1809 *
1810 * @param[in] initAt
1811 * The test to know if At has to be initialized:
1812 * - if initAt = 0 then the matrix is symmetric of hermitian which
1813 * means that Lvalues = Uvalues so At does not need to be
1814 * initialised.
1815 * - if initAt = 1 then the matrix is general and which means that
1816 * At needs to be initialised and computed.
1817 *
1818 * @param[inout] bcsc
1819 * On entry, the pointer to an allocated bcsc.
1820 * On exit, the bcsc stores the input spm with the permutation applied
1821 * and grouped accordingly to the distribution described in solvmtx.
1822 *
1823 *******************************************************************************/
1824static inline void
1825bcsc_init( const spmatrix_t *spm,
1826 const pastix_order_t *ord,
1827 const SolverMatrix *solvmtx,
1828 pastix_int_t initAt,
1829 pastix_bcsc_t *bcsc )
1830{
1831 pastix_int_t valuesize;
1832
1833 bcsc_init_struct( spm, solvmtx, bcsc );
1834 valuesize = bcsc_init_coltab( spm, ord, solvmtx, bcsc );
1835
1836 /*
1837 * Fills in the lower triangular part of the blocked csc with values and
1838 * rows. The upper triangular part is done later if required through LU
1839 * factorization.
1840 */
1841 switch( spm->flttype ) {
1842 case SpmFloat:
1843 bcsc_sinit( spm, ord, solvmtx, initAt, bcsc, valuesize );
1844 break;
1845 case SpmDouble:
1846 bcsc_dinit( spm, ord, solvmtx, initAt, bcsc, valuesize );
1847 break;
1848 case SpmComplex32:
1849 bcsc_cinit( spm, ord, solvmtx, initAt, bcsc, valuesize );
1850 break;
1851 case SpmComplex64:
1852 bcsc_zinit( spm, ord, solvmtx, initAt, bcsc, valuesize );
1853 break;
1854 case SpmPattern:
1855 default:
1856 fprintf(stderr, "bcsc_init: Error unknown floating type for input spm\n");
1857 }
1858}
1859
1860/**
1861 *******************************************************************************
1862 *
1863 * @brief Initializes the block csc matrix.
1864 *
1865 * The block csc matrix is used to initialize the factorized matrix, and to
1866 * perform the matvec operations in refinement.
1867 *
1868 *******************************************************************************
1869 *
1870 * @param[in] spm
1871 * The initial sparse matrix in the spm format.
1872 *
1873 * @param[in] ord
1874 * The ordering which needs to be applied on the spm to generate the
1875 * block csc.
1876 *
1877 * @param[in] solvmtx
1878 * The solver matrix structure which describes the data distribution.
1879 *
1880 * @param[in] initAt
1881 * The test to know if At has to be initialized:
1882 * - if initAt = 0 then the matrix is symmetric of hermitian which
1883 * means that Lvalues = Uvalues so At does not need to be
1884 * initialised.
1885 * - if initAt = 1 then the matrix is general which means that
1886 * At needs to be initialised and computed.
1887 *
1888 * @param[inout] bcsc
1889 * On entry, the pointer to an allocated bcsc.
1890 * On exit, the bcsc stores the input spm with the permutation applied
1891 * and grouped accordingly to the distribution described in solvmtx.
1892 *
1893 *******************************************************************************
1894 *
1895 * @return The time spent to initialize the bcsc structure.
1896 *
1897 *******************************************************************************/
1898double
1899bcscInit( const spmatrix_t *spm,
1900 const pastix_order_t *ord,
1901 const SolverMatrix *solvmtx,
1902 pastix_int_t initAt,
1903 pastix_bcsc_t *bcsc )
1904{
1905 double time = 0.;
1906
1907 assert( ord->baseval == 0 );
1908 assert( ord->vertnbr == spm->gN );
1909
1910 clockStart(time);
1911 bcsc_init( spm, ord, solvmtx, initAt, bcsc );
1912 clockStop(time);
1913
1914 return time;
1915}
1916
1917/**
1918 *******************************************************************************
1919 *
1920 * @brief Frees the block csc structure but do not free the bcsc pointer.
1921 *
1922 *******************************************************************************
1923 *
1924 * @param[inout] bcsc
1925 * The block csc matrix to free.
1926 *
1927 *******************************************************************************/
1928void
1929bcscExit( pastix_bcsc_t *bcsc )
1930{
1931 bcsc_cblk_t *cblk;
1932 pastix_int_t i;
1933
1934 if ( bcsc->cscftab == NULL ) {
1935 return;
1936 }
1937
1938 for ( i=0, cblk=bcsc->cscftab; i < bcsc->cscfnbr; i++, cblk++ ) {
1939 memFree_null( cblk->coltab );
1940 }
1941
1942 memFree_null( bcsc->cscftab );
1943 memFree_null( bcsc->rowtab );
1944
1945 if ( (bcsc->Uvalues != NULL) &&
1946 (bcsc->Uvalues != bcsc->Lvalues) ) {
1947 memFree_null( bcsc->Uvalues );
1948 }
1949
1950 memFree_null( bcsc->Lvalues );
1951
1952 bcsc_exit_struct( bcsc );
1953}
static void bcsc_init_global_coltab_shm_vdof(const spmatrix_t *spm, const pastix_order_t *ord, pastix_int_t *globcol)
Initializes the coltab of a block csc matrix. The coltab corresponds to the number of rows (expended)...
Definition bcsc.c:885
static void bcsc_init(const spmatrix_t *spm, const pastix_order_t *ord, const SolverMatrix *solvmtx, pastix_int_t initAt, pastix_bcsc_t *bcsc)
Initializes a block csc.
Definition bcsc.c:1825
static void bcsc_init_global_coltab_shm_cdof(const spmatrix_t *spm, const pastix_order_t *ord, pastix_int_t *globcol)
Initializes the coltab of a block csc matrix. The coltab corresponds to the number of rows (expended)...
Definition bcsc.c:815
static pastix_int_t * bcsc_init_global_coltab(const spmatrix_t *spm, const pastix_order_t *ord, const SolverMatrix *solvmtx, const pastix_int_t *col2cblk, bcsc_handle_comm_t *bcsc_comm)
Initializes the coltab of a block csc matrix. The coltab corresponds to the number of rows (expended)...
Definition bcsc.c:1476
static pastix_int_t * bcsc_init_dofshift(const spmatrix_t *spm, const pastix_order_t *ord)
Initializes the dofshit array of size gNexp which gives dofshift[index_permuted] = index....
Definition bcsc.c:764
BEGIN_C_DECLS typedef int pastix_int_t
Definition datatypes.h:51
void bcsc_restore_coltab(pastix_bcsc_t *bcsc)
Restores the coltab array when it has been modified to initialize the row and values arrays.
Definition bcsc.c:1700
void bcsc_init_struct(const spmatrix_t *spm, const SolverMatrix *solvmtx, pastix_bcsc_t *bcsc)
Initializes a block csc.
Definition bcsc.c:1739
pastix_int_t bcsc_init_coltab(const spmatrix_t *spm, const pastix_order_t *ord, const SolverMatrix *solvmtx, pastix_bcsc_t *bcsc)
Initializes the coltab of a block csc matrix. The coltab corresponds to the number of rows (expended)...
Definition bcsc.c:1616
void bcsc_sinit(const spmatrix_t *spm, const pastix_order_t *ord, const SolverMatrix *solvmtx, int initAt, pastix_bcsc_t *bcsc, pastix_int_t valuesize)
Initializes a centralize float block csc.
pastix_int_t * bcsc_init_col2cblk(const SolverMatrix *solvmtx, const pastix_bcsc_t *bcsc, const spmatrix_t *spm)
Creates the array which represents the repartition of each column in the block structure....
Definition bcsc.c:721
void bcsc_handle_comm_init(const SolverMatrix *solvmtx, pastix_bcsc_t *bcsc)
Initializes the bcsc_handle_comm_t structure.
Definition bcsc.c:48
pastix_int_t * bcsc_init_col2cblk_shm(const SolverMatrix *solvmtx, const pastix_bcsc_t *bcsc)
Creates the array which represents the repartition of each column in the block structure....
Definition bcsc.c:522
void bcsc_zinit(const spmatrix_t *spm, const pastix_order_t *ord, const SolverMatrix *solvmtx, int initAt, pastix_bcsc_t *bcsc, pastix_int_t valuesize)
Initializes a centralize pastix_complex64_t block csc.
void bcsc_cinit(const spmatrix_t *spm, const pastix_order_t *ord, const SolverMatrix *solvmtx, int initAt, pastix_bcsc_t *bcsc, pastix_int_t valuesize)
Initializes a centralize pastix_complex32_t block csc.
void bcsc_exit_struct(pastix_bcsc_t *bcsc)
Cleanup the bcsc struct. (symmetric of bcsc_init_struct)
Definition bcsc.c:1781
void bcsc_dinit(const spmatrix_t *spm, const pastix_order_t *ord, const SolverMatrix *solvmtx, int initAt, pastix_bcsc_t *bcsc, pastix_int_t valuesize)
Initializes a centralize double block csc.
void bcsc_handle_comm_exit(bcsc_handle_comm_t *bcsc_comm)
Frees the bcsc_handle_comm pointers.
Definition bcsc.c:79
bcsc_data_amount_t recvA
Definition bcsc.h:83
bcsc_exch_comm_t recvAAt
Definition bcsc.h:85
pastix_int_t max_idx
Definition bcsc.h:97
bcsc_exch_comm_t sendAt
Definition bcsc.h:81
PASTIX_Comm comm
Definition bcsc.h:95
pastix_int_t idxcnt
Definition bcsc.h:57
pastix_int_t colnbr
Definition bcsc.h:111
bcsc_exch_comm_t sendA
Definition bcsc.h:80
bcsc_data_amount_t size
Definition bcsc.h:66
pastix_int_t clustnum
Definition bcsc.h:94
bcsc_proc_comm_t data_comm[1]
Definition bcsc.h:99
pastix_int_t * idxbuf
Definition bcsc.h:67
pastix_int_t * coltab
Definition bcsc.h:113
bcsc_data_amount_t recvAt
Definition bcsc.h:84
bcsc_exch_comm_t sendAAt
Definition bcsc.h:82
pastix_int_t valcnt
Definition bcsc.h:58
pastix_coeftype_t flttype
Definition bcsc.h:96
pastix_int_t cblknum
Definition bcsc.h:112
void * valbuf
Definition bcsc.h:70
pastix_int_t clustnbr
Definition bcsc.h:93
pastix_int_t max_val
Definition bcsc.h:98
enum bcsc_tag_ bcsc_tag_e
Tags used in MPI communications.
double bcscInit(const spmatrix_t *spm, const pastix_order_t *ord, const SolverMatrix *solvmtx, pastix_int_t initAt, pastix_bcsc_t *bcsc)
Initializes the block csc matrix.
Definition bcsc.c:1899
struct bcsc_handle_comm_s bcsc_handle_comm_t
Structure to manage communications with distributed spm.
struct bcsc_proc_comm_s bcsc_proc_comm_t
Informations of the data exchanged with other processors.
void bcscExit(pastix_bcsc_t *bcsc)
Frees the block csc structure but do not free the bcsc pointer.
Definition bcsc.c:1929
Compressed colptr format for the bcsc.
Definition bcsc.h:110
Information about the amount of data.
Definition bcsc.h:56
Information about the sending data.
Definition bcsc.h:65
Structure to manage communications with distributed spm.
Definition bcsc.h:92
Informations of the data exchanged with other processors.
Definition bcsc.h:79
@ PASTIX_SUCCESS
Definition api.h:367
pastix_int_t baseval
Definition order.h:48
pastix_int_t * permtab
Definition order.h:51
pastix_int_t * peritab
Definition order.h:52
pastix_int_t vertnbr
Definition order.h:49
Order structure.
Definition order.h:47
pastix_int_t nodenbr
Definition solver.h:208
static pastix_int_t cblk_colnbr(const SolverCblk *cblk)
Compute the number of columns in a column block.
Definition solver.h:329
pastix_int_t cblknbr
Definition solver.h:211
pastix_int_t faninnbr
Definition solver.h:213
pastix_int_t recvnbr
Definition solver.h:216
pastix_int_t bcscnum
Definition solver.h:175
SolverCblk *restrict cblktab
Definition solver.h:228
int8_t cblktype
Definition solver.h:164
pastix_int_t lcolnum
Definition solver.h:167
pastix_int_t fcolnum
Definition solver.h:166
Solver column block structure.
Definition solver.h:161
Solver column block structure.
Definition solver.h:203