PaStiX Handbook 6.4.0
Loading...
Searching...
No Matches
bcsc_dinit.c
Go to the documentation of this file.
1/**
2 *
3 * @file bcsc_dinit.c
4 *
5 * @copyright 2004-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
6 * Univ. Bordeaux. All rights reserved.
7 *
8 * @version 6.4.0
9 * @author Mathieu Faverge
10 * @author Pierre Ramet
11 * @author Xavier Lacoste
12 * @author Theophile Terraz
13 * @author Tony Delarue
14 * @author Vincent Bridonneau
15 * @author Alycia Lisito
16 * @date 2024-07-09
17 *
18 * @generated from /builds/2mk6rsew/0/solverstack/pastix/bcsc/bcsc_zinit.c, normal z -> d, Tue Feb 25 14:36:01 2025
19 *
20 **/
21#include "common.h"
22#include "pastix/order.h"
23#include <spm.h>
24#include "blend/solver.h"
25#include "bcsc/bcsc.h"
26#include "bcsc_d.h"
27
28#ifndef DOXYGEN_SHOULD_SKIP_THIS
29static inline double
30__fct_id( double val ) {
31 return val;
32}
33
34static inline double
35__fct_conj( double val ) {
36#if defined(PRECISION_c) || defined(PRECISION_z)
37 return ( val );
38#else
39 /* This function should not be called in this case. */
40 assert(0);
41 return val;
42#endif
43}
44#endif /* DOXYGEN_SHOULD_SKIP_THIS */
45
46#if defined(PASTIX_WITH_MPI)
47/**
48 *******************************************************************************
49 *
50 * @brief Stores the data the current processor has to send to the other processors
51 * in the bcsc_comm structure.
52 *
53 * There are two cases:
54 *
55 * - If the matrix is general: the full columns and rows of the blocks are stored
56 * in Lvalues and Uvalues.
57 * The local data of the current process which are in remote blocks after
58 * the permutation need be to sent to the owner process. The data is stored
59 * in sendA if it is sent for the column only, in sendAt if it is sent for
60 * the row only and in sendAAt if it is sent for the row and column.
61 *
62 * - If the matrix is Symmetric or Symmetric: only the full columns of the blocks
63 * are stored in Lvalues (and Uvalues = Lvalues). Only half of the spm is
64 * stored lower (or upper) triangular half, therefore we need to duplicate
65 * the lower (or upper) data to fill the upper (or lower half of the matrix
66 * in the blocks.
67 * The local data of the current process which are in remote blocks after
68 * the permutation need be to sent to the owner process. The data is stored
69 * in sendA if it is sent for the lower (or upper) half or the column, in
70 * sendAt if it is sent for the upper (or lower) half of the column and in
71 * sendAAt if it is sent for both the lower and upper half of the column.
72 * The diagonal values are stored in sendA only.
73 *
74 *******************************************************************************
75 *
76 * @param[in] spm
77 * The initial sparse matrix in the spm format.
78 *
79 * @param[in] ord
80 * The ordering that needs to be applied on the spm to generate the
81 * block csc.
82 *
83 * @param[in] col2cblk
84 * The array of matching columns with cblk indexes.
85 *
86 * @param[inout] bcsc_comm
87 * The structure in which the data is stored.
88 *
89 *******************************************************************************/
90void
91bcsc_dstore_data( const spmatrix_t *spm,
92 const pastix_order_t *ord,
93 const pastix_int_t *col2cblk,
94 bcsc_handle_comm_t *bcsc_comm )
95{
96 double *values_c;
97 double *values = (double*)(spm->values);
98 pastix_int_t *colptr = spm->colptr;
99 pastix_int_t *rowptr = spm->rowptr;
100 pastix_int_t *loc2glob = spm->loc2glob;
101 pastix_int_t *dofs = spm->dofs;
102 pastix_int_t dof = spm->dof;
103 bcsc_proc_comm_t *data_comm = bcsc_comm->data_comm;
104 pastix_int_t clustnbr = bcsc_comm->clustnbr;
105 pastix_int_t il, jl, ig, jg, igp, jgp, igpe, jgpe;
106 pastix_int_t ownerj, owneri, baseval;
107 pastix_int_t dofi, dofj, frow, lrow;
108 bcsc_exch_comm_t *data_sendA, *data_sendAt, *data_sendAAt;
109 bcsc_data_amount_t *data_cntA, *data_cntAt, *data_cntAAt;
110#if defined(PRECISION_z) || defined(PRECISION_c)
111 int sym = (spm->mtxtype == SpmSymmetric) || (spm->mtxtype == SpmSymmetric);
112#else
113 int sym = (spm->mtxtype == SpmSymmetric);
114#endif
115
116 /* Allocates the sending indexes and values buffers. */
117 bcsc_allocate_buf( bcsc_comm, PastixTagMemSend );
118
119 /*
120 * Allocates and initialises the counters used to fill the values
121 * and indexes buffers.
122 */
123 MALLOC_INTERN( data_cntA, 3 * clustnbr, bcsc_data_amount_t );
124 memset( data_cntA, 0, 3 * clustnbr * sizeof(bcsc_data_amount_t) );
125 data_cntAt = data_cntA + clustnbr;
126 data_cntAAt = data_cntA + clustnbr * 2;
127
128 baseval = spm->baseval;
129
130 /* Goes through the columns of spm. */
131 for ( jl = 0; jl < spm->n; jl++, colptr++, loc2glob++ ) {
132 jg = *loc2glob - baseval;
133 jgp = ord->permtab[jg];
134 jgpe = ( dof > 0 ) ? jgp * dof : dofs[ jg ] - baseval;
135 dofj = ( dof > 0 ) ? dof : dofs[ jg+1 ] - dofs[ jg ];
136
137 frow = colptr[0] - baseval;
138 lrow = colptr[1] - baseval;
139 assert( (lrow - frow) >= 0 );
140
141 ownerj = col2cblk[ jgpe ];
142
143 /* The column jp belongs to another process. */
144 if ( ownerj < 0 ) {
145 ownerj = - ownerj - 1;
146 data_comm = bcsc_comm->data_comm + ownerj;
147 data_sendA = &( data_comm->sendA );
148
149 /* Goes through the rows of jl. */
150 for ( il = frow; il < lrow; il++ ) {
151 ig = rowptr[il] - baseval;
152 igp = ord->permtab[ig];
153 igpe = ( dof > 0 ) ? igp * dof : dofs[ ig ] - baseval;
154 dofi = ( dof > 0 ) ? dof : dofs[ ig+1 ] - dofs[ ig ];
155 owneri = col2cblk[ igpe ];
156
157 /*
158 * The diagonal values (ip, jp) belong to the same process.
159 * They are sent to owneri in the sym case for A only.
160 */
161 if ( sym && ( ig == jg ) ) {
162 data_sendA->idxbuf[data_cntA[ownerj].idxcnt ] = igp;
163 data_sendA->idxbuf[data_cntA[ownerj].idxcnt+1] = jgp;
164 data_cntA[ownerj].idxcnt += 2;
165
166 values_c = ((double*)(data_sendA->valbuf)) + data_cntA[ownerj].valcnt;
167 memcpy( values_c, values, dofi * dofj * sizeof(double) );
168 data_cntA[ownerj].valcnt += dofi * dofj;
169
170 values += dofi * dofj;
171 continue;
172 }
173
174 /* The row ip belongs to another process. */
175 if ( owneri < 0 ) {
176 owneri = - owneri - 1;
177 data_comm = bcsc_comm->data_comm + owneri;
178
179 /*
180 * The diagonal values (ip, jp) belong to the same process.
181 * They are sent to owneri for AAt in the general cae.
182 */
183 if ( owneri == ownerj ) {
184 data_sendAAt = &( data_comm->sendAAt );
185
186 data_sendAAt->idxbuf[data_cntAAt[ownerj].idxcnt ] = igp;
187 data_sendAAt->idxbuf[data_cntAAt[ownerj].idxcnt+1] = jgp;
188 data_cntAAt[ownerj].idxcnt += 2;
189
190 values_c = ((double*)(data_sendAAt->valbuf)) + data_cntAAt[ownerj].valcnt;
191 memcpy( values_c, values, dofi * dofj * sizeof(double) );
192 data_cntAAt[ownerj].valcnt += dofi * dofj;
193 }
194 /*
195 * The values (ip, jp) belong to different processes.
196 * They are sent to owneri for At and to ownerj for A.
197 */
198 else {
199 data_sendAt = &( data_comm->sendAt );
200
201 data_sendAt->idxbuf[data_cntAt[owneri].idxcnt ] = igp;
202 data_sendAt->idxbuf[data_cntAt[owneri].idxcnt+1] = jgp;
203 data_cntAt[owneri].idxcnt += 2;
204
205 values_c = ((double*)(data_sendAt->valbuf)) + data_cntAt[owneri].valcnt;
206 memcpy( values_c, values, dofi * dofj * sizeof(double) );
207 data_cntAt[owneri].valcnt += dofi * dofj;
208
209 data_sendA->idxbuf[data_cntA[ownerj].idxcnt ] = igp;
210 data_sendA->idxbuf[data_cntA[ownerj].idxcnt+1] = jgp;
211 data_cntA[ownerj].idxcnt += 2;
212
213 values_c = ((double*)(data_sendA->valbuf)) + data_cntA[ownerj].valcnt;
214 memcpy( values_c, values, dofi * dofj * sizeof(double) );
215 data_cntA[ownerj].valcnt += dofi * dofj;
216 }
217 }
218 /* The row ip is local. */
219 else {
220 /*
221 * The values (ip, jp) belong to ownerj.
222 * They are sent to ownerj for A.
223 */
224 data_sendA->idxbuf[data_cntA[ownerj].idxcnt ] = igp;
225 data_sendA->idxbuf[data_cntA[ownerj].idxcnt+1] = jgp;
226 data_cntA[ownerj].idxcnt += 2;
227
228 values_c = ((double*)(data_sendA->valbuf)) + data_cntA[ownerj].valcnt;
229 memcpy( values_c, values, dofi * dofj * sizeof(double) );
230 data_cntA[ownerj].valcnt += dofi * dofj;
231 }
232 values += dofi * dofj;
233 }
234 }
235 /* The column jp is local. */
236 else {
237 /* Goes through the rows of j. */
238 for ( il = frow; il < lrow; il++ ) {
239 ig = rowptr[il] - baseval;
240 igp = ord->permtab[ig];
241 igpe = ( dof > 0 ) ? igp * dof : dofs[ ig ] - baseval;
242 dofi = ( dof > 0 ) ? dof : dofs[ ig+1 ] - dofs[ ig ];
243 owneri = col2cblk[ igpe ];
244
245 /*
246 * The diagonal values (ip, jp) have already been
247 * added to globcol in the sym case.
248 */
249 if ( sym && ( ig == jg ) ) {
250 values += dofi * dofj;
251 continue;
252 }
253
254 /* The row ip belongs to another process. */
255 if ( owneri < 0 ) {
256 owneri = - owneri - 1;
257 data_comm = bcsc_comm->data_comm + owneri;
258 data_sendAt = &( data_comm->sendAt );
259
260 /*
261 * The values (ip, jp) belong to owneri.
262 * They are sent to ownerj for At.
263 */
264 data_sendAt->idxbuf[data_cntAt[owneri].idxcnt ] = igp;
265 data_sendAt->idxbuf[data_cntAt[owneri].idxcnt+1] = jgp;
266 data_cntAt[owneri].idxcnt += 2;
267
268 values_c = ((double*)(data_sendAt->valbuf)) + data_cntAt[owneri].valcnt;
269 memcpy( values_c, values, dofi * dofj * sizeof(double) );
270 data_cntAt[owneri].valcnt += dofi * dofj;
271 }
272 values += dofi * dofj;
273 }
274 }
275 }
276
277 memFree_null( data_cntA );
278}
279
280/**
281 *******************************************************************************
282 *
283 * @ingroup bcsc_internal
284 *
285 * @brief Adds the remote data of A to the bcsc structure.
286 *
287 *******************************************************************************
288 *
289 * @param[in] spm
290 * The initial sparse matrix in the spm format.
291 *
292 * @param[in] ord
293 * The ordering that needs to be applied on the spm to generate the
294 * block csc.
295 *
296 * @param[in] solvmtx
297 * The solver matrix structure which describes the data distribution.
298 *
299 * @param[inout] bcsc
300 * On entry, the pointer to an allocated bcsc.
301 * On exit, the bcsc fields are updated.
302 *
303 * @param[in] values_buf
304 * The array containing the remote values.
305 *
306 * @param[in] idx_cnt
307 * The index for the begining of the indexes array buffer
308 * corresponding to the values_buf.
309 *
310 * @param[in] idx_size
311 * The number of indexes corresponding to the values_buf.
312 *
313 * @param[in] AAt
314 * TODO
315 *
316 *******************************************************************************/
317static inline pastix_int_t
318bcsc_dhandle_recv_A( const spmatrix_t *spm,
319 const pastix_order_t *ord,
320 const SolverMatrix *solvmtx,
321 pastix_bcsc_t *bcsc,
322 double *values_buf,
323 pastix_int_t idx_cnt,
324 pastix_int_t idx_size,
325 pastix_int_t AAt )
326{
327 bcsc_handle_comm_t *bcsc_comm = bcsc->bcsc_comm;
328 pastix_int_t *dofs = spm->dofs;
329 pastix_int_t dof = spm->dof;
330 SolverCblk *cblk;
331 pastix_int_t *coltab;
332 pastix_int_t k, igp, jgp, ig, jg, igpe, jgpe;
333 pastix_int_t itercblk;
334 pastix_int_t idofi, idofj, dofi, dofj;
335 pastix_int_t colidx, rowidx, pos;
336 pastix_int_t clustnum = bcsc_comm->clustnum;
337 double *Values = bcsc->Lvalues;
338 pastix_int_t *indexes = ( AAt == 0 ) ? bcsc_comm->data_comm[clustnum].sendA.idxbuf :
339 bcsc_comm->data_comm[clustnum].sendAAt.idxbuf;
340 pastix_int_t nbelt = 0;
341
342 /*
343 * Goes through the received indexes in order to add the data
344 * received.
345 */
346 indexes += idx_cnt;
347 for ( k = 0; k < idx_size; k+=2, indexes+=2 ) {
348 /* Gets received indexes jgp and igp. */
349 igp = indexes[0];
350 jgp = indexes[1];
351
352 ig = ord->peritab[igp];
353 jg = ord->peritab[jgp];
354 igpe = (dof > 0) ? igp * dof : dofs[ ig ] - spm->baseval;
355 jgpe = (dof > 0) ? jgp * dof : dofs[ jg ] - spm->baseval;
356 dofi = (dof > 0) ? dof : dofs[ig+1] - dofs[ig];
357 dofj = (dof > 0) ? dof : dofs[jg+1] - dofs[jg];
358
359 itercblk = bcsc->col2cblk[ jgpe ];
360 assert( itercblk >= 0 );
361
362 /* Gets the block on which the data received will be added. */
363 cblk = solvmtx->cblktab + itercblk;
364 coltab = bcsc->cscftab[cblk->bcscnum].coltab;
365
366 /* Goes through the values at (igp, jgp). */
367 colidx = jgpe - cblk->fcolnum;
368 for ( idofj = 0; idofj < dofj; idofj++, colidx++ ) {
369 rowidx = igpe;
370 pos = coltab[ colidx ];
371 for ( idofi = 0; idofi < dofi; idofi++, rowidx++, pos++, values_buf++ ) {
372 /* Adds the row igp. */
373 assert( rowidx >= 0 );
374 assert( rowidx < spm->gNexp );
375 bcsc->rowtab[ pos ] = rowidx;
376 /* Adds the data at row igp and column jgp. */
377 Values[ pos ] = *values_buf;
378 nbelt++;
379 }
380 coltab[ colidx ] += dofi;
381 assert( coltab[colidx] <= coltab[colidx+1] );
382 }
383 }
384
385 return nbelt;
386}
387
388/**
389 *******************************************************************************
390 *
391 * @ingroup bcsc_internal
392 *
393 * @brief Adds the remote data of At to the bcsc structure.
394 *
395 *******************************************************************************
396 *
397 * @param[in] spm
398 * The initial sparse matrix in the spm format.
399 *
400 * @param[in] ord
401 * The ordering that needs to be applied on the spm to generate the
402 * block csc.
403 *
404 * @param[in] solvmtx
405 * The solver matrix structure which describes the data distribution.
406 *
407 * @param[inout] rowtab
408 * The row tab of the bcsc or the row tab associated to At.
409 *
410 * @param[inout] bcsc
411 * On entry, the pointer to an allocated bcsc.
412 * On exit, the bcsc fields are updated.
413 *
414 * @param[in] values_buf
415 * The array containing the remote values.
416 *
417 * @param[in] idx_cnt
418 * The index for the begining of the indexes array buffer
419 * corresponding to the values_buf.
420 *
421 * @param[in] idx_size
422 * The number of indexes corresponding to the values_buf.
423 *
424 * @param[in] AAt
425 * TODO
426 *
427 *******************************************************************************/
428static inline pastix_int_t
429bcsc_dhandle_recv_At( const spmatrix_t *spm,
430 const pastix_order_t *ord,
431 const SolverMatrix *solvmtx,
432 pastix_int_t *rowtab,
433 pastix_bcsc_t *bcsc,
434 double *values_buf,
435 pastix_int_t idx_cnt,
436 pastix_int_t idx_size,
437 pastix_int_t AAt )
438{
439 bcsc_handle_comm_t *bcsc_comm = bcsc->bcsc_comm;
440 pastix_int_t *dofs = spm->dofs;
441 pastix_int_t dof = spm->dof;
442 SolverCblk *cblk;
443 pastix_int_t *coltab;
444 pastix_int_t k, igp, jgp, ig, jg, igpe, jgpe;
445 pastix_int_t itercblk;
446 pastix_int_t idofi, idofj, dofi, dofj;
447 pastix_int_t colidx, rowidx, pos;
448 pastix_int_t clustnum = bcsc_comm->clustnum;
449 pastix_int_t *indexes = ( AAt == 0 ) ? bcsc_comm->data_comm[clustnum].sendAt.idxbuf :
450 bcsc_comm->data_comm[clustnum].sendAAt.idxbuf;
451 double *Values;
452 double (*_bcsc_conj)(double) = __fct_id;
453 pastix_int_t nbelt = 0;
454
455 /* Gets the Uvalues in the general case and the Lvalues in the sym case. */
456 if ( spm->mtxtype == SpmGeneral ) {
457 _bcsc_conj = __fct_id;
458 Values = (double*)(bcsc->Uvalues);
459 }
460 else {
461 if( spm->mtxtype == SpmSymmetric ) {
462 _bcsc_conj = __fct_conj;
463 }
464 if( spm->mtxtype == SpmSymmetric ) {
465 _bcsc_conj = __fct_id;
466 }
467 Values = (double*)(bcsc->Lvalues);
468 }
469
470 /*
471 * Goes through the received indexes in order to add the data
472 * received.
473 */
474 indexes += idx_cnt;
475 for ( k = 0; k < idx_size; k+=2, indexes+=2 ) {
476 /* Gets received indexes igp and jgp. */
477 igp = indexes[0];
478 jgp = indexes[1];
479
480 ig = ord->peritab[igp];
481 jg = ord->peritab[jgp];
482 igpe = (dof > 0) ? igp * dof : dofs[ ig ] - spm->baseval;
483 jgpe = (dof > 0) ? jgp * dof : dofs[ jg ] - spm->baseval;
484 dofi = (dof > 0) ? dof : dofs[ig+1] - dofs[ig];
485 dofj = (dof > 0) ? dof : dofs[jg+1] - dofs[jg];
486
487 itercblk = bcsc->col2cblk[ igpe ];
488 assert( itercblk >= 0 );
489
490 /* Gets the block on which the data received will be added. */
491 cblk = solvmtx->cblktab + itercblk;
492 coltab = bcsc->cscftab[cblk->bcscnum].coltab;
493
494 /* Goes through the values at (igp, jgp). */
495 for ( idofj = 0; idofj < dofj; idofj++ ) {
496 rowidx = jgpe + idofj;
497 colidx = igpe - cblk->fcolnum;
498 for ( idofi = 0; idofi < dofi; idofi++, colidx++, values_buf++ ) {
499 pos = coltab[ colidx ];
500 /* Adds the row jgp. */
501 assert( rowidx >= 0 );
502 assert( rowidx < spm->gNexp );
503 rowtab[ pos ] = rowidx;
504
505 /* Adds the data at row jgp and column igp. */
506 Values[ pos ] = _bcsc_conj( *values_buf );
507 coltab[ colidx ] ++;
508 assert( coltab[colidx] <= coltab[colidx+1] );
509 nbelt++;
510 }
511 }
512 }
513 return nbelt;
514}
515
516/**
517 *******************************************************************************
518 *
519 * @ingroup bcsc_internal
520 *
521 * @brief Adds the remote data of At to the bcsc structure.
522 *
523 *******************************************************************************
524 *
525 * @param[in] spm
526 * The initial sparse matrix in the spm format.
527 *
528 * @param[in] ord
529 * The ordering that needs to be applied on the spm to generate the
530 * block csc.
531 *
532 * @param[in] solvmtx
533 * The solver matrix structure which describes the data distribution.
534 *
535 * @param[inout] rowtab
536 * The row tab of the bcsc or the row tab associated to At.
537 *
538 * @param[inout] bcsc
539 * On entry, the pointer to an allocated bcsc.
540 * On exit, the bcsc fields are updated.
541 *
542 * @param[in] values_buf
543 * The array containing the remote values.
544 *
545 * @param[in] idx_cnt
546 * The index for the begining of the indexes array buffer
547 * corresponding to the values_buf.
548 *
549 * @param[in] idx_size
550 * The number of indexes corresponding to the values_buf.
551 *
552 *******************************************************************************/
553static inline pastix_int_t
554bcsc_dhandle_recv_AAt( const spmatrix_t *spm,
555 const pastix_order_t *ord,
556 const SolverMatrix *solvmtx,
557 pastix_int_t *rowtab,
558 pastix_bcsc_t *bcsc,
559 double *values_buf,
560 pastix_int_t idx_cnt,
561 pastix_int_t idx_size )
562{
563 bcsc_handle_comm_t *bcsc_comm = bcsc->bcsc_comm;
564 pastix_int_t *dofs = spm->dofs;
565 pastix_int_t dof = spm->dof;
566 SolverCblk *cblki, *cblkj;
567 pastix_int_t *coltabi, *coltabj;
568 pastix_int_t k, igp, jgp, ig, jg, igpe, jgpe;
569 pastix_int_t itercblki, itercblkj;
570 pastix_int_t idofi, idofj, dofi, dofj;
571 pastix_int_t colj, rowj, posj;
572 pastix_int_t coli, rowi, posi;
573 pastix_int_t clustnum = bcsc_comm->clustnum;
574 pastix_int_t *indexes = bcsc_comm->data_comm[clustnum].sendAAt.idxbuf;
575 double *ValuesA, *ValuesAt;
576 double (*_bcsc_conj)(double) = __fct_id;
577 pastix_int_t nbelt = 0;
578
579 /* Gets the Uvalues in the general case and the Lvalues in the sym case. */
580 if ( spm->mtxtype == SpmGeneral ) {
581 _bcsc_conj = __fct_id;
582 ValuesAt = (double*)(bcsc->Uvalues);
583 }
584 else {
585 if( spm->mtxtype == SpmSymmetric ) {
586 _bcsc_conj = __fct_conj;
587 }
588 if( spm->mtxtype == SpmSymmetric ) {
589 _bcsc_conj = __fct_id;
590 }
591 ValuesAt = (double*)(bcsc->Lvalues);
592 }
593 ValuesA = (double*)(bcsc->Lvalues);
594
595 /*
596 * Goes through the received indexes in order to add the data
597 * received.
598 */
599 indexes += idx_cnt;
600 for ( k = 0; k < idx_size; k+=2, indexes+=2 ) {
601 /* Gets received indexes igp and jgp. */
602 igp = indexes[0];
603 jgp = indexes[1];
604
605 ig = ord->peritab[igp];
606 jg = ord->peritab[jgp];
607 igpe = (dof > 0) ? igp * dof : dofs[ ig ] - spm->baseval;
608 jgpe = (dof > 0) ? jgp * dof : dofs[ jg ] - spm->baseval;
609 dofi = (dof > 0) ? dof : dofs[ig+1] - dofs[ig];
610 dofj = (dof > 0) ? dof : dofs[jg+1] - dofs[jg];
611
612 itercblki = bcsc->col2cblk[ igpe ];
613 itercblkj = bcsc->col2cblk[ jgpe ];
614 assert( itercblki >= 0 );
615 assert( itercblkj >= 0 );
616
617 /* Gets the block on which the data received will be added. */
618 cblki = solvmtx->cblktab + itercblki;
619 cblkj = solvmtx->cblktab + itercblkj;
620 coltabi = bcsc->cscftab[cblki->bcscnum].coltab;
621 coltabj = bcsc->cscftab[cblkj->bcscnum].coltab;
622
623 /* Goes through the values at (igp, jgp). */
624 colj = jgpe - cblkj->fcolnum;
625 for ( idofj = 0; idofj < dofj; idofj++ ) {
626 rowj = igpe;
627 posj = coltabj[ colj ];
628 rowi = jgpe + idofj;
629 coli = igpe - cblki->fcolnum;
630 for ( idofi = 0; idofi < dofi; idofi++, coli++, rowj++, posj++, values_buf++ ) {
631 posi = coltabi[ coli ];
632 /* Adds the row jgp /igp. */
633 assert( rowi >= 0 );
634 assert( rowj >= 0 );
635 assert( rowi < spm->gNexp );
636 assert( rowj < spm->gNexp );
637
638 rowtab[ posi ] = rowi;
639 bcsc->rowtab[ posj ] = rowj;
640
641 /* Adds the data at row jgp and column igp. */
642 ValuesAt[ posi ] = _bcsc_conj( *values_buf );
643 ValuesA [ posj ] = *values_buf;
644 coltabi[ coli ] ++;
645 assert( coltabi[coli] <= coltabi[coli+1] );
646 nbelt++;
647 }
648 coltabj[ colj ] += dofi;
649 assert( coltabj[colj] <= coltabj[colj+1] );
650 }
651 }
652
653 return nbelt;
654}
655
656/**
657 *******************************************************************************
658 *
659 * @ingroup bcsc_internal
660 *
661 * @brief Exchanges and stores the remote values of A.
662 *
663 *******************************************************************************
664 *
665 * @param[in] spm
666 * The initial sparse matrix in the spm format.
667 *
668 * @param[in] ord
669 * The ordering which needs to be applied on the spm to generate the
670 * block csc.
671 *
672 * @param[in] solvmtx
673 * The solver matrix structure which describes the data distribution.
674 *
675 * @param[inout] bcsc
676 * On entry, the pointer to an allocated bcsc.
677 * On exit, the bcsc fields are updated with the remote values of A.
678 *
679 *******************************************************************************/
680static inline pastix_int_t
681bcsc_dexchange_values_A( const spmatrix_t *spm,
682 const pastix_order_t *ord,
683 const SolverMatrix *solvmtx,
684 pastix_bcsc_t *bcsc )
685{
686 bcsc_handle_comm_t *bcsc_comm = bcsc->bcsc_comm;
687 pastix_int_t clustnbr = bcsc_comm->clustnbr;
688 pastix_int_t clustnum = bcsc_comm->clustnum;
689 bcsc_proc_comm_t *data_comm = bcsc_comm->data_comm;
690 bcsc_proc_comm_t *data_send = NULL;
691 bcsc_proc_comm_t *data_recv = NULL;
692 double *val_buf = NULL;
693 pastix_int_t idx_size = 0;
694 pastix_int_t counter_req = 0;
695 pastix_int_t nbelt_recv = 0;
696 pastix_int_t cnt = 0;
697 pastix_int_t idx_cnt[clustnbr];
698 MPI_Status statuses[clustnbr-1];
699 MPI_Request requests[clustnbr-1];
700 bcsc_data_amount_t *sends, *recvs;
701 pastix_int_t c_send, c_recv, k;
702
703 /* Allocates the receiving indexes and values buffers. */
704 if ( bcsc_comm->max_idx != 0 ) {
705 MALLOC_INTERN( val_buf, bcsc_comm->max_val, double );
706 }
707
708 for ( k = 0; k < clustnbr; k++ ) {
709 if ( k == clustnum ) {
710 idx_cnt[k] = 0;
711 continue;
712 }
713 idx_cnt[ k ] = cnt;
714 cnt += data_comm[k].recvA.idxcnt;
715 }
716
717 /* Sends the values. */
718 c_send = (clustnum+1) % clustnbr;
719 for ( k = 0; k < clustnbr-1; k++ ) {
720 data_send = data_comm + c_send;
721 sends = &( data_send->sendA.size );
722 if ( c_send == clustnum ) {
723 continue;
724 }
725
726 /* Posts the emissions of the values. */
727 if ( sends->valcnt != 0 ) {
728 MPI_Isend( data_send->sendA.valbuf, sends->valcnt, PASTIX_MPI_DOUBLE,
729 c_send, PastixTagValuesA, bcsc_comm->comm, &requests[counter_req++] );
730 }
731 c_send = (c_send+1) % clustnbr;
732 }
733
734 /* Receives the values. */
735 c_recv = (clustnum-1+clustnbr) % clustnbr;
736 for ( k = 0; k < clustnbr-1; k++ ) {
737 data_recv = data_comm + c_recv;
738 recvs = &( data_recv->recvA );
739 if ( c_recv == clustnum ) {
740 continue;
741 }
742
743 /* Posts the receptions of the values. */
744 if ( recvs->valcnt != 0 ) {
745 MPI_Recv( val_buf, recvs->valcnt, PASTIX_MPI_DOUBLE,
746 c_recv, PastixTagValuesA, bcsc_comm->comm, MPI_STATUS_IGNORE );
747 idx_size = recvs->idxcnt;
748 nbelt_recv += bcsc_dhandle_recv_A( spm, ord, solvmtx, bcsc,
749 val_buf, idx_cnt[c_recv], idx_size, 0 );
750 }
751 c_recv = (c_recv-1+clustnbr) % clustnbr;
752 }
753
754 MPI_Waitall( counter_req, requests, statuses );
755 free( val_buf );
756 assert( nbelt_recv == bcsc_comm->data_comm[clustnum].recvA.valcnt );
757 bcsc_free_buf( bcsc_comm, PastixTagMemSendValA );
758 bcsc_free_buf( bcsc_comm, PastixTagMemRecvIdxA );
759 return nbelt_recv;
760}
761
762/**
763 *******************************************************************************
764 *
765 * @ingroup bcsc_internal
766 *
767 * @brief Exchanges and stores the remote values of At.
768 *
769 *******************************************************************************
770 *
771 * @param[in] spm
772 * The initial sparse matrix in the spm format.
773 *
774 * @param[in] ord
775 * The ordering which needs to be applied on the spm to generate the
776 * block csc.
777 *
778 * @param[in] solvmtx
779 * The solver matrix structure which describes the data distribution.
780 *
781 * @param[inout] rowtab
782 * The row tab of the bcsc or the row tab associated to At.
783 *
784 * @param[inout] bcsc
785 * On entry, the pointer to an allocated bcsc.
786 * On exit, the bcsc fields are updated with the remote values of At.
787 *
788 *******************************************************************************/
789static inline pastix_int_t
790bcsc_dexchange_values_At( const spmatrix_t *spm,
791 const pastix_order_t *ord,
792 const SolverMatrix *solvmtx,
793 pastix_int_t *rowtab,
794 pastix_bcsc_t *bcsc )
795{
796 bcsc_handle_comm_t *bcsc_comm = bcsc->bcsc_comm;
797 pastix_int_t clustnbr = bcsc_comm->clustnbr;
798 pastix_int_t clustnum = bcsc_comm->clustnum;
799 bcsc_proc_comm_t *data_comm = bcsc_comm->data_comm;
800 bcsc_proc_comm_t *data_send = NULL;
801 bcsc_proc_comm_t *data_recv = NULL;
802 double *val_buf = NULL;
803 pastix_int_t idx_size = 0;
804 pastix_int_t counter_req = 0;
805 pastix_int_t nbelt_recv = 0;
806 pastix_int_t cnt = 0;
807 pastix_int_t idx_cnt[clustnbr];
808 MPI_Status statuses[clustnbr-1];
809 MPI_Request requests[clustnbr-1];
810 bcsc_data_amount_t *sends, *recvs;
811 pastix_int_t c_send, c_recv, k;
812
813 /* Allocates the receiving indexes and values buffers. */
814 if ( bcsc_comm->max_idx != 0 ) {
815 MALLOC_INTERN( val_buf, bcsc_comm->max_val, double );
816 }
817
818 for ( k = 0; k < clustnbr; k++ ) {
819 if ( k == clustnum ) {
820 idx_cnt[k] = 0;
821 continue;
822 }
823 idx_cnt[ k ] = cnt;
824 cnt += data_comm[k].recvAt.idxcnt;
825 }
826
827 /* Sends the values. */
828 c_send = (clustnum+1) % clustnbr;
829 for ( k = 0; k < clustnbr-1; k++ ) {
830 data_send = data_comm + c_send;
831 sends = &( data_send->sendAt.size );
832 if ( c_send == clustnum ) {
833 continue;
834 }
835
836 /* Posts the emissions of the values. */
837 if ( sends->valcnt != 0 ) {
838 MPI_Isend( data_send->sendAt.valbuf, sends->valcnt, PASTIX_MPI_DOUBLE,
839 c_send, PastixTagValuesAt, bcsc_comm->comm, &requests[counter_req++] );
840 }
841 c_send = (c_send+1) % clustnbr;
842 }
843
844 /* Receives the values. */
845 c_recv = (clustnum-1+clustnbr) % clustnbr;
846 for ( k = 0; k < clustnbr-1; k++ ) {
847 data_recv = data_comm + c_recv;
848 recvs = &( data_recv->recvAt );
849 if ( c_recv == clustnum ) {
850 continue;
851 }
852
853 /* Posts the receptions of the values. */
854 if ( recvs->valcnt != 0 ) {
855 MPI_Recv( val_buf, recvs->valcnt, PASTIX_MPI_DOUBLE,
856 c_recv, PastixTagValuesAt, bcsc_comm->comm, MPI_STATUS_IGNORE );
857 idx_size = recvs->idxcnt;
858 nbelt_recv += bcsc_dhandle_recv_At( spm, ord, solvmtx, rowtab, bcsc,
859 val_buf, idx_cnt[c_recv], idx_size, 0 );
860 }
861 c_recv = (c_recv-1+clustnbr) % clustnbr;
862 }
863
864 MPI_Waitall( counter_req, requests, statuses );
865 free( val_buf );
866 assert( nbelt_recv == bcsc_comm->data_comm[clustnum].recvAt.valcnt );
867 bcsc_free_buf( bcsc_comm, PastixTagMemSendValAt );
868 bcsc_free_buf( bcsc_comm, PastixTagMemRecvIdxAt );
869 return nbelt_recv;
870}
871
872/**
873 *******************************************************************************
874 *
875 * @ingroup bcsc_internal
876 *
877 * @brief Exchanges and stores the remote values of A.
878 *
879 *******************************************************************************
880 *
881 * @param[in] spm
882 * The initial sparse matrix in the spm format.
883 *
884 * @param[in] ord
885 * The ordering which needs to be applied on the spm to generate the
886 * block csc.
887 *
888 * @param[in] solvmtx
889 * The solver matrix structure which describes the data distribution.
890 *
891 * @param[inout] rowtab
892 * The row tab of the bcsc or the row tab associated to At.
893 *
894 * @param[inout] bcsc
895 * On entry, the pointer to an allocated bcsc.
896 * On exit, the bcsc fields are updated with the remote values of A.
897 *
898 *******************************************************************************/
899static inline pastix_int_t
900bcsc_dexchange_values_AAt( const spmatrix_t *spm,
901 const pastix_order_t *ord,
902 const SolverMatrix *solvmtx,
903 pastix_int_t *rowtabAt,
904 pastix_bcsc_t *bcsc )
905{
906 bcsc_handle_comm_t *bcsc_comm = bcsc->bcsc_comm;
907 pastix_int_t clustnbr = bcsc_comm->clustnbr;
908 pastix_int_t clustnum = bcsc_comm->clustnum;
909 bcsc_proc_comm_t *data_comm = bcsc_comm->data_comm;
910 bcsc_proc_comm_t *data_send = NULL;
911 bcsc_proc_comm_t *data_recv = NULL;
912 double *val_buf = NULL;
913 pastix_int_t idx_size = 0;
914 pastix_int_t counter_req = 0;
915 pastix_int_t nbelt_recv = 0;
916 pastix_int_t cnt = 0;
917 pastix_int_t idx_cnt[clustnbr];
918 MPI_Status statuses[clustnbr-1];
919 MPI_Request requests[clustnbr-1];
920 bcsc_data_amount_t *sends;
921 bcsc_exch_comm_t *recvs;
922 pastix_int_t c_send, c_recv, k;
923
924 /* Allocates the receiving indexes and values buffers. */
925 if ( bcsc_comm->max_idx != 0 ) {
926 if ( spm->mtxtype != SpmGeneral ) {
927 MALLOC_INTERN( val_buf, bcsc_comm->max_val, double );
928 }
929 else {
930 if ( rowtabAt == bcsc->rowtab ) {
931 bcsc_allocate_buf( bcsc_comm, PastixTagMemRecvValAAt);
932 }
933 }
934 }
935
936 for ( k = 0; k < clustnbr; k++ ) {
937 if ( k == clustnum ) {
938 idx_cnt[k] = 0;
939 continue;
940 }
941 idx_cnt[ k ] = cnt;
942 cnt += data_comm[k].recvAAt.size.idxcnt;
943 }
944
945 /* Sends the values. */
946 c_send = (clustnum+1) % clustnbr;
947 for ( k = 0; k < clustnbr-1; k++ ) {
948 if ( rowtabAt != bcsc->rowtab ) {
949 break;
950 }
951 data_send = data_comm + c_send;
952 sends = &( data_send->sendAAt.size );
953 if ( c_send == clustnum ) {
954 continue;
955 }
956
957 /* Posts the emissions of the values. */
958 if ( sends->valcnt != 0 ) {
959 MPI_Isend( data_send->sendAAt.valbuf, sends->valcnt, PASTIX_MPI_DOUBLE,
960 c_send, PastixTagValuesAAt, bcsc_comm->comm, &requests[counter_req++] );
961 }
962 c_send = (c_send+1) % clustnbr;
963 }
964
965 /* Receives the values. */
966 c_recv = (clustnum-1+clustnbr) % clustnbr;
967 for ( k = 0; k < clustnbr-1; k++ ) {
968 data_recv = data_comm + c_recv;
969 recvs = &( data_recv->recvAAt );
970 if ( c_recv == clustnum ) {
971 continue;
972 }
973
974 /* Posts the receptions of the values. */
975 if ( recvs->size.valcnt != 0 ) {
976 if ( spm->mtxtype == SpmGeneral ) {
977 val_buf = recvs->valbuf;
978 }
979 if ( rowtabAt == bcsc->rowtab ) {
980 MPI_Recv( val_buf, recvs->size.valcnt, PASTIX_MPI_DOUBLE,
981 c_recv, PastixTagValuesAAt, bcsc_comm->comm, MPI_STATUS_IGNORE );
982 }
983 idx_size = recvs->size.idxcnt;
984
985 if ( spm->mtxtype != SpmGeneral ) {
986 nbelt_recv += bcsc_dhandle_recv_AAt( spm, ord, solvmtx, rowtabAt, bcsc,
987 val_buf, idx_cnt[c_recv], idx_size );
988 }
989 else {
990 if ( rowtabAt == bcsc->rowtab ) {
991 nbelt_recv += bcsc_dhandle_recv_A( spm, ord, solvmtx, bcsc, val_buf,
992 idx_cnt[c_recv], idx_size, 1 );
993 }
994 else {
995 nbelt_recv += bcsc_dhandle_recv_At( spm, ord, solvmtx, rowtabAt, bcsc,
996 val_buf, idx_cnt[c_recv], idx_size, 1 );
997 }
998 }
999 }
1000 c_recv = (c_recv-1+clustnbr) % clustnbr;
1001 }
1002
1003 if ( rowtabAt == bcsc->rowtab ) {
1004 MPI_Waitall( counter_req, requests, statuses );
1005 }
1006 if ( spm->mtxtype != SpmGeneral ) {
1007 free( val_buf );
1008 }
1009 assert( nbelt_recv == bcsc_comm->data_comm[clustnum].recvAAt.size.valcnt );
1010 if ( ( spm->mtxtype != SpmGeneral ) || ( rowtabAt != bcsc->rowtab ) ) {
1011 bcsc_free_buf( bcsc_comm, PastixTagMemRecvIdxAAt );
1012 bcsc_free_buf( bcsc_comm, PastixTagMemSendValAAt );
1013 }
1014 if ( ( spm->mtxtype == SpmGeneral ) && ( rowtabAt != bcsc->rowtab ) ) {
1015 bcsc_free_buf( bcsc_comm, PastixTagMemRecvAAt );
1016 }
1017 return nbelt_recv;
1018}
1019#endif
1020
1021/**
1022 *******************************************************************************
1023 *
1024 * @ingroup bcsc_internal
1025 *
1026 * @brief Initializes the A values of the block csc stored in the given spm.
1027 *
1028 *******************************************************************************
1029 *
1030 * @param[in] spm
1031 * The initial sparse matrix in the spm format.
1032 *
1033 * @param[in] ord
1034 * The ordering which needs to be applied on the spm to generate the
1035 * block csc.
1036 *
1037 * @param[in] solvmtx
1038 * The solver matrix structure which describes the data distribution.
1039 *
1040 * @param[inout] bcsc
1041 * On entry, the pointer to an allocated bcsc.
1042 * On exit, the bcsc fields are updated.
1043 *
1044 *******************************************************************************
1045 *
1046 * @retval TODO
1047 *
1048 *******************************************************************************/
1049static inline pastix_int_t
1050bcsc_dinit_A( const spmatrix_t *spm,
1051 const pastix_order_t *ord,
1052 const SolverMatrix *solvmtx,
1053 pastix_bcsc_t *bcsc )
1054{
1055 double *values = (double*)(spm->values);
1056 double *Lvalues = (double*)(bcsc->Lvalues);
1057 pastix_int_t *loc2glob = spm->loc2glob;
1058 pastix_int_t *colptr = spm->colptr;
1059 pastix_int_t *rowptr = spm->rowptr;
1060 pastix_int_t *dofs = spm->dofs;
1061 pastix_int_t dof = spm->dof;
1062 pastix_int_t nbelt = 0;
1063 SolverCblk *cblk;
1064 pastix_int_t *coltab;
1065 pastix_int_t k, j, ig, jg, igp, jgp, igpe, jgpe;
1066 pastix_int_t itercblk, baseval;
1067 pastix_int_t idofi, idofj, dofi, dofj;
1068 pastix_int_t colidx, rowidx, pos;
1069
1070 baseval = spm->baseval;
1071 /*
1072 * Initializes the values of the matrix A in the blocked csc format. This
1073 * applies the permutation to the values array.
1074 *
1075 * Goes through the columns of the spm.
1076 * j is the initial local column index.
1077 * jg is the initial global column index.
1078 * jgpis the "new" jg index -> the one resulting from the permutation.
1079 */
1080 for ( j = 0; j < spm->n; j++, colptr++, loc2glob++ ) {
1081 jg = ( spm->loc2glob == NULL ) ? j : *loc2glob - baseval;
1082 jgp = ord->permtab[jg];
1083 jgpe = ( dof > 0 ) ? jgp * dof : dofs[ jg ] - baseval;
1084 dofj = ( dof > 0 ) ? dof : dofs[ jg+1 ] - dofs[ jg ];
1085
1086 itercblk = bcsc->col2cblk[ jgpe ];
1087
1088 /*
1089 * If MPI is used in shared memory, the block can belong to another
1090 * processor, in this case the column is skigped.
1091 */
1092 /* The block of the column jgpbelongs to the current processor. */
1093 if ( itercblk >= 0 ) {
1094 /* Gets the block in which the data will be added. */
1095 cblk = solvmtx->cblktab + itercblk;
1096 coltab = bcsc->cscftab[cblk->bcscnum].coltab;
1097 /*
1098 * Goes through the row of the column j.
1099 * ig is the initial global row index (for the row index local = global).
1100 * igpis the "new" ig index -> the one resulting from the permutation.
1101 */
1102 for ( k = colptr[0]; k < colptr[1]; k++, rowptr++ ) {
1103 ig = *rowptr - baseval;
1104 igp = ord->permtab[ig];
1105 igpe = ( dof > 0 ) ? igp * dof : dofs[ ig ] - baseval;
1106 dofi = ( dof > 0 ) ? dof : dofs[ ig+1 ] - dofs[ ig ];
1107
1108 /* Copies the values from element (ig, jg) to position (igpe, jgpe) into expanded bcsc. */
1109 colidx = jgpe - cblk->fcolnum;
1110 for ( idofj = 0; idofj < dofj; idofj++, colidx++ ) {
1111 rowidx = igpe;
1112 pos = coltab[ colidx ];
1113 for ( idofi = 0; idofi < dofi; idofi++, rowidx++, pos++, values++ ) {
1114 assert( rowidx >= 0 );
1115 assert( rowidx < spm->gNexp );
1116 bcsc->rowtab[ pos ] = rowidx;
1117 Lvalues[ pos ] = *values;
1118 nbelt++;
1119 }
1120 coltab[ colidx ] += dofi;
1121 assert( coltab[ colidx ] <= coltab[ colidx+1 ] );
1122 }
1123 }
1124 }
1125 /* The block of the column jgpbelongs to another processor. */
1126 else {
1127 for ( k = colptr[0]; k < colptr[1]; k++, rowptr++ ) {
1128 ig = *rowptr - baseval;
1129 dofi = ( dof > 0 ) ? dof : dofs[ ig+1 ] - dofs[ ig ];
1130
1131 values += dofi * dofj;
1132 }
1133 }
1134 }
1135
1136 return nbelt;
1137}
1138
1139/**
1140 *******************************************************************************
1141 *
1142 * @ingroup bcsc_internal
1143 *
1144 * @brief Initializes the At values in the block cscstored in the given spm.
1145 *
1146 * This routine initializes either :
1147 * The symmetric upper part (L^t)
1148 * The symmetric upper part (L^h)
1149 * The transpose part of A (A^t -> U)
1150 *
1151 *******************************************************************************
1152 *
1153 * @param[in] spm
1154 * The initial sparse matrix in the spm format.
1155 *
1156 * @param[in] ord
1157 * The ordering which needs to be applied on the spm to generate the
1158 * block csc.
1159 *
1160 * @param[in] solvmtx
1161 * The solver matrix structure which describes the data distribution.
1162 *
1163 * @param[inout] rowtab
1164 * The row tab of the bcsc or the row tab associated to At.
1165 *
1166 * @param[inout] bcsc
1167 * On entry, the pointer to an allocated bcsc.
1168 * On exit, the bcsc fields are updated.
1169 *
1170 *******************************************************************************
1171 *
1172 * @retval TODO
1173 *
1174 *******************************************************************************/
1175static inline pastix_int_t
1176bcsc_dinit_At( const spmatrix_t *spm,
1177 const pastix_order_t *ord,
1178 const SolverMatrix *solvmtx,
1179 pastix_int_t *rowtab,
1180 pastix_bcsc_t *bcsc )
1181{
1182 double *values = (double*)(spm->values);
1183 double *Uvalues;
1184 pastix_int_t *loc2glob = spm->loc2glob;
1185 pastix_int_t *colptr = spm->colptr;
1186 pastix_int_t *rowptr = spm->rowptr;
1187 pastix_int_t *dofs = spm->dofs;
1188 pastix_int_t dof = spm->dof;
1189 pastix_int_t nbelt = 0;
1190 SolverCblk *cblk;
1191 pastix_int_t *coltab;
1192 pastix_int_t k, j, ig, jg, igp, jgp, igpe, jgpe;
1193 pastix_int_t itercblk, baseval;
1194 pastix_int_t idofi, idofj, dofi, dofj;
1195 pastix_int_t colidx, rowidx, pos;
1196 double (*_bcsc_conj)(double) = NULL;
1197
1198 /* We're working on U. */
1199 if ( spm->mtxtype == SpmGeneral ) {
1200 _bcsc_conj = __fct_id;
1201 Uvalues = (double*)(bcsc->Uvalues);
1202 }
1203 /* L^[t|h] part of the matrix. */
1204 else {
1205 /*
1206 * precision_generator/sub.py will change SpmSymmetric to SpmSymmetric
1207 * Don't use else or else if.
1208 */
1209 if( spm->mtxtype == SpmSymmetric ) {
1210 _bcsc_conj = __fct_conj;
1211 }
1212 if( spm->mtxtype == SpmSymmetric ) {
1213 _bcsc_conj = __fct_id;
1214 }
1215 Uvalues = (double*)(bcsc->Lvalues);
1216 }
1217 assert( _bcsc_conj != NULL );
1218
1219 baseval = spm->baseval;
1220 /*
1221 * Initializes the values of the matrix A^t in the blocked csc format. This
1222 * applies the permutation to the values array.
1223 *
1224 * Goes through the columns of the spm.
1225 * j is the initial local column index.
1226 * jg is the initial global column index.
1227 * jgpis the "new" jg index -> the one resulting from the permutation.
1228 */
1229 for ( j = 0; j < spm->n; j++, colptr++, loc2glob++ ) {
1230 jg = ( spm->loc2glob == NULL ) ? j : *loc2glob - baseval;
1231 jgp = ord->permtab[jg];
1232 jgpe = ( dof > 0 ) ? jgp * dof : dofs[ jg ] - baseval;
1233 dofj = ( dof > 0 ) ? dof : dofs[ jg+1 ] - dofs[ jg ];
1234
1235 /*
1236 * Goes through the row of the column j.
1237 * ig is the initial global row index (for the row index local = global).
1238 * igpis the "new" ig index -> the one resulting from the permutation.
1239 */
1240 for ( k = colptr[0]; k < colptr[1]; k++, rowptr++ ) {
1241 ig = *rowptr - baseval;
1242 igp = ord->permtab[ig];
1243 igpe = ( dof > 0 ) ? igp * dof : dofs[ ig ] - baseval;
1244 dofi = ( dof > 0 ) ? dof : dofs[ ig+1 ] - dofs[ ig ];
1245
1246 /* Diagonal block of a symmetric matrix. */
1247 if ( ( ig == jg ) && ( spm->mtxtype != SpmGeneral ) ) {
1248 values += dofi * dofj;
1249 continue;
1250 }
1251 itercblk = bcsc->col2cblk[ igpe ];
1252
1253 /*
1254 * If MPI is used in shared memory, the block can belong to another
1255 * processor, in this case the row is skigped.
1256 */
1257 /* The block of the row igpbelongs to the current processor. */
1258 if ( itercblk >= 0 ) {
1259 /* Gets the block in which the data will be added. */
1260 cblk = solvmtx->cblktab + itercblk;
1261 coltab = bcsc->cscftab[cblk->bcscnum].coltab;
1262
1263 /* Copies the values from element (ig, jg) to position (igpe, jgpe) into expanded bcsc. */
1264 for ( idofj = 0; idofj < dofj; idofj++ ) {
1265 rowidx = jgpe + idofj;
1266 colidx = igpe - cblk->fcolnum;
1267 for ( idofi = 0; idofi < dofi; idofi++, colidx++, values++ ) {
1268 pos = coltab[ colidx ];
1269
1270 /* Copies the index/value */
1271 assert( rowidx >= 0 );
1272 assert( rowidx < spm->gNexp );
1273 rowtab[ pos ] = rowidx;
1274 Uvalues[ pos ] = _bcsc_conj( *values );
1275
1276 coltab[ colidx ]++;
1277 nbelt++;
1278 }
1279 }
1280 }
1281 /* The block of the row igpbelongs to another processor. */
1282 else {
1283 values += dofi * dofj;
1284 continue;
1285 }
1286 }
1287 }
1288 return nbelt;
1289}
1290
1291/**
1292 *******************************************************************************
1293 *
1294 * @ingroup bcsc_internal
1295 *
1296 * @brief Sorts the block csc subarray associated to each column block.
1297 *
1298 *******************************************************************************
1299 *
1300 * @param[in] bcsc
1301 * On entry, the pointer to an allocated bcsc.
1302 *
1303 * @param[in] rowtab
1304 * The initial sparse matrix in the spm format.
1305 *
1306 * @param[in] valtab
1307 * The ordering that needs to be applied on the spm to generate the
1308 * block csc.
1309 *
1310 *******************************************************************************/
1311static inline void
1312bcsc_dsort( const pastix_bcsc_t *bcsc,
1313 pastix_int_t *rowtab,
1314 double *valtab )
1315{
1316 bcsc_cblk_t *block;
1317 pastix_int_t block_num, col, len_col_block, block_nbr, col_nbr;
1318 void *sortptr[2];
1319
1320 block_nbr = bcsc->cscfnbr;
1321 block = bcsc->cscftab;
1322 for ( block_num = 0; block_num < block_nbr; block_num++, block++ ) {
1323
1324 col_nbr = block->colnbr;
1325 for ( col = 0; col < col_nbr; col++ ) {
1326
1327 sortptr[0] = (void*)(rowtab + block->coltab[col]);
1328 sortptr[1] = (void*)(valtab + block->coltab[col]);
1329
1330 len_col_block = block->coltab[col+1] - block->coltab[col];
1331#if !defined(NDEBUG)
1332 {
1333 pastix_int_t gN = bcsc->gN;
1334 pastix_int_t i;
1335
1336 for ( i = 0; i < len_col_block; i++ ) {
1337 assert( rowtab[ block->coltab[col] + i ] >= 0 );
1338 assert( rowtab[ block->coltab[col] + i ] < gN );
1339 }
1340 }
1341#endif
1342
1343 d_qsortIntFloatAsc( sortptr, len_col_block );
1344 }
1345 }
1346}
1347
1348/**
1349 *******************************************************************************
1350 *
1351 * @brief Initializes a centralize double block csc.
1352 *
1353 *******************************************************************************
1354 *
1355 * @param[in] spm
1356 * The initial sparse matrix in the spm format.
1357 *
1358 * @param[in] ord
1359 * The ordering that needs to be applied on the spm to generate the
1360 * block csc.
1361 *
1362 * @param[in] solvmtx
1363 * The solver matrix structure that describe the data distribution.
1364 *
1365 * @param[in] initAt
1366 * A flag to enable/disable the initialization of A'.
1367 *
1368 * @param[inout] bcsc
1369 * On entry, the pointer to an allocated bcsc.
1370 * On exit, the bcsc stores the input spm with the permutation applied
1371 * and grouped according to the distribution described in solvmtx.
1372 *
1373 * @param[in] valuesize
1374 * The number of non zero unknowns in the matrix.
1375 *
1376 *******************************************************************************/
1377void
1378bcsc_dinit( const spmatrix_t *spm,
1379 const pastix_order_t *ord,
1380 const SolverMatrix *solvmtx,
1381 int initAt,
1382 pastix_bcsc_t *bcsc,
1383 pastix_int_t valuesize )
1384{
1385 pastix_int_t nbelt = 0;
1386#if defined(PASTIX_WITH_MPI)
1387 pastix_int_t nbelt_recv = 0;
1388 bcsc_handle_comm_t *bcsc_comm = bcsc->bcsc_comm;
1389#endif
1390
1391 /* Exchanges the data if the spm is distributed and adds the received data of A. */
1392#if defined(PASTIX_WITH_MPI)
1393 if ( bcsc_comm != NULL ) {
1394 nbelt_recv = bcsc_dexchange_values_A( spm, ord, solvmtx, bcsc );
1395 nbelt_recv += bcsc_dexchange_values_AAt( spm, ord, solvmtx, bcsc->rowtab, bcsc );
1396 nbelt += nbelt_recv;
1397 }
1398#endif
1399
1400 /* Initializes the blocked structure of the matrix A. */
1401 nbelt += bcsc_dinit_A( spm, ord, solvmtx, bcsc );
1402
1403 /* Initializes the blocked structure of the matrix At if the matrix is not general. */
1404 if ( spm->mtxtype != SpmGeneral ) {
1405 /* Exchanges the data if the spm is distributed and adds the received data of A. */
1406#if defined(PASTIX_WITH_MPI)
1407 if ( bcsc_comm != NULL ) {
1408 nbelt_recv = bcsc_dexchange_values_At( spm, ord, solvmtx, bcsc->rowtab, bcsc );
1409 nbelt += nbelt_recv;
1410 }
1411#endif
1412 nbelt += bcsc_dinit_At( spm, ord, solvmtx, bcsc->rowtab, bcsc );
1413 }
1414
1415 /* Restores the correct coltab arrays. */
1416 bcsc_restore_coltab( bcsc );
1417
1418 /* Sorts the csc. */
1419 bcsc_dsort( bcsc, bcsc->rowtab, bcsc->Lvalues );
1420
1421 /* Initializes the blocked structure of the matrix At if the matrix is general. */
1422 if ( spm->mtxtype == SpmGeneral ) {
1423 /* If only the refinement is performed A^t is not required. */
1424 if ( initAt ) {
1425 pastix_int_t *trowtab, i;
1426 MALLOC_INTERN( bcsc->Uvalues, valuesize, double );
1427 MALLOC_INTERN( trowtab, valuesize, pastix_int_t );
1428
1429 for (i=0; i<valuesize; i++) {
1430 trowtab[i] = -1;
1431 }
1432 nbelt = bcsc_dinit_At( spm, ord, solvmtx, trowtab, bcsc );
1433#if defined(PASTIX_WITH_MPI)
1434 if ( bcsc_comm != NULL ) {
1435 nbelt_recv = bcsc_dexchange_values_At( spm, ord, solvmtx, trowtab, bcsc );
1436 nbelt_recv += bcsc_dexchange_values_AAt( spm, ord, solvmtx, trowtab, bcsc );
1437 nbelt += nbelt_recv;
1438 }
1439#endif
1440 /* Restores the correct coltab arrays */
1441 bcsc_restore_coltab( bcsc );
1442
1443 /* Sorts the transposed csc */
1444 bcsc_dsort( bcsc, trowtab, bcsc->Uvalues );
1445 memFree( trowtab );
1446 }
1447 }
1448 /* In the case of SpmSymmetric, is applied when used to save memory space */
1449 else {
1450 bcsc->Uvalues = bcsc->Lvalues;
1451 }
1452
1453 (void) nbelt;
1454#if defined(PASTIX_WITH_MPI)
1455 (void) nbelt_recv;
1456#endif
1457}
BEGIN_C_DECLS typedef int pastix_int_t
Definition datatypes.h:51
static pastix_int_t bcsc_dinit_At(const spmatrix_t *spm, const pastix_order_t *ord, const SolverMatrix *solvmtx, pastix_int_t *rowtab, pastix_bcsc_t *bcsc)
Initializes the At values in the block cscstored in the given spm.
void bcsc_restore_coltab(pastix_bcsc_t *bcsc)
Restores the coltab array when it has been modified to initialize the row and values arrays.
Definition bcsc.c:1700
static void bcsc_dsort(const pastix_bcsc_t *bcsc, pastix_int_t *rowtab, double *valtab)
Sorts the block csc subarray associated to each column block.
static pastix_int_t bcsc_dinit_A(const spmatrix_t *spm, const pastix_order_t *ord, const SolverMatrix *solvmtx, pastix_bcsc_t *bcsc)
Initializes the A values of the block csc stored in the given spm.
void bcsc_dinit(const spmatrix_t *spm, const pastix_order_t *ord, const SolverMatrix *solvmtx, int initAt, pastix_bcsc_t *bcsc, pastix_int_t valuesize)
Initializes a centralize double block csc.
bcsc_data_amount_t recvA
Definition bcsc.h:83
bcsc_exch_comm_t recvAAt
Definition bcsc.h:85
pastix_int_t max_idx
Definition bcsc.h:97
bcsc_exch_comm_t sendAt
Definition bcsc.h:81
PASTIX_Comm comm
Definition bcsc.h:95
pastix_int_t idxcnt
Definition bcsc.h:57
pastix_int_t colnbr
Definition bcsc.h:111
bcsc_exch_comm_t sendA
Definition bcsc.h:80
bcsc_data_amount_t size
Definition bcsc.h:66
pastix_int_t clustnum
Definition bcsc.h:94
bcsc_proc_comm_t data_comm[1]
Definition bcsc.h:99
pastix_int_t * idxbuf
Definition bcsc.h:67
pastix_int_t * coltab
Definition bcsc.h:113
bcsc_data_amount_t recvAt
Definition bcsc.h:84
bcsc_exch_comm_t sendAAt
Definition bcsc.h:82
pastix_int_t valcnt
Definition bcsc.h:58
void * valbuf
Definition bcsc.h:70
pastix_int_t clustnbr
Definition bcsc.h:93
pastix_int_t max_val
Definition bcsc.h:98
Compressed colptr format for the bcsc.
Definition bcsc.h:110
Information about the amount of data.
Definition bcsc.h:56
Information about the sending data.
Definition bcsc.h:65
Structure to manage communications with distributed spm.
Definition bcsc.h:92
Informations of the data exchanged with other processors.
Definition bcsc.h:79
pastix_int_t * permtab
Definition order.h:51
pastix_int_t * peritab
Definition order.h:52
Order structure.
Definition order.h:47
pastix_int_t bcscnum
Definition solver.h:175
SolverCblk *restrict cblktab
Definition solver.h:228
pastix_int_t fcolnum
Definition solver.h:166
Solver column block structure.
Definition solver.h:161
Solver column block structure.
Definition solver.h:203