xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 2f613bf53f46f9356e00a2ca2bd69453be72fc31)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg)
48 {
49   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
54   A->boundtocpu = flg;
55 #endif
56   if (a->A) {
57     ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr);
58   }
59   if (a->B) {
60     ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr);
61   }
62   PetscFunctionReturn(0);
63 }
64 
65 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
66 {
67   PetscErrorCode ierr;
68   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
69 
70   PetscFunctionBegin;
71   if (mat->A) {
72     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
73     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
74   }
75   PetscFunctionReturn(0);
76 }
77 
78 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
79 {
80   PetscErrorCode  ierr;
81   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
82   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
83   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
84   const PetscInt  *ia,*ib;
85   const MatScalar *aa,*bb,*aav,*bav;
86   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
87   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
88 
89   PetscFunctionBegin;
90   *keptrows = NULL;
91 
92   ia   = a->i;
93   ib   = b->i;
94   ierr = MatSeqAIJGetArrayRead(mat->A,&aav);CHKERRQ(ierr);
95   ierr = MatSeqAIJGetArrayRead(mat->B,&bav);CHKERRQ(ierr);
96   for (i=0; i<m; i++) {
97     na = ia[i+1] - ia[i];
98     nb = ib[i+1] - ib[i];
99     if (!na && !nb) {
100       cnt++;
101       goto ok1;
102     }
103     aa = aav + ia[i];
104     for (j=0; j<na; j++) {
105       if (aa[j] != 0.0) goto ok1;
106     }
107     bb = bav + ib[i];
108     for (j=0; j <nb; j++) {
109       if (bb[j] != 0.0) goto ok1;
110     }
111     cnt++;
112 ok1:;
113   }
114   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRMPI(ierr);
115   if (!n0rows) {
116     ierr = MatSeqAIJRestoreArrayRead(mat->A,&aav);CHKERRQ(ierr);
117     ierr = MatSeqAIJRestoreArrayRead(mat->B,&bav);CHKERRQ(ierr);
118     PetscFunctionReturn(0);
119   }
120   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
121   cnt  = 0;
122   for (i=0; i<m; i++) {
123     na = ia[i+1] - ia[i];
124     nb = ib[i+1] - ib[i];
125     if (!na && !nb) continue;
126     aa = aav + ia[i];
127     for (j=0; j<na;j++) {
128       if (aa[j] != 0.0) {
129         rows[cnt++] = rstart + i;
130         goto ok2;
131       }
132     }
133     bb = bav + ib[i];
134     for (j=0; j<nb; j++) {
135       if (bb[j] != 0.0) {
136         rows[cnt++] = rstart + i;
137         goto ok2;
138       }
139     }
140 ok2:;
141   }
142   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
143   ierr = MatSeqAIJRestoreArrayRead(mat->A,&aav);CHKERRQ(ierr);
144   ierr = MatSeqAIJRestoreArrayRead(mat->B,&bav);CHKERRQ(ierr);
145   PetscFunctionReturn(0);
146 }
147 
148 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
149 {
150   PetscErrorCode    ierr;
151   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
152   PetscBool         cong;
153 
154   PetscFunctionBegin;
155   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
156   if (Y->assembled && cong) {
157     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
158   } else {
159     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
160   }
161   PetscFunctionReturn(0);
162 }
163 
164 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
165 {
166   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
167   PetscErrorCode ierr;
168   PetscInt       i,rstart,nrows,*rows;
169 
170   PetscFunctionBegin;
171   *zrows = NULL;
172   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
173   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
174   for (i=0; i<nrows; i++) rows[i] += rstart;
175   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
176   PetscFunctionReturn(0);
177 }
178 
179 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A,PetscInt type,PetscReal *reductions)
180 {
181   PetscErrorCode    ierr;
182   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)A->data;
183   PetscInt          i,m,n,*garray = aij->garray;
184   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ*) aij->A->data;
185   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ*) aij->B->data;
186   PetscReal         *work;
187   const PetscScalar *dummy;
188 
189   PetscFunctionBegin;
190   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
191   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
192   ierr = MatSeqAIJGetArrayRead(aij->A,&dummy);CHKERRQ(ierr);
193   ierr = MatSeqAIJRestoreArrayRead(aij->A,&dummy);CHKERRQ(ierr);
194   ierr = MatSeqAIJGetArrayRead(aij->B,&dummy);CHKERRQ(ierr);
195   ierr = MatSeqAIJRestoreArrayRead(aij->B,&dummy);CHKERRQ(ierr);
196   if (type == NORM_2) {
197     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
198       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
199     }
200     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
201       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
202     }
203   } else if (type == NORM_1) {
204     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
205       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
206     }
207     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
208       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
209     }
210   } else if (type == NORM_INFINITY) {
211     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
212       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
213     }
214     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
215       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
216     }
217   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
218     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
219       work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
220     }
221     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
222       work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
223     }
224   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
225     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
226       work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
227     }
228     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
229       work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
230     }
231   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type");
232   if (type == NORM_INFINITY) {
233     ierr = MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
234   } else {
235     ierr = MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
236   }
237   ierr = PetscFree(work);CHKERRQ(ierr);
238   if (type == NORM_2) {
239     for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
240   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
241     for (i=0; i<n; i++) reductions[i] /= m;
242   }
243   PetscFunctionReturn(0);
244 }
245 
246 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
247 {
248   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
249   IS              sis,gis;
250   PetscErrorCode  ierr;
251   const PetscInt  *isis,*igis;
252   PetscInt        n,*iis,nsis,ngis,rstart,i;
253 
254   PetscFunctionBegin;
255   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
256   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
257   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
258   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
259   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
260   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
261 
262   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
263   ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr);
264   ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr);
265   n    = ngis + nsis;
266   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
267   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
268   for (i=0; i<n; i++) iis[i] += rstart;
269   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
270 
271   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
272   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
273   ierr = ISDestroy(&sis);CHKERRQ(ierr);
274   ierr = ISDestroy(&gis);CHKERRQ(ierr);
275   PetscFunctionReturn(0);
276 }
277 
278 /*
279   Local utility routine that creates a mapping from the global column
280 number to the local number in the off-diagonal part of the local
281 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
282 a slightly higher hash table cost; without it it is not scalable (each processor
283 has an order N integer array but is fast to access.
284 */
285 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
286 {
287   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
288   PetscErrorCode ierr;
289   PetscInt       n = aij->B->cmap->n,i;
290 
291   PetscFunctionBegin;
292   if (n && !aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
293 #if defined(PETSC_USE_CTABLE)
294   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
295   for (i=0; i<n; i++) {
296     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
297   }
298 #else
299   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
300   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
301   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
302 #endif
303   PetscFunctionReturn(0);
304 }
305 
306 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
307 { \
308     if (col <= lastcol1)  low1 = 0;     \
309     else                 high1 = nrow1; \
310     lastcol1 = col;\
311     while (high1-low1 > 5) { \
312       t = (low1+high1)/2; \
313       if (rp1[t] > col) high1 = t; \
314       else              low1  = t; \
315     } \
316       for (_i=low1; _i<high1; _i++) { \
317         if (rp1[_i] > col) break; \
318         if (rp1[_i] == col) { \
319           if (addv == ADD_VALUES) { \
320             ap1[_i] += value;   \
321             /* Not sure LogFlops will slow dow the code or not */ \
322             (void)PetscLogFlops(1.0);   \
323            } \
324           else                    ap1[_i] = value; \
325           inserted = PETSC_TRUE; \
326           goto a_noinsert; \
327         } \
328       }  \
329       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
330       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
331       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
332       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
333       N = nrow1++ - 1; a->nz++; high1++; \
334       /* shift up all the later entries in this row */ \
335       ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\
336       ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\
337       rp1[_i] = col;  \
338       ap1[_i] = value;  \
339       A->nonzerostate++;\
340       a_noinsert: ; \
341       ailen[row] = nrow1; \
342 }
343 
344 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
345   { \
346     if (col <= lastcol2) low2 = 0;                        \
347     else high2 = nrow2;                                   \
348     lastcol2 = col;                                       \
349     while (high2-low2 > 5) {                              \
350       t = (low2+high2)/2;                                 \
351       if (rp2[t] > col) high2 = t;                        \
352       else             low2  = t;                         \
353     }                                                     \
354     for (_i=low2; _i<high2; _i++) {                       \
355       if (rp2[_i] > col) break;                           \
356       if (rp2[_i] == col) {                               \
357         if (addv == ADD_VALUES) {                         \
358           ap2[_i] += value;                               \
359           (void)PetscLogFlops(1.0);                       \
360         }                                                 \
361         else                    ap2[_i] = value;          \
362         inserted = PETSC_TRUE;                            \
363         goto b_noinsert;                                  \
364       }                                                   \
365     }                                                     \
366     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
367     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
368     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
369     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
370     N = nrow2++ - 1; b->nz++; high2++;                    \
371     /* shift up all the later entries in this row */      \
372     ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\
373     ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\
374     rp2[_i] = col;                                        \
375     ap2[_i] = value;                                      \
376     B->nonzerostate++;                                    \
377     b_noinsert: ;                                         \
378     bilen[row] = nrow2;                                   \
379   }
380 
381 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
382 {
383   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
384   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
385   PetscErrorCode ierr;
386   PetscInt       l,*garray = mat->garray,diag;
387 
388   PetscFunctionBegin;
389   /* code only works for square matrices A */
390 
391   /* find size of row to the left of the diagonal part */
392   ierr = MatGetOwnershipRange(A,&diag,NULL);CHKERRQ(ierr);
393   row  = row - diag;
394   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
395     if (garray[b->j[b->i[row]+l]] > diag) break;
396   }
397   ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr);
398 
399   /* diagonal part */
400   ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr);
401 
402   /* right of diagonal part */
403   ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr);
404 #if defined(PETSC_HAVE_DEVICE)
405   if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU;
406 #endif
407   PetscFunctionReturn(0);
408 }
409 
410 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
411 {
412   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
413   PetscScalar    value = 0.0;
414   PetscErrorCode ierr;
415   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
416   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
417   PetscBool      roworiented = aij->roworiented;
418 
419   /* Some Variables required in the macro */
420   Mat        A                    = aij->A;
421   Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
422   PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
423   PetscBool  ignorezeroentries    = a->ignorezeroentries;
424   Mat        B                    = aij->B;
425   Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
426   PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
427   MatScalar  *aa,*ba;
428   /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
429    * cannot use "#if defined" inside a macro. */
430   PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
431 
432   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
433   PetscInt  nonew;
434   MatScalar *ap1,*ap2;
435 
436   PetscFunctionBegin;
437 #if defined(PETSC_HAVE_DEVICE)
438   if (A->offloadmask == PETSC_OFFLOAD_GPU) {
439     const PetscScalar *dummy;
440     ierr = MatSeqAIJGetArrayRead(A,&dummy);CHKERRQ(ierr);
441     ierr = MatSeqAIJRestoreArrayRead(A,&dummy);CHKERRQ(ierr);
442   }
443   if (B->offloadmask == PETSC_OFFLOAD_GPU) {
444     const PetscScalar *dummy;
445     ierr = MatSeqAIJGetArrayRead(B,&dummy);CHKERRQ(ierr);
446     ierr = MatSeqAIJRestoreArrayRead(B,&dummy);CHKERRQ(ierr);
447   }
448 #endif
449   aa = a->a;
450   ba = b->a;
451   for (i=0; i<m; i++) {
452     if (im[i] < 0) continue;
453     if (PetscUnlikely(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
454     if (im[i] >= rstart && im[i] < rend) {
455       row      = im[i] - rstart;
456       lastcol1 = -1;
457       rp1      = aj + ai[row];
458       ap1      = aa + ai[row];
459       rmax1    = aimax[row];
460       nrow1    = ailen[row];
461       low1     = 0;
462       high1    = nrow1;
463       lastcol2 = -1;
464       rp2      = bj + bi[row];
465       ap2      = ba + bi[row];
466       rmax2    = bimax[row];
467       nrow2    = bilen[row];
468       low2     = 0;
469       high2    = nrow2;
470 
471       for (j=0; j<n; j++) {
472         if (v)  value = roworiented ? v[i*n+j] : v[i+j*m];
473         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
474         if (in[j] >= cstart && in[j] < cend) {
475           col   = in[j] - cstart;
476           nonew = a->nonew;
477           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
478 #if defined(PETSC_HAVE_DEVICE)
479           if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
480 #endif
481         } else if (in[j] < 0) continue;
482         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
483         else {
484           if (mat->was_assembled) {
485             if (!aij->colmap) {
486               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
487             }
488 #if defined(PETSC_USE_CTABLE)
489             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
490             col--;
491 #else
492             col = aij->colmap[in[j]] - 1;
493 #endif
494             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
495               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
496               col  =  in[j];
497               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
498               B        = aij->B;
499               b        = (Mat_SeqAIJ*)B->data;
500               bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
501               rp2      = bj + bi[row];
502               ap2      = ba + bi[row];
503               rmax2    = bimax[row];
504               nrow2    = bilen[row];
505               low2     = 0;
506               high2    = nrow2;
507               bm       = aij->B->rmap->n;
508               ba       = b->a;
509               inserted = PETSC_FALSE;
510             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
511               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
512                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
513               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
514             }
515           } else col = in[j];
516           nonew = b->nonew;
517           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
518 #if defined(PETSC_HAVE_DEVICE)
519           if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
520 #endif
521         }
522       }
523     } else {
524       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
525       if (!aij->donotstash) {
526         mat->assembled = PETSC_FALSE;
527         if (roworiented) {
528           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
529         } else {
530           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
531         }
532       }
533     }
534   }
535   PetscFunctionReturn(0);
536 }
537 
538 /*
539     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
540     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
541     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
542 */
543 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
544 {
545   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
546   Mat            A           = aij->A; /* diagonal part of the matrix */
547   Mat            B           = aij->B; /* offdiagonal part of the matrix */
548   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
549   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
550   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
551   PetscInt       *ailen      = a->ilen,*aj = a->j;
552   PetscInt       *bilen      = b->ilen,*bj = b->j;
553   PetscInt       am          = aij->A->rmap->n,j;
554   PetscInt       diag_so_far = 0,dnz;
555   PetscInt       offd_so_far = 0,onz;
556 
557   PetscFunctionBegin;
558   /* Iterate over all rows of the matrix */
559   for (j=0; j<am; j++) {
560     dnz = onz = 0;
561     /*  Iterate over all non-zero columns of the current row */
562     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
563       /* If column is in the diagonal */
564       if (mat_j[col] >= cstart && mat_j[col] < cend) {
565         aj[diag_so_far++] = mat_j[col] - cstart;
566         dnz++;
567       } else { /* off-diagonal entries */
568         bj[offd_so_far++] = mat_j[col];
569         onz++;
570       }
571     }
572     ailen[j] = dnz;
573     bilen[j] = onz;
574   }
575   PetscFunctionReturn(0);
576 }
577 
578 /*
579     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
580     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
581     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
582     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
583     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
584 */
585 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
586 {
587   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
588   Mat            A      = aij->A; /* diagonal part of the matrix */
589   Mat            B      = aij->B; /* offdiagonal part of the matrix */
590   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
591   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
592   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
593   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
594   PetscInt       *ailen = a->ilen,*aj = a->j;
595   PetscInt       *bilen = b->ilen,*bj = b->j;
596   PetscInt       am     = aij->A->rmap->n,j;
597   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
598   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
599   PetscScalar    *aa = a->a,*ba = b->a;
600 
601   PetscFunctionBegin;
602   /* Iterate over all rows of the matrix */
603   for (j=0; j<am; j++) {
604     dnz_row = onz_row = 0;
605     rowstart_offd = full_offd_i[j];
606     rowstart_diag = full_diag_i[j];
607     /*  Iterate over all non-zero columns of the current row */
608     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
609       /* If column is in the diagonal */
610       if (mat_j[col] >= cstart && mat_j[col] < cend) {
611         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
612         aa[rowstart_diag+dnz_row] = mat_a[col];
613         dnz_row++;
614       } else { /* off-diagonal entries */
615         bj[rowstart_offd+onz_row] = mat_j[col];
616         ba[rowstart_offd+onz_row] = mat_a[col];
617         onz_row++;
618       }
619     }
620     ailen[j] = dnz_row;
621     bilen[j] = onz_row;
622   }
623   PetscFunctionReturn(0);
624 }
625 
626 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
627 {
628   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
629   PetscErrorCode ierr;
630   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
631   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
632 
633   PetscFunctionBegin;
634   for (i=0; i<m; i++) {
635     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
636     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
637     if (idxm[i] >= rstart && idxm[i] < rend) {
638       row = idxm[i] - rstart;
639       for (j=0; j<n; j++) {
640         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
641         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
642         if (idxn[j] >= cstart && idxn[j] < cend) {
643           col  = idxn[j] - cstart;
644           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
645         } else {
646           if (!aij->colmap) {
647             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
648           }
649 #if defined(PETSC_USE_CTABLE)
650           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
651           col--;
652 #else
653           col = aij->colmap[idxn[j]] - 1;
654 #endif
655           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
656           else {
657             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
658           }
659         }
660       }
661     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
662   }
663   PetscFunctionReturn(0);
664 }
665 
666 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
667 {
668   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
669   PetscErrorCode ierr;
670   PetscInt       nstash,reallocs;
671 
672   PetscFunctionBegin;
673   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
674 
675   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
676   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
677   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
678   PetscFunctionReturn(0);
679 }
680 
681 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
682 {
683   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
684   PetscErrorCode ierr;
685   PetscMPIInt    n;
686   PetscInt       i,j,rstart,ncols,flg;
687   PetscInt       *row,*col;
688   PetscBool      other_disassembled;
689   PetscScalar    *val;
690 
691   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
692 
693   PetscFunctionBegin;
694   if (!aij->donotstash && !mat->nooffprocentries) {
695     while (1) {
696       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
697       if (!flg) break;
698 
699       for (i=0; i<n;) {
700         /* Now identify the consecutive vals belonging to the same row */
701         for (j=i,rstart=row[j]; j<n; j++) {
702           if (row[j] != rstart) break;
703         }
704         if (j < n) ncols = j-i;
705         else       ncols = n-i;
706         /* Now assemble all these values with a single function call */
707         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
708         i    = j;
709       }
710     }
711     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
712   }
713 #if defined(PETSC_HAVE_DEVICE)
714   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
715   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
716   if (mat->boundtocpu) {
717     ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr);
718     ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr);
719   }
720 #endif
721   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
722   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
723 
724   /* determine if any processor has disassembled, if so we must
725      also disassemble ourself, in order that we may reassemble. */
726   /*
727      if nonzero structure of submatrix B cannot change then we know that
728      no processor disassembled thus we can skip this stuff
729   */
730   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
731     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
732     if (mat->was_assembled && !other_disassembled) {
733 #if defined(PETSC_HAVE_DEVICE)
734       aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */
735 #endif
736       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
737     }
738   }
739   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
740     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
741   }
742   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
743 #if defined(PETSC_HAVE_DEVICE)
744   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
745 #endif
746   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
747   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
748 
749   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
750 
751   aij->rowvalues = NULL;
752 
753   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
754 
755   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
756   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
757     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
758     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
759   }
760 #if defined(PETSC_HAVE_DEVICE)
761   mat->offloadmask = PETSC_OFFLOAD_BOTH;
762 #endif
763   PetscFunctionReturn(0);
764 }
765 
766 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
767 {
768   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
769   PetscErrorCode ierr;
770 
771   PetscFunctionBegin;
772   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
773   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
774   PetscFunctionReturn(0);
775 }
776 
777 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
778 {
779   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
780   PetscObjectState sA, sB;
781   PetscInt        *lrows;
782   PetscInt         r, len;
783   PetscBool        cong, lch, gch;
784   PetscErrorCode   ierr;
785 
786   PetscFunctionBegin;
787   /* get locally owned rows */
788   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
789   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
790   /* fix right hand side if needed */
791   if (x && b) {
792     const PetscScalar *xx;
793     PetscScalar       *bb;
794 
795     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
796     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
797     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
798     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
799     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
800     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
801   }
802 
803   sA = mat->A->nonzerostate;
804   sB = mat->B->nonzerostate;
805 
806   if (diag != 0.0 && cong) {
807     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
808     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
809   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
810     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
811     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
812     PetscInt   nnwA, nnwB;
813     PetscBool  nnzA, nnzB;
814 
815     nnwA = aijA->nonew;
816     nnwB = aijB->nonew;
817     nnzA = aijA->keepnonzeropattern;
818     nnzB = aijB->keepnonzeropattern;
819     if (!nnzA) {
820       ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr);
821       aijA->nonew = 0;
822     }
823     if (!nnzB) {
824       ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr);
825       aijB->nonew = 0;
826     }
827     /* Must zero here before the next loop */
828     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
829     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
830     for (r = 0; r < len; ++r) {
831       const PetscInt row = lrows[r] + A->rmap->rstart;
832       if (row >= A->cmap->N) continue;
833       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
834     }
835     aijA->nonew = nnwA;
836     aijB->nonew = nnwB;
837   } else {
838     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
839     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
840   }
841   ierr = PetscFree(lrows);CHKERRQ(ierr);
842   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
843   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
844 
845   /* reduce nonzerostate */
846   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
847   ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
848   if (gch) A->nonzerostate++;
849   PetscFunctionReturn(0);
850 }
851 
852 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
853 {
854   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
855   PetscErrorCode    ierr;
856   PetscMPIInt       n = A->rmap->n;
857   PetscInt          i,j,r,m,len = 0;
858   PetscInt          *lrows,*owners = A->rmap->range;
859   PetscMPIInt       p = 0;
860   PetscSFNode       *rrows;
861   PetscSF           sf;
862   const PetscScalar *xx;
863   PetscScalar       *bb,*mask;
864   Vec               xmask,lmask;
865   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
866   const PetscInt    *aj, *ii,*ridx;
867   PetscScalar       *aa;
868 
869   PetscFunctionBegin;
870   /* Create SF where leaves are input rows and roots are owned rows */
871   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
872   for (r = 0; r < n; ++r) lrows[r] = -1;
873   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
874   for (r = 0; r < N; ++r) {
875     const PetscInt idx   = rows[r];
876     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
877     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
878       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
879     }
880     rrows[r].rank  = p;
881     rrows[r].index = rows[r] - owners[p];
882   }
883   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
884   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
885   /* Collect flags for rows to be zeroed */
886   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
887   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
888   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
889   /* Compress and put in row numbers */
890   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
891   /* zero diagonal part of matrix */
892   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
893   /* handle off diagonal part of matrix */
894   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
895   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
896   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
897   for (i=0; i<len; i++) bb[lrows[i]] = 1;
898   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
899   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
900   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
901   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
902   if (x && b) { /* this code is buggy when the row and column layout don't match */
903     PetscBool cong;
904 
905     ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
906     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
907     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
908     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
909     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
910     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
911   }
912   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
913   /* remove zeroed rows of off diagonal matrix */
914   ii = aij->i;
915   for (i=0; i<len; i++) {
916     ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr);
917   }
918   /* loop over all elements of off process part of matrix zeroing removed columns*/
919   if (aij->compressedrow.use) {
920     m    = aij->compressedrow.nrows;
921     ii   = aij->compressedrow.i;
922     ridx = aij->compressedrow.rindex;
923     for (i=0; i<m; i++) {
924       n  = ii[i+1] - ii[i];
925       aj = aij->j + ii[i];
926       aa = aij->a + ii[i];
927 
928       for (j=0; j<n; j++) {
929         if (PetscAbsScalar(mask[*aj])) {
930           if (b) bb[*ridx] -= *aa*xx[*aj];
931           *aa = 0.0;
932         }
933         aa++;
934         aj++;
935       }
936       ridx++;
937     }
938   } else { /* do not use compressed row format */
939     m = l->B->rmap->n;
940     for (i=0; i<m; i++) {
941       n  = ii[i+1] - ii[i];
942       aj = aij->j + ii[i];
943       aa = aij->a + ii[i];
944       for (j=0; j<n; j++) {
945         if (PetscAbsScalar(mask[*aj])) {
946           if (b) bb[i] -= *aa*xx[*aj];
947           *aa = 0.0;
948         }
949         aa++;
950         aj++;
951       }
952     }
953   }
954   if (x && b) {
955     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
956     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
957   }
958   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
959   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
960   ierr = PetscFree(lrows);CHKERRQ(ierr);
961 
962   /* only change matrix nonzero state if pattern was allowed to be changed */
963   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
964     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
965     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
966   }
967   PetscFunctionReturn(0);
968 }
969 
970 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
971 {
972   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
973   PetscErrorCode ierr;
974   PetscInt       nt;
975   VecScatter     Mvctx = a->Mvctx;
976 
977   PetscFunctionBegin;
978   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
979   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
980   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
981   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
982   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
983   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
984   PetscFunctionReturn(0);
985 }
986 
987 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
988 {
989   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
990   PetscErrorCode ierr;
991 
992   PetscFunctionBegin;
993   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
994   PetscFunctionReturn(0);
995 }
996 
997 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
998 {
999   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1000   PetscErrorCode ierr;
1001   VecScatter     Mvctx = a->Mvctx;
1002 
1003   PetscFunctionBegin;
1004   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1005   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1006   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1007   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1008   PetscFunctionReturn(0);
1009 }
1010 
1011 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1012 {
1013   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1014   PetscErrorCode ierr;
1015 
1016   PetscFunctionBegin;
1017   /* do nondiagonal part */
1018   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1019   /* do local part */
1020   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1021   /* add partial results together */
1022   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1023   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1024   PetscFunctionReturn(0);
1025 }
1026 
1027 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1028 {
1029   MPI_Comm       comm;
1030   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1031   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1032   IS             Me,Notme;
1033   PetscErrorCode ierr;
1034   PetscInt       M,N,first,last,*notme,i;
1035   PetscBool      lf;
1036   PetscMPIInt    size;
1037 
1038   PetscFunctionBegin;
1039   /* Easy test: symmetric diagonal block */
1040   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1041   ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr);
1042   ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRMPI(ierr);
1043   if (!*f) PetscFunctionReturn(0);
1044   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1045   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
1046   if (size == 1) PetscFunctionReturn(0);
1047 
1048   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1049   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1050   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1051   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1052   for (i=0; i<first; i++) notme[i] = i;
1053   for (i=last; i<M; i++) notme[i-last+first] = i;
1054   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1055   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1056   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1057   Aoff = Aoffs[0];
1058   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1059   Boff = Boffs[0];
1060   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1061   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1062   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1063   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1064   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1065   ierr = PetscFree(notme);CHKERRQ(ierr);
1066   PetscFunctionReturn(0);
1067 }
1068 
1069 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1070 {
1071   PetscErrorCode ierr;
1072 
1073   PetscFunctionBegin;
1074   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1075   PetscFunctionReturn(0);
1076 }
1077 
1078 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1079 {
1080   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1081   PetscErrorCode ierr;
1082 
1083   PetscFunctionBegin;
1084   /* do nondiagonal part */
1085   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1086   /* do local part */
1087   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1088   /* add partial results together */
1089   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1090   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1091   PetscFunctionReturn(0);
1092 }
1093 
1094 /*
1095   This only works correctly for square matrices where the subblock A->A is the
1096    diagonal block
1097 */
1098 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1099 {
1100   PetscErrorCode ierr;
1101   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1102 
1103   PetscFunctionBegin;
1104   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1105   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1106   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1107   PetscFunctionReturn(0);
1108 }
1109 
1110 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1111 {
1112   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1113   PetscErrorCode ierr;
1114 
1115   PetscFunctionBegin;
1116   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1117   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1118   PetscFunctionReturn(0);
1119 }
1120 
1121 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1122 {
1123   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1124   PetscErrorCode ierr;
1125 
1126   PetscFunctionBegin;
1127 #if defined(PETSC_USE_LOG)
1128   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1129 #endif
1130   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1131   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1132   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1133   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1134 #if defined(PETSC_USE_CTABLE)
1135   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1136 #else
1137   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1138 #endif
1139   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1140   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1141   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1142   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1143   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1144   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1145 
1146   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
1147   ierr = PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL);CHKERRQ(ierr);
1148 
1149   ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr);
1150   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1151   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1152   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1153   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1154   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1155   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1156   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1157   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr);
1158   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1159 #if defined(PETSC_HAVE_CUDA)
1160   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL);CHKERRQ(ierr);
1161 #endif
1162 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
1163   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL);CHKERRQ(ierr);
1164 #endif
1165   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
1166 #if defined(PETSC_HAVE_ELEMENTAL)
1167   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1168 #endif
1169 #if defined(PETSC_HAVE_SCALAPACK)
1170   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL);CHKERRQ(ierr);
1171 #endif
1172 #if defined(PETSC_HAVE_HYPRE)
1173   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1174   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1175 #endif
1176   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1177   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr);
1178   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1179   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL);CHKERRQ(ierr);
1180   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL);CHKERRQ(ierr);
1181   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL);CHKERRQ(ierr);
1182 #if defined(PETSC_HAVE_MKL_SPARSE)
1183   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL);CHKERRQ(ierr);
1184 #endif
1185   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL);CHKERRQ(ierr);
1186   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1187   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL);CHKERRQ(ierr);
1188   PetscFunctionReturn(0);
1189 }
1190 
1191 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1192 {
1193   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1194   Mat_SeqAIJ        *A   = (Mat_SeqAIJ*)aij->A->data;
1195   Mat_SeqAIJ        *B   = (Mat_SeqAIJ*)aij->B->data;
1196   const PetscInt    *garray = aij->garray;
1197   const PetscScalar *aa,*ba;
1198   PetscInt          header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb;
1199   PetscInt          *rowlens;
1200   PetscInt          *colidxs;
1201   PetscScalar       *matvals;
1202   PetscErrorCode    ierr;
1203 
1204   PetscFunctionBegin;
1205   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1206 
1207   M  = mat->rmap->N;
1208   N  = mat->cmap->N;
1209   m  = mat->rmap->n;
1210   rs = mat->rmap->rstart;
1211   cs = mat->cmap->rstart;
1212   nz = A->nz + B->nz;
1213 
1214   /* write matrix header */
1215   header[0] = MAT_FILE_CLASSID;
1216   header[1] = M; header[2] = N; header[3] = nz;
1217   ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1218   ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr);
1219 
1220   /* fill in and store row lengths  */
1221   ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr);
1222   for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1223   ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr);
1224   ierr = PetscFree(rowlens);CHKERRQ(ierr);
1225 
1226   /* fill in and store column indices */
1227   ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr);
1228   for (cnt=0, i=0; i<m; i++) {
1229     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1230       if (garray[B->j[jb]] > cs) break;
1231       colidxs[cnt++] = garray[B->j[jb]];
1232     }
1233     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1234       colidxs[cnt++] = A->j[ja] + cs;
1235     for (; jb<B->i[i+1]; jb++)
1236       colidxs[cnt++] = garray[B->j[jb]];
1237   }
1238   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1239   ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
1240   ierr = PetscFree(colidxs);CHKERRQ(ierr);
1241 
1242   /* fill in and store nonzero values */
1243   ierr = MatSeqAIJGetArrayRead(aij->A,&aa);CHKERRQ(ierr);
1244   ierr = MatSeqAIJGetArrayRead(aij->B,&ba);CHKERRQ(ierr);
1245   ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr);
1246   for (cnt=0, i=0; i<m; i++) {
1247     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1248       if (garray[B->j[jb]] > cs) break;
1249       matvals[cnt++] = ba[jb];
1250     }
1251     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1252       matvals[cnt++] = aa[ja];
1253     for (; jb<B->i[i+1]; jb++)
1254       matvals[cnt++] = ba[jb];
1255   }
1256   ierr = MatSeqAIJRestoreArrayRead(aij->A,&aa);CHKERRQ(ierr);
1257   ierr = MatSeqAIJRestoreArrayRead(aij->B,&ba);CHKERRQ(ierr);
1258   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1259   ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
1260   ierr = PetscFree(matvals);CHKERRQ(ierr);
1261 
1262   /* write block size option to the viewer's .info file */
1263   ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
1264   PetscFunctionReturn(0);
1265 }
1266 
1267 #include <petscdraw.h>
1268 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1269 {
1270   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1271   PetscErrorCode    ierr;
1272   PetscMPIInt       rank = aij->rank,size = aij->size;
1273   PetscBool         isdraw,iascii,isbinary;
1274   PetscViewer       sviewer;
1275   PetscViewerFormat format;
1276 
1277   PetscFunctionBegin;
1278   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1279   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1280   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1281   if (iascii) {
1282     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1283     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1284       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1285       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1286       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1287       for (i=0; i<(PetscInt)size; i++) {
1288         nmax = PetscMax(nmax,nz[i]);
1289         nmin = PetscMin(nmin,nz[i]);
1290         navg += nz[i];
1291       }
1292       ierr = PetscFree(nz);CHKERRQ(ierr);
1293       navg = navg/size;
1294       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1295       PetscFunctionReturn(0);
1296     }
1297     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1298     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1299       MatInfo   info;
1300       PetscInt *inodes=NULL;
1301 
1302       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr);
1303       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1304       ierr = MatInodeGetInodeSizes(aij->A,NULL,&inodes,NULL);CHKERRQ(ierr);
1305       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1306       if (!inodes) {
1307         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1308                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1309       } else {
1310         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1311                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1312       }
1313       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1314       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1315       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1316       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1317       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1318       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1319       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1320       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1321       PetscFunctionReturn(0);
1322     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1323       PetscInt inodecount,inodelimit,*inodes;
1324       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1325       if (inodes) {
1326         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1327       } else {
1328         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1329       }
1330       PetscFunctionReturn(0);
1331     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1332       PetscFunctionReturn(0);
1333     }
1334   } else if (isbinary) {
1335     if (size == 1) {
1336       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1337       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1338     } else {
1339       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1340     }
1341     PetscFunctionReturn(0);
1342   } else if (iascii && size == 1) {
1343     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1344     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1345     PetscFunctionReturn(0);
1346   } else if (isdraw) {
1347     PetscDraw draw;
1348     PetscBool isnull;
1349     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1350     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1351     if (isnull) PetscFunctionReturn(0);
1352   }
1353 
1354   { /* assemble the entire matrix onto first processor */
1355     Mat A = NULL, Av;
1356     IS  isrow,iscol;
1357 
1358     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1359     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1360     ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1361     ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr);
1362 /*  The commented code uses MatCreateSubMatrices instead */
1363 /*
1364     Mat *AA, A = NULL, Av;
1365     IS  isrow,iscol;
1366 
1367     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1368     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1369     ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr);
1370     if (rank == 0) {
1371        ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr);
1372        A    = AA[0];
1373        Av   = AA[0];
1374     }
1375     ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr);
1376 */
1377     ierr = ISDestroy(&iscol);CHKERRQ(ierr);
1378     ierr = ISDestroy(&isrow);CHKERRQ(ierr);
1379     /*
1380        Everyone has to call to draw the matrix since the graphics waits are
1381        synchronized across all processors that share the PetscDraw object
1382     */
1383     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1384     if (rank == 0) {
1385       if (((PetscObject)mat)->name) {
1386         ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr);
1387       }
1388       ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr);
1389     }
1390     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1391     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1392     ierr = MatDestroy(&A);CHKERRQ(ierr);
1393   }
1394   PetscFunctionReturn(0);
1395 }
1396 
1397 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1398 {
1399   PetscErrorCode ierr;
1400   PetscBool      iascii,isdraw,issocket,isbinary;
1401 
1402   PetscFunctionBegin;
1403   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1404   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1405   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1406   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1407   if (iascii || isdraw || isbinary || issocket) {
1408     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1409   }
1410   PetscFunctionReturn(0);
1411 }
1412 
1413 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1414 {
1415   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1416   PetscErrorCode ierr;
1417   Vec            bb1 = NULL;
1418   PetscBool      hasop;
1419 
1420   PetscFunctionBegin;
1421   if (flag == SOR_APPLY_UPPER) {
1422     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1423     PetscFunctionReturn(0);
1424   }
1425 
1426   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1427     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1428   }
1429 
1430   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1431     if (flag & SOR_ZERO_INITIAL_GUESS) {
1432       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1433       its--;
1434     }
1435 
1436     while (its--) {
1437       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1438       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1439 
1440       /* update rhs: bb1 = bb - B*x */
1441       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1442       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1443 
1444       /* local sweep */
1445       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1446     }
1447   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1448     if (flag & SOR_ZERO_INITIAL_GUESS) {
1449       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1450       its--;
1451     }
1452     while (its--) {
1453       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1454       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1455 
1456       /* update rhs: bb1 = bb - B*x */
1457       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1458       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1459 
1460       /* local sweep */
1461       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1462     }
1463   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1464     if (flag & SOR_ZERO_INITIAL_GUESS) {
1465       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1466       its--;
1467     }
1468     while (its--) {
1469       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1470       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1471 
1472       /* update rhs: bb1 = bb - B*x */
1473       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1474       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1475 
1476       /* local sweep */
1477       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1478     }
1479   } else if (flag & SOR_EISENSTAT) {
1480     Vec xx1;
1481 
1482     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1483     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1484 
1485     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1486     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1487     if (!mat->diag) {
1488       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1489       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1490     }
1491     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1492     if (hasop) {
1493       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1494     } else {
1495       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1496     }
1497     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1498 
1499     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1500 
1501     /* local sweep */
1502     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1503     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1504     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1505   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1506 
1507   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1508 
1509   matin->factorerrortype = mat->A->factorerrortype;
1510   PetscFunctionReturn(0);
1511 }
1512 
1513 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1514 {
1515   Mat            aA,aB,Aperm;
1516   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1517   PetscScalar    *aa,*ba;
1518   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1519   PetscSF        rowsf,sf;
1520   IS             parcolp = NULL;
1521   PetscBool      done;
1522   PetscErrorCode ierr;
1523 
1524   PetscFunctionBegin;
1525   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1526   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1527   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1528   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1529 
1530   /* Invert row permutation to find out where my rows should go */
1531   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1532   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1533   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1534   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1535   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr);
1536   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr);
1537 
1538   /* Invert column permutation to find out where my columns should go */
1539   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1540   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1541   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1542   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1543   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr);
1544   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr);
1545   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1546 
1547   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1548   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1549   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1550 
1551   /* Find out where my gcols should go */
1552   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1553   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1554   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1555   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1556   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1557   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE);CHKERRQ(ierr);
1558   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE);CHKERRQ(ierr);
1559   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1560 
1561   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1562   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1563   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1564   for (i=0; i<m; i++) {
1565     PetscInt    row = rdest[i];
1566     PetscMPIInt rowner;
1567     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1568     for (j=ai[i]; j<ai[i+1]; j++) {
1569       PetscInt    col = cdest[aj[j]];
1570       PetscMPIInt cowner;
1571       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1572       if (rowner == cowner) dnnz[i]++;
1573       else onnz[i]++;
1574     }
1575     for (j=bi[i]; j<bi[i+1]; j++) {
1576       PetscInt    col = gcdest[bj[j]];
1577       PetscMPIInt cowner;
1578       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1579       if (rowner == cowner) dnnz[i]++;
1580       else onnz[i]++;
1581     }
1582   }
1583   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE);CHKERRQ(ierr);
1584   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE);CHKERRQ(ierr);
1585   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE);CHKERRQ(ierr);
1586   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE);CHKERRQ(ierr);
1587   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1588 
1589   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1590   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1591   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1592   for (i=0; i<m; i++) {
1593     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1594     PetscInt j0,rowlen;
1595     rowlen = ai[i+1] - ai[i];
1596     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1597       for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1598       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1599     }
1600     rowlen = bi[i+1] - bi[i];
1601     for (j0=j=0; j<rowlen; j0=j) {
1602       for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1603       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1604     }
1605   }
1606   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1607   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1608   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1609   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1610   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1611   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1612   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1613   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1614   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1615   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1616   *B = Aperm;
1617   PetscFunctionReturn(0);
1618 }
1619 
1620 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1621 {
1622   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1623   PetscErrorCode ierr;
1624 
1625   PetscFunctionBegin;
1626   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1627   if (ghosts) *ghosts = aij->garray;
1628   PetscFunctionReturn(0);
1629 }
1630 
1631 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1632 {
1633   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1634   Mat            A    = mat->A,B = mat->B;
1635   PetscErrorCode ierr;
1636   PetscLogDouble isend[5],irecv[5];
1637 
1638   PetscFunctionBegin;
1639   info->block_size = 1.0;
1640   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1641 
1642   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1643   isend[3] = info->memory;  isend[4] = info->mallocs;
1644 
1645   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1646 
1647   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1648   isend[3] += info->memory;  isend[4] += info->mallocs;
1649   if (flag == MAT_LOCAL) {
1650     info->nz_used      = isend[0];
1651     info->nz_allocated = isend[1];
1652     info->nz_unneeded  = isend[2];
1653     info->memory       = isend[3];
1654     info->mallocs      = isend[4];
1655   } else if (flag == MAT_GLOBAL_MAX) {
1656     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr);
1657 
1658     info->nz_used      = irecv[0];
1659     info->nz_allocated = irecv[1];
1660     info->nz_unneeded  = irecv[2];
1661     info->memory       = irecv[3];
1662     info->mallocs      = irecv[4];
1663   } else if (flag == MAT_GLOBAL_SUM) {
1664     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr);
1665 
1666     info->nz_used      = irecv[0];
1667     info->nz_allocated = irecv[1];
1668     info->nz_unneeded  = irecv[2];
1669     info->memory       = irecv[3];
1670     info->mallocs      = irecv[4];
1671   }
1672   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1673   info->fill_ratio_needed = 0;
1674   info->factor_mallocs    = 0;
1675   PetscFunctionReturn(0);
1676 }
1677 
1678 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1679 {
1680   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1681   PetscErrorCode ierr;
1682 
1683   PetscFunctionBegin;
1684   switch (op) {
1685   case MAT_NEW_NONZERO_LOCATIONS:
1686   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1687   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1688   case MAT_KEEP_NONZERO_PATTERN:
1689   case MAT_NEW_NONZERO_LOCATION_ERR:
1690   case MAT_USE_INODES:
1691   case MAT_IGNORE_ZERO_ENTRIES:
1692   case MAT_FORM_EXPLICIT_TRANSPOSE:
1693     MatCheckPreallocated(A,1);
1694     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1695     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1696     break;
1697   case MAT_ROW_ORIENTED:
1698     MatCheckPreallocated(A,1);
1699     a->roworiented = flg;
1700 
1701     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1702     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1703     break;
1704   case MAT_FORCE_DIAGONAL_ENTRIES:
1705   case MAT_SORTED_FULL:
1706     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1707     break;
1708   case MAT_IGNORE_OFF_PROC_ENTRIES:
1709     a->donotstash = flg;
1710     break;
1711   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1712   case MAT_SPD:
1713   case MAT_SYMMETRIC:
1714   case MAT_STRUCTURALLY_SYMMETRIC:
1715   case MAT_HERMITIAN:
1716   case MAT_SYMMETRY_ETERNAL:
1717     break;
1718   case MAT_SUBMAT_SINGLEIS:
1719     A->submat_singleis = flg;
1720     break;
1721   case MAT_STRUCTURE_ONLY:
1722     /* The option is handled directly by MatSetOption() */
1723     break;
1724   default:
1725     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1726   }
1727   PetscFunctionReturn(0);
1728 }
1729 
1730 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1731 {
1732   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1733   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1734   PetscErrorCode ierr;
1735   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1736   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1737   PetscInt       *cmap,*idx_p;
1738 
1739   PetscFunctionBegin;
1740   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1741   mat->getrowactive = PETSC_TRUE;
1742 
1743   if (!mat->rowvalues && (idx || v)) {
1744     /*
1745         allocate enough space to hold information from the longest row.
1746     */
1747     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1748     PetscInt   max = 1,tmp;
1749     for (i=0; i<matin->rmap->n; i++) {
1750       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1751       if (max < tmp) max = tmp;
1752     }
1753     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1754   }
1755 
1756   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1757   lrow = row - rstart;
1758 
1759   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1760   if (!v)   {pvA = NULL; pvB = NULL;}
1761   if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1762   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1763   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1764   nztot = nzA + nzB;
1765 
1766   cmap = mat->garray;
1767   if (v  || idx) {
1768     if (nztot) {
1769       /* Sort by increasing column numbers, assuming A and B already sorted */
1770       PetscInt imark = -1;
1771       if (v) {
1772         *v = v_p = mat->rowvalues;
1773         for (i=0; i<nzB; i++) {
1774           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1775           else break;
1776         }
1777         imark = i;
1778         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1779         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1780       }
1781       if (idx) {
1782         *idx = idx_p = mat->rowindices;
1783         if (imark > -1) {
1784           for (i=0; i<imark; i++) {
1785             idx_p[i] = cmap[cworkB[i]];
1786           }
1787         } else {
1788           for (i=0; i<nzB; i++) {
1789             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1790             else break;
1791           }
1792           imark = i;
1793         }
1794         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1795         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1796       }
1797     } else {
1798       if (idx) *idx = NULL;
1799       if (v)   *v   = NULL;
1800     }
1801   }
1802   *nz  = nztot;
1803   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1804   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1805   PetscFunctionReturn(0);
1806 }
1807 
1808 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1809 {
1810   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1811 
1812   PetscFunctionBegin;
1813   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1814   aij->getrowactive = PETSC_FALSE;
1815   PetscFunctionReturn(0);
1816 }
1817 
1818 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1819 {
1820   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1821   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1822   PetscErrorCode ierr;
1823   PetscInt       i,j,cstart = mat->cmap->rstart;
1824   PetscReal      sum = 0.0;
1825   MatScalar      *v;
1826 
1827   PetscFunctionBegin;
1828   if (aij->size == 1) {
1829     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1830   } else {
1831     if (type == NORM_FROBENIUS) {
1832       v = amat->a;
1833       for (i=0; i<amat->nz; i++) {
1834         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1835       }
1836       v = bmat->a;
1837       for (i=0; i<bmat->nz; i++) {
1838         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1839       }
1840       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1841       *norm = PetscSqrtReal(*norm);
1842       ierr = PetscLogFlops(2.0*amat->nz+2.0*bmat->nz);CHKERRQ(ierr);
1843     } else if (type == NORM_1) { /* max column norm */
1844       PetscReal *tmp,*tmp2;
1845       PetscInt  *jj,*garray = aij->garray;
1846       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1847       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1848       *norm = 0.0;
1849       v     = amat->a; jj = amat->j;
1850       for (j=0; j<amat->nz; j++) {
1851         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1852       }
1853       v = bmat->a; jj = bmat->j;
1854       for (j=0; j<bmat->nz; j++) {
1855         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1856       }
1857       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1858       for (j=0; j<mat->cmap->N; j++) {
1859         if (tmp2[j] > *norm) *norm = tmp2[j];
1860       }
1861       ierr = PetscFree(tmp);CHKERRQ(ierr);
1862       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1863       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1864     } else if (type == NORM_INFINITY) { /* max row norm */
1865       PetscReal ntemp = 0.0;
1866       for (j=0; j<aij->A->rmap->n; j++) {
1867         v   = amat->a + amat->i[j];
1868         sum = 0.0;
1869         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1870           sum += PetscAbsScalar(*v); v++;
1871         }
1872         v = bmat->a + bmat->i[j];
1873         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1874           sum += PetscAbsScalar(*v); v++;
1875         }
1876         if (sum > ntemp) ntemp = sum;
1877       }
1878       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1879       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1880     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1881   }
1882   PetscFunctionReturn(0);
1883 }
1884 
1885 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1886 {
1887   Mat_MPIAIJ      *a    =(Mat_MPIAIJ*)A->data,*b;
1888   Mat_SeqAIJ      *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
1889   PetscInt        M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
1890   const PetscInt  *ai,*aj,*bi,*bj,*B_diag_i;
1891   PetscErrorCode  ierr;
1892   Mat             B,A_diag,*B_diag;
1893   const MatScalar *pbv,*bv;
1894 
1895   PetscFunctionBegin;
1896   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1897   ai = Aloc->i; aj = Aloc->j;
1898   bi = Bloc->i; bj = Bloc->j;
1899   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1900     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1901     PetscSFNode          *oloc;
1902     PETSC_UNUSED PetscSF sf;
1903 
1904     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1905     /* compute d_nnz for preallocation */
1906     ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr);
1907     for (i=0; i<ai[ma]; i++) {
1908       d_nnz[aj[i]]++;
1909     }
1910     /* compute local off-diagonal contributions */
1911     ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr);
1912     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1913     /* map those to global */
1914     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1915     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1916     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1917     ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr);
1918     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1919     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1920     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1921 
1922     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1923     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1924     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1925     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1926     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1927     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1928   } else {
1929     B    = *matout;
1930     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1931   }
1932 
1933   b           = (Mat_MPIAIJ*)B->data;
1934   A_diag      = a->A;
1935   B_diag      = &b->A;
1936   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
1937   A_diag_ncol = A_diag->cmap->N;
1938   B_diag_ilen = sub_B_diag->ilen;
1939   B_diag_i    = sub_B_diag->i;
1940 
1941   /* Set ilen for diagonal of B */
1942   for (i=0; i<A_diag_ncol; i++) {
1943     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
1944   }
1945 
1946   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
1947   very quickly (=without using MatSetValues), because all writes are local. */
1948   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
1949 
1950   /* copy over the B part */
1951   ierr = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr);
1952   ierr = MatSeqAIJGetArrayRead(a->B,&bv);CHKERRQ(ierr);
1953   pbv  = bv;
1954   row  = A->rmap->rstart;
1955   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1956   cols_tmp = cols;
1957   for (i=0; i<mb; i++) {
1958     ncol = bi[i+1]-bi[i];
1959     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES);CHKERRQ(ierr);
1960     row++;
1961     pbv += ncol; cols_tmp += ncol;
1962   }
1963   ierr = PetscFree(cols);CHKERRQ(ierr);
1964   ierr = MatSeqAIJRestoreArrayRead(a->B,&bv);CHKERRQ(ierr);
1965 
1966   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1967   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1968   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1969     *matout = B;
1970   } else {
1971     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
1972   }
1973   PetscFunctionReturn(0);
1974 }
1975 
1976 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1977 {
1978   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1979   Mat            a    = aij->A,b = aij->B;
1980   PetscErrorCode ierr;
1981   PetscInt       s1,s2,s3;
1982 
1983   PetscFunctionBegin;
1984   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1985   if (rr) {
1986     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1987     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1988     /* Overlap communication with computation. */
1989     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1990   }
1991   if (ll) {
1992     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1993     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1994     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1995   }
1996   /* scale  the diagonal block */
1997   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1998 
1999   if (rr) {
2000     /* Do a scatter end and then right scale the off-diagonal block */
2001     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2002     ierr = (*b->ops->diagonalscale)(b,NULL,aij->lvec);CHKERRQ(ierr);
2003   }
2004   PetscFunctionReturn(0);
2005 }
2006 
2007 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2008 {
2009   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2010   PetscErrorCode ierr;
2011 
2012   PetscFunctionBegin;
2013   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2014   PetscFunctionReturn(0);
2015 }
2016 
2017 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2018 {
2019   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2020   Mat            a,b,c,d;
2021   PetscBool      flg;
2022   PetscErrorCode ierr;
2023 
2024   PetscFunctionBegin;
2025   a = matA->A; b = matA->B;
2026   c = matB->A; d = matB->B;
2027 
2028   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2029   if (flg) {
2030     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2031   }
2032   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
2033   PetscFunctionReturn(0);
2034 }
2035 
2036 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2037 {
2038   PetscErrorCode ierr;
2039   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2040   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2041 
2042   PetscFunctionBegin;
2043   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2044   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2045     /* because of the column compression in the off-processor part of the matrix a->B,
2046        the number of columns in a->B and b->B may be different, hence we cannot call
2047        the MatCopy() directly on the two parts. If need be, we can provide a more
2048        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2049        then copying the submatrices */
2050     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2051   } else {
2052     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2053     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2054   }
2055   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2056   PetscFunctionReturn(0);
2057 }
2058 
2059 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2060 {
2061   PetscErrorCode ierr;
2062 
2063   PetscFunctionBegin;
2064   ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr);
2065   PetscFunctionReturn(0);
2066 }
2067 
2068 /*
2069    Computes the number of nonzeros per row needed for preallocation when X and Y
2070    have different nonzero structure.
2071 */
2072 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2073 {
2074   PetscInt       i,j,k,nzx,nzy;
2075 
2076   PetscFunctionBegin;
2077   /* Set the number of nonzeros in the new matrix */
2078   for (i=0; i<m; i++) {
2079     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2080     nzx = xi[i+1] - xi[i];
2081     nzy = yi[i+1] - yi[i];
2082     nnz[i] = 0;
2083     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2084       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2085       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2086       nnz[i]++;
2087     }
2088     for (; k<nzy; k++) nnz[i]++;
2089   }
2090   PetscFunctionReturn(0);
2091 }
2092 
2093 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2094 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2095 {
2096   PetscErrorCode ierr;
2097   PetscInt       m = Y->rmap->N;
2098   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2099   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2100 
2101   PetscFunctionBegin;
2102   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2103   PetscFunctionReturn(0);
2104 }
2105 
2106 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2107 {
2108   PetscErrorCode ierr;
2109   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2110 
2111   PetscFunctionBegin;
2112   if (str == SAME_NONZERO_PATTERN) {
2113     ierr = MatAXPY(yy->A,a,xx->A,str);CHKERRQ(ierr);
2114     ierr = MatAXPY(yy->B,a,xx->B,str);CHKERRQ(ierr);
2115   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2116     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2117   } else {
2118     Mat      B;
2119     PetscInt *nnz_d,*nnz_o;
2120 
2121     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2122     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2123     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2124     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2125     ierr = MatSetLayouts(B,Y->rmap,Y->cmap);CHKERRQ(ierr);
2126     ierr = MatSetType(B,((PetscObject)Y)->type_name);CHKERRQ(ierr);
2127     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2128     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2129     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2130     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2131     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2132     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2133     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2134   }
2135   PetscFunctionReturn(0);
2136 }
2137 
2138 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2139 
2140 PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2141 {
2142 #if defined(PETSC_USE_COMPLEX)
2143   PetscErrorCode ierr;
2144   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2145 
2146   PetscFunctionBegin;
2147   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2148   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2149 #else
2150   PetscFunctionBegin;
2151 #endif
2152   PetscFunctionReturn(0);
2153 }
2154 
2155 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2156 {
2157   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2158   PetscErrorCode ierr;
2159 
2160   PetscFunctionBegin;
2161   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2162   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2163   PetscFunctionReturn(0);
2164 }
2165 
2166 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2167 {
2168   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2169   PetscErrorCode ierr;
2170 
2171   PetscFunctionBegin;
2172   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2173   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2174   PetscFunctionReturn(0);
2175 }
2176 
2177 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2178 {
2179   Mat_MPIAIJ        *a = (Mat_MPIAIJ*)A->data;
2180   PetscErrorCode    ierr;
2181   PetscInt          i,*idxb = NULL,m = A->rmap->n;
2182   PetscScalar       *va,*vv;
2183   Vec               vB,vA;
2184   const PetscScalar *vb;
2185 
2186   PetscFunctionBegin;
2187   ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vA);CHKERRQ(ierr);
2188   ierr = MatGetRowMaxAbs(a->A,vA,idx);CHKERRQ(ierr);
2189 
2190   ierr = VecGetArrayWrite(vA,&va);CHKERRQ(ierr);
2191   if (idx) {
2192     for (i=0; i<m; i++) {
2193       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2194     }
2195   }
2196 
2197   ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vB);CHKERRQ(ierr);
2198   ierr = PetscMalloc1(m,&idxb);CHKERRQ(ierr);
2199   ierr = MatGetRowMaxAbs(a->B,vB,idxb);CHKERRQ(ierr);
2200 
2201   ierr = VecGetArrayWrite(v,&vv);CHKERRQ(ierr);
2202   ierr = VecGetArrayRead(vB,&vb);CHKERRQ(ierr);
2203   for (i=0; i<m; i++) {
2204     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2205       vv[i] = vb[i];
2206       if (idx) idx[i] = a->garray[idxb[i]];
2207     } else {
2208       vv[i] = va[i];
2209       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]])
2210         idx[i] = a->garray[idxb[i]];
2211     }
2212   }
2213   ierr = VecRestoreArrayWrite(vA,&vv);CHKERRQ(ierr);
2214   ierr = VecRestoreArrayWrite(vA,&va);CHKERRQ(ierr);
2215   ierr = VecRestoreArrayRead(vB,&vb);CHKERRQ(ierr);
2216   ierr = PetscFree(idxb);CHKERRQ(ierr);
2217   ierr = VecDestroy(&vA);CHKERRQ(ierr);
2218   ierr = VecDestroy(&vB);CHKERRQ(ierr);
2219   PetscFunctionReturn(0);
2220 }
2221 
2222 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2223 {
2224   Mat_MPIAIJ        *mat   = (Mat_MPIAIJ*) A->data;
2225   PetscInt          m = A->rmap->n,n = A->cmap->n;
2226   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2227   PetscInt          *cmap  = mat->garray;
2228   PetscInt          *diagIdx, *offdiagIdx;
2229   Vec               diagV, offdiagV;
2230   PetscScalar       *a, *diagA, *offdiagA;
2231   const PetscScalar *ba,*bav;
2232   PetscInt          r,j,col,ncols,*bi,*bj;
2233   PetscErrorCode    ierr;
2234   Mat               B = mat->B;
2235   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2236 
2237   PetscFunctionBegin;
2238   /* When a process holds entire A and other processes have no entry */
2239   if (A->cmap->N == n) {
2240     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2241     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2242     ierr = MatGetRowMinAbs(mat->A,diagV,idx);CHKERRQ(ierr);
2243     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2244     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2245     PetscFunctionReturn(0);
2246   } else if (n == 0) {
2247     if (m) {
2248       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2249       for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;}
2250       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2251     }
2252     PetscFunctionReturn(0);
2253   }
2254 
2255   ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2256   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2257   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2258   ierr = MatGetRowMinAbs(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2259 
2260   /* Get offdiagIdx[] for implicit 0.0 */
2261   ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr);
2262   ba   = bav;
2263   bi   = b->i;
2264   bj   = b->j;
2265   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2266   for (r = 0; r < m; r++) {
2267     ncols = bi[r+1] - bi[r];
2268     if (ncols == A->cmap->N - n) { /* Brow is dense */
2269       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2270     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2271       offdiagA[r] = 0.0;
2272 
2273       /* Find first hole in the cmap */
2274       for (j=0; j<ncols; j++) {
2275         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2276         if (col > j && j < cstart) {
2277           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2278           break;
2279         } else if (col > j + n && j >= cstart) {
2280           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2281           break;
2282         }
2283       }
2284       if (j == ncols && ncols < A->cmap->N - n) {
2285         /* a hole is outside compressed Bcols */
2286         if (ncols == 0) {
2287           if (cstart) {
2288             offdiagIdx[r] = 0;
2289           } else offdiagIdx[r] = cend;
2290         } else { /* ncols > 0 */
2291           offdiagIdx[r] = cmap[ncols-1] + 1;
2292           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2293         }
2294       }
2295     }
2296 
2297     for (j=0; j<ncols; j++) {
2298       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2299       ba++; bj++;
2300     }
2301   }
2302 
2303   ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr);
2304   ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2305   for (r = 0; r < m; ++r) {
2306     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2307       a[r]   = diagA[r];
2308       if (idx) idx[r] = cstart + diagIdx[r];
2309     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2310       a[r] = diagA[r];
2311       if (idx) {
2312         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2313           idx[r] = cstart + diagIdx[r];
2314         } else idx[r] = offdiagIdx[r];
2315       }
2316     } else {
2317       a[r]   = offdiagA[r];
2318       if (idx) idx[r] = offdiagIdx[r];
2319     }
2320   }
2321   ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr);
2322   ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr);
2323   ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2324   ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2325   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2326   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2327   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2328   PetscFunctionReturn(0);
2329 }
2330 
2331 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2332 {
2333   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*) A->data;
2334   PetscInt          m = A->rmap->n,n = A->cmap->n;
2335   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2336   PetscInt          *cmap  = mat->garray;
2337   PetscInt          *diagIdx, *offdiagIdx;
2338   Vec               diagV, offdiagV;
2339   PetscScalar       *a, *diagA, *offdiagA;
2340   const PetscScalar *ba,*bav;
2341   PetscInt          r,j,col,ncols,*bi,*bj;
2342   PetscErrorCode    ierr;
2343   Mat               B = mat->B;
2344   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2345 
2346   PetscFunctionBegin;
2347   /* When a process holds entire A and other processes have no entry */
2348   if (A->cmap->N == n) {
2349     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2350     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2351     ierr = MatGetRowMin(mat->A,diagV,idx);CHKERRQ(ierr);
2352     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2353     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2354     PetscFunctionReturn(0);
2355   } else if (n == 0) {
2356     if (m) {
2357       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2358       for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;}
2359       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2360     }
2361     PetscFunctionReturn(0);
2362   }
2363 
2364   ierr = PetscCalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2365   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2366   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2367   ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2368 
2369   /* Get offdiagIdx[] for implicit 0.0 */
2370   ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr);
2371   ba   = bav;
2372   bi   = b->i;
2373   bj   = b->j;
2374   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2375   for (r = 0; r < m; r++) {
2376     ncols = bi[r+1] - bi[r];
2377     if (ncols == A->cmap->N - n) { /* Brow is dense */
2378       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2379     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2380       offdiagA[r] = 0.0;
2381 
2382       /* Find first hole in the cmap */
2383       for (j=0; j<ncols; j++) {
2384         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2385         if (col > j && j < cstart) {
2386           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2387           break;
2388         } else if (col > j + n && j >= cstart) {
2389           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2390           break;
2391         }
2392       }
2393       if (j == ncols && ncols < A->cmap->N - n) {
2394         /* a hole is outside compressed Bcols */
2395         if (ncols == 0) {
2396           if (cstart) {
2397             offdiagIdx[r] = 0;
2398           } else offdiagIdx[r] = cend;
2399         } else { /* ncols > 0 */
2400           offdiagIdx[r] = cmap[ncols-1] + 1;
2401           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2402         }
2403       }
2404     }
2405 
2406     for (j=0; j<ncols; j++) {
2407       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2408       ba++; bj++;
2409     }
2410   }
2411 
2412   ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr);
2413   ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2414   for (r = 0; r < m; ++r) {
2415     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2416       a[r]   = diagA[r];
2417       if (idx) idx[r] = cstart + diagIdx[r];
2418     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2419       a[r] = diagA[r];
2420       if (idx) {
2421         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2422           idx[r] = cstart + diagIdx[r];
2423         } else idx[r] = offdiagIdx[r];
2424       }
2425     } else {
2426       a[r]   = offdiagA[r];
2427       if (idx) idx[r] = offdiagIdx[r];
2428     }
2429   }
2430   ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr);
2431   ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr);
2432   ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2433   ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2434   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2435   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2436   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2437   PetscFunctionReturn(0);
2438 }
2439 
2440 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2441 {
2442   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*)A->data;
2443   PetscInt          m = A->rmap->n,n = A->cmap->n;
2444   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2445   PetscInt          *cmap  = mat->garray;
2446   PetscInt          *diagIdx, *offdiagIdx;
2447   Vec               diagV, offdiagV;
2448   PetscScalar       *a, *diagA, *offdiagA;
2449   const PetscScalar *ba,*bav;
2450   PetscInt          r,j,col,ncols,*bi,*bj;
2451   PetscErrorCode    ierr;
2452   Mat               B = mat->B;
2453   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2454 
2455   PetscFunctionBegin;
2456   /* When a process holds entire A and other processes have no entry */
2457   if (A->cmap->N == n) {
2458     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2459     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2460     ierr = MatGetRowMax(mat->A,diagV,idx);CHKERRQ(ierr);
2461     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2462     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2463     PetscFunctionReturn(0);
2464   } else if (n == 0) {
2465     if (m) {
2466       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2467       for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;}
2468       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2469     }
2470     PetscFunctionReturn(0);
2471   }
2472 
2473   ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2474   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2475   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2476   ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2477 
2478   /* Get offdiagIdx[] for implicit 0.0 */
2479   ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr);
2480   ba   = bav;
2481   bi   = b->i;
2482   bj   = b->j;
2483   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2484   for (r = 0; r < m; r++) {
2485     ncols = bi[r+1] - bi[r];
2486     if (ncols == A->cmap->N - n) { /* Brow is dense */
2487       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2488     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2489       offdiagA[r] = 0.0;
2490 
2491       /* Find first hole in the cmap */
2492       for (j=0; j<ncols; j++) {
2493         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2494         if (col > j && j < cstart) {
2495           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2496           break;
2497         } else if (col > j + n && j >= cstart) {
2498           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2499           break;
2500         }
2501       }
2502       if (j == ncols && ncols < A->cmap->N - n) {
2503         /* a hole is outside compressed Bcols */
2504         if (ncols == 0) {
2505           if (cstart) {
2506             offdiagIdx[r] = 0;
2507           } else offdiagIdx[r] = cend;
2508         } else { /* ncols > 0 */
2509           offdiagIdx[r] = cmap[ncols-1] + 1;
2510           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2511         }
2512       }
2513     }
2514 
2515     for (j=0; j<ncols; j++) {
2516       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2517       ba++; bj++;
2518     }
2519   }
2520 
2521   ierr = VecGetArrayWrite(v,    &a);CHKERRQ(ierr);
2522   ierr = VecGetArrayRead(diagV,(const PetscScalar**)&diagA);CHKERRQ(ierr);
2523   for (r = 0; r < m; ++r) {
2524     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2525       a[r] = diagA[r];
2526       if (idx) idx[r] = cstart + diagIdx[r];
2527     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2528       a[r] = diagA[r];
2529       if (idx) {
2530         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2531           idx[r] = cstart + diagIdx[r];
2532         } else idx[r] = offdiagIdx[r];
2533       }
2534     } else {
2535       a[r] = offdiagA[r];
2536       if (idx) idx[r] = offdiagIdx[r];
2537     }
2538   }
2539   ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr);
2540   ierr = VecRestoreArrayWrite(v,       &a);CHKERRQ(ierr);
2541   ierr = VecRestoreArrayRead(diagV,   (const PetscScalar**)&diagA);CHKERRQ(ierr);
2542   ierr = VecRestoreArrayWrite(offdiagV,&offdiagA);CHKERRQ(ierr);
2543   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2544   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2545   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2546   PetscFunctionReturn(0);
2547 }
2548 
2549 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2550 {
2551   PetscErrorCode ierr;
2552   Mat            *dummy;
2553 
2554   PetscFunctionBegin;
2555   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2556   *newmat = *dummy;
2557   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2558   PetscFunctionReturn(0);
2559 }
2560 
2561 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2562 {
2563   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2564   PetscErrorCode ierr;
2565 
2566   PetscFunctionBegin;
2567   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2568   A->factorerrortype = a->A->factorerrortype;
2569   PetscFunctionReturn(0);
2570 }
2571 
2572 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2573 {
2574   PetscErrorCode ierr;
2575   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2576 
2577   PetscFunctionBegin;
2578   if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2579   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2580   if (x->assembled) {
2581     ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2582   } else {
2583     ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr);
2584   }
2585   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2586   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2587   PetscFunctionReturn(0);
2588 }
2589 
2590 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2591 {
2592   PetscFunctionBegin;
2593   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2594   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2595   PetscFunctionReturn(0);
2596 }
2597 
2598 /*@
2599    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2600 
2601    Collective on Mat
2602 
2603    Input Parameters:
2604 +    A - the matrix
2605 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2606 
2607  Level: advanced
2608 
2609 @*/
2610 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2611 {
2612   PetscErrorCode       ierr;
2613 
2614   PetscFunctionBegin;
2615   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2616   PetscFunctionReturn(0);
2617 }
2618 
2619 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2620 {
2621   PetscErrorCode       ierr;
2622   PetscBool            sc = PETSC_FALSE,flg;
2623 
2624   PetscFunctionBegin;
2625   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2626   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2627   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2628   if (flg) {
2629     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2630   }
2631   ierr = PetscOptionsTail();CHKERRQ(ierr);
2632   PetscFunctionReturn(0);
2633 }
2634 
2635 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2636 {
2637   PetscErrorCode ierr;
2638   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2639   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2640 
2641   PetscFunctionBegin;
2642   if (!Y->preallocated) {
2643     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2644   } else if (!aij->nz) {
2645     PetscInt nonew = aij->nonew;
2646     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2647     aij->nonew = nonew;
2648   }
2649   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2650   PetscFunctionReturn(0);
2651 }
2652 
2653 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2654 {
2655   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2656   PetscErrorCode ierr;
2657 
2658   PetscFunctionBegin;
2659   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2660   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2661   if (d) {
2662     PetscInt rstart;
2663     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2664     *d += rstart;
2665 
2666   }
2667   PetscFunctionReturn(0);
2668 }
2669 
2670 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2671 {
2672   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2673   PetscErrorCode ierr;
2674 
2675   PetscFunctionBegin;
2676   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2677   PetscFunctionReturn(0);
2678 }
2679 
2680 /* -------------------------------------------------------------------*/
2681 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2682                                        MatGetRow_MPIAIJ,
2683                                        MatRestoreRow_MPIAIJ,
2684                                        MatMult_MPIAIJ,
2685                                 /* 4*/ MatMultAdd_MPIAIJ,
2686                                        MatMultTranspose_MPIAIJ,
2687                                        MatMultTransposeAdd_MPIAIJ,
2688                                        NULL,
2689                                        NULL,
2690                                        NULL,
2691                                 /*10*/ NULL,
2692                                        NULL,
2693                                        NULL,
2694                                        MatSOR_MPIAIJ,
2695                                        MatTranspose_MPIAIJ,
2696                                 /*15*/ MatGetInfo_MPIAIJ,
2697                                        MatEqual_MPIAIJ,
2698                                        MatGetDiagonal_MPIAIJ,
2699                                        MatDiagonalScale_MPIAIJ,
2700                                        MatNorm_MPIAIJ,
2701                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2702                                        MatAssemblyEnd_MPIAIJ,
2703                                        MatSetOption_MPIAIJ,
2704                                        MatZeroEntries_MPIAIJ,
2705                                 /*24*/ MatZeroRows_MPIAIJ,
2706                                        NULL,
2707                                        NULL,
2708                                        NULL,
2709                                        NULL,
2710                                 /*29*/ MatSetUp_MPIAIJ,
2711                                        NULL,
2712                                        NULL,
2713                                        MatGetDiagonalBlock_MPIAIJ,
2714                                        NULL,
2715                                 /*34*/ MatDuplicate_MPIAIJ,
2716                                        NULL,
2717                                        NULL,
2718                                        NULL,
2719                                        NULL,
2720                                 /*39*/ MatAXPY_MPIAIJ,
2721                                        MatCreateSubMatrices_MPIAIJ,
2722                                        MatIncreaseOverlap_MPIAIJ,
2723                                        MatGetValues_MPIAIJ,
2724                                        MatCopy_MPIAIJ,
2725                                 /*44*/ MatGetRowMax_MPIAIJ,
2726                                        MatScale_MPIAIJ,
2727                                        MatShift_MPIAIJ,
2728                                        MatDiagonalSet_MPIAIJ,
2729                                        MatZeroRowsColumns_MPIAIJ,
2730                                 /*49*/ MatSetRandom_MPIAIJ,
2731                                        NULL,
2732                                        NULL,
2733                                        NULL,
2734                                        NULL,
2735                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2736                                        NULL,
2737                                        MatSetUnfactored_MPIAIJ,
2738                                        MatPermute_MPIAIJ,
2739                                        NULL,
2740                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2741                                        MatDestroy_MPIAIJ,
2742                                        MatView_MPIAIJ,
2743                                        NULL,
2744                                        NULL,
2745                                 /*64*/ NULL,
2746                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2747                                        NULL,
2748                                        NULL,
2749                                        NULL,
2750                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2751                                        MatGetRowMinAbs_MPIAIJ,
2752                                        NULL,
2753                                        NULL,
2754                                        NULL,
2755                                        NULL,
2756                                 /*75*/ MatFDColoringApply_AIJ,
2757                                        MatSetFromOptions_MPIAIJ,
2758                                        NULL,
2759                                        NULL,
2760                                        MatFindZeroDiagonals_MPIAIJ,
2761                                 /*80*/ NULL,
2762                                        NULL,
2763                                        NULL,
2764                                 /*83*/ MatLoad_MPIAIJ,
2765                                        MatIsSymmetric_MPIAIJ,
2766                                        NULL,
2767                                        NULL,
2768                                        NULL,
2769                                        NULL,
2770                                 /*89*/ NULL,
2771                                        NULL,
2772                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2773                                        NULL,
2774                                        NULL,
2775                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2776                                        NULL,
2777                                        NULL,
2778                                        NULL,
2779                                        MatBindToCPU_MPIAIJ,
2780                                 /*99*/ MatProductSetFromOptions_MPIAIJ,
2781                                        NULL,
2782                                        NULL,
2783                                        MatConjugate_MPIAIJ,
2784                                        NULL,
2785                                 /*104*/MatSetValuesRow_MPIAIJ,
2786                                        MatRealPart_MPIAIJ,
2787                                        MatImaginaryPart_MPIAIJ,
2788                                        NULL,
2789                                        NULL,
2790                                 /*109*/NULL,
2791                                        NULL,
2792                                        MatGetRowMin_MPIAIJ,
2793                                        NULL,
2794                                        MatMissingDiagonal_MPIAIJ,
2795                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2796                                        NULL,
2797                                        MatGetGhosts_MPIAIJ,
2798                                        NULL,
2799                                        NULL,
2800                                 /*119*/MatMultDiagonalBlock_MPIAIJ,
2801                                        NULL,
2802                                        NULL,
2803                                        NULL,
2804                                        MatGetMultiProcBlock_MPIAIJ,
2805                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2806                                        MatGetColumnReductions_MPIAIJ,
2807                                        MatInvertBlockDiagonal_MPIAIJ,
2808                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2809                                        MatCreateSubMatricesMPI_MPIAIJ,
2810                                 /*129*/NULL,
2811                                        NULL,
2812                                        NULL,
2813                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2814                                        NULL,
2815                                 /*134*/NULL,
2816                                        NULL,
2817                                        NULL,
2818                                        NULL,
2819                                        NULL,
2820                                 /*139*/MatSetBlockSizes_MPIAIJ,
2821                                        NULL,
2822                                        NULL,
2823                                        MatFDColoringSetUp_MPIXAIJ,
2824                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2825                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2826                                 /*145*/NULL,
2827                                        NULL,
2828                                        NULL
2829 };
2830 
2831 /* ----------------------------------------------------------------------------------------*/
2832 
2833 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2834 {
2835   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2836   PetscErrorCode ierr;
2837 
2838   PetscFunctionBegin;
2839   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2840   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2841   PetscFunctionReturn(0);
2842 }
2843 
2844 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2845 {
2846   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2847   PetscErrorCode ierr;
2848 
2849   PetscFunctionBegin;
2850   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2851   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2852   PetscFunctionReturn(0);
2853 }
2854 
2855 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2856 {
2857   Mat_MPIAIJ     *b;
2858   PetscErrorCode ierr;
2859   PetscMPIInt    size;
2860 
2861   PetscFunctionBegin;
2862   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2863   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2864   b = (Mat_MPIAIJ*)B->data;
2865 
2866 #if defined(PETSC_USE_CTABLE)
2867   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2868 #else
2869   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2870 #endif
2871   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2872   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2873   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2874 
2875   /* Because the B will have been resized we simply destroy it and create a new one each time */
2876   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr);
2877   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2878   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2879   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2880   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2881   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2882   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2883 
2884   if (!B->preallocated) {
2885     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2886     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2887     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2888     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2889     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2890   }
2891 
2892   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2893   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2894   B->preallocated  = PETSC_TRUE;
2895   B->was_assembled = PETSC_FALSE;
2896   B->assembled     = PETSC_FALSE;
2897   PetscFunctionReturn(0);
2898 }
2899 
2900 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2901 {
2902   Mat_MPIAIJ     *b;
2903   PetscErrorCode ierr;
2904 
2905   PetscFunctionBegin;
2906   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2907   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2908   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2909   b = (Mat_MPIAIJ*)B->data;
2910 
2911 #if defined(PETSC_USE_CTABLE)
2912   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2913 #else
2914   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2915 #endif
2916   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2917   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2918   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2919 
2920   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2921   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2922   B->preallocated  = PETSC_TRUE;
2923   B->was_assembled = PETSC_FALSE;
2924   B->assembled = PETSC_FALSE;
2925   PetscFunctionReturn(0);
2926 }
2927 
2928 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2929 {
2930   Mat            mat;
2931   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2932   PetscErrorCode ierr;
2933 
2934   PetscFunctionBegin;
2935   *newmat = NULL;
2936   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2937   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2938   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2939   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2940   a       = (Mat_MPIAIJ*)mat->data;
2941 
2942   mat->factortype   = matin->factortype;
2943   mat->assembled    = matin->assembled;
2944   mat->insertmode   = NOT_SET_VALUES;
2945   mat->preallocated = matin->preallocated;
2946 
2947   a->size         = oldmat->size;
2948   a->rank         = oldmat->rank;
2949   a->donotstash   = oldmat->donotstash;
2950   a->roworiented  = oldmat->roworiented;
2951   a->rowindices   = NULL;
2952   a->rowvalues    = NULL;
2953   a->getrowactive = PETSC_FALSE;
2954 
2955   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2956   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2957 
2958   if (oldmat->colmap) {
2959 #if defined(PETSC_USE_CTABLE)
2960     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2961 #else
2962     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2963     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2964     ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr);
2965 #endif
2966   } else a->colmap = NULL;
2967   if (oldmat->garray) {
2968     PetscInt len;
2969     len  = oldmat->B->cmap->n;
2970     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2971     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2972     if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); }
2973   } else a->garray = NULL;
2974 
2975   /* It may happen MatDuplicate is called with a non-assembled matrix
2976      In fact, MatDuplicate only requires the matrix to be preallocated
2977      This may happen inside a DMCreateMatrix_Shell */
2978   if (oldmat->lvec) {
2979     ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2980     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2981   }
2982   if (oldmat->Mvctx) {
2983     ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2984     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2985   }
2986   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2987   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2988   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2989   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2990   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2991   *newmat = mat;
2992   PetscFunctionReturn(0);
2993 }
2994 
2995 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2996 {
2997   PetscBool      isbinary, ishdf5;
2998   PetscErrorCode ierr;
2999 
3000   PetscFunctionBegin;
3001   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
3002   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
3003   /* force binary viewer to load .info file if it has not yet done so */
3004   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3005   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
3006   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
3007   if (isbinary) {
3008     ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr);
3009   } else if (ishdf5) {
3010 #if defined(PETSC_HAVE_HDF5)
3011     ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr);
3012 #else
3013     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3014 #endif
3015   } else {
3016     SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
3017   }
3018   PetscFunctionReturn(0);
3019 }
3020 
3021 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3022 {
3023   PetscInt       header[4],M,N,m,nz,rows,cols,sum,i;
3024   PetscInt       *rowidxs,*colidxs;
3025   PetscScalar    *matvals;
3026   PetscErrorCode ierr;
3027 
3028   PetscFunctionBegin;
3029   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3030 
3031   /* read in matrix header */
3032   ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3033   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3034   M  = header[1]; N = header[2]; nz = header[3];
3035   if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M);
3036   if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N);
3037   if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ");
3038 
3039   /* set block sizes from the viewer's .info file */
3040   ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
3041   /* set global sizes if not set already */
3042   if (mat->rmap->N < 0) mat->rmap->N = M;
3043   if (mat->cmap->N < 0) mat->cmap->N = N;
3044   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
3045   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
3046 
3047   /* check if the matrix sizes are correct */
3048   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3049   if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols);
3050 
3051   /* read in row lengths and build row indices */
3052   ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr);
3053   ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr);
3054   ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr);
3055   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3056   ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRMPI(ierr);
3057   if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum);
3058   /* read in column indices and matrix values */
3059   ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr);
3060   ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
3061   ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
3062   /* store matrix indices and values */
3063   ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr);
3064   ierr = PetscFree(rowidxs);CHKERRQ(ierr);
3065   ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr);
3066   PetscFunctionReturn(0);
3067 }
3068 
3069 /* Not scalable because of ISAllGather() unless getting all columns. */
3070 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3071 {
3072   PetscErrorCode ierr;
3073   IS             iscol_local;
3074   PetscBool      isstride;
3075   PetscMPIInt    lisstride=0,gisstride;
3076 
3077   PetscFunctionBegin;
3078   /* check if we are grabbing all columns*/
3079   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3080 
3081   if (isstride) {
3082     PetscInt  start,len,mstart,mlen;
3083     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3084     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3085     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3086     if (mstart == start && mlen-mstart == len) lisstride = 1;
3087   }
3088 
3089   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
3090   if (gisstride) {
3091     PetscInt N;
3092     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3093     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr);
3094     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3095     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3096   } else {
3097     PetscInt cbs;
3098     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3099     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3100     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3101   }
3102 
3103   *isseq = iscol_local;
3104   PetscFunctionReturn(0);
3105 }
3106 
3107 /*
3108  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3109  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3110 
3111  Input Parameters:
3112    mat - matrix
3113    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3114            i.e., mat->rstart <= isrow[i] < mat->rend
3115    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3116            i.e., mat->cstart <= iscol[i] < mat->cend
3117  Output Parameter:
3118    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3119    iscol_o - sequential column index set for retrieving mat->B
3120    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3121  */
3122 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3123 {
3124   PetscErrorCode ierr;
3125   Vec            x,cmap;
3126   const PetscInt *is_idx;
3127   PetscScalar    *xarray,*cmaparray;
3128   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3129   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3130   Mat            B=a->B;
3131   Vec            lvec=a->lvec,lcmap;
3132   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3133   MPI_Comm       comm;
3134   VecScatter     Mvctx=a->Mvctx;
3135 
3136   PetscFunctionBegin;
3137   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3138   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3139 
3140   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3141   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3142   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3143   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3144   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3145 
3146   /* Get start indices */
3147   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
3148   isstart -= ncols;
3149   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3150 
3151   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3152   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3153   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3154   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3155   for (i=0; i<ncols; i++) {
3156     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3157     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3158     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3159   }
3160   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3161   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3162   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3163 
3164   /* Get iscol_d */
3165   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3166   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3167   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3168 
3169   /* Get isrow_d */
3170   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3171   rstart = mat->rmap->rstart;
3172   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3173   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3174   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3175   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3176 
3177   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3178   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3179   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3180 
3181   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3182   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3183   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3184 
3185   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3186 
3187   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3188   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3189 
3190   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3191   /* off-process column indices */
3192   count = 0;
3193   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3194   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3195 
3196   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3197   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3198   for (i=0; i<Bn; i++) {
3199     if (PetscRealPart(xarray[i]) > -1.0) {
3200       idx[count]     = i;                   /* local column index in off-diagonal part B */
3201       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3202       count++;
3203     }
3204   }
3205   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3206   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3207 
3208   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3209   /* cannot ensure iscol_o has same blocksize as iscol! */
3210 
3211   ierr = PetscFree(idx);CHKERRQ(ierr);
3212   *garray = cmap1;
3213 
3214   ierr = VecDestroy(&x);CHKERRQ(ierr);
3215   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3216   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3217   PetscFunctionReturn(0);
3218 }
3219 
3220 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3221 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3222 {
3223   PetscErrorCode ierr;
3224   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3225   Mat            M = NULL;
3226   MPI_Comm       comm;
3227   IS             iscol_d,isrow_d,iscol_o;
3228   Mat            Asub = NULL,Bsub = NULL;
3229   PetscInt       n;
3230 
3231   PetscFunctionBegin;
3232   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3233 
3234   if (call == MAT_REUSE_MATRIX) {
3235     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3236     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3237     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3238 
3239     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3240     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3241 
3242     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3243     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3244 
3245     /* Update diagonal and off-diagonal portions of submat */
3246     asub = (Mat_MPIAIJ*)(*submat)->data;
3247     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3248     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3249     if (n) {
3250       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3251     }
3252     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3253     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3254 
3255   } else { /* call == MAT_INITIAL_MATRIX) */
3256     const PetscInt *garray;
3257     PetscInt        BsubN;
3258 
3259     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3260     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3261 
3262     /* Create local submatrices Asub and Bsub */
3263     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3264     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3265 
3266     /* Create submatrix M */
3267     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3268 
3269     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3270     asub = (Mat_MPIAIJ*)M->data;
3271 
3272     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3273     n = asub->B->cmap->N;
3274     if (BsubN > n) {
3275       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3276       const PetscInt *idx;
3277       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3278       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3279 
3280       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3281       j = 0;
3282       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3283       for (i=0; i<n; i++) {
3284         if (j >= BsubN) break;
3285         while (subgarray[i] > garray[j]) j++;
3286 
3287         if (subgarray[i] == garray[j]) {
3288           idx_new[i] = idx[j++];
3289         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3290       }
3291       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3292 
3293       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3294       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3295 
3296     } else if (BsubN < n) {
3297       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3298     }
3299 
3300     ierr = PetscFree(garray);CHKERRQ(ierr);
3301     *submat = M;
3302 
3303     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3304     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3305     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3306 
3307     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3308     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3309 
3310     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3311     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3312   }
3313   PetscFunctionReturn(0);
3314 }
3315 
3316 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3317 {
3318   PetscErrorCode ierr;
3319   IS             iscol_local=NULL,isrow_d;
3320   PetscInt       csize;
3321   PetscInt       n,i,j,start,end;
3322   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3323   MPI_Comm       comm;
3324 
3325   PetscFunctionBegin;
3326   /* If isrow has same processor distribution as mat,
3327      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3328   if (call == MAT_REUSE_MATRIX) {
3329     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3330     if (isrow_d) {
3331       sameRowDist  = PETSC_TRUE;
3332       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3333     } else {
3334       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3335       if (iscol_local) {
3336         sameRowDist  = PETSC_TRUE;
3337         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3338       }
3339     }
3340   } else {
3341     /* Check if isrow has same processor distribution as mat */
3342     sameDist[0] = PETSC_FALSE;
3343     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3344     if (!n) {
3345       sameDist[0] = PETSC_TRUE;
3346     } else {
3347       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3348       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3349       if (i >= start && j < end) {
3350         sameDist[0] = PETSC_TRUE;
3351       }
3352     }
3353 
3354     /* Check if iscol has same processor distribution as mat */
3355     sameDist[1] = PETSC_FALSE;
3356     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3357     if (!n) {
3358       sameDist[1] = PETSC_TRUE;
3359     } else {
3360       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3361       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3362       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3363     }
3364 
3365     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3366     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRMPI(ierr);
3367     sameRowDist = tsameDist[0];
3368   }
3369 
3370   if (sameRowDist) {
3371     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3372       /* isrow and iscol have same processor distribution as mat */
3373       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3374       PetscFunctionReturn(0);
3375     } else { /* sameRowDist */
3376       /* isrow has same processor distribution as mat */
3377       if (call == MAT_INITIAL_MATRIX) {
3378         PetscBool sorted;
3379         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3380         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3381         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3382         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3383 
3384         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3385         if (sorted) {
3386           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3387           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3388           PetscFunctionReturn(0);
3389         }
3390       } else { /* call == MAT_REUSE_MATRIX */
3391         IS iscol_sub;
3392         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3393         if (iscol_sub) {
3394           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3395           PetscFunctionReturn(0);
3396         }
3397       }
3398     }
3399   }
3400 
3401   /* General case: iscol -> iscol_local which has global size of iscol */
3402   if (call == MAT_REUSE_MATRIX) {
3403     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3404     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3405   } else {
3406     if (!iscol_local) {
3407       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3408     }
3409   }
3410 
3411   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3412   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3413 
3414   if (call == MAT_INITIAL_MATRIX) {
3415     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3416     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3417   }
3418   PetscFunctionReturn(0);
3419 }
3420 
3421 /*@C
3422      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3423          and "off-diagonal" part of the matrix in CSR format.
3424 
3425    Collective
3426 
3427    Input Parameters:
3428 +  comm - MPI communicator
3429 .  A - "diagonal" portion of matrix
3430 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3431 -  garray - global index of B columns
3432 
3433    Output Parameter:
3434 .   mat - the matrix, with input A as its local diagonal matrix
3435    Level: advanced
3436 
3437    Notes:
3438        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3439        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3440 
3441 .seealso: MatCreateMPIAIJWithSplitArrays()
3442 @*/
3443 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3444 {
3445   PetscErrorCode    ierr;
3446   Mat_MPIAIJ        *maij;
3447   Mat_SeqAIJ        *b=(Mat_SeqAIJ*)B->data,*bnew;
3448   PetscInt          *oi=b->i,*oj=b->j,i,nz,col;
3449   const PetscScalar *oa;
3450   Mat               Bnew;
3451   PetscInt          m,n,N;
3452 
3453   PetscFunctionBegin;
3454   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3455   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3456   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3457   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3458   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3459   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3460 
3461   /* Get global columns of mat */
3462   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
3463 
3464   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3465   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3466   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3467   maij = (Mat_MPIAIJ*)(*mat)->data;
3468 
3469   (*mat)->preallocated = PETSC_TRUE;
3470 
3471   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3472   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3473 
3474   /* Set A as diagonal portion of *mat */
3475   maij->A = A;
3476 
3477   nz = oi[m];
3478   for (i=0; i<nz; i++) {
3479     col   = oj[i];
3480     oj[i] = garray[col];
3481   }
3482 
3483   /* Set Bnew as off-diagonal portion of *mat */
3484   ierr = MatSeqAIJGetArrayRead(B,&oa);CHKERRQ(ierr);
3485   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew);CHKERRQ(ierr);
3486   ierr = MatSeqAIJRestoreArrayRead(B,&oa);CHKERRQ(ierr);
3487   bnew        = (Mat_SeqAIJ*)Bnew->data;
3488   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3489   maij->B     = Bnew;
3490 
3491   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3492 
3493   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3494   b->free_a       = PETSC_FALSE;
3495   b->free_ij      = PETSC_FALSE;
3496   ierr = MatDestroy(&B);CHKERRQ(ierr);
3497 
3498   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3499   bnew->free_a       = PETSC_TRUE;
3500   bnew->free_ij      = PETSC_TRUE;
3501 
3502   /* condense columns of maij->B */
3503   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3504   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3505   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3506   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3507   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3508   PetscFunctionReturn(0);
3509 }
3510 
3511 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3512 
3513 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3514 {
3515   PetscErrorCode ierr;
3516   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3517   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3518   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3519   Mat            M,Msub,B=a->B;
3520   MatScalar      *aa;
3521   Mat_SeqAIJ     *aij;
3522   PetscInt       *garray = a->garray,*colsub,Ncols;
3523   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3524   IS             iscol_sub,iscmap;
3525   const PetscInt *is_idx,*cmap;
3526   PetscBool      allcolumns=PETSC_FALSE;
3527   MPI_Comm       comm;
3528 
3529   PetscFunctionBegin;
3530   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3531   if (call == MAT_REUSE_MATRIX) {
3532     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3533     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3534     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3535 
3536     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3537     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3538 
3539     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3540     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3541 
3542     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3543 
3544   } else { /* call == MAT_INITIAL_MATRIX) */
3545     PetscBool flg;
3546 
3547     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3548     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3549 
3550     /* (1) iscol -> nonscalable iscol_local */
3551     /* Check for special case: each processor gets entire matrix columns */
3552     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3553     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3554     ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
3555     if (allcolumns) {
3556       iscol_sub = iscol_local;
3557       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3558       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3559 
3560     } else {
3561       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3562       PetscInt *idx,*cmap1,k;
3563       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3564       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3565       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3566       count = 0;
3567       k     = 0;
3568       for (i=0; i<Ncols; i++) {
3569         j = is_idx[i];
3570         if (j >= cstart && j < cend) {
3571           /* diagonal part of mat */
3572           idx[count]     = j;
3573           cmap1[count++] = i; /* column index in submat */
3574         } else if (Bn) {
3575           /* off-diagonal part of mat */
3576           if (j == garray[k]) {
3577             idx[count]     = j;
3578             cmap1[count++] = i;  /* column index in submat */
3579           } else if (j > garray[k]) {
3580             while (j > garray[k] && k < Bn-1) k++;
3581             if (j == garray[k]) {
3582               idx[count]     = j;
3583               cmap1[count++] = i; /* column index in submat */
3584             }
3585           }
3586         }
3587       }
3588       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3589 
3590       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3591       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3592       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3593 
3594       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3595     }
3596 
3597     /* (3) Create sequential Msub */
3598     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3599   }
3600 
3601   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3602   aij  = (Mat_SeqAIJ*)(Msub)->data;
3603   ii   = aij->i;
3604   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3605 
3606   /*
3607       m - number of local rows
3608       Ncols - number of columns (same on all processors)
3609       rstart - first row in new global matrix generated
3610   */
3611   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3612 
3613   if (call == MAT_INITIAL_MATRIX) {
3614     /* (4) Create parallel newmat */
3615     PetscMPIInt    rank,size;
3616     PetscInt       csize;
3617 
3618     ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
3619     ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
3620 
3621     /*
3622         Determine the number of non-zeros in the diagonal and off-diagonal
3623         portions of the matrix in order to do correct preallocation
3624     */
3625 
3626     /* first get start and end of "diagonal" columns */
3627     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3628     if (csize == PETSC_DECIDE) {
3629       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3630       if (mglobal == Ncols) { /* square matrix */
3631         nlocal = m;
3632       } else {
3633         nlocal = Ncols/size + ((Ncols % size) > rank);
3634       }
3635     } else {
3636       nlocal = csize;
3637     }
3638     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
3639     rstart = rend - nlocal;
3640     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3641 
3642     /* next, compute all the lengths */
3643     jj    = aij->j;
3644     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3645     olens = dlens + m;
3646     for (i=0; i<m; i++) {
3647       jend = ii[i+1] - ii[i];
3648       olen = 0;
3649       dlen = 0;
3650       for (j=0; j<jend; j++) {
3651         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3652         else dlen++;
3653         jj++;
3654       }
3655       olens[i] = olen;
3656       dlens[i] = dlen;
3657     }
3658 
3659     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3660     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3661 
3662     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3663     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3664     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3665     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3666     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3667     ierr = PetscFree(dlens);CHKERRQ(ierr);
3668 
3669   } else { /* call == MAT_REUSE_MATRIX */
3670     M    = *newmat;
3671     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3672     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3673     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3674     /*
3675          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3676        rather than the slower MatSetValues().
3677     */
3678     M->was_assembled = PETSC_TRUE;
3679     M->assembled     = PETSC_FALSE;
3680   }
3681 
3682   /* (5) Set values of Msub to *newmat */
3683   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3684   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3685 
3686   jj   = aij->j;
3687   ierr = MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr);
3688   for (i=0; i<m; i++) {
3689     row = rstart + i;
3690     nz  = ii[i+1] - ii[i];
3691     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3692     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3693     jj += nz; aa += nz;
3694   }
3695   ierr = MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr);
3696   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3697 
3698   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3699   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3700 
3701   ierr = PetscFree(colsub);CHKERRQ(ierr);
3702 
3703   /* save Msub, iscol_sub and iscmap used in processor for next request */
3704   if (call == MAT_INITIAL_MATRIX) {
3705     *newmat = M;
3706     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3707     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3708 
3709     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3710     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3711 
3712     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3713     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3714 
3715     if (iscol_local) {
3716       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3717       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3718     }
3719   }
3720   PetscFunctionReturn(0);
3721 }
3722 
3723 /*
3724     Not great since it makes two copies of the submatrix, first an SeqAIJ
3725   in local and then by concatenating the local matrices the end result.
3726   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3727 
3728   Note: This requires a sequential iscol with all indices.
3729 */
3730 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3731 {
3732   PetscErrorCode ierr;
3733   PetscMPIInt    rank,size;
3734   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3735   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3736   Mat            M,Mreuse;
3737   MatScalar      *aa,*vwork;
3738   MPI_Comm       comm;
3739   Mat_SeqAIJ     *aij;
3740   PetscBool      colflag,allcolumns=PETSC_FALSE;
3741 
3742   PetscFunctionBegin;
3743   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3744   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
3745   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
3746 
3747   /* Check for special case: each processor gets entire matrix columns */
3748   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3749   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3750   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3751   ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
3752 
3753   if (call ==  MAT_REUSE_MATRIX) {
3754     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3755     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3756     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3757   } else {
3758     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3759   }
3760 
3761   /*
3762       m - number of local rows
3763       n - number of columns (same on all processors)
3764       rstart - first row in new global matrix generated
3765   */
3766   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3767   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3768   if (call == MAT_INITIAL_MATRIX) {
3769     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3770     ii  = aij->i;
3771     jj  = aij->j;
3772 
3773     /*
3774         Determine the number of non-zeros in the diagonal and off-diagonal
3775         portions of the matrix in order to do correct preallocation
3776     */
3777 
3778     /* first get start and end of "diagonal" columns */
3779     if (csize == PETSC_DECIDE) {
3780       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3781       if (mglobal == n) { /* square matrix */
3782         nlocal = m;
3783       } else {
3784         nlocal = n/size + ((n % size) > rank);
3785       }
3786     } else {
3787       nlocal = csize;
3788     }
3789     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
3790     rstart = rend - nlocal;
3791     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3792 
3793     /* next, compute all the lengths */
3794     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3795     olens = dlens + m;
3796     for (i=0; i<m; i++) {
3797       jend = ii[i+1] - ii[i];
3798       olen = 0;
3799       dlen = 0;
3800       for (j=0; j<jend; j++) {
3801         if (*jj < rstart || *jj >= rend) olen++;
3802         else dlen++;
3803         jj++;
3804       }
3805       olens[i] = olen;
3806       dlens[i] = dlen;
3807     }
3808     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3809     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3810     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3811     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3812     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3813     ierr = PetscFree(dlens);CHKERRQ(ierr);
3814   } else {
3815     PetscInt ml,nl;
3816 
3817     M    = *newmat;
3818     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3819     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3820     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3821     /*
3822          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3823        rather than the slower MatSetValues().
3824     */
3825     M->was_assembled = PETSC_TRUE;
3826     M->assembled     = PETSC_FALSE;
3827   }
3828   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3829   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3830   ii   = aij->i;
3831   jj   = aij->j;
3832 
3833   /* trigger copy to CPU if needed */
3834   ierr = MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr);
3835   for (i=0; i<m; i++) {
3836     row   = rstart + i;
3837     nz    = ii[i+1] - ii[i];
3838     cwork = jj; jj += nz;
3839     vwork = aa; aa += nz;
3840     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3841   }
3842   ierr = MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr);
3843 
3844   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3845   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3846   *newmat = M;
3847 
3848   /* save submatrix used in processor for next request */
3849   if (call ==  MAT_INITIAL_MATRIX) {
3850     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3851     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3852   }
3853   PetscFunctionReturn(0);
3854 }
3855 
3856 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3857 {
3858   PetscInt       m,cstart, cend,j,nnz,i,d;
3859   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3860   const PetscInt *JJ;
3861   PetscErrorCode ierr;
3862   PetscBool      nooffprocentries;
3863 
3864   PetscFunctionBegin;
3865   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3866 
3867   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3868   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3869   m      = B->rmap->n;
3870   cstart = B->cmap->rstart;
3871   cend   = B->cmap->rend;
3872   rstart = B->rmap->rstart;
3873 
3874   ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3875 
3876   if (PetscDefined(USE_DEBUG)) {
3877     for (i=0; i<m; i++) {
3878       nnz = Ii[i+1]- Ii[i];
3879       JJ  = J + Ii[i];
3880       if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3881       if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3882       if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3883     }
3884   }
3885 
3886   for (i=0; i<m; i++) {
3887     nnz     = Ii[i+1]- Ii[i];
3888     JJ      = J + Ii[i];
3889     nnz_max = PetscMax(nnz_max,nnz);
3890     d       = 0;
3891     for (j=0; j<nnz; j++) {
3892       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3893     }
3894     d_nnz[i] = d;
3895     o_nnz[i] = nnz - d;
3896   }
3897   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3898   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3899 
3900   for (i=0; i<m; i++) {
3901     ii   = i + rstart;
3902     ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr);
3903   }
3904   nooffprocentries    = B->nooffprocentries;
3905   B->nooffprocentries = PETSC_TRUE;
3906   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3907   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3908   B->nooffprocentries = nooffprocentries;
3909 
3910   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3911   PetscFunctionReturn(0);
3912 }
3913 
3914 /*@
3915    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3916    (the default parallel PETSc format).
3917 
3918    Collective
3919 
3920    Input Parameters:
3921 +  B - the matrix
3922 .  i - the indices into j for the start of each local row (starts with zero)
3923 .  j - the column indices for each local row (starts with zero)
3924 -  v - optional values in the matrix
3925 
3926    Level: developer
3927 
3928    Notes:
3929        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3930      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3931      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3932 
3933        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3934 
3935        The format which is used for the sparse matrix input, is equivalent to a
3936     row-major ordering.. i.e for the following matrix, the input data expected is
3937     as shown
3938 
3939 $        1 0 0
3940 $        2 0 3     P0
3941 $       -------
3942 $        4 5 6     P1
3943 $
3944 $     Process0 [P0]: rows_owned=[0,1]
3945 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3946 $        j =  {0,0,2}  [size = 3]
3947 $        v =  {1,2,3}  [size = 3]
3948 $
3949 $     Process1 [P1]: rows_owned=[2]
3950 $        i =  {0,3}    [size = nrow+1  = 1+1]
3951 $        j =  {0,1,2}  [size = 3]
3952 $        v =  {4,5,6}  [size = 3]
3953 
3954 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3955           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3956 @*/
3957 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3958 {
3959   PetscErrorCode ierr;
3960 
3961   PetscFunctionBegin;
3962   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3963   PetscFunctionReturn(0);
3964 }
3965 
3966 /*@C
3967    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3968    (the default parallel PETSc format).  For good matrix assembly performance
3969    the user should preallocate the matrix storage by setting the parameters
3970    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3971    performance can be increased by more than a factor of 50.
3972 
3973    Collective
3974 
3975    Input Parameters:
3976 +  B - the matrix
3977 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3978            (same value is used for all local rows)
3979 .  d_nnz - array containing the number of nonzeros in the various rows of the
3980            DIAGONAL portion of the local submatrix (possibly different for each row)
3981            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3982            The size of this array is equal to the number of local rows, i.e 'm'.
3983            For matrices that will be factored, you must leave room for (and set)
3984            the diagonal entry even if it is zero.
3985 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3986            submatrix (same value is used for all local rows).
3987 -  o_nnz - array containing the number of nonzeros in the various rows of the
3988            OFF-DIAGONAL portion of the local submatrix (possibly different for
3989            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3990            structure. The size of this array is equal to the number
3991            of local rows, i.e 'm'.
3992 
3993    If the *_nnz parameter is given then the *_nz parameter is ignored
3994 
3995    The AIJ format (also called the Yale sparse matrix format or
3996    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3997    storage.  The stored row and column indices begin with zero.
3998    See Users-Manual: ch_mat for details.
3999 
4000    The parallel matrix is partitioned such that the first m0 rows belong to
4001    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4002    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4003 
4004    The DIAGONAL portion of the local submatrix of a processor can be defined
4005    as the submatrix which is obtained by extraction the part corresponding to
4006    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4007    first row that belongs to the processor, r2 is the last row belonging to
4008    the this processor, and c1-c2 is range of indices of the local part of a
4009    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4010    common case of a square matrix, the row and column ranges are the same and
4011    the DIAGONAL part is also square. The remaining portion of the local
4012    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4013 
4014    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4015 
4016    You can call MatGetInfo() to get information on how effective the preallocation was;
4017    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4018    You can also run with the option -info and look for messages with the string
4019    malloc in them to see if additional memory allocation was needed.
4020 
4021    Example usage:
4022 
4023    Consider the following 8x8 matrix with 34 non-zero values, that is
4024    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4025    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4026    as follows:
4027 
4028 .vb
4029             1  2  0  |  0  3  0  |  0  4
4030     Proc0   0  5  6  |  7  0  0  |  8  0
4031             9  0 10  | 11  0  0  | 12  0
4032     -------------------------------------
4033            13  0 14  | 15 16 17  |  0  0
4034     Proc1   0 18  0  | 19 20 21  |  0  0
4035             0  0  0  | 22 23  0  | 24  0
4036     -------------------------------------
4037     Proc2  25 26 27  |  0  0 28  | 29  0
4038            30  0  0  | 31 32 33  |  0 34
4039 .ve
4040 
4041    This can be represented as a collection of submatrices as:
4042 
4043 .vb
4044       A B C
4045       D E F
4046       G H I
4047 .ve
4048 
4049    Where the submatrices A,B,C are owned by proc0, D,E,F are
4050    owned by proc1, G,H,I are owned by proc2.
4051 
4052    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4053    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4054    The 'M','N' parameters are 8,8, and have the same values on all procs.
4055 
4056    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4057    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4058    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4059    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4060    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4061    matrix, ans [DF] as another SeqAIJ matrix.
4062 
4063    When d_nz, o_nz parameters are specified, d_nz storage elements are
4064    allocated for every row of the local diagonal submatrix, and o_nz
4065    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4066    One way to choose d_nz and o_nz is to use the max nonzerors per local
4067    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4068    In this case, the values of d_nz,o_nz are:
4069 .vb
4070      proc0 : dnz = 2, o_nz = 2
4071      proc1 : dnz = 3, o_nz = 2
4072      proc2 : dnz = 1, o_nz = 4
4073 .ve
4074    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4075    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4076    for proc3. i.e we are using 12+15+10=37 storage locations to store
4077    34 values.
4078 
4079    When d_nnz, o_nnz parameters are specified, the storage is specified
4080    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4081    In the above case the values for d_nnz,o_nnz are:
4082 .vb
4083      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4084      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4085      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4086 .ve
4087    Here the space allocated is sum of all the above values i.e 34, and
4088    hence pre-allocation is perfect.
4089 
4090    Level: intermediate
4091 
4092 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4093           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4094 @*/
4095 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4096 {
4097   PetscErrorCode ierr;
4098 
4099   PetscFunctionBegin;
4100   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4101   PetscValidType(B,1);
4102   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4103   PetscFunctionReturn(0);
4104 }
4105 
4106 /*@
4107      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4108          CSR format for the local rows.
4109 
4110    Collective
4111 
4112    Input Parameters:
4113 +  comm - MPI communicator
4114 .  m - number of local rows (Cannot be PETSC_DECIDE)
4115 .  n - This value should be the same as the local size used in creating the
4116        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4117        calculated if N is given) For square matrices n is almost always m.
4118 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4119 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4120 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4121 .   j - column indices
4122 -   a - matrix values
4123 
4124    Output Parameter:
4125 .   mat - the matrix
4126 
4127    Level: intermediate
4128 
4129    Notes:
4130        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4131      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4132      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4133 
4134        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4135 
4136        The format which is used for the sparse matrix input, is equivalent to a
4137     row-major ordering.. i.e for the following matrix, the input data expected is
4138     as shown
4139 
4140        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4141 
4142 $        1 0 0
4143 $        2 0 3     P0
4144 $       -------
4145 $        4 5 6     P1
4146 $
4147 $     Process0 [P0]: rows_owned=[0,1]
4148 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4149 $        j =  {0,0,2}  [size = 3]
4150 $        v =  {1,2,3}  [size = 3]
4151 $
4152 $     Process1 [P1]: rows_owned=[2]
4153 $        i =  {0,3}    [size = nrow+1  = 1+1]
4154 $        j =  {0,1,2}  [size = 3]
4155 $        v =  {4,5,6}  [size = 3]
4156 
4157 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4158           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4159 @*/
4160 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4161 {
4162   PetscErrorCode ierr;
4163 
4164   PetscFunctionBegin;
4165   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4166   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4167   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4168   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4169   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4170   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4171   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4172   PetscFunctionReturn(0);
4173 }
4174 
4175 /*@
4176      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4177          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical
4178 
4179    Collective
4180 
4181    Input Parameters:
4182 +  mat - the matrix
4183 .  m - number of local rows (Cannot be PETSC_DECIDE)
4184 .  n - This value should be the same as the local size used in creating the
4185        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4186        calculated if N is given) For square matrices n is almost always m.
4187 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4188 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4189 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4190 .  J - column indices
4191 -  v - matrix values
4192 
4193    Level: intermediate
4194 
4195 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4196           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4197 @*/
4198 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4199 {
4200   PetscErrorCode ierr;
4201   PetscInt       cstart,nnz,i,j;
4202   PetscInt       *ld;
4203   PetscBool      nooffprocentries;
4204   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4205   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data, *Ao  = (Mat_SeqAIJ*)Aij->B->data;
4206   PetscScalar    *ad = Ad->a, *ao = Ao->a;
4207   const PetscInt *Adi = Ad->i;
4208   PetscInt       ldi,Iii,md;
4209 
4210   PetscFunctionBegin;
4211   if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4212   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4213   if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4214   if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4215 
4216   cstart = mat->cmap->rstart;
4217   if (!Aij->ld) {
4218     /* count number of entries below block diagonal */
4219     ierr    = PetscCalloc1(m,&ld);CHKERRQ(ierr);
4220     Aij->ld = ld;
4221     for (i=0; i<m; i++) {
4222       nnz  = Ii[i+1]- Ii[i];
4223       j     = 0;
4224       while  (J[j] < cstart && j < nnz) {j++;}
4225       J    += nnz;
4226       ld[i] = j;
4227     }
4228   } else {
4229     ld = Aij->ld;
4230   }
4231 
4232   for (i=0; i<m; i++) {
4233     nnz  = Ii[i+1]- Ii[i];
4234     Iii  = Ii[i];
4235     ldi  = ld[i];
4236     md   = Adi[i+1]-Adi[i];
4237     ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr);
4238     ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr);
4239     ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr);
4240     ad  += md;
4241     ao  += nnz - md;
4242   }
4243   nooffprocentries      = mat->nooffprocentries;
4244   mat->nooffprocentries = PETSC_TRUE;
4245   ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr);
4246   ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr);
4247   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
4248   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4249   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4250   mat->nooffprocentries = nooffprocentries;
4251   PetscFunctionReturn(0);
4252 }
4253 
4254 /*@C
4255    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4256    (the default parallel PETSc format).  For good matrix assembly performance
4257    the user should preallocate the matrix storage by setting the parameters
4258    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4259    performance can be increased by more than a factor of 50.
4260 
4261    Collective
4262 
4263    Input Parameters:
4264 +  comm - MPI communicator
4265 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4266            This value should be the same as the local size used in creating the
4267            y vector for the matrix-vector product y = Ax.
4268 .  n - This value should be the same as the local size used in creating the
4269        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4270        calculated if N is given) For square matrices n is almost always m.
4271 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4272 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4273 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4274            (same value is used for all local rows)
4275 .  d_nnz - array containing the number of nonzeros in the various rows of the
4276            DIAGONAL portion of the local submatrix (possibly different for each row)
4277            or NULL, if d_nz is used to specify the nonzero structure.
4278            The size of this array is equal to the number of local rows, i.e 'm'.
4279 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4280            submatrix (same value is used for all local rows).
4281 -  o_nnz - array containing the number of nonzeros in the various rows of the
4282            OFF-DIAGONAL portion of the local submatrix (possibly different for
4283            each row) or NULL, if o_nz is used to specify the nonzero
4284            structure. The size of this array is equal to the number
4285            of local rows, i.e 'm'.
4286 
4287    Output Parameter:
4288 .  A - the matrix
4289 
4290    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4291    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4292    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4293 
4294    Notes:
4295    If the *_nnz parameter is given then the *_nz parameter is ignored
4296 
4297    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4298    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4299    storage requirements for this matrix.
4300 
4301    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4302    processor than it must be used on all processors that share the object for
4303    that argument.
4304 
4305    The user MUST specify either the local or global matrix dimensions
4306    (possibly both).
4307 
4308    The parallel matrix is partitioned across processors such that the
4309    first m0 rows belong to process 0, the next m1 rows belong to
4310    process 1, the next m2 rows belong to process 2 etc.. where
4311    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4312    values corresponding to [m x N] submatrix.
4313 
4314    The columns are logically partitioned with the n0 columns belonging
4315    to 0th partition, the next n1 columns belonging to the next
4316    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4317 
4318    The DIAGONAL portion of the local submatrix on any given processor
4319    is the submatrix corresponding to the rows and columns m,n
4320    corresponding to the given processor. i.e diagonal matrix on
4321    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4322    etc. The remaining portion of the local submatrix [m x (N-n)]
4323    constitute the OFF-DIAGONAL portion. The example below better
4324    illustrates this concept.
4325 
4326    For a square global matrix we define each processor's diagonal portion
4327    to be its local rows and the corresponding columns (a square submatrix);
4328    each processor's off-diagonal portion encompasses the remainder of the
4329    local matrix (a rectangular submatrix).
4330 
4331    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4332 
4333    When calling this routine with a single process communicator, a matrix of
4334    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4335    type of communicator, use the construction mechanism
4336 .vb
4337      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4338 .ve
4339 
4340 $     MatCreate(...,&A);
4341 $     MatSetType(A,MATMPIAIJ);
4342 $     MatSetSizes(A, m,n,M,N);
4343 $     MatMPIAIJSetPreallocation(A,...);
4344 
4345    By default, this format uses inodes (identical nodes) when possible.
4346    We search for consecutive rows with the same nonzero structure, thereby
4347    reusing matrix information to achieve increased efficiency.
4348 
4349    Options Database Keys:
4350 +  -mat_no_inode  - Do not use inodes
4351 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4352 
4353    Example usage:
4354 
4355    Consider the following 8x8 matrix with 34 non-zero values, that is
4356    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4357    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4358    as follows
4359 
4360 .vb
4361             1  2  0  |  0  3  0  |  0  4
4362     Proc0   0  5  6  |  7  0  0  |  8  0
4363             9  0 10  | 11  0  0  | 12  0
4364     -------------------------------------
4365            13  0 14  | 15 16 17  |  0  0
4366     Proc1   0 18  0  | 19 20 21  |  0  0
4367             0  0  0  | 22 23  0  | 24  0
4368     -------------------------------------
4369     Proc2  25 26 27  |  0  0 28  | 29  0
4370            30  0  0  | 31 32 33  |  0 34
4371 .ve
4372 
4373    This can be represented as a collection of submatrices as
4374 
4375 .vb
4376       A B C
4377       D E F
4378       G H I
4379 .ve
4380 
4381    Where the submatrices A,B,C are owned by proc0, D,E,F are
4382    owned by proc1, G,H,I are owned by proc2.
4383 
4384    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4385    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4386    The 'M','N' parameters are 8,8, and have the same values on all procs.
4387 
4388    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4389    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4390    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4391    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4392    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4393    matrix, ans [DF] as another SeqAIJ matrix.
4394 
4395    When d_nz, o_nz parameters are specified, d_nz storage elements are
4396    allocated for every row of the local diagonal submatrix, and o_nz
4397    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4398    One way to choose d_nz and o_nz is to use the max nonzerors per local
4399    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4400    In this case, the values of d_nz,o_nz are
4401 .vb
4402      proc0 : dnz = 2, o_nz = 2
4403      proc1 : dnz = 3, o_nz = 2
4404      proc2 : dnz = 1, o_nz = 4
4405 .ve
4406    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4407    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4408    for proc3. i.e we are using 12+15+10=37 storage locations to store
4409    34 values.
4410 
4411    When d_nnz, o_nnz parameters are specified, the storage is specified
4412    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4413    In the above case the values for d_nnz,o_nnz are
4414 .vb
4415      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4416      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4417      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4418 .ve
4419    Here the space allocated is sum of all the above values i.e 34, and
4420    hence pre-allocation is perfect.
4421 
4422    Level: intermediate
4423 
4424 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4425           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4426 @*/
4427 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4428 {
4429   PetscErrorCode ierr;
4430   PetscMPIInt    size;
4431 
4432   PetscFunctionBegin;
4433   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4434   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4435   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
4436   if (size > 1) {
4437     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4438     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4439   } else {
4440     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4441     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4442   }
4443   PetscFunctionReturn(0);
4444 }
4445 
4446 /*@C
4447   MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix
4448 
4449   Not collective
4450 
4451   Input Parameter:
4452 . A - The MPIAIJ matrix
4453 
4454   Output Parameters:
4455 + Ad - The local diagonal block as a SeqAIJ matrix
4456 . Ao - The local off-diagonal block as a SeqAIJ matrix
4457 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4458 
4459   Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4460   in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4461   the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4462   local column numbers to global column numbers in the original matrix.
4463 
4464   Level: intermediate
4465 
4466 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ
4467 @*/
4468 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4469 {
4470   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4471   PetscBool      flg;
4472   PetscErrorCode ierr;
4473 
4474   PetscFunctionBegin;
4475   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4476   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4477   if (Ad)     *Ad     = a->A;
4478   if (Ao)     *Ao     = a->B;
4479   if (colmap) *colmap = a->garray;
4480   PetscFunctionReturn(0);
4481 }
4482 
4483 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4484 {
4485   PetscErrorCode ierr;
4486   PetscInt       m,N,i,rstart,nnz,Ii;
4487   PetscInt       *indx;
4488   PetscScalar    *values;
4489 
4490   PetscFunctionBegin;
4491   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4492   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4493     PetscInt       *dnz,*onz,sum,bs,cbs;
4494 
4495     if (n == PETSC_DECIDE) {
4496       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4497     }
4498     /* Check sum(n) = N */
4499     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
4500     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4501 
4502     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
4503     rstart -= m;
4504 
4505     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4506     for (i=0; i<m; i++) {
4507       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4508       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4509       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4510     }
4511 
4512     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4513     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4514     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4515     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4516     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4517     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4518     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4519     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4520     ierr = MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
4521   }
4522 
4523   /* numeric phase */
4524   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4525   for (i=0; i<m; i++) {
4526     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4527     Ii   = i + rstart;
4528     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4529     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4530   }
4531   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4532   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4533   PetscFunctionReturn(0);
4534 }
4535 
4536 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4537 {
4538   PetscErrorCode    ierr;
4539   PetscMPIInt       rank;
4540   PetscInt          m,N,i,rstart,nnz;
4541   size_t            len;
4542   const PetscInt    *indx;
4543   PetscViewer       out;
4544   char              *name;
4545   Mat               B;
4546   const PetscScalar *values;
4547 
4548   PetscFunctionBegin;
4549   ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr);
4550   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
4551   /* Should this be the type of the diagonal block of A? */
4552   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4553   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4554   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4555   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4556   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4557   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
4558   for (i=0; i<m; i++) {
4559     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4560     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4561     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4562   }
4563   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4564   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4565 
4566   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRMPI(ierr);
4567   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4568   ierr = PetscMalloc1(len+6,&name);CHKERRQ(ierr);
4569   ierr = PetscSNPrintf(name,len+6,"%s.%d",outfile,rank);CHKERRQ(ierr);
4570   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4571   ierr = PetscFree(name);CHKERRQ(ierr);
4572   ierr = MatView(B,out);CHKERRQ(ierr);
4573   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4574   ierr = MatDestroy(&B);CHKERRQ(ierr);
4575   PetscFunctionReturn(0);
4576 }
4577 
4578 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4579 {
4580   PetscErrorCode      ierr;
4581   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4582 
4583   PetscFunctionBegin;
4584   if (!merge) PetscFunctionReturn(0);
4585   ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4586   ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4587   ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4588   ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4589   ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4590   ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4591   ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4592   ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4593   ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4594   ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4595   ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4596   ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4597   ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4598   ierr = PetscFree(merge);CHKERRQ(ierr);
4599   PetscFunctionReturn(0);
4600 }
4601 
4602 #include <../src/mat/utils/freespace.h>
4603 #include <petscbt.h>
4604 
4605 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4606 {
4607   PetscErrorCode      ierr;
4608   MPI_Comm            comm;
4609   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4610   PetscMPIInt         size,rank,taga,*len_s;
4611   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4612   PetscInt            proc,m;
4613   PetscInt            **buf_ri,**buf_rj;
4614   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4615   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4616   MPI_Request         *s_waits,*r_waits;
4617   MPI_Status          *status;
4618   MatScalar           *aa=a->a;
4619   MatScalar           **abuf_r,*ba_i;
4620   Mat_Merge_SeqsToMPI *merge;
4621   PetscContainer      container;
4622 
4623   PetscFunctionBegin;
4624   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4625   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4626 
4627   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
4628   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
4629 
4630   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4631   if (!container) SETERRQ(PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4632   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4633 
4634   bi     = merge->bi;
4635   bj     = merge->bj;
4636   buf_ri = merge->buf_ri;
4637   buf_rj = merge->buf_rj;
4638 
4639   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4640   owners = merge->rowmap->range;
4641   len_s  = merge->len_s;
4642 
4643   /* send and recv matrix values */
4644   /*-----------------------------*/
4645   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4646   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4647 
4648   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4649   for (proc=0,k=0; proc<size; proc++) {
4650     if (!len_s[proc]) continue;
4651     i    = owners[proc];
4652     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRMPI(ierr);
4653     k++;
4654   }
4655 
4656   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRMPI(ierr);}
4657   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRMPI(ierr);}
4658   ierr = PetscFree(status);CHKERRQ(ierr);
4659 
4660   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4661   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4662 
4663   /* insert mat values of mpimat */
4664   /*----------------------------*/
4665   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4666   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4667 
4668   for (k=0; k<merge->nrecv; k++) {
4669     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4670     nrows       = *(buf_ri_k[k]);
4671     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4672     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4673   }
4674 
4675   /* set values of ba */
4676   m = merge->rowmap->n;
4677   for (i=0; i<m; i++) {
4678     arow = owners[rank] + i;
4679     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4680     bnzi = bi[i+1] - bi[i];
4681     ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr);
4682 
4683     /* add local non-zero vals of this proc's seqmat into ba */
4684     anzi   = ai[arow+1] - ai[arow];
4685     aj     = a->j + ai[arow];
4686     aa     = a->a + ai[arow];
4687     nextaj = 0;
4688     for (j=0; nextaj<anzi; j++) {
4689       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4690         ba_i[j] += aa[nextaj++];
4691       }
4692     }
4693 
4694     /* add received vals into ba */
4695     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4696       /* i-th row */
4697       if (i == *nextrow[k]) {
4698         anzi   = *(nextai[k]+1) - *nextai[k];
4699         aj     = buf_rj[k] + *(nextai[k]);
4700         aa     = abuf_r[k] + *(nextai[k]);
4701         nextaj = 0;
4702         for (j=0; nextaj<anzi; j++) {
4703           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4704             ba_i[j] += aa[nextaj++];
4705           }
4706         }
4707         nextrow[k]++; nextai[k]++;
4708       }
4709     }
4710     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4711   }
4712   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4713   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4714 
4715   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4716   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4717   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4718   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4719   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4720   PetscFunctionReturn(0);
4721 }
4722 
4723 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4724 {
4725   PetscErrorCode      ierr;
4726   Mat                 B_mpi;
4727   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4728   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4729   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4730   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4731   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4732   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4733   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4734   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4735   MPI_Status          *status;
4736   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4737   PetscBT             lnkbt;
4738   Mat_Merge_SeqsToMPI *merge;
4739   PetscContainer      container;
4740 
4741   PetscFunctionBegin;
4742   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4743 
4744   /* make sure it is a PETSc comm */
4745   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4746   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
4747   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
4748 
4749   ierr = PetscNew(&merge);CHKERRQ(ierr);
4750   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4751 
4752   /* determine row ownership */
4753   /*---------------------------------------------------------*/
4754   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4755   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4756   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4757   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4758   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4759   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4760   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4761 
4762   m      = merge->rowmap->n;
4763   owners = merge->rowmap->range;
4764 
4765   /* determine the number of messages to send, their lengths */
4766   /*---------------------------------------------------------*/
4767   len_s = merge->len_s;
4768 
4769   len          = 0; /* length of buf_si[] */
4770   merge->nsend = 0;
4771   for (proc=0; proc<size; proc++) {
4772     len_si[proc] = 0;
4773     if (proc == rank) {
4774       len_s[proc] = 0;
4775     } else {
4776       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4777       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4778     }
4779     if (len_s[proc]) {
4780       merge->nsend++;
4781       nrows = 0;
4782       for (i=owners[proc]; i<owners[proc+1]; i++) {
4783         if (ai[i+1] > ai[i]) nrows++;
4784       }
4785       len_si[proc] = 2*(nrows+1);
4786       len         += len_si[proc];
4787     }
4788   }
4789 
4790   /* determine the number and length of messages to receive for ij-structure */
4791   /*-------------------------------------------------------------------------*/
4792   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4793   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4794 
4795   /* post the Irecv of j-structure */
4796   /*-------------------------------*/
4797   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4798   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4799 
4800   /* post the Isend of j-structure */
4801   /*--------------------------------*/
4802   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4803 
4804   for (proc=0, k=0; proc<size; proc++) {
4805     if (!len_s[proc]) continue;
4806     i    = owners[proc];
4807     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRMPI(ierr);
4808     k++;
4809   }
4810 
4811   /* receives and sends of j-structure are complete */
4812   /*------------------------------------------------*/
4813   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRMPI(ierr);}
4814   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRMPI(ierr);}
4815 
4816   /* send and recv i-structure */
4817   /*---------------------------*/
4818   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4819   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4820 
4821   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4822   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4823   for (proc=0,k=0; proc<size; proc++) {
4824     if (!len_s[proc]) continue;
4825     /* form outgoing message for i-structure:
4826          buf_si[0]:                 nrows to be sent
4827                [1:nrows]:           row index (global)
4828                [nrows+1:2*nrows+1]: i-structure index
4829     */
4830     /*-------------------------------------------*/
4831     nrows       = len_si[proc]/2 - 1;
4832     buf_si_i    = buf_si + nrows+1;
4833     buf_si[0]   = nrows;
4834     buf_si_i[0] = 0;
4835     nrows       = 0;
4836     for (i=owners[proc]; i<owners[proc+1]; i++) {
4837       anzi = ai[i+1] - ai[i];
4838       if (anzi) {
4839         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4840         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4841         nrows++;
4842       }
4843     }
4844     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRMPI(ierr);
4845     k++;
4846     buf_si += len_si[proc];
4847   }
4848 
4849   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRMPI(ierr);}
4850   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRMPI(ierr);}
4851 
4852   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4853   for (i=0; i<merge->nrecv; i++) {
4854     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4855   }
4856 
4857   ierr = PetscFree(len_si);CHKERRQ(ierr);
4858   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4859   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4860   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4861   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4862   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4863   ierr = PetscFree(status);CHKERRQ(ierr);
4864 
4865   /* compute a local seq matrix in each processor */
4866   /*----------------------------------------------*/
4867   /* allocate bi array and free space for accumulating nonzero column info */
4868   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4869   bi[0] = 0;
4870 
4871   /* create and initialize a linked list */
4872   nlnk = N+1;
4873   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4874 
4875   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4876   len  = ai[owners[rank+1]] - ai[owners[rank]];
4877   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4878 
4879   current_space = free_space;
4880 
4881   /* determine symbolic info for each local row */
4882   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4883 
4884   for (k=0; k<merge->nrecv; k++) {
4885     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4886     nrows       = *buf_ri_k[k];
4887     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4888     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4889   }
4890 
4891   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4892   len  = 0;
4893   for (i=0; i<m; i++) {
4894     bnzi = 0;
4895     /* add local non-zero cols of this proc's seqmat into lnk */
4896     arow  = owners[rank] + i;
4897     anzi  = ai[arow+1] - ai[arow];
4898     aj    = a->j + ai[arow];
4899     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4900     bnzi += nlnk;
4901     /* add received col data into lnk */
4902     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4903       if (i == *nextrow[k]) { /* i-th row */
4904         anzi  = *(nextai[k]+1) - *nextai[k];
4905         aj    = buf_rj[k] + *nextai[k];
4906         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4907         bnzi += nlnk;
4908         nextrow[k]++; nextai[k]++;
4909       }
4910     }
4911     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4912 
4913     /* if free space is not available, make more free space */
4914     if (current_space->local_remaining<bnzi) {
4915       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4916       nspacedouble++;
4917     }
4918     /* copy data into free space, then initialize lnk */
4919     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4920     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4921 
4922     current_space->array           += bnzi;
4923     current_space->local_used      += bnzi;
4924     current_space->local_remaining -= bnzi;
4925 
4926     bi[i+1] = bi[i] + bnzi;
4927   }
4928 
4929   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4930 
4931   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4932   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4933   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4934 
4935   /* create symbolic parallel matrix B_mpi */
4936   /*---------------------------------------*/
4937   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4938   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4939   if (n==PETSC_DECIDE) {
4940     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4941   } else {
4942     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4943   }
4944   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4945   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4946   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4947   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4948   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4949 
4950   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4951   B_mpi->assembled  = PETSC_FALSE;
4952   merge->bi         = bi;
4953   merge->bj         = bj;
4954   merge->buf_ri     = buf_ri;
4955   merge->buf_rj     = buf_rj;
4956   merge->coi        = NULL;
4957   merge->coj        = NULL;
4958   merge->owners_co  = NULL;
4959 
4960   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4961 
4962   /* attach the supporting struct to B_mpi for reuse */
4963   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4964   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4965   ierr    = PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI);CHKERRQ(ierr);
4966   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4967   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4968   *mpimat = B_mpi;
4969 
4970   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4971   PetscFunctionReturn(0);
4972 }
4973 
4974 /*@C
4975       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4976                  matrices from each processor
4977 
4978     Collective
4979 
4980    Input Parameters:
4981 +    comm - the communicators the parallel matrix will live on
4982 .    seqmat - the input sequential matrices
4983 .    m - number of local rows (or PETSC_DECIDE)
4984 .    n - number of local columns (or PETSC_DECIDE)
4985 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4986 
4987    Output Parameter:
4988 .    mpimat - the parallel matrix generated
4989 
4990     Level: advanced
4991 
4992    Notes:
4993      The dimensions of the sequential matrix in each processor MUST be the same.
4994      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4995      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4996 @*/
4997 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4998 {
4999   PetscErrorCode ierr;
5000   PetscMPIInt    size;
5001 
5002   PetscFunctionBegin;
5003   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
5004   if (size == 1) {
5005     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5006     if (scall == MAT_INITIAL_MATRIX) {
5007       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
5008     } else {
5009       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5010     }
5011     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5012     PetscFunctionReturn(0);
5013   }
5014   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5015   if (scall == MAT_INITIAL_MATRIX) {
5016     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
5017   }
5018   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5019   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5020   PetscFunctionReturn(0);
5021 }
5022 
5023 /*@
5024      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5025           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5026           with MatGetSize()
5027 
5028     Not Collective
5029 
5030    Input Parameters:
5031 +    A - the matrix
5032 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5033 
5034    Output Parameter:
5035 .    A_loc - the local sequential matrix generated
5036 
5037     Level: developer
5038 
5039    Notes:
5040      When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A.
5041      If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called.
5042      This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely
5043      modify the values of the returned A_loc.
5044 
5045 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed(), MatMPIAIJGetLocalMatMerge()
5046 @*/
5047 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5048 {
5049   PetscErrorCode    ierr;
5050   Mat_MPIAIJ        *mpimat=(Mat_MPIAIJ*)A->data;
5051   Mat_SeqAIJ        *mat,*a,*b;
5052   PetscInt          *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5053   const PetscScalar *aa,*ba,*aav,*bav;
5054   PetscScalar       *ca,*cam;
5055   PetscMPIInt       size;
5056   PetscInt          am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5057   PetscInt          *ci,*cj,col,ncols_d,ncols_o,jo;
5058   PetscBool         match;
5059 
5060   PetscFunctionBegin;
5061   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
5062   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5063   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
5064   if (size == 1) {
5065     if (scall == MAT_INITIAL_MATRIX) {
5066       ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr);
5067       *A_loc = mpimat->A;
5068     } else if (scall == MAT_REUSE_MATRIX) {
5069       ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5070     }
5071     PetscFunctionReturn(0);
5072   }
5073 
5074   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5075   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5076   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5077   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5078   ierr = MatSeqAIJGetArrayRead(mpimat->A,&aav);CHKERRQ(ierr);
5079   ierr = MatSeqAIJGetArrayRead(mpimat->B,&bav);CHKERRQ(ierr);
5080   aa   = aav;
5081   ba   = bav;
5082   if (scall == MAT_INITIAL_MATRIX) {
5083     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5084     ci[0] = 0;
5085     for (i=0; i<am; i++) {
5086       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5087     }
5088     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5089     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5090     k    = 0;
5091     for (i=0; i<am; i++) {
5092       ncols_o = bi[i+1] - bi[i];
5093       ncols_d = ai[i+1] - ai[i];
5094       /* off-diagonal portion of A */
5095       for (jo=0; jo<ncols_o; jo++) {
5096         col = cmap[*bj];
5097         if (col >= cstart) break;
5098         cj[k]   = col; bj++;
5099         ca[k++] = *ba++;
5100       }
5101       /* diagonal portion of A */
5102       for (j=0; j<ncols_d; j++) {
5103         cj[k]   = cstart + *aj++;
5104         ca[k++] = *aa++;
5105       }
5106       /* off-diagonal portion of A */
5107       for (j=jo; j<ncols_o; j++) {
5108         cj[k]   = cmap[*bj++];
5109         ca[k++] = *ba++;
5110       }
5111     }
5112     /* put together the new matrix */
5113     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5114     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5115     /* Since these are PETSc arrays, change flags to free them as necessary. */
5116     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5117     mat->free_a  = PETSC_TRUE;
5118     mat->free_ij = PETSC_TRUE;
5119     mat->nonew   = 0;
5120   } else if (scall == MAT_REUSE_MATRIX) {
5121     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5122 #if defined(PETSC_USE_DEVICE)
5123     (*A_loc)->offloadmask = PETSC_OFFLOAD_CPU;
5124 #endif
5125     ci = mat->i; cj = mat->j; cam = mat->a;
5126     for (i=0; i<am; i++) {
5127       /* off-diagonal portion of A */
5128       ncols_o = bi[i+1] - bi[i];
5129       for (jo=0; jo<ncols_o; jo++) {
5130         col = cmap[*bj];
5131         if (col >= cstart) break;
5132         *cam++ = *ba++; bj++;
5133       }
5134       /* diagonal portion of A */
5135       ncols_d = ai[i+1] - ai[i];
5136       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5137       /* off-diagonal portion of A */
5138       for (j=jo; j<ncols_o; j++) {
5139         *cam++ = *ba++; bj++;
5140       }
5141     }
5142   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5143   ierr = MatSeqAIJRestoreArrayRead(mpimat->A,&aav);CHKERRQ(ierr);
5144   ierr = MatSeqAIJRestoreArrayRead(mpimat->B,&bav);CHKERRQ(ierr);
5145   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5146   PetscFunctionReturn(0);
5147 }
5148 
5149 /*@
5150      MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5151           mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part
5152 
5153     Not Collective
5154 
5155    Input Parameters:
5156 +    A - the matrix
5157 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5158 
5159    Output Parameters:
5160 +    glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL)
5161 -    A_loc - the local sequential matrix generated
5162 
5163     Level: developer
5164 
5165    Notes:
5166      This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering)
5167 
5168 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed()
5169 
5170 @*/
5171 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc)
5172 {
5173   PetscErrorCode ierr;
5174   Mat            Ao,Ad;
5175   const PetscInt *cmap;
5176   PetscMPIInt    size;
5177   PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*);
5178 
5179   PetscFunctionBegin;
5180   ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap);CHKERRQ(ierr);
5181   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
5182   if (size == 1) {
5183     if (scall == MAT_INITIAL_MATRIX) {
5184       ierr = PetscObjectReference((PetscObject)Ad);CHKERRQ(ierr);
5185       *A_loc = Ad;
5186     } else if (scall == MAT_REUSE_MATRIX) {
5187       ierr = MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5188     }
5189     if (glob) { ierr = ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob);CHKERRQ(ierr); }
5190     PetscFunctionReturn(0);
5191   }
5192   ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f);CHKERRQ(ierr);
5193   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5194   if (f) {
5195     ierr = (*f)(A,scall,glob,A_loc);CHKERRQ(ierr);
5196   } else {
5197     Mat_SeqAIJ        *a = (Mat_SeqAIJ*)Ad->data;
5198     Mat_SeqAIJ        *b = (Mat_SeqAIJ*)Ao->data;
5199     Mat_SeqAIJ        *c;
5200     PetscInt          *ai = a->i, *aj = a->j;
5201     PetscInt          *bi = b->i, *bj = b->j;
5202     PetscInt          *ci,*cj;
5203     const PetscScalar *aa,*ba;
5204     PetscScalar       *ca;
5205     PetscInt          i,j,am,dn,on;
5206 
5207     ierr = MatGetLocalSize(Ad,&am,&dn);CHKERRQ(ierr);
5208     ierr = MatGetLocalSize(Ao,NULL,&on);CHKERRQ(ierr);
5209     ierr = MatSeqAIJGetArrayRead(Ad,&aa);CHKERRQ(ierr);
5210     ierr = MatSeqAIJGetArrayRead(Ao,&ba);CHKERRQ(ierr);
5211     if (scall == MAT_INITIAL_MATRIX) {
5212       PetscInt k;
5213       ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5214       ierr = PetscMalloc1(ai[am]+bi[am],&cj);CHKERRQ(ierr);
5215       ierr = PetscMalloc1(ai[am]+bi[am],&ca);CHKERRQ(ierr);
5216       ci[0] = 0;
5217       for (i=0,k=0; i<am; i++) {
5218         const PetscInt ncols_o = bi[i+1] - bi[i];
5219         const PetscInt ncols_d = ai[i+1] - ai[i];
5220         ci[i+1] = ci[i] + ncols_o + ncols_d;
5221         /* diagonal portion of A */
5222         for (j=0; j<ncols_d; j++,k++) {
5223           cj[k] = *aj++;
5224           ca[k] = *aa++;
5225         }
5226         /* off-diagonal portion of A */
5227         for (j=0; j<ncols_o; j++,k++) {
5228           cj[k] = dn + *bj++;
5229           ca[k] = *ba++;
5230         }
5231       }
5232       /* put together the new matrix */
5233       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc);CHKERRQ(ierr);
5234       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5235       /* Since these are PETSc arrays, change flags to free them as necessary. */
5236       c          = (Mat_SeqAIJ*)(*A_loc)->data;
5237       c->free_a  = PETSC_TRUE;
5238       c->free_ij = PETSC_TRUE;
5239       c->nonew   = 0;
5240       ierr = MatSetType(*A_loc,((PetscObject)Ad)->type_name);CHKERRQ(ierr);
5241     } else if (scall == MAT_REUSE_MATRIX) {
5242 #if defined(PETSC_HAVE_DEVICE)
5243       (*A_loc)->offloadmask = PETSC_OFFLOAD_CPU;
5244 #endif
5245       c  = (Mat_SeqAIJ*)(*A_loc)->data;
5246       ca = c->a;
5247       for (i=0; i<am; i++) {
5248         const PetscInt ncols_d = ai[i+1] - ai[i];
5249         const PetscInt ncols_o = bi[i+1] - bi[i];
5250         /* diagonal portion of A */
5251         for (j=0; j<ncols_d; j++) *ca++ = *aa++;
5252         /* off-diagonal portion of A */
5253         for (j=0; j<ncols_o; j++) *ca++ = *ba++;
5254       }
5255     } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5256     ierr = MatSeqAIJRestoreArrayRead(Ad,&aa);CHKERRQ(ierr);
5257     ierr = MatSeqAIJRestoreArrayRead(Ao,&aa);CHKERRQ(ierr);
5258     if (glob) {
5259       PetscInt cst, *gidx;
5260 
5261       ierr = MatGetOwnershipRangeColumn(A,&cst,NULL);CHKERRQ(ierr);
5262       ierr = PetscMalloc1(dn+on,&gidx);CHKERRQ(ierr);
5263       for (i=0; i<dn; i++) gidx[i]    = cst + i;
5264       for (i=0; i<on; i++) gidx[i+dn] = cmap[i];
5265       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob);CHKERRQ(ierr);
5266     }
5267   }
5268   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5269   PetscFunctionReturn(0);
5270 }
5271 
5272 /*@C
5273      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5274 
5275     Not Collective
5276 
5277    Input Parameters:
5278 +    A - the matrix
5279 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5280 -    row, col - index sets of rows and columns to extract (or NULL)
5281 
5282    Output Parameter:
5283 .    A_loc - the local sequential matrix generated
5284 
5285     Level: developer
5286 
5287 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5288 
5289 @*/
5290 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5291 {
5292   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5293   PetscErrorCode ierr;
5294   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5295   IS             isrowa,iscola;
5296   Mat            *aloc;
5297   PetscBool      match;
5298 
5299   PetscFunctionBegin;
5300   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5301   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5302   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5303   if (!row) {
5304     start = A->rmap->rstart; end = A->rmap->rend;
5305     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5306   } else {
5307     isrowa = *row;
5308   }
5309   if (!col) {
5310     start = A->cmap->rstart;
5311     cmap  = a->garray;
5312     nzA   = a->A->cmap->n;
5313     nzB   = a->B->cmap->n;
5314     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5315     ncols = 0;
5316     for (i=0; i<nzB; i++) {
5317       if (cmap[i] < start) idx[ncols++] = cmap[i];
5318       else break;
5319     }
5320     imark = i;
5321     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5322     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5323     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5324   } else {
5325     iscola = *col;
5326   }
5327   if (scall != MAT_INITIAL_MATRIX) {
5328     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5329     aloc[0] = *A_loc;
5330   }
5331   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5332   if (!col) { /* attach global id of condensed columns */
5333     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5334   }
5335   *A_loc = aloc[0];
5336   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5337   if (!row) {
5338     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5339   }
5340   if (!col) {
5341     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5342   }
5343   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5344   PetscFunctionReturn(0);
5345 }
5346 
5347 /*
5348  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5349  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5350  * on a global size.
5351  * */
5352 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth)
5353 {
5354   Mat_MPIAIJ               *p=(Mat_MPIAIJ*)P->data;
5355   Mat_SeqAIJ               *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth;
5356   PetscInt                 plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol;
5357   PetscMPIInt              owner;
5358   PetscSFNode              *iremote,*oiremote;
5359   const PetscInt           *lrowindices;
5360   PetscErrorCode           ierr;
5361   PetscSF                  sf,osf;
5362   PetscInt                 pcstart,*roffsets,*loffsets,*pnnz,j;
5363   PetscInt                 ontotalcols,dntotalcols,ntotalcols,nout;
5364   MPI_Comm                 comm;
5365   ISLocalToGlobalMapping   mapping;
5366 
5367   PetscFunctionBegin;
5368   ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr);
5369   /* plocalsize is the number of roots
5370    * nrows is the number of leaves
5371    * */
5372   ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr);
5373   ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr);
5374   ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr);
5375   ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr);
5376   for (i=0;i<nrows;i++) {
5377     /* Find a remote index and an owner for a row
5378      * The row could be local or remote
5379      * */
5380     owner = 0;
5381     lidx  = 0;
5382     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr);
5383     iremote[i].index = lidx;
5384     iremote[i].rank  = owner;
5385   }
5386   /* Create SF to communicate how many nonzero columns for each row */
5387   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5388   /* SF will figure out the number of nonzero colunms for each row, and their
5389    * offsets
5390    * */
5391   ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5392   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5393   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5394 
5395   ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr);
5396   ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr);
5397   ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr);
5398   roffsets[0] = 0;
5399   roffsets[1] = 0;
5400   for (i=0;i<plocalsize;i++) {
5401     /* diag */
5402     nrcols[i*2+0] = pd->i[i+1] - pd->i[i];
5403     /* off diag */
5404     nrcols[i*2+1] = po->i[i+1] - po->i[i];
5405     /* compute offsets so that we relative location for each row */
5406     roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0];
5407     roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1];
5408   }
5409   ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr);
5410   ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr);
5411   /* 'r' means root, and 'l' means leaf */
5412   ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE);CHKERRQ(ierr);
5413   ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE);CHKERRQ(ierr);
5414   ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE);CHKERRQ(ierr);
5415   ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE);CHKERRQ(ierr);
5416   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5417   ierr = PetscFree(roffsets);CHKERRQ(ierr);
5418   ierr = PetscFree(nrcols);CHKERRQ(ierr);
5419   dntotalcols = 0;
5420   ontotalcols = 0;
5421   ncol = 0;
5422   for (i=0;i<nrows;i++) {
5423     pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1];
5424     ncol = PetscMax(pnnz[i],ncol);
5425     /* diag */
5426     dntotalcols += nlcols[i*2+0];
5427     /* off diag */
5428     ontotalcols += nlcols[i*2+1];
5429   }
5430   /* We do not need to figure the right number of columns
5431    * since all the calculations will be done by going through the raw data
5432    * */
5433   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr);
5434   ierr = MatSetUp(*P_oth);CHKERRQ(ierr);
5435   ierr = PetscFree(pnnz);CHKERRQ(ierr);
5436   p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5437   /* diag */
5438   ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr);
5439   /* off diag */
5440   ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr);
5441   /* diag */
5442   ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr);
5443   /* off diag */
5444   ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr);
5445   dntotalcols = 0;
5446   ontotalcols = 0;
5447   ntotalcols  = 0;
5448   for (i=0;i<nrows;i++) {
5449     owner = 0;
5450     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr);
5451     /* Set iremote for diag matrix */
5452     for (j=0;j<nlcols[i*2+0];j++) {
5453       iremote[dntotalcols].index   = loffsets[i*2+0] + j;
5454       iremote[dntotalcols].rank    = owner;
5455       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5456       ilocal[dntotalcols++]        = ntotalcols++;
5457     }
5458     /* off diag */
5459     for (j=0;j<nlcols[i*2+1];j++) {
5460       oiremote[ontotalcols].index   = loffsets[i*2+1] + j;
5461       oiremote[ontotalcols].rank    = owner;
5462       oilocal[ontotalcols++]        = ntotalcols++;
5463     }
5464   }
5465   ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr);
5466   ierr = PetscFree(loffsets);CHKERRQ(ierr);
5467   ierr = PetscFree(nlcols);CHKERRQ(ierr);
5468   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5469   /* P serves as roots and P_oth is leaves
5470    * Diag matrix
5471    * */
5472   ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5473   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5474   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5475 
5476   ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr);
5477   /* Off diag */
5478   ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5479   ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr);
5480   ierr = PetscSFSetUp(osf);CHKERRQ(ierr);
5481   /* We operate on the matrix internal data for saving memory */
5482   ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5483   ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5484   ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr);
5485   /* Convert to global indices for diag matrix */
5486   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart;
5487   ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr);
5488   /* We want P_oth store global indices */
5489   ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr);
5490   /* Use memory scalable approach */
5491   ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr);
5492   ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr);
5493   ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr);
5494   ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr);
5495   /* Convert back to local indices */
5496   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart;
5497   ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr);
5498   nout = 0;
5499   ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr);
5500   if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout);
5501   ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr);
5502   /* Exchange values */
5503   ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5504   ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5505   /* Stop PETSc from shrinking memory */
5506   for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i];
5507   ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5508   ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5509   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5510   ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr);
5511   ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr);
5512   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5513   ierr = PetscSFDestroy(&osf);CHKERRQ(ierr);
5514   PetscFunctionReturn(0);
5515 }
5516 
5517 /*
5518  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5519  * This supports MPIAIJ and MAIJ
5520  * */
5521 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth)
5522 {
5523   Mat_MPIAIJ            *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data;
5524   Mat_SeqAIJ            *p_oth;
5525   Mat_SeqAIJ            *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data;
5526   IS                    rows,map;
5527   PetscHMapI            hamp;
5528   PetscInt              i,htsize,*rowindices,off,*mapping,key,count;
5529   MPI_Comm              comm;
5530   PetscSF               sf,osf;
5531   PetscBool             has;
5532   PetscErrorCode        ierr;
5533 
5534   PetscFunctionBegin;
5535   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5536   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5537   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5538    *  and then create a submatrix (that often is an overlapping matrix)
5539    * */
5540   if (reuse == MAT_INITIAL_MATRIX) {
5541     /* Use a hash table to figure out unique keys */
5542     ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr);
5543     ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr);
5544     ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr);
5545     count = 0;
5546     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5547     for (i=0;i<a->B->cmap->n;i++) {
5548       key  = a->garray[i]/dof;
5549       ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr);
5550       if (!has) {
5551         mapping[i] = count;
5552         ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr);
5553       } else {
5554         /* Current 'i' has the same value the previous step */
5555         mapping[i] = count-1;
5556       }
5557     }
5558     ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr);
5559     ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr);
5560     if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);
5561     ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr);
5562     off = 0;
5563     ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr);
5564     ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr);
5565     ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr);
5566     ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr);
5567     /* In case, the matrix was already created but users want to recreate the matrix */
5568     ierr = MatDestroy(P_oth);CHKERRQ(ierr);
5569     ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr);
5570     ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr);
5571     ierr = ISDestroy(&map);CHKERRQ(ierr);
5572     ierr = ISDestroy(&rows);CHKERRQ(ierr);
5573   } else if (reuse == MAT_REUSE_MATRIX) {
5574     /* If matrix was already created, we simply update values using SF objects
5575      * that as attached to the matrix ealier.
5576      *  */
5577     ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr);
5578     ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr);
5579     if (!sf || !osf) SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet");
5580     p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5581     /* Update values in place */
5582     ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5583     ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5584     ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5585     ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5586   } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type");
5587   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5588   PetscFunctionReturn(0);
5589 }
5590 
5591 /*@C
5592     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5593 
5594     Collective on Mat
5595 
5596    Input Parameters:
5597 +    A - the first matrix in mpiaij format
5598 .    B - the second matrix in mpiaij format
5599 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5600 
5601    Input/Output Parameters:
5602 +    rowb - index sets of rows of B to extract (or NULL), modified on output
5603 -    colb - index sets of columns of B to extract (or NULL), modified on output
5604 
5605    Output Parameter:
5606 .    B_seq - the sequential matrix generated
5607 
5608     Level: developer
5609 
5610 @*/
5611 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5612 {
5613   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5614   PetscErrorCode ierr;
5615   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5616   IS             isrowb,iscolb;
5617   Mat            *bseq=NULL;
5618 
5619   PetscFunctionBegin;
5620   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5621     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5622   }
5623   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5624 
5625   if (scall == MAT_INITIAL_MATRIX) {
5626     start = A->cmap->rstart;
5627     cmap  = a->garray;
5628     nzA   = a->A->cmap->n;
5629     nzB   = a->B->cmap->n;
5630     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5631     ncols = 0;
5632     for (i=0; i<nzB; i++) {  /* row < local row index */
5633       if (cmap[i] < start) idx[ncols++] = cmap[i];
5634       else break;
5635     }
5636     imark = i;
5637     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5638     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5639     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5640     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5641   } else {
5642     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5643     isrowb  = *rowb; iscolb = *colb;
5644     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5645     bseq[0] = *B_seq;
5646   }
5647   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5648   *B_seq = bseq[0];
5649   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5650   if (!rowb) {
5651     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5652   } else {
5653     *rowb = isrowb;
5654   }
5655   if (!colb) {
5656     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5657   } else {
5658     *colb = iscolb;
5659   }
5660   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5661   PetscFunctionReturn(0);
5662 }
5663 
5664 /*
5665     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5666     of the OFF-DIAGONAL portion of local A
5667 
5668     Collective on Mat
5669 
5670    Input Parameters:
5671 +    A,B - the matrices in mpiaij format
5672 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5673 
5674    Output Parameter:
5675 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5676 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5677 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5678 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5679 
5680     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5681      for this matrix. This is not desirable..
5682 
5683     Level: developer
5684 
5685 */
5686 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5687 {
5688   PetscErrorCode         ierr;
5689   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5690   Mat_SeqAIJ             *b_oth;
5691   VecScatter             ctx;
5692   MPI_Comm               comm;
5693   const PetscMPIInt      *rprocs,*sprocs;
5694   const PetscInt         *srow,*rstarts,*sstarts;
5695   PetscInt               *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5696   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len;
5697   PetscScalar            *b_otha,*bufa,*bufA,*vals = NULL;
5698   MPI_Request            *reqs = NULL,*rwaits = NULL,*swaits = NULL;
5699   PetscMPIInt            size,tag,rank,nreqs;
5700 
5701   PetscFunctionBegin;
5702   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5703   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
5704 
5705   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5706     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5707   }
5708   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5709   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
5710 
5711   if (size == 1) {
5712     startsj_s = NULL;
5713     bufa_ptr  = NULL;
5714     *B_oth    = NULL;
5715     PetscFunctionReturn(0);
5716   }
5717 
5718   ctx = a->Mvctx;
5719   tag = ((PetscObject)ctx)->tag;
5720 
5721   ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5722   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5723   ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr);
5724   ierr = PetscMPIIntCast(nsends+nrecvs,&nreqs);CHKERRQ(ierr);
5725   ierr = PetscMalloc1(nreqs,&reqs);CHKERRQ(ierr);
5726   rwaits = reqs;
5727   swaits = reqs + nrecvs;
5728 
5729   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5730   if (scall == MAT_INITIAL_MATRIX) {
5731     /* i-array */
5732     /*---------*/
5733     /*  post receives */
5734     if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */
5735     for (i=0; i<nrecvs; i++) {
5736       rowlen = rvalues + rstarts[i]*rbs;
5737       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5738       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr);
5739     }
5740 
5741     /* pack the outgoing message */
5742     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5743 
5744     sstartsj[0] = 0;
5745     rstartsj[0] = 0;
5746     len         = 0; /* total length of j or a array to be sent */
5747     if (nsends) {
5748       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5749       ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr);
5750     }
5751     for (i=0; i<nsends; i++) {
5752       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5753       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5754       for (j=0; j<nrows; j++) {
5755         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5756         for (l=0; l<sbs; l++) {
5757           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5758 
5759           rowlen[j*sbs+l] = ncols;
5760 
5761           len += ncols;
5762           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5763         }
5764         k++;
5765       }
5766       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr);
5767 
5768       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5769     }
5770     /* recvs and sends of i-array are completed */
5771     if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);}
5772     ierr = PetscFree(svalues);CHKERRQ(ierr);
5773 
5774     /* allocate buffers for sending j and a arrays */
5775     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5776     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5777 
5778     /* create i-array of B_oth */
5779     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5780 
5781     b_othi[0] = 0;
5782     len       = 0; /* total length of j or a array to be received */
5783     k         = 0;
5784     for (i=0; i<nrecvs; i++) {
5785       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5786       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5787       for (j=0; j<nrows; j++) {
5788         b_othi[k+1] = b_othi[k] + rowlen[j];
5789         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5790         k++;
5791       }
5792       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5793     }
5794     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5795 
5796     /* allocate space for j and a arrrays of B_oth */
5797     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5798     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5799 
5800     /* j-array */
5801     /*---------*/
5802     /*  post receives of j-array */
5803     for (i=0; i<nrecvs; i++) {
5804       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5805       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr);
5806     }
5807 
5808     /* pack the outgoing message j-array */
5809     if (nsends) k = sstarts[0];
5810     for (i=0; i<nsends; i++) {
5811       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5812       bufJ  = bufj+sstartsj[i];
5813       for (j=0; j<nrows; j++) {
5814         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5815         for (ll=0; ll<sbs; ll++) {
5816           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5817           for (l=0; l<ncols; l++) {
5818             *bufJ++ = cols[l];
5819           }
5820           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5821         }
5822       }
5823       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr);
5824     }
5825 
5826     /* recvs and sends of j-array are completed */
5827     if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);}
5828   } else if (scall == MAT_REUSE_MATRIX) {
5829     sstartsj = *startsj_s;
5830     rstartsj = *startsj_r;
5831     bufa     = *bufa_ptr;
5832     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5833     b_otha   = b_oth->a;
5834 #if defined(PETSC_HAVE_DEVICE)
5835     (*B_oth)->offloadmask = PETSC_OFFLOAD_CPU;
5836 #endif
5837   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5838 
5839   /* a-array */
5840   /*---------*/
5841   /*  post receives of a-array */
5842   for (i=0; i<nrecvs; i++) {
5843     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5844     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr);
5845   }
5846 
5847   /* pack the outgoing message a-array */
5848   if (nsends) k = sstarts[0];
5849   for (i=0; i<nsends; i++) {
5850     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5851     bufA  = bufa+sstartsj[i];
5852     for (j=0; j<nrows; j++) {
5853       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5854       for (ll=0; ll<sbs; ll++) {
5855         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5856         for (l=0; l<ncols; l++) {
5857           *bufA++ = vals[l];
5858         }
5859         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5860       }
5861     }
5862     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr);
5863   }
5864   /* recvs and sends of a-array are completed */
5865   if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);}
5866   ierr = PetscFree(reqs);CHKERRQ(ierr);
5867 
5868   if (scall == MAT_INITIAL_MATRIX) {
5869     /* put together the new matrix */
5870     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5871 
5872     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5873     /* Since these are PETSc arrays, change flags to free them as necessary. */
5874     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5875     b_oth->free_a  = PETSC_TRUE;
5876     b_oth->free_ij = PETSC_TRUE;
5877     b_oth->nonew   = 0;
5878 
5879     ierr = PetscFree(bufj);CHKERRQ(ierr);
5880     if (!startsj_s || !bufa_ptr) {
5881       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5882       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5883     } else {
5884       *startsj_s = sstartsj;
5885       *startsj_r = rstartsj;
5886       *bufa_ptr  = bufa;
5887     }
5888   }
5889 
5890   ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5891   ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr);
5892   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5893   PetscFunctionReturn(0);
5894 }
5895 
5896 /*@C
5897   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5898 
5899   Not Collective
5900 
5901   Input Parameter:
5902 . A - The matrix in mpiaij format
5903 
5904   Output Parameters:
5905 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5906 . colmap - A map from global column index to local index into lvec
5907 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5908 
5909   Level: developer
5910 
5911 @*/
5912 #if defined(PETSC_USE_CTABLE)
5913 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5914 #else
5915 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5916 #endif
5917 {
5918   Mat_MPIAIJ *a;
5919 
5920   PetscFunctionBegin;
5921   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5922   PetscValidPointer(lvec, 2);
5923   PetscValidPointer(colmap, 3);
5924   PetscValidPointer(multScatter, 4);
5925   a = (Mat_MPIAIJ*) A->data;
5926   if (lvec) *lvec = a->lvec;
5927   if (colmap) *colmap = a->colmap;
5928   if (multScatter) *multScatter = a->Mvctx;
5929   PetscFunctionReturn(0);
5930 }
5931 
5932 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5933 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5934 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5935 #if defined(PETSC_HAVE_MKL_SPARSE)
5936 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5937 #endif
5938 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*);
5939 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5940 #if defined(PETSC_HAVE_ELEMENTAL)
5941 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5942 #endif
5943 #if defined(PETSC_HAVE_SCALAPACK)
5944 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*);
5945 #endif
5946 #if defined(PETSC_HAVE_HYPRE)
5947 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5948 #endif
5949 #if defined(PETSC_HAVE_CUDA)
5950 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*);
5951 #endif
5952 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
5953 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*);
5954 #endif
5955 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5956 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5957 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
5958 
5959 /*
5960     Computes (B'*A')' since computing B*A directly is untenable
5961 
5962                n                       p                          p
5963         [             ]       [             ]         [                 ]
5964       m [      A      ]  *  n [       B     ]   =   m [         C       ]
5965         [             ]       [             ]         [                 ]
5966 
5967 */
5968 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5969 {
5970   PetscErrorCode ierr;
5971   Mat            At,Bt,Ct;
5972 
5973   PetscFunctionBegin;
5974   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5975   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5976   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct);CHKERRQ(ierr);
5977   ierr = MatDestroy(&At);CHKERRQ(ierr);
5978   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5979   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5980   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5981   PetscFunctionReturn(0);
5982 }
5983 
5984 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C)
5985 {
5986   PetscErrorCode ierr;
5987   PetscBool      cisdense;
5988 
5989   PetscFunctionBegin;
5990   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5991   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N);CHKERRQ(ierr);
5992   ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr);
5993   ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr);
5994   if (!cisdense) {
5995     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
5996   }
5997   ierr = MatSetUp(C);CHKERRQ(ierr);
5998 
5999   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6000   PetscFunctionReturn(0);
6001 }
6002 
6003 /* ----------------------------------------------------------------*/
6004 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6005 {
6006   Mat_Product *product = C->product;
6007   Mat         A = product->A,B=product->B;
6008 
6009   PetscFunctionBegin;
6010   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)
6011     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
6012 
6013   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6014   C->ops->productsymbolic = MatProductSymbolic_AB;
6015   PetscFunctionReturn(0);
6016 }
6017 
6018 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6019 {
6020   PetscErrorCode ierr;
6021   Mat_Product    *product = C->product;
6022 
6023   PetscFunctionBegin;
6024   if (product->type == MATPRODUCT_AB) {
6025     ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr);
6026   }
6027   PetscFunctionReturn(0);
6028 }
6029 /* ----------------------------------------------------------------*/
6030 
6031 /*MC
6032    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6033 
6034    Options Database Keys:
6035 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
6036 
6037    Level: beginner
6038 
6039    Notes:
6040     MatSetValues() may be called for this matrix type with a NULL argument for the numerical values,
6041     in this case the values associated with the rows and columns one passes in are set to zero
6042     in the matrix
6043 
6044     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
6045     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
6046 
6047 .seealso: MatCreateAIJ()
6048 M*/
6049 
6050 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6051 {
6052   Mat_MPIAIJ     *b;
6053   PetscErrorCode ierr;
6054   PetscMPIInt    size;
6055 
6056   PetscFunctionBegin;
6057   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr);
6058 
6059   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
6060   B->data       = (void*)b;
6061   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
6062   B->assembled  = PETSC_FALSE;
6063   B->insertmode = NOT_SET_VALUES;
6064   b->size       = size;
6065 
6066   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRMPI(ierr);
6067 
6068   /* build cache for off array entries formed */
6069   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
6070 
6071   b->donotstash  = PETSC_FALSE;
6072   b->colmap      = NULL;
6073   b->garray      = NULL;
6074   b->roworiented = PETSC_TRUE;
6075 
6076   /* stuff used for matrix vector multiply */
6077   b->lvec  = NULL;
6078   b->Mvctx = NULL;
6079 
6080   /* stuff for MatGetRow() */
6081   b->rowindices   = NULL;
6082   b->rowvalues    = NULL;
6083   b->getrowactive = PETSC_FALSE;
6084 
6085   /* flexible pointer used in CUSPARSE classes */
6086   b->spptr = NULL;
6087 
6088   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
6089   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
6090   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
6091   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
6092   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
6093   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
6094   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
6095   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
6096   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
6097   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
6098 #if defined(PETSC_HAVE_CUDA)
6099   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE);CHKERRQ(ierr);
6100 #endif
6101 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6102   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos);CHKERRQ(ierr);
6103 #endif
6104 #if defined(PETSC_HAVE_MKL_SPARSE)
6105   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
6106 #endif
6107   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
6108   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr);
6109   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
6110   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense);CHKERRQ(ierr);
6111 #if defined(PETSC_HAVE_ELEMENTAL)
6112   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
6113 #endif
6114 #if defined(PETSC_HAVE_SCALAPACK)
6115   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK);CHKERRQ(ierr);
6116 #endif
6117   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
6118   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
6119 #if defined(PETSC_HAVE_HYPRE)
6120   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
6121   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr);
6122 #endif
6123   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr);
6124   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr);
6125   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
6126   PetscFunctionReturn(0);
6127 }
6128 
6129 /*@C
6130      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
6131          and "off-diagonal" part of the matrix in CSR format.
6132 
6133    Collective
6134 
6135    Input Parameters:
6136 +  comm - MPI communicator
6137 .  m - number of local rows (Cannot be PETSC_DECIDE)
6138 .  n - This value should be the same as the local size used in creating the
6139        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
6140        calculated if N is given) For square matrices n is almost always m.
6141 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
6142 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
6143 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6144 .   j - column indices, which must be local, i.e., based off the start column of the diagonal portion
6145 .   a - matrix values
6146 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6147 .   oj - column indices, which must be global, representing global columns in the MPIAIJ matrix
6148 -   oa - matrix values
6149 
6150    Output Parameter:
6151 .   mat - the matrix
6152 
6153    Level: advanced
6154 
6155    Notes:
6156        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6157        must free the arrays once the matrix has been destroyed and not before.
6158 
6159        The i and j indices are 0 based
6160 
6161        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
6162 
6163        This sets local rows and cannot be used to set off-processor values.
6164 
6165        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6166        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6167        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6168        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6169        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
6170        communication if it is known that only local entries will be set.
6171 
6172 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
6173           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
6174 @*/
6175 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
6176 {
6177   PetscErrorCode ierr;
6178   Mat_MPIAIJ     *maij;
6179 
6180   PetscFunctionBegin;
6181   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
6182   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
6183   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
6184   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
6185   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
6186   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
6187   maij = (Mat_MPIAIJ*) (*mat)->data;
6188 
6189   (*mat)->preallocated = PETSC_TRUE;
6190 
6191   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
6192   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
6193 
6194   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
6195   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
6196 
6197   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
6198   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6199   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6200   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
6201   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
6202   PetscFunctionReturn(0);
6203 }
6204 
6205 /*
6206     Special version for direct calls from Fortran
6207 */
6208 #include <petsc/private/fortranimpl.h>
6209 
6210 /* Change these macros so can be used in void function */
6211 #undef CHKERRQ
6212 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
6213 #undef SETERRQ2
6214 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
6215 #undef SETERRQ3
6216 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
6217 #undef SETERRQ
6218 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
6219 
6220 #if defined(PETSC_HAVE_FORTRAN_CAPS)
6221 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
6222 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
6223 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
6224 #else
6225 #endif
6226 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
6227 {
6228   Mat            mat  = *mmat;
6229   PetscInt       m    = *mm, n = *mn;
6230   InsertMode     addv = *maddv;
6231   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
6232   PetscScalar    value;
6233   PetscErrorCode ierr;
6234 
6235   MatCheckPreallocated(mat,1);
6236   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
6237   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
6238   {
6239     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
6240     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
6241     PetscBool roworiented = aij->roworiented;
6242 
6243     /* Some Variables required in the macro */
6244     Mat        A                    = aij->A;
6245     Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
6246     PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
6247     MatScalar  *aa                  = a->a;
6248     PetscBool  ignorezeroentries    = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
6249     Mat        B                    = aij->B;
6250     Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
6251     PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
6252     MatScalar  *ba                  = b->a;
6253     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
6254      * cannot use "#if defined" inside a macro. */
6255     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
6256 
6257     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
6258     PetscInt  nonew = a->nonew;
6259     MatScalar *ap1,*ap2;
6260 
6261     PetscFunctionBegin;
6262     for (i=0; i<m; i++) {
6263       if (im[i] < 0) continue;
6264       if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
6265       if (im[i] >= rstart && im[i] < rend) {
6266         row      = im[i] - rstart;
6267         lastcol1 = -1;
6268         rp1      = aj + ai[row];
6269         ap1      = aa + ai[row];
6270         rmax1    = aimax[row];
6271         nrow1    = ailen[row];
6272         low1     = 0;
6273         high1    = nrow1;
6274         lastcol2 = -1;
6275         rp2      = bj + bi[row];
6276         ap2      = ba + bi[row];
6277         rmax2    = bimax[row];
6278         nrow2    = bilen[row];
6279         low2     = 0;
6280         high2    = nrow2;
6281 
6282         for (j=0; j<n; j++) {
6283           if (roworiented) value = v[i*n+j];
6284           else value = v[i+j*m];
6285           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
6286           if (in[j] >= cstart && in[j] < cend) {
6287             col = in[j] - cstart;
6288             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
6289 #if defined(PETSC_HAVE_DEVICE)
6290             if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
6291 #endif
6292           } else if (in[j] < 0) continue;
6293           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
6294             /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
6295             SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
6296           } else {
6297             if (mat->was_assembled) {
6298               if (!aij->colmap) {
6299                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
6300               }
6301 #if defined(PETSC_USE_CTABLE)
6302               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
6303               col--;
6304 #else
6305               col = aij->colmap[in[j]] - 1;
6306 #endif
6307               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
6308                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
6309                 col  =  in[j];
6310                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
6311                 B        = aij->B;
6312                 b        = (Mat_SeqAIJ*)B->data;
6313                 bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
6314                 rp2      = bj + bi[row];
6315                 ap2      = ba + bi[row];
6316                 rmax2    = bimax[row];
6317                 nrow2    = bilen[row];
6318                 low2     = 0;
6319                 high2    = nrow2;
6320                 bm       = aij->B->rmap->n;
6321                 ba       = b->a;
6322                 inserted = PETSC_FALSE;
6323               }
6324             } else col = in[j];
6325             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
6326 #if defined(PETSC_HAVE_DEVICE)
6327             if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
6328 #endif
6329           }
6330         }
6331       } else if (!aij->donotstash) {
6332         if (roworiented) {
6333           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6334         } else {
6335           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6336         }
6337       }
6338     }
6339   }
6340   PetscFunctionReturnVoid();
6341 }
6342 
6343 typedef struct {
6344   Mat       *mp;    /* intermediate products */
6345   PetscBool *mptmp; /* is the intermediate product temporary ? */
6346   PetscInt  cp;     /* number of intermediate products */
6347 
6348   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
6349   PetscInt    *startsj_s,*startsj_r;
6350   PetscScalar *bufa;
6351   Mat         P_oth;
6352 
6353   /* may take advantage of merging product->B */
6354   Mat Bloc; /* B-local by merging diag and off-diag */
6355 
6356   /* cusparse does not have support to split between symbolic and numeric phases.
6357      When api_user is true, we don't need to update the numerical values
6358      of the temporary storage */
6359   PetscBool reusesym;
6360 
6361   /* support for COO values insertion */
6362   PetscScalar  *coo_v,*coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
6363   PetscInt     **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */
6364   PetscInt     **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */
6365   PetscBool    hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */
6366   PetscSF      sf; /* used for non-local values insertion and memory malloc */
6367   PetscMemType mtype;
6368 
6369   /* customization */
6370   PetscBool abmerge;
6371   PetscBool P_oth_bind;
6372 } MatMatMPIAIJBACKEND;
6373 
6374 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
6375 {
6376   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data;
6377   PetscInt            i;
6378   PetscErrorCode      ierr;
6379 
6380   PetscFunctionBegin;
6381   ierr = PetscFree2(mmdata->startsj_s,mmdata->startsj_r);CHKERRQ(ierr);
6382   ierr = PetscFree(mmdata->bufa);CHKERRQ(ierr);
6383   ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v);CHKERRQ(ierr);
6384   ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w);CHKERRQ(ierr);
6385   ierr = MatDestroy(&mmdata->P_oth);CHKERRQ(ierr);
6386   ierr = MatDestroy(&mmdata->Bloc);CHKERRQ(ierr);
6387   ierr = PetscSFDestroy(&mmdata->sf);CHKERRQ(ierr);
6388   for (i = 0; i < mmdata->cp; i++) {
6389     ierr = MatDestroy(&mmdata->mp[i]);CHKERRQ(ierr);
6390   }
6391   ierr = PetscFree2(mmdata->mp,mmdata->mptmp);CHKERRQ(ierr);
6392   ierr = PetscFree(mmdata->own[0]);CHKERRQ(ierr);
6393   ierr = PetscFree(mmdata->own);CHKERRQ(ierr);
6394   ierr = PetscFree(mmdata->off[0]);CHKERRQ(ierr);
6395   ierr = PetscFree(mmdata->off);CHKERRQ(ierr);
6396   ierr = PetscFree(mmdata);CHKERRQ(ierr);
6397   PetscFunctionReturn(0);
6398 }
6399 
6400 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
6401 {
6402   PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]);
6403   PetscErrorCode ierr;
6404 
6405   PetscFunctionBegin;
6406   ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f);CHKERRQ(ierr);
6407   if (f) {
6408     ierr = (*f)(A,n,idx,v);CHKERRQ(ierr);
6409   } else {
6410     const PetscScalar *vv;
6411 
6412     ierr = MatSeqAIJGetArrayRead(A,&vv);CHKERRQ(ierr);
6413     if (n && idx) {
6414       PetscScalar    *w = v;
6415       const PetscInt *oi = idx;
6416       PetscInt       j;
6417 
6418       for (j = 0; j < n; j++) *w++ = vv[*oi++];
6419     } else {
6420       ierr = PetscArraycpy(v,vv,n);CHKERRQ(ierr);
6421     }
6422     ierr = MatSeqAIJRestoreArrayRead(A,&vv);CHKERRQ(ierr);
6423   }
6424   PetscFunctionReturn(0);
6425 }
6426 
6427 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
6428 {
6429   MatMatMPIAIJBACKEND *mmdata;
6430   PetscInt            i,n_d,n_o;
6431   PetscErrorCode      ierr;
6432 
6433   PetscFunctionBegin;
6434   MatCheckProduct(C,1);
6435   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
6436   mmdata = (MatMatMPIAIJBACKEND*)C->product->data;
6437   if (!mmdata->reusesym) { /* update temporary matrices */
6438     if (mmdata->P_oth) {
6439       ierr = MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr);
6440     }
6441     if (mmdata->Bloc) {
6442       ierr = MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc);CHKERRQ(ierr);
6443     }
6444   }
6445   mmdata->reusesym = PETSC_FALSE;
6446 
6447   for (i = 0; i < mmdata->cp; i++) {
6448     if (!mmdata->mp[i]->ops->productnumeric) SETERRQ1(PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]);
6449     ierr = (*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]);CHKERRQ(ierr);
6450   }
6451   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
6452     PetscInt noff = mmdata->off[i+1] - mmdata->off[i];
6453 
6454     if (mmdata->mptmp[i]) continue;
6455     if (noff) {
6456       PetscInt nown = mmdata->own[i+1] - mmdata->own[i];
6457 
6458       ierr = MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o);CHKERRQ(ierr);
6459       ierr = MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d);CHKERRQ(ierr);
6460       n_o += noff;
6461       n_d += nown;
6462     } else {
6463       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data;
6464 
6465       ierr = MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d);CHKERRQ(ierr);
6466       n_d += mm->nz;
6467     }
6468   }
6469   if (mmdata->hasoffproc) { /* offprocess insertion */
6470     ierr = PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr);
6471     ierr = PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr);
6472   }
6473   ierr = MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES);CHKERRQ(ierr);
6474   PetscFunctionReturn(0);
6475 }
6476 
6477 /* Support for Pt * A, A * P, or Pt * A * P */
6478 #define MAX_NUMBER_INTERMEDIATE 4
6479 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
6480 {
6481   Mat_Product            *product = C->product;
6482   Mat                    A,P,mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
6483   Mat_MPIAIJ             *a,*p;
6484   MatMatMPIAIJBACKEND    *mmdata;
6485   ISLocalToGlobalMapping P_oth_l2g = NULL;
6486   IS                     glob = NULL;
6487   const char             *prefix;
6488   char                   pprefix[256];
6489   const PetscInt         *globidx,*P_oth_idx;
6490   PetscInt               i,j,cp,m,n,M,N,ncoo,ncoo_d,ncoo_o,ncoo_oown,*coo_i,*coo_j;
6491   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
6492                                                                                         /* type-0: consecutive, start from 0; type-1: consecutive with */
6493                                                                                         /* a base offset; type-2: sparse with a local to global map table */
6494   const PetscInt         *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */
6495 
6496   MatProductType         ptype;
6497   PetscBool              mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk;
6498   PetscMPIInt            size;
6499   PetscErrorCode         ierr;
6500 
6501   PetscFunctionBegin;
6502   MatCheckProduct(C,1);
6503   if (product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
6504   ptype = product->type;
6505   if (product->A->symmetric && ptype == MATPRODUCT_AtB) ptype = MATPRODUCT_AB;
6506   switch (ptype) {
6507   case MATPRODUCT_AB:
6508     A = product->A;
6509     P = product->B;
6510     m = A->rmap->n;
6511     n = P->cmap->n;
6512     M = A->rmap->N;
6513     N = P->cmap->N;
6514     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
6515     break;
6516   case MATPRODUCT_AtB:
6517     P = product->A;
6518     A = product->B;
6519     m = P->cmap->n;
6520     n = A->cmap->n;
6521     M = P->cmap->N;
6522     N = A->cmap->N;
6523     hasoffproc = PETSC_TRUE;
6524     break;
6525   case MATPRODUCT_PtAP:
6526     A = product->A;
6527     P = product->B;
6528     m = P->cmap->n;
6529     n = P->cmap->n;
6530     M = P->cmap->N;
6531     N = P->cmap->N;
6532     hasoffproc = PETSC_TRUE;
6533     break;
6534   default:
6535     SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
6536   }
6537   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)C),&size);CHKERRMPI(ierr);
6538   if (size == 1) hasoffproc = PETSC_FALSE;
6539 
6540   /* defaults */
6541   for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) {
6542     mp[i]    = NULL;
6543     mptmp[i] = PETSC_FALSE;
6544     rmapt[i] = -1;
6545     cmapt[i] = -1;
6546     rmapa[i] = NULL;
6547     cmapa[i] = NULL;
6548   }
6549 
6550   /* customization */
6551   ierr = PetscNew(&mmdata);CHKERRQ(ierr);
6552   mmdata->reusesym = product->api_user;
6553   if (ptype == MATPRODUCT_AB) {
6554     if (product->api_user) {
6555       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat");CHKERRQ(ierr);
6556       ierr = PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr);
6557       ierr = PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
6558       ierr = PetscOptionsEnd();CHKERRQ(ierr);
6559     } else {
6560       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr);
6561       ierr = PetscOptionsBool("-matproduct_ab_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr);
6562       ierr = PetscOptionsBool("-matproduct_ab_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
6563       ierr = PetscOptionsEnd();CHKERRQ(ierr);
6564     }
6565   } else if (ptype == MATPRODUCT_PtAP) {
6566     if (product->api_user) {
6567       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat");CHKERRQ(ierr);
6568       ierr = PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
6569       ierr = PetscOptionsEnd();CHKERRQ(ierr);
6570     } else {
6571       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr);
6572       ierr = PetscOptionsBool("-matproduct_ptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
6573       ierr = PetscOptionsEnd();CHKERRQ(ierr);
6574     }
6575   }
6576   a = (Mat_MPIAIJ*)A->data;
6577   p = (Mat_MPIAIJ*)P->data;
6578   ierr = MatSetSizes(C,m,n,M,N);CHKERRQ(ierr);
6579   ierr = PetscLayoutSetUp(C->rmap);CHKERRQ(ierr);
6580   ierr = PetscLayoutSetUp(C->cmap);CHKERRQ(ierr);
6581   ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
6582   ierr = MatGetOptionsPrefix(C,&prefix);CHKERRQ(ierr);
6583 
6584   cp   = 0;
6585   switch (ptype) {
6586   case MATPRODUCT_AB: /* A * P */
6587     ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr);
6588 
6589     /* A_diag * P_local (merged or not) */
6590     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
6591       /* P is product->B */
6592       ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr);
6593       ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6594       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6595       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6596       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6597       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6598       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6599       mp[cp]->product->api_user = product->api_user;
6600       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6601       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6602       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6603       ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
6604       rmapt[cp] = 1;
6605       cmapt[cp] = 2;
6606       cmapa[cp] = globidx;
6607       mptmp[cp] = PETSC_FALSE;
6608       cp++;
6609     } else { /* A_diag * P_diag and A_diag * P_off */
6610       ierr = MatProductCreate(a->A,p->A,NULL,&mp[cp]);CHKERRQ(ierr);
6611       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6612       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6613       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6614       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6615       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6616       mp[cp]->product->api_user = product->api_user;
6617       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6618       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6619       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6620       rmapt[cp] = 1;
6621       cmapt[cp] = 1;
6622       mptmp[cp] = PETSC_FALSE;
6623       cp++;
6624       ierr = MatProductCreate(a->A,p->B,NULL,&mp[cp]);CHKERRQ(ierr);
6625       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6626       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6627       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6628       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6629       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6630       mp[cp]->product->api_user = product->api_user;
6631       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6632       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6633       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6634       rmapt[cp] = 1;
6635       cmapt[cp] = 2;
6636       cmapa[cp] = p->garray;
6637       mptmp[cp] = PETSC_FALSE;
6638       cp++;
6639     }
6640 
6641     /* A_off * P_other */
6642     if (mmdata->P_oth) {
6643       ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr); /* make P_oth use local col ids */
6644       ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr);
6645       ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr);
6646       ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr);
6647       ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr);
6648       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6649       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6650       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6651       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6652       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6653       mp[cp]->product->api_user = product->api_user;
6654       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6655       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6656       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6657       rmapt[cp] = 1;
6658       cmapt[cp] = 2;
6659       cmapa[cp] = P_oth_idx;
6660       mptmp[cp] = PETSC_FALSE;
6661       cp++;
6662     }
6663     break;
6664 
6665   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
6666     /* A is product->B */
6667     ierr = MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr);
6668     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
6669       ierr = MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6670       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
6671       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6672       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6673       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6674       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6675       mp[cp]->product->api_user = product->api_user;
6676       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6677       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6678       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6679       ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
6680       rmapt[cp] = 2;
6681       rmapa[cp] = globidx;
6682       cmapt[cp] = 2;
6683       cmapa[cp] = globidx;
6684       mptmp[cp] = PETSC_FALSE;
6685       cp++;
6686     } else {
6687       ierr = MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6688       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
6689       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6690       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6691       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6692       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6693       mp[cp]->product->api_user = product->api_user;
6694       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6695       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6696       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6697       ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
6698       rmapt[cp] = 1;
6699       cmapt[cp] = 2;
6700       cmapa[cp] = globidx;
6701       mptmp[cp] = PETSC_FALSE;
6702       cp++;
6703       ierr = MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6704       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
6705       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6706       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6707       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6708       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6709       mp[cp]->product->api_user = product->api_user;
6710       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6711       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6712       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6713       rmapt[cp] = 2;
6714       rmapa[cp] = p->garray;
6715       cmapt[cp] = 2;
6716       cmapa[cp] = globidx;
6717       mptmp[cp] = PETSC_FALSE;
6718       cp++;
6719     }
6720     break;
6721   case MATPRODUCT_PtAP:
6722     ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr);
6723     /* P is product->B */
6724     ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr);
6725     ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6726     ierr = MatProductSetType(mp[cp],MATPRODUCT_PtAP);CHKERRQ(ierr);
6727     ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6728     ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6729     ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6730     ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6731     mp[cp]->product->api_user = product->api_user;
6732     ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6733     if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6734     ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6735     ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
6736     rmapt[cp] = 2;
6737     rmapa[cp] = globidx;
6738     cmapt[cp] = 2;
6739     cmapa[cp] = globidx;
6740     mptmp[cp] = PETSC_FALSE;
6741     cp++;
6742     if (mmdata->P_oth) {
6743       ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr);
6744       ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr);
6745       ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr);
6746       ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr);
6747       ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr);
6748       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6749       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6750       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6751       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6752       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6753       mp[cp]->product->api_user = product->api_user;
6754       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6755       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6756       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6757       mptmp[cp] = PETSC_TRUE;
6758       cp++;
6759       ierr = MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp]);CHKERRQ(ierr);
6760       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
6761       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6762       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6763       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6764       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6765       mp[cp]->product->api_user = product->api_user;
6766       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6767       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6768       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6769       rmapt[cp] = 2;
6770       rmapa[cp] = globidx;
6771       cmapt[cp] = 2;
6772       cmapa[cp] = P_oth_idx;
6773       mptmp[cp] = PETSC_FALSE;
6774       cp++;
6775     }
6776     break;
6777   default:
6778     SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
6779   }
6780   /* sanity check */
6781   if (size > 1) for (i = 0; i < cp; i++) if (rmapt[i] == 2 && !hasoffproc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %D",i);
6782 
6783   ierr = PetscMalloc2(cp,&mmdata->mp,cp,&mmdata->mptmp);CHKERRQ(ierr);
6784   for (i = 0; i < cp; i++) {
6785     mmdata->mp[i]    = mp[i];
6786     mmdata->mptmp[i] = mptmp[i];
6787   }
6788   mmdata->cp = cp;
6789   C->product->data       = mmdata;
6790   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
6791   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
6792 
6793   /* memory type */
6794   mmdata->mtype = PETSC_MEMTYPE_HOST;
6795   ierr = PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,"");CHKERRQ(ierr);
6796   ierr = PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,"");CHKERRQ(ierr);
6797   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
6798   // enable the line below MatSeqAIJCopySubArray_SeqAIJKokkos is implemented
6799   //else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_DEVICE;
6800 
6801   /* prepare coo coordinates for values insertion */
6802 
6803   /* count total nonzeros of those intermediate seqaij Mats
6804     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
6805     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
6806     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
6807   */
6808   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
6809     Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
6810     if (mptmp[cp]) continue;
6811     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
6812       const PetscInt *rmap = rmapa[cp];
6813       const PetscInt mr = mp[cp]->rmap->n;
6814       const PetscInt rs = C->rmap->rstart;
6815       const PetscInt re = C->rmap->rend;
6816       const PetscInt *ii  = mm->i;
6817       for (i = 0; i < mr; i++) {
6818         const PetscInt gr = rmap[i];
6819         const PetscInt nz = ii[i+1] - ii[i];
6820         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
6821         else ncoo_oown += nz; /* this row is local */
6822       }
6823     } else ncoo_d += mm->nz;
6824   }
6825 
6826   /*
6827     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
6828 
6829     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
6830 
6831     off[0] points to a big index array, which is shared by off[1,2,...]. Similarily, for own[0].
6832 
6833     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
6834     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
6835     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
6836 
6837     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
6838     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive.
6839   */
6840   ierr = PetscCalloc1(mmdata->cp+1,&mmdata->off);CHKERRQ(ierr); /* +1 to make a csr-like data structure */
6841   ierr = PetscCalloc1(mmdata->cp+1,&mmdata->own);CHKERRQ(ierr);
6842 
6843   /* gather (i,j) of nonzeros inserted by remote procs */
6844   if (hasoffproc) {
6845     PetscSF  msf;
6846     PetscInt ncoo2,*coo_i2,*coo_j2;
6847 
6848     ierr = PetscMalloc1(ncoo_o,&mmdata->off[0]);CHKERRQ(ierr);
6849     ierr = PetscMalloc1(ncoo_oown,&mmdata->own[0]);CHKERRQ(ierr);
6850     ierr = PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j);CHKERRQ(ierr); /* to collect (i,j) of entries to be sent to others */
6851 
6852     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
6853       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
6854       PetscInt   *idxoff = mmdata->off[cp];
6855       PetscInt   *idxown = mmdata->own[cp];
6856       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
6857         const PetscInt *rmap = rmapa[cp];
6858         const PetscInt *cmap = cmapa[cp];
6859         const PetscInt *ii  = mm->i;
6860         PetscInt       *coi = coo_i + ncoo_o;
6861         PetscInt       *coj = coo_j + ncoo_o;
6862         const PetscInt mr = mp[cp]->rmap->n;
6863         const PetscInt rs = C->rmap->rstart;
6864         const PetscInt re = C->rmap->rend;
6865         const PetscInt cs = C->cmap->rstart;
6866         for (i = 0; i < mr; i++) {
6867           const PetscInt *jj = mm->j + ii[i];
6868           const PetscInt gr  = rmap[i];
6869           const PetscInt nz  = ii[i+1] - ii[i];
6870           if (gr < rs || gr >= re) { /* this is an offproc row */
6871             for (j = ii[i]; j < ii[i+1]; j++) {
6872               *coi++ = gr;
6873               *idxoff++ = j;
6874             }
6875             if (!cmapt[cp]) { /* already global */
6876               for (j = 0; j < nz; j++) *coj++ = jj[j];
6877             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
6878               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
6879             } else { /* offdiag */
6880               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
6881             }
6882             ncoo_o += nz;
6883           } else { /* this is a local row */
6884             for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j;
6885           }
6886         }
6887       }
6888       mmdata->off[cp + 1] = idxoff;
6889       mmdata->own[cp + 1] = idxown;
6890     }
6891 
6892     ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr);
6893     ierr = PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o/*nleaves*/,NULL/*ilocal*/,PETSC_OWN_POINTER,coo_i);CHKERRQ(ierr);
6894     ierr = PetscSFGetMultiSF(mmdata->sf,&msf);CHKERRQ(ierr);
6895     ierr = PetscSFGetGraph(msf,&ncoo2/*nroots*/,NULL,NULL,NULL);CHKERRQ(ierr);
6896     ncoo = ncoo_d + ncoo_oown + ncoo2;
6897     ierr = PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2);CHKERRQ(ierr);
6898     ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); /* put (i,j) of remote nonzeros at back */
6899     ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr);
6900     ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr);
6901     ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr);
6902     ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr);
6903     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
6904     ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w);CHKERRQ(ierr);
6905     coo_i = coo_i2;
6906     coo_j = coo_j2;
6907   } else { /* no offproc values insertion */
6908     ncoo = ncoo_d;
6909     ierr = PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j);CHKERRQ(ierr);
6910 
6911     ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr);
6912     ierr = PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER);CHKERRQ(ierr);
6913     ierr = PetscSFSetUp(mmdata->sf);CHKERRQ(ierr);
6914   }
6915   mmdata->hasoffproc = hasoffproc;
6916 
6917    /* gather (i,j) of nonzeros inserted locally */
6918   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
6919     Mat_SeqAIJ     *mm = (Mat_SeqAIJ*)mp[cp]->data;
6920     PetscInt       *coi = coo_i + ncoo_d;
6921     PetscInt       *coj = coo_j + ncoo_d;
6922     const PetscInt *jj  = mm->j;
6923     const PetscInt *ii  = mm->i;
6924     const PetscInt *cmap = cmapa[cp];
6925     const PetscInt *rmap = rmapa[cp];
6926     const PetscInt mr = mp[cp]->rmap->n;
6927     const PetscInt rs = C->rmap->rstart;
6928     const PetscInt re = C->rmap->rend;
6929     const PetscInt cs = C->cmap->rstart;
6930 
6931     if (mptmp[cp]) continue;
6932     if (rmapt[cp] == 1) { /* consecutive rows */
6933       /* fill coo_i */
6934       for (i = 0; i < mr; i++) {
6935         const PetscInt gr = i + rs;
6936         for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr;
6937       }
6938       /* fill coo_j */
6939       if (!cmapt[cp]) { /* type-0, already global */
6940         ierr = PetscArraycpy(coj,jj,mm->nz);CHKERRQ(ierr);
6941       } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */
6942         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
6943       } else { /* type-2, local to global for sparse columns */
6944         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
6945       }
6946       ncoo_d += mm->nz;
6947     } else if (rmapt[cp] == 2) { /* sparse rows */
6948       for (i = 0; i < mr; i++) {
6949         const PetscInt *jj = mm->j + ii[i];
6950         const PetscInt gr  = rmap[i];
6951         const PetscInt nz  = ii[i+1] - ii[i];
6952         if (gr >= rs && gr < re) { /* local rows */
6953           for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr;
6954           if (!cmapt[cp]) { /* type-0, already global */
6955             for (j = 0; j < nz; j++) *coj++ = jj[j];
6956           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
6957             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
6958           } else { /* type-2, local to global for sparse columns */
6959             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
6960           }
6961           ncoo_d += nz;
6962         }
6963       }
6964     }
6965   }
6966   if (glob) {
6967     ierr = ISRestoreIndices(glob,&globidx);CHKERRQ(ierr);
6968   }
6969   ierr = ISDestroy(&glob);CHKERRQ(ierr);
6970   if (P_oth_l2g) {
6971     ierr = ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr);
6972   }
6973   ierr = ISLocalToGlobalMappingDestroy(&P_oth_l2g);CHKERRQ(ierr);
6974   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
6975   ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v);CHKERRQ(ierr);
6976 
6977   /* preallocate with COO data */
6978   ierr = MatSetPreallocationCOO(C,ncoo,coo_i,coo_j);CHKERRQ(ierr);
6979   ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr);
6980   PetscFunctionReturn(0);
6981 }
6982 
6983 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
6984 {
6985   Mat_Product    *product = mat->product;
6986   PetscErrorCode ierr;
6987 #if defined(PETSC_HAVE_DEVICE)
6988   PetscBool      match = PETSC_FALSE;
6989   PetscBool      usecpu = PETSC_FALSE;
6990 #else
6991   PetscBool      match = PETSC_TRUE;
6992 #endif
6993 
6994   PetscFunctionBegin;
6995   MatCheckProduct(mat,1);
6996 #if defined(PETSC_HAVE_DEVICE)
6997   if (!product->A->boundtocpu && !product->B->boundtocpu) {
6998     ierr = PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match);CHKERRQ(ierr);
6999   }
7000   if (match) { /* we can always fallback to the CPU if requested */
7001     switch (product->type) {
7002     case MATPRODUCT_AB:
7003       if (product->api_user) {
7004         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat");CHKERRQ(ierr);
7005         ierr = PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7006         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7007       } else {
7008         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr);
7009         ierr = PetscOptionsBool("-matproduct_ab_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7010         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7011       }
7012       break;
7013     case MATPRODUCT_AtB:
7014       if (product->api_user) {
7015         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat");CHKERRQ(ierr);
7016         ierr = PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7017         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7018       } else {
7019         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat");CHKERRQ(ierr);
7020         ierr = PetscOptionsBool("-matproduct_atb_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7021         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7022       }
7023       break;
7024     case MATPRODUCT_PtAP:
7025       if (product->api_user) {
7026         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat");CHKERRQ(ierr);
7027         ierr = PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7028         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7029       } else {
7030         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr);
7031         ierr = PetscOptionsBool("-matproduct_ptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7032         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7033       }
7034       break;
7035     default:
7036       break;
7037     }
7038     match = (PetscBool)!usecpu;
7039   }
7040 #endif
7041   if (match) {
7042     switch (product->type) {
7043     case MATPRODUCT_AB:
7044     case MATPRODUCT_AtB:
7045     case MATPRODUCT_PtAP:
7046       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7047       break;
7048     default:
7049       break;
7050     }
7051   }
7052   /* fallback to MPIAIJ ops */
7053   if (!mat->ops->productsymbolic) {
7054     ierr = MatProductSetFromOptions_MPIAIJ(mat);CHKERRQ(ierr);
7055   }
7056   PetscFunctionReturn(0);
7057 }
7058