xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision ff83db7b6187e6e76e718e6806a94af28628dfd2)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg)
48 {
49   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
54   A->boundtocpu = flg;
55 #endif
56   if (a->A) {
57     ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr);
58   }
59   if (a->B) {
60     ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr);
61   }
62   PetscFunctionReturn(0);
63 }
64 
65 
66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
67 {
68   PetscErrorCode ierr;
69   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
70 
71   PetscFunctionBegin;
72   if (mat->A) {
73     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
74     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
75   }
76   PetscFunctionReturn(0);
77 }
78 
79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
80 {
81   PetscErrorCode  ierr;
82   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
83   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
84   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
85   const PetscInt  *ia,*ib;
86   const MatScalar *aa,*bb,*aav,*bav;
87   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
88   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
89 
90   PetscFunctionBegin;
91   *keptrows = NULL;
92 
93   ia   = a->i;
94   ib   = b->i;
95   ierr = MatSeqAIJGetArrayRead(mat->A,&aav);CHKERRQ(ierr);
96   ierr = MatSeqAIJGetArrayRead(mat->B,&bav);CHKERRQ(ierr);
97   for (i=0; i<m; i++) {
98     na = ia[i+1] - ia[i];
99     nb = ib[i+1] - ib[i];
100     if (!na && !nb) {
101       cnt++;
102       goto ok1;
103     }
104     aa = aav + ia[i];
105     for (j=0; j<na; j++) {
106       if (aa[j] != 0.0) goto ok1;
107     }
108     bb = bav + ib[i];
109     for (j=0; j <nb; j++) {
110       if (bb[j] != 0.0) goto ok1;
111     }
112     cnt++;
113 ok1:;
114   }
115   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
116   if (!n0rows) {
117     ierr = MatSeqAIJRestoreArrayRead(mat->A,&aav);CHKERRQ(ierr);
118     ierr = MatSeqAIJRestoreArrayRead(mat->B,&bav);CHKERRQ(ierr);
119     PetscFunctionReturn(0);
120   }
121   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
122   cnt  = 0;
123   for (i=0; i<m; i++) {
124     na = ia[i+1] - ia[i];
125     nb = ib[i+1] - ib[i];
126     if (!na && !nb) continue;
127     aa = aav + ia[i];
128     for (j=0; j<na;j++) {
129       if (aa[j] != 0.0) {
130         rows[cnt++] = rstart + i;
131         goto ok2;
132       }
133     }
134     bb = bav + ib[i];
135     for (j=0; j<nb; j++) {
136       if (bb[j] != 0.0) {
137         rows[cnt++] = rstart + i;
138         goto ok2;
139       }
140     }
141 ok2:;
142   }
143   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
144   ierr = MatSeqAIJRestoreArrayRead(mat->A,&aav);CHKERRQ(ierr);
145   ierr = MatSeqAIJRestoreArrayRead(mat->B,&bav);CHKERRQ(ierr);
146   PetscFunctionReturn(0);
147 }
148 
149 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
150 {
151   PetscErrorCode    ierr;
152   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
153   PetscBool         cong;
154 
155   PetscFunctionBegin;
156   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
157   if (Y->assembled && cong) {
158     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
159   } else {
160     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
161   }
162   PetscFunctionReturn(0);
163 }
164 
165 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
166 {
167   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
168   PetscErrorCode ierr;
169   PetscInt       i,rstart,nrows,*rows;
170 
171   PetscFunctionBegin;
172   *zrows = NULL;
173   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
174   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
175   for (i=0; i<nrows; i++) rows[i] += rstart;
176   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
177   PetscFunctionReturn(0);
178 }
179 
180 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
181 {
182   PetscErrorCode    ierr;
183   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)A->data;
184   PetscInt          i,n,*garray = aij->garray;
185   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ*) aij->A->data;
186   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ*) aij->B->data;
187   PetscReal         *work;
188   const PetscScalar *dummy;
189 
190   PetscFunctionBegin;
191   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
192   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
193   ierr = MatSeqAIJGetArrayRead(aij->A,&dummy);CHKERRQ(ierr);
194   ierr = MatSeqAIJRestoreArrayRead(aij->A,&dummy);CHKERRQ(ierr);
195   ierr = MatSeqAIJGetArrayRead(aij->B,&dummy);CHKERRQ(ierr);
196   ierr = MatSeqAIJRestoreArrayRead(aij->B,&dummy);CHKERRQ(ierr);
197   if (type == NORM_2) {
198     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
199       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
200     }
201     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
202       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
203     }
204   } else if (type == NORM_1) {
205     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
206       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
207     }
208     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
209       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
210     }
211   } else if (type == NORM_INFINITY) {
212     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
213       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
214     }
215     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
216       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
217     }
218 
219   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
220   if (type == NORM_INFINITY) {
221     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
222   } else {
223     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
224   }
225   ierr = PetscFree(work);CHKERRQ(ierr);
226   if (type == NORM_2) {
227     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
228   }
229   PetscFunctionReturn(0);
230 }
231 
232 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
233 {
234   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
235   IS              sis,gis;
236   PetscErrorCode  ierr;
237   const PetscInt  *isis,*igis;
238   PetscInt        n,*iis,nsis,ngis,rstart,i;
239 
240   PetscFunctionBegin;
241   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
242   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
243   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
244   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
245   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
246   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
247 
248   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
249   ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr);
250   ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr);
251   n    = ngis + nsis;
252   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
253   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
254   for (i=0; i<n; i++) iis[i] += rstart;
255   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
256 
257   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
258   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
259   ierr = ISDestroy(&sis);CHKERRQ(ierr);
260   ierr = ISDestroy(&gis);CHKERRQ(ierr);
261   PetscFunctionReturn(0);
262 }
263 
264 /*
265   Local utility routine that creates a mapping from the global column
266 number to the local number in the off-diagonal part of the local
267 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
268 a slightly higher hash table cost; without it it is not scalable (each processor
269 has an order N integer array but is fast to access.
270 */
271 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
272 {
273   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
274   PetscErrorCode ierr;
275   PetscInt       n = aij->B->cmap->n,i;
276 
277   PetscFunctionBegin;
278   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
279 #if defined(PETSC_USE_CTABLE)
280   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
281   for (i=0; i<n; i++) {
282     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
283   }
284 #else
285   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
286   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
287   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
288 #endif
289   PetscFunctionReturn(0);
290 }
291 
292 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
293 { \
294     if (col <= lastcol1)  low1 = 0;     \
295     else                 high1 = nrow1; \
296     lastcol1 = col;\
297     while (high1-low1 > 5) { \
298       t = (low1+high1)/2; \
299       if (rp1[t] > col) high1 = t; \
300       else              low1  = t; \
301     } \
302       for (_i=low1; _i<high1; _i++) { \
303         if (rp1[_i] > col) break; \
304         if (rp1[_i] == col) { \
305           if (addv == ADD_VALUES) { \
306             ap1[_i] += value;   \
307             /* Not sure LogFlops will slow dow the code or not */ \
308             (void)PetscLogFlops(1.0);   \
309            } \
310           else                    ap1[_i] = value; \
311           inserted = PETSC_TRUE; \
312           goto a_noinsert; \
313         } \
314       }  \
315       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
316       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
317       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
318       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
319       N = nrow1++ - 1; a->nz++; high1++; \
320       /* shift up all the later entries in this row */ \
321       ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\
322       ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\
323       rp1[_i] = col;  \
324       ap1[_i] = value;  \
325       A->nonzerostate++;\
326       a_noinsert: ; \
327       ailen[row] = nrow1; \
328 }
329 
330 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
331   { \
332     if (col <= lastcol2) low2 = 0;                        \
333     else high2 = nrow2;                                   \
334     lastcol2 = col;                                       \
335     while (high2-low2 > 5) {                              \
336       t = (low2+high2)/2;                                 \
337       if (rp2[t] > col) high2 = t;                        \
338       else             low2  = t;                         \
339     }                                                     \
340     for (_i=low2; _i<high2; _i++) {                       \
341       if (rp2[_i] > col) break;                           \
342       if (rp2[_i] == col) {                               \
343         if (addv == ADD_VALUES) {                         \
344           ap2[_i] += value;                               \
345           (void)PetscLogFlops(1.0);                       \
346         }                                                 \
347         else                    ap2[_i] = value;          \
348         inserted = PETSC_TRUE;                            \
349         goto b_noinsert;                                  \
350       }                                                   \
351     }                                                     \
352     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
353     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
354     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
355     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
356     N = nrow2++ - 1; b->nz++; high2++;                    \
357     /* shift up all the later entries in this row */      \
358     ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\
359     ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\
360     rp2[_i] = col;                                        \
361     ap2[_i] = value;                                      \
362     B->nonzerostate++;                                    \
363     b_noinsert: ;                                         \
364     bilen[row] = nrow2;                                   \
365   }
366 
367 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
368 {
369   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
370   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
371   PetscErrorCode ierr;
372   PetscInt       l,*garray = mat->garray,diag;
373 
374   PetscFunctionBegin;
375   /* code only works for square matrices A */
376 
377   /* find size of row to the left of the diagonal part */
378   ierr = MatGetOwnershipRange(A,&diag,NULL);CHKERRQ(ierr);
379   row  = row - diag;
380   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
381     if (garray[b->j[b->i[row]+l]] > diag) break;
382   }
383   ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr);
384 
385   /* diagonal part */
386   ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr);
387 
388   /* right of diagonal part */
389   ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr);
390 #if defined(PETSC_HAVE_DEVICE)
391   if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU;
392 #endif
393   PetscFunctionReturn(0);
394 }
395 
396 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
397 {
398   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
399   PetscScalar    value = 0.0;
400   PetscErrorCode ierr;
401   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
402   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
403   PetscBool      roworiented = aij->roworiented;
404 
405   /* Some Variables required in the macro */
406   Mat        A                    = aij->A;
407   Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
408   PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
409   PetscBool  ignorezeroentries    = a->ignorezeroentries;
410   Mat        B                    = aij->B;
411   Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
412   PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
413   MatScalar  *aa,*ba;
414   /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
415    * cannot use "#if defined" inside a macro. */
416   PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
417 
418   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
419   PetscInt  nonew;
420   MatScalar *ap1,*ap2;
421 
422   PetscFunctionBegin;
423 #if defined(PETSC_HAVE_DEVICE)
424   if (A->offloadmask == PETSC_OFFLOAD_GPU) {
425     const PetscScalar *dummy;
426     ierr = MatSeqAIJGetArrayRead(A,&dummy);CHKERRQ(ierr);
427     ierr = MatSeqAIJRestoreArrayRead(A,&dummy);CHKERRQ(ierr);
428   }
429   if (B->offloadmask == PETSC_OFFLOAD_GPU) {
430     const PetscScalar *dummy;
431     ierr = MatSeqAIJGetArrayRead(B,&dummy);CHKERRQ(ierr);
432     ierr = MatSeqAIJRestoreArrayRead(B,&dummy);CHKERRQ(ierr);
433   }
434 #endif
435   aa = a->a;
436   ba = b->a;
437   for (i=0; i<m; i++) {
438     if (im[i] < 0) continue;
439     if (PetscUnlikely(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
440     if (im[i] >= rstart && im[i] < rend) {
441       row      = im[i] - rstart;
442       lastcol1 = -1;
443       rp1      = aj + ai[row];
444       ap1      = aa + ai[row];
445       rmax1    = aimax[row];
446       nrow1    = ailen[row];
447       low1     = 0;
448       high1    = nrow1;
449       lastcol2 = -1;
450       rp2      = bj + bi[row];
451       ap2      = ba + bi[row];
452       rmax2    = bimax[row];
453       nrow2    = bilen[row];
454       low2     = 0;
455       high2    = nrow2;
456 
457       for (j=0; j<n; j++) {
458         if (v)  value = roworiented ? v[i*n+j] : v[i+j*m];
459         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
460         if (in[j] >= cstart && in[j] < cend) {
461           col   = in[j] - cstart;
462           nonew = a->nonew;
463           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
464 #if defined(PETSC_HAVE_DEVICE)
465           if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
466 #endif
467         } else if (in[j] < 0) continue;
468         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
469         else {
470           if (mat->was_assembled) {
471             if (!aij->colmap) {
472               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
473             }
474 #if defined(PETSC_USE_CTABLE)
475             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
476             col--;
477 #else
478             col = aij->colmap[in[j]] - 1;
479 #endif
480             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
481               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
482               col  =  in[j];
483               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
484               B        = aij->B;
485               b        = (Mat_SeqAIJ*)B->data;
486               bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
487               rp2      = bj + bi[row];
488               ap2      = ba + bi[row];
489               rmax2    = bimax[row];
490               nrow2    = bilen[row];
491               low2     = 0;
492               high2    = nrow2;
493               bm       = aij->B->rmap->n;
494               ba       = b->a;
495               inserted = PETSC_FALSE;
496             } else if (col < 0) {
497               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
498                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
499               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
500             }
501           } else col = in[j];
502           nonew = b->nonew;
503           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
504 #if defined(PETSC_HAVE_DEVICE)
505           if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
506 #endif
507         }
508       }
509     } else {
510       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
511       if (!aij->donotstash) {
512         mat->assembled = PETSC_FALSE;
513         if (roworiented) {
514           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
515         } else {
516           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
517         }
518       }
519     }
520   }
521   PetscFunctionReturn(0);
522 }
523 
524 /*
525     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
526     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
527     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
528 */
529 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
530 {
531   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
532   Mat            A           = aij->A; /* diagonal part of the matrix */
533   Mat            B           = aij->B; /* offdiagonal part of the matrix */
534   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
535   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
536   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
537   PetscInt       *ailen      = a->ilen,*aj = a->j;
538   PetscInt       *bilen      = b->ilen,*bj = b->j;
539   PetscInt       am          = aij->A->rmap->n,j;
540   PetscInt       diag_so_far = 0,dnz;
541   PetscInt       offd_so_far = 0,onz;
542 
543   PetscFunctionBegin;
544   /* Iterate over all rows of the matrix */
545   for (j=0; j<am; j++) {
546     dnz = onz = 0;
547     /*  Iterate over all non-zero columns of the current row */
548     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
549       /* If column is in the diagonal */
550       if (mat_j[col] >= cstart && mat_j[col] < cend) {
551         aj[diag_so_far++] = mat_j[col] - cstart;
552         dnz++;
553       } else { /* off-diagonal entries */
554         bj[offd_so_far++] = mat_j[col];
555         onz++;
556       }
557     }
558     ailen[j] = dnz;
559     bilen[j] = onz;
560   }
561   PetscFunctionReturn(0);
562 }
563 
564 /*
565     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
566     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
567     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
568     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
569     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
570 */
571 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
572 {
573   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
574   Mat            A      = aij->A; /* diagonal part of the matrix */
575   Mat            B      = aij->B; /* offdiagonal part of the matrix */
576   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
577   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
578   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
579   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
580   PetscInt       *ailen = a->ilen,*aj = a->j;
581   PetscInt       *bilen = b->ilen,*bj = b->j;
582   PetscInt       am     = aij->A->rmap->n,j;
583   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
584   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
585   PetscScalar    *aa = a->a,*ba = b->a;
586 
587   PetscFunctionBegin;
588   /* Iterate over all rows of the matrix */
589   for (j=0; j<am; j++) {
590     dnz_row = onz_row = 0;
591     rowstart_offd = full_offd_i[j];
592     rowstart_diag = full_diag_i[j];
593     /*  Iterate over all non-zero columns of the current row */
594     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
595       /* If column is in the diagonal */
596       if (mat_j[col] >= cstart && mat_j[col] < cend) {
597         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
598         aa[rowstart_diag+dnz_row] = mat_a[col];
599         dnz_row++;
600       } else { /* off-diagonal entries */
601         bj[rowstart_offd+onz_row] = mat_j[col];
602         ba[rowstart_offd+onz_row] = mat_a[col];
603         onz_row++;
604       }
605     }
606     ailen[j] = dnz_row;
607     bilen[j] = onz_row;
608   }
609   PetscFunctionReturn(0);
610 }
611 
612 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
613 {
614   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
615   PetscErrorCode ierr;
616   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
617   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
618 
619   PetscFunctionBegin;
620   for (i=0; i<m; i++) {
621     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
622     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
623     if (idxm[i] >= rstart && idxm[i] < rend) {
624       row = idxm[i] - rstart;
625       for (j=0; j<n; j++) {
626         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
627         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
628         if (idxn[j] >= cstart && idxn[j] < cend) {
629           col  = idxn[j] - cstart;
630           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
631         } else {
632           if (!aij->colmap) {
633             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
634           }
635 #if defined(PETSC_USE_CTABLE)
636           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
637           col--;
638 #else
639           col = aij->colmap[idxn[j]] - 1;
640 #endif
641           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
642           else {
643             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
644           }
645         }
646       }
647     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
648   }
649   PetscFunctionReturn(0);
650 }
651 
652 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
653 {
654   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
655   PetscErrorCode ierr;
656   PetscInt       nstash,reallocs;
657 
658   PetscFunctionBegin;
659   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
660 
661   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
662   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
663   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
664   PetscFunctionReturn(0);
665 }
666 
667 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
668 {
669   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
670   PetscErrorCode ierr;
671   PetscMPIInt    n;
672   PetscInt       i,j,rstart,ncols,flg;
673   PetscInt       *row,*col;
674   PetscBool      other_disassembled;
675   PetscScalar    *val;
676 
677   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
678 
679   PetscFunctionBegin;
680   if (!aij->donotstash && !mat->nooffprocentries) {
681     while (1) {
682       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
683       if (!flg) break;
684 
685       for (i=0; i<n;) {
686         /* Now identify the consecutive vals belonging to the same row */
687         for (j=i,rstart=row[j]; j<n; j++) {
688           if (row[j] != rstart) break;
689         }
690         if (j < n) ncols = j-i;
691         else       ncols = n-i;
692         /* Now assemble all these values with a single function call */
693         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
694         i    = j;
695       }
696     }
697     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
698   }
699 #if defined(PETSC_HAVE_DEVICE)
700   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
701   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
702   if (mat->boundtocpu) {
703     ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr);
704     ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr);
705   }
706 #endif
707   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
708   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
709 
710   /* determine if any processor has disassembled, if so we must
711      also disassemble ourself, in order that we may reassemble. */
712   /*
713      if nonzero structure of submatrix B cannot change then we know that
714      no processor disassembled thus we can skip this stuff
715   */
716   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
717     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
718     if (mat->was_assembled && !other_disassembled) {
719 #if defined(PETSC_HAVE_DEVICE)
720       aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */
721 #endif
722       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
723     }
724   }
725   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
726     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
727   }
728   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
729 #if defined(PETSC_HAVE_DEVICE)
730   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
731 #endif
732   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
733   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
734 
735   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
736 
737   aij->rowvalues = NULL;
738 
739   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
740 
741   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
742   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
743     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
744     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
745   }
746 #if defined(PETSC_HAVE_DEVICE)
747   mat->offloadmask = PETSC_OFFLOAD_BOTH;
748 #endif
749   PetscFunctionReturn(0);
750 }
751 
752 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
753 {
754   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
755   PetscErrorCode ierr;
756 
757   PetscFunctionBegin;
758   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
759   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
760   PetscFunctionReturn(0);
761 }
762 
763 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
764 {
765   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
766   PetscObjectState sA, sB;
767   PetscInt        *lrows;
768   PetscInt         r, len;
769   PetscBool        cong, lch, gch;
770   PetscErrorCode   ierr;
771 
772   PetscFunctionBegin;
773   /* get locally owned rows */
774   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
775   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
776   /* fix right hand side if needed */
777   if (x && b) {
778     const PetscScalar *xx;
779     PetscScalar       *bb;
780 
781     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
782     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
783     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
784     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
785     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
786     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
787   }
788 
789   sA = mat->A->nonzerostate;
790   sB = mat->B->nonzerostate;
791 
792   if (diag != 0.0 && cong) {
793     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
794     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
795   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
796     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
797     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
798     PetscInt   nnwA, nnwB;
799     PetscBool  nnzA, nnzB;
800 
801     nnwA = aijA->nonew;
802     nnwB = aijB->nonew;
803     nnzA = aijA->keepnonzeropattern;
804     nnzB = aijB->keepnonzeropattern;
805     if (!nnzA) {
806       ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr);
807       aijA->nonew = 0;
808     }
809     if (!nnzB) {
810       ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr);
811       aijB->nonew = 0;
812     }
813     /* Must zero here before the next loop */
814     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
815     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
816     for (r = 0; r < len; ++r) {
817       const PetscInt row = lrows[r] + A->rmap->rstart;
818       if (row >= A->cmap->N) continue;
819       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
820     }
821     aijA->nonew = nnwA;
822     aijB->nonew = nnwB;
823   } else {
824     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
825     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
826   }
827   ierr = PetscFree(lrows);CHKERRQ(ierr);
828   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
829   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
830 
831   /* reduce nonzerostate */
832   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
833   ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
834   if (gch) A->nonzerostate++;
835   PetscFunctionReturn(0);
836 }
837 
838 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
839 {
840   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
841   PetscErrorCode    ierr;
842   PetscMPIInt       n = A->rmap->n;
843   PetscInt          i,j,r,m,len = 0;
844   PetscInt          *lrows,*owners = A->rmap->range;
845   PetscMPIInt       p = 0;
846   PetscSFNode       *rrows;
847   PetscSF           sf;
848   const PetscScalar *xx;
849   PetscScalar       *bb,*mask;
850   Vec               xmask,lmask;
851   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
852   const PetscInt    *aj, *ii,*ridx;
853   PetscScalar       *aa;
854 
855   PetscFunctionBegin;
856   /* Create SF where leaves are input rows and roots are owned rows */
857   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
858   for (r = 0; r < n; ++r) lrows[r] = -1;
859   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
860   for (r = 0; r < N; ++r) {
861     const PetscInt idx   = rows[r];
862     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
863     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
864       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
865     }
866     rrows[r].rank  = p;
867     rrows[r].index = rows[r] - owners[p];
868   }
869   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
870   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
871   /* Collect flags for rows to be zeroed */
872   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
873   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
874   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
875   /* Compress and put in row numbers */
876   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
877   /* zero diagonal part of matrix */
878   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
879   /* handle off diagonal part of matrix */
880   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
881   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
882   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
883   for (i=0; i<len; i++) bb[lrows[i]] = 1;
884   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
885   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
886   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
887   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
888   if (x && b) { /* this code is buggy when the row and column layout don't match */
889     PetscBool cong;
890 
891     ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
892     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
893     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
894     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
895     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
896     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
897   }
898   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
899   /* remove zeroed rows of off diagonal matrix */
900   ii = aij->i;
901   for (i=0; i<len; i++) {
902     ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr);
903   }
904   /* loop over all elements of off process part of matrix zeroing removed columns*/
905   if (aij->compressedrow.use) {
906     m    = aij->compressedrow.nrows;
907     ii   = aij->compressedrow.i;
908     ridx = aij->compressedrow.rindex;
909     for (i=0; i<m; i++) {
910       n  = ii[i+1] - ii[i];
911       aj = aij->j + ii[i];
912       aa = aij->a + ii[i];
913 
914       for (j=0; j<n; j++) {
915         if (PetscAbsScalar(mask[*aj])) {
916           if (b) bb[*ridx] -= *aa*xx[*aj];
917           *aa = 0.0;
918         }
919         aa++;
920         aj++;
921       }
922       ridx++;
923     }
924   } else { /* do not use compressed row format */
925     m = l->B->rmap->n;
926     for (i=0; i<m; i++) {
927       n  = ii[i+1] - ii[i];
928       aj = aij->j + ii[i];
929       aa = aij->a + ii[i];
930       for (j=0; j<n; j++) {
931         if (PetscAbsScalar(mask[*aj])) {
932           if (b) bb[i] -= *aa*xx[*aj];
933           *aa = 0.0;
934         }
935         aa++;
936         aj++;
937       }
938     }
939   }
940   if (x && b) {
941     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
942     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
943   }
944   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
945   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
946   ierr = PetscFree(lrows);CHKERRQ(ierr);
947 
948   /* only change matrix nonzero state if pattern was allowed to be changed */
949   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
950     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
951     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
952   }
953   PetscFunctionReturn(0);
954 }
955 
956 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
957 {
958   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
959   PetscErrorCode ierr;
960   PetscInt       nt;
961   VecScatter     Mvctx = a->Mvctx;
962 
963   PetscFunctionBegin;
964   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
965   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
966   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
967   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
968   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
969   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
970   PetscFunctionReturn(0);
971 }
972 
973 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
974 {
975   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
976   PetscErrorCode ierr;
977 
978   PetscFunctionBegin;
979   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
980   PetscFunctionReturn(0);
981 }
982 
983 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
984 {
985   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
986   PetscErrorCode ierr;
987   VecScatter     Mvctx = a->Mvctx;
988 
989   PetscFunctionBegin;
990   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
991   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
992   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
993   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
994   PetscFunctionReturn(0);
995 }
996 
997 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
998 {
999   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1000   PetscErrorCode ierr;
1001 
1002   PetscFunctionBegin;
1003   /* do nondiagonal part */
1004   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1005   /* do local part */
1006   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1007   /* add partial results together */
1008   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1009   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1010   PetscFunctionReturn(0);
1011 }
1012 
1013 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1014 {
1015   MPI_Comm       comm;
1016   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1017   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1018   IS             Me,Notme;
1019   PetscErrorCode ierr;
1020   PetscInt       M,N,first,last,*notme,i;
1021   PetscBool      lf;
1022   PetscMPIInt    size;
1023 
1024   PetscFunctionBegin;
1025   /* Easy test: symmetric diagonal block */
1026   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1027   ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr);
1028   ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr);
1029   if (!*f) PetscFunctionReturn(0);
1030   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1031   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
1032   if (size == 1) PetscFunctionReturn(0);
1033 
1034   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1035   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1036   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1037   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1038   for (i=0; i<first; i++) notme[i] = i;
1039   for (i=last; i<M; i++) notme[i-last+first] = i;
1040   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1041   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1042   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1043   Aoff = Aoffs[0];
1044   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1045   Boff = Boffs[0];
1046   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1047   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1048   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1049   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1050   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1051   ierr = PetscFree(notme);CHKERRQ(ierr);
1052   PetscFunctionReturn(0);
1053 }
1054 
1055 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1056 {
1057   PetscErrorCode ierr;
1058 
1059   PetscFunctionBegin;
1060   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1061   PetscFunctionReturn(0);
1062 }
1063 
1064 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1065 {
1066   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1067   PetscErrorCode ierr;
1068 
1069   PetscFunctionBegin;
1070   /* do nondiagonal part */
1071   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1072   /* do local part */
1073   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1074   /* add partial results together */
1075   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1076   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1077   PetscFunctionReturn(0);
1078 }
1079 
1080 /*
1081   This only works correctly for square matrices where the subblock A->A is the
1082    diagonal block
1083 */
1084 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1085 {
1086   PetscErrorCode ierr;
1087   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1088 
1089   PetscFunctionBegin;
1090   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1091   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1092   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1093   PetscFunctionReturn(0);
1094 }
1095 
1096 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1097 {
1098   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1099   PetscErrorCode ierr;
1100 
1101   PetscFunctionBegin;
1102   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1103   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1108 {
1109   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1110   PetscErrorCode ierr;
1111 
1112   PetscFunctionBegin;
1113 #if defined(PETSC_USE_LOG)
1114   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1115 #endif
1116   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1117   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1118   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1119   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1120 #if defined(PETSC_USE_CTABLE)
1121   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1122 #else
1123   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1124 #endif
1125   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1126   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1127   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1128   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1129   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1130   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1131 
1132   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
1133   ierr = PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL);CHKERRQ(ierr);
1134 
1135   ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr);
1136   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1137   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1138   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1139   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1140   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1141   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1142   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1143   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr);
1144   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1145 #if defined(PETSC_HAVE_CUDA)
1146   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL);CHKERRQ(ierr);
1147 #endif
1148 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
1149   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL);CHKERRQ(ierr);
1150 #endif
1151   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
1152 #if defined(PETSC_HAVE_ELEMENTAL)
1153   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1154 #endif
1155 #if defined(PETSC_HAVE_SCALAPACK)
1156   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL);CHKERRQ(ierr);
1157 #endif
1158 #if defined(PETSC_HAVE_HYPRE)
1159   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1160   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1161 #endif
1162   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1163   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr);
1164   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1165   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL);CHKERRQ(ierr);
1166   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL);CHKERRQ(ierr);
1167   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL);CHKERRQ(ierr);
1168 #if defined(PETSC_HAVE_MKL_SPARSE)
1169   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL);CHKERRQ(ierr);
1170 #endif
1171   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL);CHKERRQ(ierr);
1172   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1173   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL);CHKERRQ(ierr);
1174   PetscFunctionReturn(0);
1175 }
1176 
1177 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1178 {
1179   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1180   Mat_SeqAIJ        *A   = (Mat_SeqAIJ*)aij->A->data;
1181   Mat_SeqAIJ        *B   = (Mat_SeqAIJ*)aij->B->data;
1182   const PetscInt    *garray = aij->garray;
1183   const PetscScalar *aa,*ba;
1184   PetscInt          header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb;
1185   PetscInt          *rowlens;
1186   PetscInt          *colidxs;
1187   PetscScalar       *matvals;
1188   PetscErrorCode    ierr;
1189 
1190   PetscFunctionBegin;
1191   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1192 
1193   M  = mat->rmap->N;
1194   N  = mat->cmap->N;
1195   m  = mat->rmap->n;
1196   rs = mat->rmap->rstart;
1197   cs = mat->cmap->rstart;
1198   nz = A->nz + B->nz;
1199 
1200   /* write matrix header */
1201   header[0] = MAT_FILE_CLASSID;
1202   header[1] = M; header[2] = N; header[3] = nz;
1203   ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1204   ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr);
1205 
1206   /* fill in and store row lengths  */
1207   ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr);
1208   for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1209   ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr);
1210   ierr = PetscFree(rowlens);CHKERRQ(ierr);
1211 
1212   /* fill in and store column indices */
1213   ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr);
1214   for (cnt=0, i=0; i<m; i++) {
1215     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1216       if (garray[B->j[jb]] > cs) break;
1217       colidxs[cnt++] = garray[B->j[jb]];
1218     }
1219     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1220       colidxs[cnt++] = A->j[ja] + cs;
1221     for (; jb<B->i[i+1]; jb++)
1222       colidxs[cnt++] = garray[B->j[jb]];
1223   }
1224   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1225   ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
1226   ierr = PetscFree(colidxs);CHKERRQ(ierr);
1227 
1228   /* fill in and store nonzero values */
1229   ierr = MatSeqAIJGetArrayRead(aij->A,&aa);CHKERRQ(ierr);
1230   ierr = MatSeqAIJGetArrayRead(aij->B,&ba);CHKERRQ(ierr);
1231   ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr);
1232   for (cnt=0, i=0; i<m; i++) {
1233     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1234       if (garray[B->j[jb]] > cs) break;
1235       matvals[cnt++] = ba[jb];
1236     }
1237     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1238       matvals[cnt++] = aa[ja];
1239     for (; jb<B->i[i+1]; jb++)
1240       matvals[cnt++] = ba[jb];
1241   }
1242   ierr = MatSeqAIJRestoreArrayRead(aij->A,&aa);CHKERRQ(ierr);
1243   ierr = MatSeqAIJRestoreArrayRead(aij->B,&ba);CHKERRQ(ierr);
1244   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1245   ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
1246   ierr = PetscFree(matvals);CHKERRQ(ierr);
1247 
1248   /* write block size option to the viewer's .info file */
1249   ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
1250   PetscFunctionReturn(0);
1251 }
1252 
1253 #include <petscdraw.h>
1254 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1255 {
1256   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1257   PetscErrorCode    ierr;
1258   PetscMPIInt       rank = aij->rank,size = aij->size;
1259   PetscBool         isdraw,iascii,isbinary;
1260   PetscViewer       sviewer;
1261   PetscViewerFormat format;
1262 
1263   PetscFunctionBegin;
1264   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1265   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1266   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1267   if (iascii) {
1268     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1269     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1270       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1271       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1272       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1273       for (i=0; i<(PetscInt)size; i++) {
1274         nmax = PetscMax(nmax,nz[i]);
1275         nmin = PetscMin(nmin,nz[i]);
1276         navg += nz[i];
1277       }
1278       ierr = PetscFree(nz);CHKERRQ(ierr);
1279       navg = navg/size;
1280       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1281       PetscFunctionReturn(0);
1282     }
1283     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1284     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1285       MatInfo   info;
1286       PetscBool inodes;
1287 
1288       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr);
1289       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1290       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1291       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1292       if (!inodes) {
1293         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1294                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1295       } else {
1296         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1297                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1298       }
1299       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1300       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1301       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1302       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1303       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1304       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1305       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1306       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1307       PetscFunctionReturn(0);
1308     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1309       PetscInt inodecount,inodelimit,*inodes;
1310       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1311       if (inodes) {
1312         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1313       } else {
1314         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1315       }
1316       PetscFunctionReturn(0);
1317     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1318       PetscFunctionReturn(0);
1319     }
1320   } else if (isbinary) {
1321     if (size == 1) {
1322       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1323       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1324     } else {
1325       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1326     }
1327     PetscFunctionReturn(0);
1328   } else if (iascii && size == 1) {
1329     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1330     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1331     PetscFunctionReturn(0);
1332   } else if (isdraw) {
1333     PetscDraw draw;
1334     PetscBool isnull;
1335     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1336     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1337     if (isnull) PetscFunctionReturn(0);
1338   }
1339 
1340   { /* assemble the entire matrix onto first processor */
1341     Mat A = NULL, Av;
1342     IS  isrow,iscol;
1343 
1344     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1345     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1346     ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1347     ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr);
1348 /*  The commented code uses MatCreateSubMatrices instead */
1349 /*
1350     Mat *AA, A = NULL, Av;
1351     IS  isrow,iscol;
1352 
1353     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1354     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1355     ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr);
1356     if (!rank) {
1357        ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr);
1358        A    = AA[0];
1359        Av   = AA[0];
1360     }
1361     ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr);
1362 */
1363     ierr = ISDestroy(&iscol);CHKERRQ(ierr);
1364     ierr = ISDestroy(&isrow);CHKERRQ(ierr);
1365     /*
1366        Everyone has to call to draw the matrix since the graphics waits are
1367        synchronized across all processors that share the PetscDraw object
1368     */
1369     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1370     if (!rank) {
1371       if (((PetscObject)mat)->name) {
1372         ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr);
1373       }
1374       ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr);
1375     }
1376     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1377     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1378     ierr = MatDestroy(&A);CHKERRQ(ierr);
1379   }
1380   PetscFunctionReturn(0);
1381 }
1382 
1383 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1384 {
1385   PetscErrorCode ierr;
1386   PetscBool      iascii,isdraw,issocket,isbinary;
1387 
1388   PetscFunctionBegin;
1389   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1390   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1391   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1392   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1393   if (iascii || isdraw || isbinary || issocket) {
1394     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1395   }
1396   PetscFunctionReturn(0);
1397 }
1398 
1399 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1400 {
1401   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1402   PetscErrorCode ierr;
1403   Vec            bb1 = NULL;
1404   PetscBool      hasop;
1405 
1406   PetscFunctionBegin;
1407   if (flag == SOR_APPLY_UPPER) {
1408     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1409     PetscFunctionReturn(0);
1410   }
1411 
1412   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1413     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1414   }
1415 
1416   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1417     if (flag & SOR_ZERO_INITIAL_GUESS) {
1418       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1419       its--;
1420     }
1421 
1422     while (its--) {
1423       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1424       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1425 
1426       /* update rhs: bb1 = bb - B*x */
1427       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1428       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1429 
1430       /* local sweep */
1431       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1432     }
1433   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1434     if (flag & SOR_ZERO_INITIAL_GUESS) {
1435       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1436       its--;
1437     }
1438     while (its--) {
1439       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1440       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1441 
1442       /* update rhs: bb1 = bb - B*x */
1443       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1444       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1445 
1446       /* local sweep */
1447       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1448     }
1449   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1450     if (flag & SOR_ZERO_INITIAL_GUESS) {
1451       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1452       its--;
1453     }
1454     while (its--) {
1455       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1456       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1457 
1458       /* update rhs: bb1 = bb - B*x */
1459       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1460       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1461 
1462       /* local sweep */
1463       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1464     }
1465   } else if (flag & SOR_EISENSTAT) {
1466     Vec xx1;
1467 
1468     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1469     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1470 
1471     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1472     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1473     if (!mat->diag) {
1474       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1475       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1476     }
1477     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1478     if (hasop) {
1479       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1480     } else {
1481       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1482     }
1483     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1484 
1485     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1486 
1487     /* local sweep */
1488     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1489     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1490     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1491   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1492 
1493   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1494 
1495   matin->factorerrortype = mat->A->factorerrortype;
1496   PetscFunctionReturn(0);
1497 }
1498 
1499 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1500 {
1501   Mat            aA,aB,Aperm;
1502   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1503   PetscScalar    *aa,*ba;
1504   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1505   PetscSF        rowsf,sf;
1506   IS             parcolp = NULL;
1507   PetscBool      done;
1508   PetscErrorCode ierr;
1509 
1510   PetscFunctionBegin;
1511   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1512   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1513   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1514   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1515 
1516   /* Invert row permutation to find out where my rows should go */
1517   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1518   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1519   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1520   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1521   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr);
1522   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr);
1523 
1524   /* Invert column permutation to find out where my columns should go */
1525   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1526   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1527   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1528   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1529   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr);
1530   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr);
1531   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1532 
1533   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1534   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1535   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1536 
1537   /* Find out where my gcols should go */
1538   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1539   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1540   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1541   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1542   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1543   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE);CHKERRQ(ierr);
1544   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE);CHKERRQ(ierr);
1545   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1546 
1547   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1548   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1549   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1550   for (i=0; i<m; i++) {
1551     PetscInt    row = rdest[i];
1552     PetscMPIInt rowner;
1553     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1554     for (j=ai[i]; j<ai[i+1]; j++) {
1555       PetscInt    col = cdest[aj[j]];
1556       PetscMPIInt cowner;
1557       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1558       if (rowner == cowner) dnnz[i]++;
1559       else onnz[i]++;
1560     }
1561     for (j=bi[i]; j<bi[i+1]; j++) {
1562       PetscInt    col = gcdest[bj[j]];
1563       PetscMPIInt cowner;
1564       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1565       if (rowner == cowner) dnnz[i]++;
1566       else onnz[i]++;
1567     }
1568   }
1569   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE);CHKERRQ(ierr);
1570   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE);CHKERRQ(ierr);
1571   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE);CHKERRQ(ierr);
1572   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE);CHKERRQ(ierr);
1573   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1574 
1575   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1576   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1577   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1578   for (i=0; i<m; i++) {
1579     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1580     PetscInt j0,rowlen;
1581     rowlen = ai[i+1] - ai[i];
1582     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1583       for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1584       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1585     }
1586     rowlen = bi[i+1] - bi[i];
1587     for (j0=j=0; j<rowlen; j0=j) {
1588       for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1589       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1590     }
1591   }
1592   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1593   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1594   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1595   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1596   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1597   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1598   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1599   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1600   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1601   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1602   *B = Aperm;
1603   PetscFunctionReturn(0);
1604 }
1605 
1606 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1607 {
1608   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1609   PetscErrorCode ierr;
1610 
1611   PetscFunctionBegin;
1612   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1613   if (ghosts) *ghosts = aij->garray;
1614   PetscFunctionReturn(0);
1615 }
1616 
1617 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1618 {
1619   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1620   Mat            A    = mat->A,B = mat->B;
1621   PetscErrorCode ierr;
1622   PetscLogDouble isend[5],irecv[5];
1623 
1624   PetscFunctionBegin;
1625   info->block_size = 1.0;
1626   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1627 
1628   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1629   isend[3] = info->memory;  isend[4] = info->mallocs;
1630 
1631   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1632 
1633   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1634   isend[3] += info->memory;  isend[4] += info->mallocs;
1635   if (flag == MAT_LOCAL) {
1636     info->nz_used      = isend[0];
1637     info->nz_allocated = isend[1];
1638     info->nz_unneeded  = isend[2];
1639     info->memory       = isend[3];
1640     info->mallocs      = isend[4];
1641   } else if (flag == MAT_GLOBAL_MAX) {
1642     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1643 
1644     info->nz_used      = irecv[0];
1645     info->nz_allocated = irecv[1];
1646     info->nz_unneeded  = irecv[2];
1647     info->memory       = irecv[3];
1648     info->mallocs      = irecv[4];
1649   } else if (flag == MAT_GLOBAL_SUM) {
1650     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1651 
1652     info->nz_used      = irecv[0];
1653     info->nz_allocated = irecv[1];
1654     info->nz_unneeded  = irecv[2];
1655     info->memory       = irecv[3];
1656     info->mallocs      = irecv[4];
1657   }
1658   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1659   info->fill_ratio_needed = 0;
1660   info->factor_mallocs    = 0;
1661   PetscFunctionReturn(0);
1662 }
1663 
1664 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1665 {
1666   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1667   PetscErrorCode ierr;
1668 
1669   PetscFunctionBegin;
1670   switch (op) {
1671   case MAT_NEW_NONZERO_LOCATIONS:
1672   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1673   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1674   case MAT_KEEP_NONZERO_PATTERN:
1675   case MAT_NEW_NONZERO_LOCATION_ERR:
1676   case MAT_USE_INODES:
1677   case MAT_IGNORE_ZERO_ENTRIES:
1678     MatCheckPreallocated(A,1);
1679     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1680     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1681     break;
1682   case MAT_ROW_ORIENTED:
1683     MatCheckPreallocated(A,1);
1684     a->roworiented = flg;
1685 
1686     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1687     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1688     break;
1689   case MAT_FORCE_DIAGONAL_ENTRIES:
1690   case MAT_SORTED_FULL:
1691     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1692     break;
1693   case MAT_IGNORE_OFF_PROC_ENTRIES:
1694     a->donotstash = flg;
1695     break;
1696   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1697   case MAT_SPD:
1698   case MAT_SYMMETRIC:
1699   case MAT_STRUCTURALLY_SYMMETRIC:
1700   case MAT_HERMITIAN:
1701   case MAT_SYMMETRY_ETERNAL:
1702     break;
1703   case MAT_SUBMAT_SINGLEIS:
1704     A->submat_singleis = flg;
1705     break;
1706   case MAT_STRUCTURE_ONLY:
1707     /* The option is handled directly by MatSetOption() */
1708     break;
1709   default:
1710     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1711   }
1712   PetscFunctionReturn(0);
1713 }
1714 
1715 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1716 {
1717   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1718   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1719   PetscErrorCode ierr;
1720   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1721   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1722   PetscInt       *cmap,*idx_p;
1723 
1724   PetscFunctionBegin;
1725   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1726   mat->getrowactive = PETSC_TRUE;
1727 
1728   if (!mat->rowvalues && (idx || v)) {
1729     /*
1730         allocate enough space to hold information from the longest row.
1731     */
1732     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1733     PetscInt   max = 1,tmp;
1734     for (i=0; i<matin->rmap->n; i++) {
1735       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1736       if (max < tmp) max = tmp;
1737     }
1738     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1739   }
1740 
1741   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1742   lrow = row - rstart;
1743 
1744   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1745   if (!v)   {pvA = NULL; pvB = NULL;}
1746   if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1747   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1748   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1749   nztot = nzA + nzB;
1750 
1751   cmap = mat->garray;
1752   if (v  || idx) {
1753     if (nztot) {
1754       /* Sort by increasing column numbers, assuming A and B already sorted */
1755       PetscInt imark = -1;
1756       if (v) {
1757         *v = v_p = mat->rowvalues;
1758         for (i=0; i<nzB; i++) {
1759           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1760           else break;
1761         }
1762         imark = i;
1763         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1764         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1765       }
1766       if (idx) {
1767         *idx = idx_p = mat->rowindices;
1768         if (imark > -1) {
1769           for (i=0; i<imark; i++) {
1770             idx_p[i] = cmap[cworkB[i]];
1771           }
1772         } else {
1773           for (i=0; i<nzB; i++) {
1774             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1775             else break;
1776           }
1777           imark = i;
1778         }
1779         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1780         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1781       }
1782     } else {
1783       if (idx) *idx = NULL;
1784       if (v)   *v   = NULL;
1785     }
1786   }
1787   *nz  = nztot;
1788   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1789   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1790   PetscFunctionReturn(0);
1791 }
1792 
1793 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1794 {
1795   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1796 
1797   PetscFunctionBegin;
1798   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1799   aij->getrowactive = PETSC_FALSE;
1800   PetscFunctionReturn(0);
1801 }
1802 
1803 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1804 {
1805   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1806   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1807   PetscErrorCode ierr;
1808   PetscInt       i,j,cstart = mat->cmap->rstart;
1809   PetscReal      sum = 0.0;
1810   MatScalar      *v;
1811 
1812   PetscFunctionBegin;
1813   if (aij->size == 1) {
1814     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1815   } else {
1816     if (type == NORM_FROBENIUS) {
1817       v = amat->a;
1818       for (i=0; i<amat->nz; i++) {
1819         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1820       }
1821       v = bmat->a;
1822       for (i=0; i<bmat->nz; i++) {
1823         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1824       }
1825       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1826       *norm = PetscSqrtReal(*norm);
1827       ierr = PetscLogFlops(2.0*amat->nz+2.0*bmat->nz);CHKERRQ(ierr);
1828     } else if (type == NORM_1) { /* max column norm */
1829       PetscReal *tmp,*tmp2;
1830       PetscInt  *jj,*garray = aij->garray;
1831       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1832       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1833       *norm = 0.0;
1834       v     = amat->a; jj = amat->j;
1835       for (j=0; j<amat->nz; j++) {
1836         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1837       }
1838       v = bmat->a; jj = bmat->j;
1839       for (j=0; j<bmat->nz; j++) {
1840         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1841       }
1842       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1843       for (j=0; j<mat->cmap->N; j++) {
1844         if (tmp2[j] > *norm) *norm = tmp2[j];
1845       }
1846       ierr = PetscFree(tmp);CHKERRQ(ierr);
1847       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1848       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1849     } else if (type == NORM_INFINITY) { /* max row norm */
1850       PetscReal ntemp = 0.0;
1851       for (j=0; j<aij->A->rmap->n; j++) {
1852         v   = amat->a + amat->i[j];
1853         sum = 0.0;
1854         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1855           sum += PetscAbsScalar(*v); v++;
1856         }
1857         v = bmat->a + bmat->i[j];
1858         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1859           sum += PetscAbsScalar(*v); v++;
1860         }
1861         if (sum > ntemp) ntemp = sum;
1862       }
1863       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1864       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1865     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1866   }
1867   PetscFunctionReturn(0);
1868 }
1869 
1870 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1871 {
1872   Mat_MPIAIJ      *a    =(Mat_MPIAIJ*)A->data,*b;
1873   Mat_SeqAIJ      *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
1874   PetscInt        M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
1875   const PetscInt  *ai,*aj,*bi,*bj,*B_diag_i;
1876   PetscErrorCode  ierr;
1877   Mat             B,A_diag,*B_diag;
1878   const MatScalar *pbv,*bv;
1879 
1880   PetscFunctionBegin;
1881   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1882   ai = Aloc->i; aj = Aloc->j;
1883   bi = Bloc->i; bj = Bloc->j;
1884   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1885     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1886     PetscSFNode          *oloc;
1887     PETSC_UNUSED PetscSF sf;
1888 
1889     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1890     /* compute d_nnz for preallocation */
1891     ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr);
1892     for (i=0; i<ai[ma]; i++) {
1893       d_nnz[aj[i]]++;
1894     }
1895     /* compute local off-diagonal contributions */
1896     ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr);
1897     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1898     /* map those to global */
1899     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1900     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1901     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1902     ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr);
1903     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1904     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1905     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1906 
1907     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1908     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1909     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1910     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1911     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1912     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1913   } else {
1914     B    = *matout;
1915     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1916   }
1917 
1918   b           = (Mat_MPIAIJ*)B->data;
1919   A_diag      = a->A;
1920   B_diag      = &b->A;
1921   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
1922   A_diag_ncol = A_diag->cmap->N;
1923   B_diag_ilen = sub_B_diag->ilen;
1924   B_diag_i    = sub_B_diag->i;
1925 
1926   /* Set ilen for diagonal of B */
1927   for (i=0; i<A_diag_ncol; i++) {
1928     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
1929   }
1930 
1931   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
1932   very quickly (=without using MatSetValues), because all writes are local. */
1933   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
1934 
1935   /* copy over the B part */
1936   ierr = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr);
1937   ierr = MatSeqAIJGetArrayRead(a->B,&bv);CHKERRQ(ierr);
1938   pbv  = bv;
1939   row  = A->rmap->rstart;
1940   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1941   cols_tmp = cols;
1942   for (i=0; i<mb; i++) {
1943     ncol = bi[i+1]-bi[i];
1944     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES);CHKERRQ(ierr);
1945     row++;
1946     pbv += ncol; cols_tmp += ncol;
1947   }
1948   ierr = PetscFree(cols);CHKERRQ(ierr);
1949   ierr = MatSeqAIJRestoreArrayRead(a->B,&bv);CHKERRQ(ierr);
1950 
1951   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1952   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1953   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1954     *matout = B;
1955   } else {
1956     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
1957   }
1958   PetscFunctionReturn(0);
1959 }
1960 
1961 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1962 {
1963   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1964   Mat            a    = aij->A,b = aij->B;
1965   PetscErrorCode ierr;
1966   PetscInt       s1,s2,s3;
1967 
1968   PetscFunctionBegin;
1969   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1970   if (rr) {
1971     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1972     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1973     /* Overlap communication with computation. */
1974     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1975   }
1976   if (ll) {
1977     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1978     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1979     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1980   }
1981   /* scale  the diagonal block */
1982   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1983 
1984   if (rr) {
1985     /* Do a scatter end and then right scale the off-diagonal block */
1986     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1987     ierr = (*b->ops->diagonalscale)(b,NULL,aij->lvec);CHKERRQ(ierr);
1988   }
1989   PetscFunctionReturn(0);
1990 }
1991 
1992 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1993 {
1994   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1995   PetscErrorCode ierr;
1996 
1997   PetscFunctionBegin;
1998   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1999   PetscFunctionReturn(0);
2000 }
2001 
2002 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2003 {
2004   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2005   Mat            a,b,c,d;
2006   PetscBool      flg;
2007   PetscErrorCode ierr;
2008 
2009   PetscFunctionBegin;
2010   a = matA->A; b = matA->B;
2011   c = matB->A; d = matB->B;
2012 
2013   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2014   if (flg) {
2015     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2016   }
2017   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2018   PetscFunctionReturn(0);
2019 }
2020 
2021 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2022 {
2023   PetscErrorCode ierr;
2024   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2025   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2026 
2027   PetscFunctionBegin;
2028   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2029   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2030     /* because of the column compression in the off-processor part of the matrix a->B,
2031        the number of columns in a->B and b->B may be different, hence we cannot call
2032        the MatCopy() directly on the two parts. If need be, we can provide a more
2033        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2034        then copying the submatrices */
2035     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2036   } else {
2037     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2038     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2039   }
2040   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2041   PetscFunctionReturn(0);
2042 }
2043 
2044 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2045 {
2046   PetscErrorCode ierr;
2047 
2048   PetscFunctionBegin;
2049   ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr);
2050   PetscFunctionReturn(0);
2051 }
2052 
2053 /*
2054    Computes the number of nonzeros per row needed for preallocation when X and Y
2055    have different nonzero structure.
2056 */
2057 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2058 {
2059   PetscInt       i,j,k,nzx,nzy;
2060 
2061   PetscFunctionBegin;
2062   /* Set the number of nonzeros in the new matrix */
2063   for (i=0; i<m; i++) {
2064     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2065     nzx = xi[i+1] - xi[i];
2066     nzy = yi[i+1] - yi[i];
2067     nnz[i] = 0;
2068     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2069       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2070       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2071       nnz[i]++;
2072     }
2073     for (; k<nzy; k++) nnz[i]++;
2074   }
2075   PetscFunctionReturn(0);
2076 }
2077 
2078 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2079 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2080 {
2081   PetscErrorCode ierr;
2082   PetscInt       m = Y->rmap->N;
2083   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2084   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2085 
2086   PetscFunctionBegin;
2087   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2088   PetscFunctionReturn(0);
2089 }
2090 
2091 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2092 {
2093   PetscErrorCode ierr;
2094   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2095 
2096   PetscFunctionBegin;
2097   if (str == SAME_NONZERO_PATTERN) {
2098     ierr = MatAXPY(yy->A,a,xx->A,str);CHKERRQ(ierr);
2099     ierr = MatAXPY(yy->B,a,xx->B,str);CHKERRQ(ierr);
2100   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2101     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2102   } else {
2103     Mat      B;
2104     PetscInt *nnz_d,*nnz_o;
2105 
2106     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2107     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2108     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2109     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2110     ierr = MatSetLayouts(B,Y->rmap,Y->cmap);CHKERRQ(ierr);
2111     ierr = MatSetType(B,((PetscObject)Y)->type_name);CHKERRQ(ierr);
2112     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2113     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2114     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2115     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2116     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2117     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2118     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2119   }
2120   PetscFunctionReturn(0);
2121 }
2122 
2123 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2124 
2125 PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2126 {
2127 #if defined(PETSC_USE_COMPLEX)
2128   PetscErrorCode ierr;
2129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2130 
2131   PetscFunctionBegin;
2132   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2133   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2134 #else
2135   PetscFunctionBegin;
2136 #endif
2137   PetscFunctionReturn(0);
2138 }
2139 
2140 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2141 {
2142   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2143   PetscErrorCode ierr;
2144 
2145   PetscFunctionBegin;
2146   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2147   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2148   PetscFunctionReturn(0);
2149 }
2150 
2151 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2152 {
2153   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2154   PetscErrorCode ierr;
2155 
2156   PetscFunctionBegin;
2157   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2158   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2159   PetscFunctionReturn(0);
2160 }
2161 
2162 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2163 {
2164   Mat_MPIAIJ        *a = (Mat_MPIAIJ*)A->data;
2165   PetscErrorCode    ierr;
2166   PetscInt          i,*idxb = NULL,m = A->rmap->n;
2167   PetscScalar       *va,*vv;
2168   Vec               vB,vA;
2169   const PetscScalar *vb;
2170 
2171   PetscFunctionBegin;
2172   ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vA);CHKERRQ(ierr);
2173   ierr = MatGetRowMaxAbs(a->A,vA,idx);CHKERRQ(ierr);
2174 
2175   ierr = VecGetArrayWrite(vA,&va);CHKERRQ(ierr);
2176   if (idx) {
2177     for (i=0; i<m; i++) {
2178       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2179     }
2180   }
2181 
2182   ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vB);CHKERRQ(ierr);
2183   ierr = PetscMalloc1(m,&idxb);CHKERRQ(ierr);
2184   ierr = MatGetRowMaxAbs(a->B,vB,idxb);CHKERRQ(ierr);
2185 
2186   ierr = VecGetArrayWrite(v,&vv);CHKERRQ(ierr);
2187   ierr = VecGetArrayRead(vB,&vb);CHKERRQ(ierr);
2188   for (i=0; i<m; i++) {
2189     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2190       vv[i] = vb[i];
2191       if (idx) idx[i] = a->garray[idxb[i]];
2192     } else {
2193       vv[i] = va[i];
2194       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]])
2195         idx[i] = a->garray[idxb[i]];
2196     }
2197   }
2198   ierr = VecRestoreArrayWrite(vA,&vv);CHKERRQ(ierr);
2199   ierr = VecRestoreArrayWrite(vA,&va);CHKERRQ(ierr);
2200   ierr = VecRestoreArrayRead(vB,&vb);CHKERRQ(ierr);
2201   ierr = PetscFree(idxb);CHKERRQ(ierr);
2202   ierr = VecDestroy(&vA);CHKERRQ(ierr);
2203   ierr = VecDestroy(&vB);CHKERRQ(ierr);
2204   PetscFunctionReturn(0);
2205 }
2206 
2207 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2208 {
2209   Mat_MPIAIJ        *mat   = (Mat_MPIAIJ*) A->data;
2210   PetscInt          m = A->rmap->n,n = A->cmap->n;
2211   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2212   PetscInt          *cmap  = mat->garray;
2213   PetscInt          *diagIdx, *offdiagIdx;
2214   Vec               diagV, offdiagV;
2215   PetscScalar       *a, *diagA, *offdiagA;
2216   const PetscScalar *ba,*bav;
2217   PetscInt          r,j,col,ncols,*bi,*bj;
2218   PetscErrorCode    ierr;
2219   Mat               B = mat->B;
2220   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2221 
2222   PetscFunctionBegin;
2223   /* When a process holds entire A and other processes have no entry */
2224   if (A->cmap->N == n) {
2225     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2226     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2227     ierr = MatGetRowMinAbs(mat->A,diagV,idx);CHKERRQ(ierr);
2228     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2229     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2230     PetscFunctionReturn(0);
2231   } else if (n == 0) {
2232     if (m) {
2233       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2234       for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;}
2235       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2236     }
2237     PetscFunctionReturn(0);
2238   }
2239 
2240   ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2241   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2242   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2243   ierr = MatGetRowMinAbs(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2244 
2245   /* Get offdiagIdx[] for implicit 0.0 */
2246   ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr);
2247   ba   = bav;
2248   bi   = b->i;
2249   bj   = b->j;
2250   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2251   for (r = 0; r < m; r++) {
2252     ncols = bi[r+1] - bi[r];
2253     if (ncols == A->cmap->N - n) { /* Brow is dense */
2254       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2255     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2256       offdiagA[r] = 0.0;
2257 
2258       /* Find first hole in the cmap */
2259       for (j=0; j<ncols; j++) {
2260         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2261         if (col > j && j < cstart) {
2262           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2263           break;
2264         } else if (col > j + n && j >= cstart) {
2265           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2266           break;
2267         }
2268       }
2269       if (j == ncols && ncols < A->cmap->N - n) {
2270         /* a hole is outside compressed Bcols */
2271         if (ncols == 0) {
2272           if (cstart) {
2273             offdiagIdx[r] = 0;
2274           } else offdiagIdx[r] = cend;
2275         } else { /* ncols > 0 */
2276           offdiagIdx[r] = cmap[ncols-1] + 1;
2277           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2278         }
2279       }
2280     }
2281 
2282     for (j=0; j<ncols; j++) {
2283       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2284       ba++; bj++;
2285     }
2286   }
2287 
2288   ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr);
2289   ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2290   for (r = 0; r < m; ++r) {
2291     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2292       a[r]   = diagA[r];
2293       if (idx) idx[r] = cstart + diagIdx[r];
2294     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2295       a[r] = diagA[r];
2296       if (idx) {
2297         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2298           idx[r] = cstart + diagIdx[r];
2299         } else idx[r] = offdiagIdx[r];
2300       }
2301     } else {
2302       a[r]   = offdiagA[r];
2303       if (idx) idx[r] = offdiagIdx[r];
2304     }
2305   }
2306   ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr);
2307   ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr);
2308   ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2309   ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2310   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2311   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2312   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2313   PetscFunctionReturn(0);
2314 }
2315 
2316 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2317 {
2318   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*) A->data;
2319   PetscInt          m = A->rmap->n,n = A->cmap->n;
2320   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2321   PetscInt          *cmap  = mat->garray;
2322   PetscInt          *diagIdx, *offdiagIdx;
2323   Vec               diagV, offdiagV;
2324   PetscScalar       *a, *diagA, *offdiagA;
2325   const PetscScalar *ba,*bav;
2326   PetscInt          r,j,col,ncols,*bi,*bj;
2327   PetscErrorCode    ierr;
2328   Mat               B = mat->B;
2329   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2330 
2331   PetscFunctionBegin;
2332   /* When a process holds entire A and other processes have no entry */
2333   if (A->cmap->N == n) {
2334     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2335     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2336     ierr = MatGetRowMin(mat->A,diagV,idx);CHKERRQ(ierr);
2337     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2338     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2339     PetscFunctionReturn(0);
2340   } else if (n == 0) {
2341     if (m) {
2342       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2343       for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;}
2344       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2345     }
2346     PetscFunctionReturn(0);
2347   }
2348 
2349   ierr = PetscCalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2350   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2351   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2352   ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2353 
2354   /* Get offdiagIdx[] for implicit 0.0 */
2355   ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr);
2356   ba   = bav;
2357   bi   = b->i;
2358   bj   = b->j;
2359   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2360   for (r = 0; r < m; r++) {
2361     ncols = bi[r+1] - bi[r];
2362     if (ncols == A->cmap->N - n) { /* Brow is dense */
2363       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2364     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2365       offdiagA[r] = 0.0;
2366 
2367       /* Find first hole in the cmap */
2368       for (j=0; j<ncols; j++) {
2369         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2370         if (col > j && j < cstart) {
2371           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2372           break;
2373         } else if (col > j + n && j >= cstart) {
2374           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2375           break;
2376         }
2377       }
2378       if (j == ncols && ncols < A->cmap->N - n) {
2379         /* a hole is outside compressed Bcols */
2380         if (ncols == 0) {
2381           if (cstart) {
2382             offdiagIdx[r] = 0;
2383           } else offdiagIdx[r] = cend;
2384         } else { /* ncols > 0 */
2385           offdiagIdx[r] = cmap[ncols-1] + 1;
2386           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2387         }
2388       }
2389     }
2390 
2391     for (j=0; j<ncols; j++) {
2392       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2393       ba++; bj++;
2394     }
2395   }
2396 
2397   ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr);
2398   ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2399   for (r = 0; r < m; ++r) {
2400     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2401       a[r]   = diagA[r];
2402       if (idx) idx[r] = cstart + diagIdx[r];
2403     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2404       a[r] = diagA[r];
2405       if (idx) {
2406         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2407           idx[r] = cstart + diagIdx[r];
2408         } else idx[r] = offdiagIdx[r];
2409       }
2410     } else {
2411       a[r]   = offdiagA[r];
2412       if (idx) idx[r] = offdiagIdx[r];
2413     }
2414   }
2415   ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr);
2416   ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr);
2417   ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2418   ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2419   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2420   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2421   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2422   PetscFunctionReturn(0);
2423 }
2424 
2425 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2426 {
2427   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*)A->data;
2428   PetscInt          m = A->rmap->n,n = A->cmap->n;
2429   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2430   PetscInt          *cmap  = mat->garray;
2431   PetscInt          *diagIdx, *offdiagIdx;
2432   Vec               diagV, offdiagV;
2433   PetscScalar       *a, *diagA, *offdiagA;
2434   const PetscScalar *ba,*bav;
2435   PetscInt          r,j,col,ncols,*bi,*bj;
2436   PetscErrorCode    ierr;
2437   Mat               B = mat->B;
2438   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2439 
2440   PetscFunctionBegin;
2441   /* When a process holds entire A and other processes have no entry */
2442   if (A->cmap->N == n) {
2443     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2444     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2445     ierr = MatGetRowMax(mat->A,diagV,idx);CHKERRQ(ierr);
2446     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2447     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2448     PetscFunctionReturn(0);
2449   } else if (n == 0) {
2450     if (m) {
2451       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2452       for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;}
2453       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2454     }
2455     PetscFunctionReturn(0);
2456   }
2457 
2458   ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2459   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2460   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2461   ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2462 
2463   /* Get offdiagIdx[] for implicit 0.0 */
2464   ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr);
2465   ba   = bav;
2466   bi   = b->i;
2467   bj   = b->j;
2468   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2469   for (r = 0; r < m; r++) {
2470     ncols = bi[r+1] - bi[r];
2471     if (ncols == A->cmap->N - n) { /* Brow is dense */
2472       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2473     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2474       offdiagA[r] = 0.0;
2475 
2476       /* Find first hole in the cmap */
2477       for (j=0; j<ncols; j++) {
2478         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2479         if (col > j && j < cstart) {
2480           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2481           break;
2482         } else if (col > j + n && j >= cstart) {
2483           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2484           break;
2485         }
2486       }
2487       if (j == ncols && ncols < A->cmap->N - n) {
2488         /* a hole is outside compressed Bcols */
2489         if (ncols == 0) {
2490           if (cstart) {
2491             offdiagIdx[r] = 0;
2492           } else offdiagIdx[r] = cend;
2493         } else { /* ncols > 0 */
2494           offdiagIdx[r] = cmap[ncols-1] + 1;
2495           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2496         }
2497       }
2498     }
2499 
2500     for (j=0; j<ncols; j++) {
2501       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2502       ba++; bj++;
2503     }
2504   }
2505 
2506   ierr = VecGetArrayWrite(v,    &a);CHKERRQ(ierr);
2507   ierr = VecGetArrayRead(diagV,(const PetscScalar**)&diagA);CHKERRQ(ierr);
2508   for (r = 0; r < m; ++r) {
2509     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2510       a[r] = diagA[r];
2511       if (idx) idx[r] = cstart + diagIdx[r];
2512     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2513       a[r] = diagA[r];
2514       if (idx) {
2515         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2516           idx[r] = cstart + diagIdx[r];
2517         } else idx[r] = offdiagIdx[r];
2518       }
2519     } else {
2520       a[r] = offdiagA[r];
2521       if (idx) idx[r] = offdiagIdx[r];
2522     }
2523   }
2524   ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr);
2525   ierr = VecRestoreArrayWrite(v,       &a);CHKERRQ(ierr);
2526   ierr = VecRestoreArrayRead(diagV,   (const PetscScalar**)&diagA);CHKERRQ(ierr);
2527   ierr = VecRestoreArrayWrite(offdiagV,&offdiagA);CHKERRQ(ierr);
2528   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2529   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2530   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2531   PetscFunctionReturn(0);
2532 }
2533 
2534 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2535 {
2536   PetscErrorCode ierr;
2537   Mat            *dummy;
2538 
2539   PetscFunctionBegin;
2540   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2541   *newmat = *dummy;
2542   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2543   PetscFunctionReturn(0);
2544 }
2545 
2546 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2547 {
2548   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2549   PetscErrorCode ierr;
2550 
2551   PetscFunctionBegin;
2552   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2553   A->factorerrortype = a->A->factorerrortype;
2554   PetscFunctionReturn(0);
2555 }
2556 
2557 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2558 {
2559   PetscErrorCode ierr;
2560   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2561 
2562   PetscFunctionBegin;
2563   if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2564   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2565   if (x->assembled) {
2566     ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2567   } else {
2568     ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr);
2569   }
2570   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2571   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2572   PetscFunctionReturn(0);
2573 }
2574 
2575 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2576 {
2577   PetscFunctionBegin;
2578   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2579   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2580   PetscFunctionReturn(0);
2581 }
2582 
2583 /*@
2584    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2585 
2586    Collective on Mat
2587 
2588    Input Parameters:
2589 +    A - the matrix
2590 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2591 
2592  Level: advanced
2593 
2594 @*/
2595 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2596 {
2597   PetscErrorCode       ierr;
2598 
2599   PetscFunctionBegin;
2600   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2601   PetscFunctionReturn(0);
2602 }
2603 
2604 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2605 {
2606   PetscErrorCode       ierr;
2607   PetscBool            sc = PETSC_FALSE,flg;
2608 
2609   PetscFunctionBegin;
2610   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2611   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2612   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2613   if (flg) {
2614     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2615   }
2616   ierr = PetscOptionsTail();CHKERRQ(ierr);
2617   PetscFunctionReturn(0);
2618 }
2619 
2620 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2621 {
2622   PetscErrorCode ierr;
2623   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2624   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2625 
2626   PetscFunctionBegin;
2627   if (!Y->preallocated) {
2628     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2629   } else if (!aij->nz) {
2630     PetscInt nonew = aij->nonew;
2631     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2632     aij->nonew = nonew;
2633   }
2634   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2635   PetscFunctionReturn(0);
2636 }
2637 
2638 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2639 {
2640   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2641   PetscErrorCode ierr;
2642 
2643   PetscFunctionBegin;
2644   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2645   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2646   if (d) {
2647     PetscInt rstart;
2648     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2649     *d += rstart;
2650 
2651   }
2652   PetscFunctionReturn(0);
2653 }
2654 
2655 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2656 {
2657   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2658   PetscErrorCode ierr;
2659 
2660   PetscFunctionBegin;
2661   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2662   PetscFunctionReturn(0);
2663 }
2664 
2665 /* -------------------------------------------------------------------*/
2666 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2667                                        MatGetRow_MPIAIJ,
2668                                        MatRestoreRow_MPIAIJ,
2669                                        MatMult_MPIAIJ,
2670                                 /* 4*/ MatMultAdd_MPIAIJ,
2671                                        MatMultTranspose_MPIAIJ,
2672                                        MatMultTransposeAdd_MPIAIJ,
2673                                        NULL,
2674                                        NULL,
2675                                        NULL,
2676                                 /*10*/ NULL,
2677                                        NULL,
2678                                        NULL,
2679                                        MatSOR_MPIAIJ,
2680                                        MatTranspose_MPIAIJ,
2681                                 /*15*/ MatGetInfo_MPIAIJ,
2682                                        MatEqual_MPIAIJ,
2683                                        MatGetDiagonal_MPIAIJ,
2684                                        MatDiagonalScale_MPIAIJ,
2685                                        MatNorm_MPIAIJ,
2686                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2687                                        MatAssemblyEnd_MPIAIJ,
2688                                        MatSetOption_MPIAIJ,
2689                                        MatZeroEntries_MPIAIJ,
2690                                 /*24*/ MatZeroRows_MPIAIJ,
2691                                        NULL,
2692                                        NULL,
2693                                        NULL,
2694                                        NULL,
2695                                 /*29*/ MatSetUp_MPIAIJ,
2696                                        NULL,
2697                                        NULL,
2698                                        MatGetDiagonalBlock_MPIAIJ,
2699                                        NULL,
2700                                 /*34*/ MatDuplicate_MPIAIJ,
2701                                        NULL,
2702                                        NULL,
2703                                        NULL,
2704                                        NULL,
2705                                 /*39*/ MatAXPY_MPIAIJ,
2706                                        MatCreateSubMatrices_MPIAIJ,
2707                                        MatIncreaseOverlap_MPIAIJ,
2708                                        MatGetValues_MPIAIJ,
2709                                        MatCopy_MPIAIJ,
2710                                 /*44*/ MatGetRowMax_MPIAIJ,
2711                                        MatScale_MPIAIJ,
2712                                        MatShift_MPIAIJ,
2713                                        MatDiagonalSet_MPIAIJ,
2714                                        MatZeroRowsColumns_MPIAIJ,
2715                                 /*49*/ MatSetRandom_MPIAIJ,
2716                                        NULL,
2717                                        NULL,
2718                                        NULL,
2719                                        NULL,
2720                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2721                                        NULL,
2722                                        MatSetUnfactored_MPIAIJ,
2723                                        MatPermute_MPIAIJ,
2724                                        NULL,
2725                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2726                                        MatDestroy_MPIAIJ,
2727                                        MatView_MPIAIJ,
2728                                        NULL,
2729                                        NULL,
2730                                 /*64*/ NULL,
2731                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2732                                        NULL,
2733                                        NULL,
2734                                        NULL,
2735                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2736                                        MatGetRowMinAbs_MPIAIJ,
2737                                        NULL,
2738                                        NULL,
2739                                        NULL,
2740                                        NULL,
2741                                 /*75*/ MatFDColoringApply_AIJ,
2742                                        MatSetFromOptions_MPIAIJ,
2743                                        NULL,
2744                                        NULL,
2745                                        MatFindZeroDiagonals_MPIAIJ,
2746                                 /*80*/ NULL,
2747                                        NULL,
2748                                        NULL,
2749                                 /*83*/ MatLoad_MPIAIJ,
2750                                        MatIsSymmetric_MPIAIJ,
2751                                        NULL,
2752                                        NULL,
2753                                        NULL,
2754                                        NULL,
2755                                 /*89*/ NULL,
2756                                        NULL,
2757                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2758                                        NULL,
2759                                        NULL,
2760                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2761                                        NULL,
2762                                        NULL,
2763                                        NULL,
2764                                        MatBindToCPU_MPIAIJ,
2765                                 /*99*/ MatProductSetFromOptions_MPIAIJ,
2766                                        NULL,
2767                                        NULL,
2768                                        MatConjugate_MPIAIJ,
2769                                        NULL,
2770                                 /*104*/MatSetValuesRow_MPIAIJ,
2771                                        MatRealPart_MPIAIJ,
2772                                        MatImaginaryPart_MPIAIJ,
2773                                        NULL,
2774                                        NULL,
2775                                 /*109*/NULL,
2776                                        NULL,
2777                                        MatGetRowMin_MPIAIJ,
2778                                        NULL,
2779                                        MatMissingDiagonal_MPIAIJ,
2780                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2781                                        NULL,
2782                                        MatGetGhosts_MPIAIJ,
2783                                        NULL,
2784                                        NULL,
2785                                 /*119*/MatMultDiagonalBlock_MPIAIJ,
2786                                        NULL,
2787                                        NULL,
2788                                        NULL,
2789                                        MatGetMultiProcBlock_MPIAIJ,
2790                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2791                                        MatGetColumnNorms_MPIAIJ,
2792                                        MatInvertBlockDiagonal_MPIAIJ,
2793                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2794                                        MatCreateSubMatricesMPI_MPIAIJ,
2795                                 /*129*/NULL,
2796                                        NULL,
2797                                        NULL,
2798                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2799                                        NULL,
2800                                 /*134*/NULL,
2801                                        NULL,
2802                                        NULL,
2803                                        NULL,
2804                                        NULL,
2805                                 /*139*/MatSetBlockSizes_MPIAIJ,
2806                                        NULL,
2807                                        NULL,
2808                                        MatFDColoringSetUp_MPIXAIJ,
2809                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2810                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2811                                 /*145*/NULL,
2812                                        NULL,
2813                                        NULL
2814 };
2815 
2816 /* ----------------------------------------------------------------------------------------*/
2817 
2818 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2819 {
2820   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2821   PetscErrorCode ierr;
2822 
2823   PetscFunctionBegin;
2824   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2825   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2826   PetscFunctionReturn(0);
2827 }
2828 
2829 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2830 {
2831   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2832   PetscErrorCode ierr;
2833 
2834   PetscFunctionBegin;
2835   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2836   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2837   PetscFunctionReturn(0);
2838 }
2839 
2840 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2841 {
2842   Mat_MPIAIJ     *b;
2843   PetscErrorCode ierr;
2844   PetscMPIInt    size;
2845 
2846   PetscFunctionBegin;
2847   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2848   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2849   b = (Mat_MPIAIJ*)B->data;
2850 
2851 #if defined(PETSC_USE_CTABLE)
2852   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2853 #else
2854   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2855 #endif
2856   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2857   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2858   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2859 
2860   /* Because the B will have been resized we simply destroy it and create a new one each time */
2861   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr);
2862   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2863   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2864   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2865   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2866   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2867   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2868 
2869   if (!B->preallocated) {
2870     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2871     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2872     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2873     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2874     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2875   }
2876 
2877   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2878   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2879   B->preallocated  = PETSC_TRUE;
2880   B->was_assembled = PETSC_FALSE;
2881   B->assembled     = PETSC_FALSE;
2882   PetscFunctionReturn(0);
2883 }
2884 
2885 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2886 {
2887   Mat_MPIAIJ     *b;
2888   PetscErrorCode ierr;
2889 
2890   PetscFunctionBegin;
2891   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2892   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2893   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2894   b = (Mat_MPIAIJ*)B->data;
2895 
2896 #if defined(PETSC_USE_CTABLE)
2897   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2898 #else
2899   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2900 #endif
2901   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2902   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2903   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2904 
2905   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2906   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2907   B->preallocated  = PETSC_TRUE;
2908   B->was_assembled = PETSC_FALSE;
2909   B->assembled = PETSC_FALSE;
2910   PetscFunctionReturn(0);
2911 }
2912 
2913 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2914 {
2915   Mat            mat;
2916   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2917   PetscErrorCode ierr;
2918 
2919   PetscFunctionBegin;
2920   *newmat = NULL;
2921   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2922   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2923   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2924   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2925   a       = (Mat_MPIAIJ*)mat->data;
2926 
2927   mat->factortype   = matin->factortype;
2928   mat->assembled    = matin->assembled;
2929   mat->insertmode   = NOT_SET_VALUES;
2930   mat->preallocated = matin->preallocated;
2931 
2932   a->size         = oldmat->size;
2933   a->rank         = oldmat->rank;
2934   a->donotstash   = oldmat->donotstash;
2935   a->roworiented  = oldmat->roworiented;
2936   a->rowindices   = NULL;
2937   a->rowvalues    = NULL;
2938   a->getrowactive = PETSC_FALSE;
2939 
2940   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2941   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2942 
2943   if (oldmat->colmap) {
2944 #if defined(PETSC_USE_CTABLE)
2945     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2946 #else
2947     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2948     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2949     ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr);
2950 #endif
2951   } else a->colmap = NULL;
2952   if (oldmat->garray) {
2953     PetscInt len;
2954     len  = oldmat->B->cmap->n;
2955     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2956     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2957     if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); }
2958   } else a->garray = NULL;
2959 
2960   /* It may happen MatDuplicate is called with a non-assembled matrix
2961      In fact, MatDuplicate only requires the matrix to be preallocated
2962      This may happen inside a DMCreateMatrix_Shell */
2963   if (oldmat->lvec) {
2964     ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2965     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2966   }
2967   if (oldmat->Mvctx) {
2968     ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2969     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2970   }
2971   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2972   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2973   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2974   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2975   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2976   *newmat = mat;
2977   PetscFunctionReturn(0);
2978 }
2979 
2980 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2981 {
2982   PetscBool      isbinary, ishdf5;
2983   PetscErrorCode ierr;
2984 
2985   PetscFunctionBegin;
2986   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2987   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2988   /* force binary viewer to load .info file if it has not yet done so */
2989   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2990   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2991   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2992   if (isbinary) {
2993     ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr);
2994   } else if (ishdf5) {
2995 #if defined(PETSC_HAVE_HDF5)
2996     ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr);
2997 #else
2998     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
2999 #endif
3000   } else {
3001     SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
3002   }
3003   PetscFunctionReturn(0);
3004 }
3005 
3006 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3007 {
3008   PetscInt       header[4],M,N,m,nz,rows,cols,sum,i;
3009   PetscInt       *rowidxs,*colidxs;
3010   PetscScalar    *matvals;
3011   PetscErrorCode ierr;
3012 
3013   PetscFunctionBegin;
3014   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3015 
3016   /* read in matrix header */
3017   ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3018   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3019   M  = header[1]; N = header[2]; nz = header[3];
3020   if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M);
3021   if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N);
3022   if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ");
3023 
3024   /* set block sizes from the viewer's .info file */
3025   ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
3026   /* set global sizes if not set already */
3027   if (mat->rmap->N < 0) mat->rmap->N = M;
3028   if (mat->cmap->N < 0) mat->cmap->N = N;
3029   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
3030   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
3031 
3032   /* check if the matrix sizes are correct */
3033   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3034   if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols);
3035 
3036   /* read in row lengths and build row indices */
3037   ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr);
3038   ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr);
3039   ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr);
3040   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3041   ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr);
3042   if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum);
3043   /* read in column indices and matrix values */
3044   ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr);
3045   ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
3046   ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
3047   /* store matrix indices and values */
3048   ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr);
3049   ierr = PetscFree(rowidxs);CHKERRQ(ierr);
3050   ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr);
3051   PetscFunctionReturn(0);
3052 }
3053 
3054 /* Not scalable because of ISAllGather() unless getting all columns. */
3055 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3056 {
3057   PetscErrorCode ierr;
3058   IS             iscol_local;
3059   PetscBool      isstride;
3060   PetscMPIInt    lisstride=0,gisstride;
3061 
3062   PetscFunctionBegin;
3063   /* check if we are grabbing all columns*/
3064   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3065 
3066   if (isstride) {
3067     PetscInt  start,len,mstart,mlen;
3068     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3069     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3070     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3071     if (mstart == start && mlen-mstart == len) lisstride = 1;
3072   }
3073 
3074   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3075   if (gisstride) {
3076     PetscInt N;
3077     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3078     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr);
3079     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3080     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3081   } else {
3082     PetscInt cbs;
3083     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3084     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3085     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3086   }
3087 
3088   *isseq = iscol_local;
3089   PetscFunctionReturn(0);
3090 }
3091 
3092 /*
3093  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3094  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3095 
3096  Input Parameters:
3097    mat - matrix
3098    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3099            i.e., mat->rstart <= isrow[i] < mat->rend
3100    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3101            i.e., mat->cstart <= iscol[i] < mat->cend
3102  Output Parameter:
3103    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3104    iscol_o - sequential column index set for retrieving mat->B
3105    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3106  */
3107 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3108 {
3109   PetscErrorCode ierr;
3110   Vec            x,cmap;
3111   const PetscInt *is_idx;
3112   PetscScalar    *xarray,*cmaparray;
3113   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3114   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3115   Mat            B=a->B;
3116   Vec            lvec=a->lvec,lcmap;
3117   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3118   MPI_Comm       comm;
3119   VecScatter     Mvctx=a->Mvctx;
3120 
3121   PetscFunctionBegin;
3122   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3123   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3124 
3125   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3126   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3127   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3128   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3129   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3130 
3131   /* Get start indices */
3132   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
3133   isstart -= ncols;
3134   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3135 
3136   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3137   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3138   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3139   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3140   for (i=0; i<ncols; i++) {
3141     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3142     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3143     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3144   }
3145   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3146   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3147   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3148 
3149   /* Get iscol_d */
3150   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3151   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3152   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3153 
3154   /* Get isrow_d */
3155   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3156   rstart = mat->rmap->rstart;
3157   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3158   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3159   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3160   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3161 
3162   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3163   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3164   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3165 
3166   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3167   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3168   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3169 
3170   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3171 
3172   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3173   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3174 
3175   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3176   /* off-process column indices */
3177   count = 0;
3178   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3179   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3180 
3181   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3182   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3183   for (i=0; i<Bn; i++) {
3184     if (PetscRealPart(xarray[i]) > -1.0) {
3185       idx[count]     = i;                   /* local column index in off-diagonal part B */
3186       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3187       count++;
3188     }
3189   }
3190   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3191   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3192 
3193   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3194   /* cannot ensure iscol_o has same blocksize as iscol! */
3195 
3196   ierr = PetscFree(idx);CHKERRQ(ierr);
3197   *garray = cmap1;
3198 
3199   ierr = VecDestroy(&x);CHKERRQ(ierr);
3200   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3201   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3202   PetscFunctionReturn(0);
3203 }
3204 
3205 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3206 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3207 {
3208   PetscErrorCode ierr;
3209   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3210   Mat            M = NULL;
3211   MPI_Comm       comm;
3212   IS             iscol_d,isrow_d,iscol_o;
3213   Mat            Asub = NULL,Bsub = NULL;
3214   PetscInt       n;
3215 
3216   PetscFunctionBegin;
3217   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3218 
3219   if (call == MAT_REUSE_MATRIX) {
3220     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3221     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3222     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3223 
3224     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3225     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3226 
3227     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3228     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3229 
3230     /* Update diagonal and off-diagonal portions of submat */
3231     asub = (Mat_MPIAIJ*)(*submat)->data;
3232     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3233     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3234     if (n) {
3235       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3236     }
3237     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3238     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3239 
3240   } else { /* call == MAT_INITIAL_MATRIX) */
3241     const PetscInt *garray;
3242     PetscInt        BsubN;
3243 
3244     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3245     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3246 
3247     /* Create local submatrices Asub and Bsub */
3248     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3249     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3250 
3251     /* Create submatrix M */
3252     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3253 
3254     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3255     asub = (Mat_MPIAIJ*)M->data;
3256 
3257     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3258     n = asub->B->cmap->N;
3259     if (BsubN > n) {
3260       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3261       const PetscInt *idx;
3262       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3263       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3264 
3265       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3266       j = 0;
3267       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3268       for (i=0; i<n; i++) {
3269         if (j >= BsubN) break;
3270         while (subgarray[i] > garray[j]) j++;
3271 
3272         if (subgarray[i] == garray[j]) {
3273           idx_new[i] = idx[j++];
3274         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3275       }
3276       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3277 
3278       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3279       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3280 
3281     } else if (BsubN < n) {
3282       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3283     }
3284 
3285     ierr = PetscFree(garray);CHKERRQ(ierr);
3286     *submat = M;
3287 
3288     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3289     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3290     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3291 
3292     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3293     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3294 
3295     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3296     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3297   }
3298   PetscFunctionReturn(0);
3299 }
3300 
3301 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3302 {
3303   PetscErrorCode ierr;
3304   IS             iscol_local=NULL,isrow_d;
3305   PetscInt       csize;
3306   PetscInt       n,i,j,start,end;
3307   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3308   MPI_Comm       comm;
3309 
3310   PetscFunctionBegin;
3311   /* If isrow has same processor distribution as mat,
3312      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3313   if (call == MAT_REUSE_MATRIX) {
3314     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3315     if (isrow_d) {
3316       sameRowDist  = PETSC_TRUE;
3317       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3318     } else {
3319       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3320       if (iscol_local) {
3321         sameRowDist  = PETSC_TRUE;
3322         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3323       }
3324     }
3325   } else {
3326     /* Check if isrow has same processor distribution as mat */
3327     sameDist[0] = PETSC_FALSE;
3328     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3329     if (!n) {
3330       sameDist[0] = PETSC_TRUE;
3331     } else {
3332       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3333       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3334       if (i >= start && j < end) {
3335         sameDist[0] = PETSC_TRUE;
3336       }
3337     }
3338 
3339     /* Check if iscol has same processor distribution as mat */
3340     sameDist[1] = PETSC_FALSE;
3341     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3342     if (!n) {
3343       sameDist[1] = PETSC_TRUE;
3344     } else {
3345       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3346       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3347       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3348     }
3349 
3350     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3351     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3352     sameRowDist = tsameDist[0];
3353   }
3354 
3355   if (sameRowDist) {
3356     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3357       /* isrow and iscol have same processor distribution as mat */
3358       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3359       PetscFunctionReturn(0);
3360     } else { /* sameRowDist */
3361       /* isrow has same processor distribution as mat */
3362       if (call == MAT_INITIAL_MATRIX) {
3363         PetscBool sorted;
3364         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3365         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3366         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3367         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3368 
3369         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3370         if (sorted) {
3371           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3372           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3373           PetscFunctionReturn(0);
3374         }
3375       } else { /* call == MAT_REUSE_MATRIX */
3376         IS iscol_sub;
3377         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3378         if (iscol_sub) {
3379           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3380           PetscFunctionReturn(0);
3381         }
3382       }
3383     }
3384   }
3385 
3386   /* General case: iscol -> iscol_local which has global size of iscol */
3387   if (call == MAT_REUSE_MATRIX) {
3388     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3389     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3390   } else {
3391     if (!iscol_local) {
3392       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3393     }
3394   }
3395 
3396   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3397   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3398 
3399   if (call == MAT_INITIAL_MATRIX) {
3400     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3401     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3402   }
3403   PetscFunctionReturn(0);
3404 }
3405 
3406 /*@C
3407      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3408          and "off-diagonal" part of the matrix in CSR format.
3409 
3410    Collective
3411 
3412    Input Parameters:
3413 +  comm - MPI communicator
3414 .  A - "diagonal" portion of matrix
3415 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3416 -  garray - global index of B columns
3417 
3418    Output Parameter:
3419 .   mat - the matrix, with input A as its local diagonal matrix
3420    Level: advanced
3421 
3422    Notes:
3423        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3424        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3425 
3426 .seealso: MatCreateMPIAIJWithSplitArrays()
3427 @*/
3428 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3429 {
3430   PetscErrorCode    ierr;
3431   Mat_MPIAIJ        *maij;
3432   Mat_SeqAIJ        *b=(Mat_SeqAIJ*)B->data,*bnew;
3433   PetscInt          *oi=b->i,*oj=b->j,i,nz,col;
3434   const PetscScalar *oa;
3435   Mat               Bnew;
3436   PetscInt          m,n,N;
3437 
3438   PetscFunctionBegin;
3439   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3440   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3441   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3442   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3443   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3444   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3445 
3446   /* Get global columns of mat */
3447   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3448 
3449   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3450   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3451   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3452   maij = (Mat_MPIAIJ*)(*mat)->data;
3453 
3454   (*mat)->preallocated = PETSC_TRUE;
3455 
3456   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3457   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3458 
3459   /* Set A as diagonal portion of *mat */
3460   maij->A = A;
3461 
3462   nz = oi[m];
3463   for (i=0; i<nz; i++) {
3464     col   = oj[i];
3465     oj[i] = garray[col];
3466   }
3467 
3468   /* Set Bnew as off-diagonal portion of *mat */
3469   ierr = MatSeqAIJGetArrayRead(B,&oa);CHKERRQ(ierr);
3470   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew);CHKERRQ(ierr);
3471   ierr = MatSeqAIJRestoreArrayRead(B,&oa);CHKERRQ(ierr);
3472   bnew        = (Mat_SeqAIJ*)Bnew->data;
3473   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3474   maij->B     = Bnew;
3475 
3476   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3477 
3478   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3479   b->free_a       = PETSC_FALSE;
3480   b->free_ij      = PETSC_FALSE;
3481   ierr = MatDestroy(&B);CHKERRQ(ierr);
3482 
3483   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3484   bnew->free_a       = PETSC_TRUE;
3485   bnew->free_ij      = PETSC_TRUE;
3486 
3487   /* condense columns of maij->B */
3488   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3489   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3490   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3491   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3492   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3493   PetscFunctionReturn(0);
3494 }
3495 
3496 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3497 
3498 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3499 {
3500   PetscErrorCode ierr;
3501   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3502   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3503   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3504   Mat            M,Msub,B=a->B;
3505   MatScalar      *aa;
3506   Mat_SeqAIJ     *aij;
3507   PetscInt       *garray = a->garray,*colsub,Ncols;
3508   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3509   IS             iscol_sub,iscmap;
3510   const PetscInt *is_idx,*cmap;
3511   PetscBool      allcolumns=PETSC_FALSE;
3512   MPI_Comm       comm;
3513 
3514   PetscFunctionBegin;
3515   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3516   if (call == MAT_REUSE_MATRIX) {
3517     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3518     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3519     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3520 
3521     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3522     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3523 
3524     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3525     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3526 
3527     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3528 
3529   } else { /* call == MAT_INITIAL_MATRIX) */
3530     PetscBool flg;
3531 
3532     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3533     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3534 
3535     /* (1) iscol -> nonscalable iscol_local */
3536     /* Check for special case: each processor gets entire matrix columns */
3537     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3538     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3539     ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3540     if (allcolumns) {
3541       iscol_sub = iscol_local;
3542       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3543       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3544 
3545     } else {
3546       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3547       PetscInt *idx,*cmap1,k;
3548       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3549       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3550       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3551       count = 0;
3552       k     = 0;
3553       for (i=0; i<Ncols; i++) {
3554         j = is_idx[i];
3555         if (j >= cstart && j < cend) {
3556           /* diagonal part of mat */
3557           idx[count]     = j;
3558           cmap1[count++] = i; /* column index in submat */
3559         } else if (Bn) {
3560           /* off-diagonal part of mat */
3561           if (j == garray[k]) {
3562             idx[count]     = j;
3563             cmap1[count++] = i;  /* column index in submat */
3564           } else if (j > garray[k]) {
3565             while (j > garray[k] && k < Bn-1) k++;
3566             if (j == garray[k]) {
3567               idx[count]     = j;
3568               cmap1[count++] = i; /* column index in submat */
3569             }
3570           }
3571         }
3572       }
3573       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3574 
3575       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3576       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3577       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3578 
3579       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3580     }
3581 
3582     /* (3) Create sequential Msub */
3583     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3584   }
3585 
3586   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3587   aij  = (Mat_SeqAIJ*)(Msub)->data;
3588   ii   = aij->i;
3589   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3590 
3591   /*
3592       m - number of local rows
3593       Ncols - number of columns (same on all processors)
3594       rstart - first row in new global matrix generated
3595   */
3596   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3597 
3598   if (call == MAT_INITIAL_MATRIX) {
3599     /* (4) Create parallel newmat */
3600     PetscMPIInt    rank,size;
3601     PetscInt       csize;
3602 
3603     ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
3604     ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
3605 
3606     /*
3607         Determine the number of non-zeros in the diagonal and off-diagonal
3608         portions of the matrix in order to do correct preallocation
3609     */
3610 
3611     /* first get start and end of "diagonal" columns */
3612     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3613     if (csize == PETSC_DECIDE) {
3614       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3615       if (mglobal == Ncols) { /* square matrix */
3616         nlocal = m;
3617       } else {
3618         nlocal = Ncols/size + ((Ncols % size) > rank);
3619       }
3620     } else {
3621       nlocal = csize;
3622     }
3623     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
3624     rstart = rend - nlocal;
3625     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3626 
3627     /* next, compute all the lengths */
3628     jj    = aij->j;
3629     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3630     olens = dlens + m;
3631     for (i=0; i<m; i++) {
3632       jend = ii[i+1] - ii[i];
3633       olen = 0;
3634       dlen = 0;
3635       for (j=0; j<jend; j++) {
3636         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3637         else dlen++;
3638         jj++;
3639       }
3640       olens[i] = olen;
3641       dlens[i] = dlen;
3642     }
3643 
3644     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3645     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3646 
3647     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3648     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3649     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3650     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3651     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3652     ierr = PetscFree(dlens);CHKERRQ(ierr);
3653 
3654   } else { /* call == MAT_REUSE_MATRIX */
3655     M    = *newmat;
3656     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3657     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3658     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3659     /*
3660          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3661        rather than the slower MatSetValues().
3662     */
3663     M->was_assembled = PETSC_TRUE;
3664     M->assembled     = PETSC_FALSE;
3665   }
3666 
3667   /* (5) Set values of Msub to *newmat */
3668   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3669   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3670 
3671   jj   = aij->j;
3672   ierr = MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr);
3673   for (i=0; i<m; i++) {
3674     row = rstart + i;
3675     nz  = ii[i+1] - ii[i];
3676     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3677     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3678     jj += nz; aa += nz;
3679   }
3680   ierr = MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr);
3681   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3682 
3683   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3684   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3685 
3686   ierr = PetscFree(colsub);CHKERRQ(ierr);
3687 
3688   /* save Msub, iscol_sub and iscmap used in processor for next request */
3689   if (call == MAT_INITIAL_MATRIX) {
3690     *newmat = M;
3691     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3692     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3693 
3694     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3695     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3696 
3697     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3698     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3699 
3700     if (iscol_local) {
3701       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3702       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3703     }
3704   }
3705   PetscFunctionReturn(0);
3706 }
3707 
3708 /*
3709     Not great since it makes two copies of the submatrix, first an SeqAIJ
3710   in local and then by concatenating the local matrices the end result.
3711   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3712 
3713   Note: This requires a sequential iscol with all indices.
3714 */
3715 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3716 {
3717   PetscErrorCode ierr;
3718   PetscMPIInt    rank,size;
3719   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3720   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3721   Mat            M,Mreuse;
3722   MatScalar      *aa,*vwork;
3723   MPI_Comm       comm;
3724   Mat_SeqAIJ     *aij;
3725   PetscBool      colflag,allcolumns=PETSC_FALSE;
3726 
3727   PetscFunctionBegin;
3728   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3729   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
3730   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
3731 
3732   /* Check for special case: each processor gets entire matrix columns */
3733   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3734   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3735   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3736   ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3737 
3738   if (call ==  MAT_REUSE_MATRIX) {
3739     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3740     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3741     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3742   } else {
3743     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3744   }
3745 
3746   /*
3747       m - number of local rows
3748       n - number of columns (same on all processors)
3749       rstart - first row in new global matrix generated
3750   */
3751   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3752   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3753   if (call == MAT_INITIAL_MATRIX) {
3754     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3755     ii  = aij->i;
3756     jj  = aij->j;
3757 
3758     /*
3759         Determine the number of non-zeros in the diagonal and off-diagonal
3760         portions of the matrix in order to do correct preallocation
3761     */
3762 
3763     /* first get start and end of "diagonal" columns */
3764     if (csize == PETSC_DECIDE) {
3765       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3766       if (mglobal == n) { /* square matrix */
3767         nlocal = m;
3768       } else {
3769         nlocal = n/size + ((n % size) > rank);
3770       }
3771     } else {
3772       nlocal = csize;
3773     }
3774     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
3775     rstart = rend - nlocal;
3776     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3777 
3778     /* next, compute all the lengths */
3779     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3780     olens = dlens + m;
3781     for (i=0; i<m; i++) {
3782       jend = ii[i+1] - ii[i];
3783       olen = 0;
3784       dlen = 0;
3785       for (j=0; j<jend; j++) {
3786         if (*jj < rstart || *jj >= rend) olen++;
3787         else dlen++;
3788         jj++;
3789       }
3790       olens[i] = olen;
3791       dlens[i] = dlen;
3792     }
3793     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3794     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3795     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3796     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3797     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3798     ierr = PetscFree(dlens);CHKERRQ(ierr);
3799   } else {
3800     PetscInt ml,nl;
3801 
3802     M    = *newmat;
3803     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3804     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3805     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3806     /*
3807          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3808        rather than the slower MatSetValues().
3809     */
3810     M->was_assembled = PETSC_TRUE;
3811     M->assembled     = PETSC_FALSE;
3812   }
3813   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3814   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3815   ii   = aij->i;
3816   jj   = aij->j;
3817 
3818   /* trigger copy to CPU if needed */
3819   ierr = MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr);
3820   for (i=0; i<m; i++) {
3821     row   = rstart + i;
3822     nz    = ii[i+1] - ii[i];
3823     cwork = jj; jj += nz;
3824     vwork = aa; aa += nz;
3825     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3826   }
3827   ierr = MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr);
3828 
3829   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3830   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3831   *newmat = M;
3832 
3833   /* save submatrix used in processor for next request */
3834   if (call ==  MAT_INITIAL_MATRIX) {
3835     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3836     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3837   }
3838   PetscFunctionReturn(0);
3839 }
3840 
3841 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3842 {
3843   PetscInt       m,cstart, cend,j,nnz,i,d;
3844   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3845   const PetscInt *JJ;
3846   PetscErrorCode ierr;
3847   PetscBool      nooffprocentries;
3848 
3849   PetscFunctionBegin;
3850   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3851 
3852   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3853   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3854   m      = B->rmap->n;
3855   cstart = B->cmap->rstart;
3856   cend   = B->cmap->rend;
3857   rstart = B->rmap->rstart;
3858 
3859   ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3860 
3861   if (PetscDefined(USE_DEBUG)) {
3862     for (i=0; i<m; i++) {
3863       nnz = Ii[i+1]- Ii[i];
3864       JJ  = J + Ii[i];
3865       if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3866       if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3867       if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3868     }
3869   }
3870 
3871   for (i=0; i<m; i++) {
3872     nnz     = Ii[i+1]- Ii[i];
3873     JJ      = J + Ii[i];
3874     nnz_max = PetscMax(nnz_max,nnz);
3875     d       = 0;
3876     for (j=0; j<nnz; j++) {
3877       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3878     }
3879     d_nnz[i] = d;
3880     o_nnz[i] = nnz - d;
3881   }
3882   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3883   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3884 
3885   for (i=0; i<m; i++) {
3886     ii   = i + rstart;
3887     ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr);
3888   }
3889   nooffprocentries    = B->nooffprocentries;
3890   B->nooffprocentries = PETSC_TRUE;
3891   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3892   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3893   B->nooffprocentries = nooffprocentries;
3894 
3895   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3896   PetscFunctionReturn(0);
3897 }
3898 
3899 /*@
3900    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3901    (the default parallel PETSc format).
3902 
3903    Collective
3904 
3905    Input Parameters:
3906 +  B - the matrix
3907 .  i - the indices into j for the start of each local row (starts with zero)
3908 .  j - the column indices for each local row (starts with zero)
3909 -  v - optional values in the matrix
3910 
3911    Level: developer
3912 
3913    Notes:
3914        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3915      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3916      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3917 
3918        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3919 
3920        The format which is used for the sparse matrix input, is equivalent to a
3921     row-major ordering.. i.e for the following matrix, the input data expected is
3922     as shown
3923 
3924 $        1 0 0
3925 $        2 0 3     P0
3926 $       -------
3927 $        4 5 6     P1
3928 $
3929 $     Process0 [P0]: rows_owned=[0,1]
3930 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3931 $        j =  {0,0,2}  [size = 3]
3932 $        v =  {1,2,3}  [size = 3]
3933 $
3934 $     Process1 [P1]: rows_owned=[2]
3935 $        i =  {0,3}    [size = nrow+1  = 1+1]
3936 $        j =  {0,1,2}  [size = 3]
3937 $        v =  {4,5,6}  [size = 3]
3938 
3939 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3940           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3941 @*/
3942 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3943 {
3944   PetscErrorCode ierr;
3945 
3946   PetscFunctionBegin;
3947   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3948   PetscFunctionReturn(0);
3949 }
3950 
3951 /*@C
3952    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3953    (the default parallel PETSc format).  For good matrix assembly performance
3954    the user should preallocate the matrix storage by setting the parameters
3955    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3956    performance can be increased by more than a factor of 50.
3957 
3958    Collective
3959 
3960    Input Parameters:
3961 +  B - the matrix
3962 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3963            (same value is used for all local rows)
3964 .  d_nnz - array containing the number of nonzeros in the various rows of the
3965            DIAGONAL portion of the local submatrix (possibly different for each row)
3966            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3967            The size of this array is equal to the number of local rows, i.e 'm'.
3968            For matrices that will be factored, you must leave room for (and set)
3969            the diagonal entry even if it is zero.
3970 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3971            submatrix (same value is used for all local rows).
3972 -  o_nnz - array containing the number of nonzeros in the various rows of the
3973            OFF-DIAGONAL portion of the local submatrix (possibly different for
3974            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3975            structure. The size of this array is equal to the number
3976            of local rows, i.e 'm'.
3977 
3978    If the *_nnz parameter is given then the *_nz parameter is ignored
3979 
3980    The AIJ format (also called the Yale sparse matrix format or
3981    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3982    storage.  The stored row and column indices begin with zero.
3983    See Users-Manual: ch_mat for details.
3984 
3985    The parallel matrix is partitioned such that the first m0 rows belong to
3986    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3987    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3988 
3989    The DIAGONAL portion of the local submatrix of a processor can be defined
3990    as the submatrix which is obtained by extraction the part corresponding to
3991    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3992    first row that belongs to the processor, r2 is the last row belonging to
3993    the this processor, and c1-c2 is range of indices of the local part of a
3994    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3995    common case of a square matrix, the row and column ranges are the same and
3996    the DIAGONAL part is also square. The remaining portion of the local
3997    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3998 
3999    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4000 
4001    You can call MatGetInfo() to get information on how effective the preallocation was;
4002    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4003    You can also run with the option -info and look for messages with the string
4004    malloc in them to see if additional memory allocation was needed.
4005 
4006    Example usage:
4007 
4008    Consider the following 8x8 matrix with 34 non-zero values, that is
4009    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4010    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4011    as follows:
4012 
4013 .vb
4014             1  2  0  |  0  3  0  |  0  4
4015     Proc0   0  5  6  |  7  0  0  |  8  0
4016             9  0 10  | 11  0  0  | 12  0
4017     -------------------------------------
4018            13  0 14  | 15 16 17  |  0  0
4019     Proc1   0 18  0  | 19 20 21  |  0  0
4020             0  0  0  | 22 23  0  | 24  0
4021     -------------------------------------
4022     Proc2  25 26 27  |  0  0 28  | 29  0
4023            30  0  0  | 31 32 33  |  0 34
4024 .ve
4025 
4026    This can be represented as a collection of submatrices as:
4027 
4028 .vb
4029       A B C
4030       D E F
4031       G H I
4032 .ve
4033 
4034    Where the submatrices A,B,C are owned by proc0, D,E,F are
4035    owned by proc1, G,H,I are owned by proc2.
4036 
4037    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4038    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4039    The 'M','N' parameters are 8,8, and have the same values on all procs.
4040 
4041    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4042    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4043    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4044    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4045    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4046    matrix, ans [DF] as another SeqAIJ matrix.
4047 
4048    When d_nz, o_nz parameters are specified, d_nz storage elements are
4049    allocated for every row of the local diagonal submatrix, and o_nz
4050    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4051    One way to choose d_nz and o_nz is to use the max nonzerors per local
4052    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4053    In this case, the values of d_nz,o_nz are:
4054 .vb
4055      proc0 : dnz = 2, o_nz = 2
4056      proc1 : dnz = 3, o_nz = 2
4057      proc2 : dnz = 1, o_nz = 4
4058 .ve
4059    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4060    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4061    for proc3. i.e we are using 12+15+10=37 storage locations to store
4062    34 values.
4063 
4064    When d_nnz, o_nnz parameters are specified, the storage is specified
4065    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4066    In the above case the values for d_nnz,o_nnz are:
4067 .vb
4068      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4069      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4070      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4071 .ve
4072    Here the space allocated is sum of all the above values i.e 34, and
4073    hence pre-allocation is perfect.
4074 
4075    Level: intermediate
4076 
4077 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4078           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4079 @*/
4080 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4081 {
4082   PetscErrorCode ierr;
4083 
4084   PetscFunctionBegin;
4085   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4086   PetscValidType(B,1);
4087   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4088   PetscFunctionReturn(0);
4089 }
4090 
4091 /*@
4092      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4093          CSR format for the local rows.
4094 
4095    Collective
4096 
4097    Input Parameters:
4098 +  comm - MPI communicator
4099 .  m - number of local rows (Cannot be PETSC_DECIDE)
4100 .  n - This value should be the same as the local size used in creating the
4101        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4102        calculated if N is given) For square matrices n is almost always m.
4103 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4104 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4105 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4106 .   j - column indices
4107 -   a - matrix values
4108 
4109    Output Parameter:
4110 .   mat - the matrix
4111 
4112    Level: intermediate
4113 
4114    Notes:
4115        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4116      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4117      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4118 
4119        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4120 
4121        The format which is used for the sparse matrix input, is equivalent to a
4122     row-major ordering.. i.e for the following matrix, the input data expected is
4123     as shown
4124 
4125        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4126 
4127 $        1 0 0
4128 $        2 0 3     P0
4129 $       -------
4130 $        4 5 6     P1
4131 $
4132 $     Process0 [P0]: rows_owned=[0,1]
4133 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4134 $        j =  {0,0,2}  [size = 3]
4135 $        v =  {1,2,3}  [size = 3]
4136 $
4137 $     Process1 [P1]: rows_owned=[2]
4138 $        i =  {0,3}    [size = nrow+1  = 1+1]
4139 $        j =  {0,1,2}  [size = 3]
4140 $        v =  {4,5,6}  [size = 3]
4141 
4142 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4143           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4144 @*/
4145 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4146 {
4147   PetscErrorCode ierr;
4148 
4149   PetscFunctionBegin;
4150   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4151   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4152   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4153   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4154   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4155   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4156   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4157   PetscFunctionReturn(0);
4158 }
4159 
4160 /*@
4161      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4162          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical
4163 
4164    Collective
4165 
4166    Input Parameters:
4167 +  mat - the matrix
4168 .  m - number of local rows (Cannot be PETSC_DECIDE)
4169 .  n - This value should be the same as the local size used in creating the
4170        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4171        calculated if N is given) For square matrices n is almost always m.
4172 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4173 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4174 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4175 .  J - column indices
4176 -  v - matrix values
4177 
4178    Level: intermediate
4179 
4180 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4181           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4182 @*/
4183 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4184 {
4185   PetscErrorCode ierr;
4186   PetscInt       cstart,nnz,i,j;
4187   PetscInt       *ld;
4188   PetscBool      nooffprocentries;
4189   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4190   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data, *Ao  = (Mat_SeqAIJ*)Aij->B->data;
4191   PetscScalar    *ad = Ad->a, *ao = Ao->a;
4192   const PetscInt *Adi = Ad->i;
4193   PetscInt       ldi,Iii,md;
4194 
4195   PetscFunctionBegin;
4196   if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4197   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4198   if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4199   if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4200 
4201   cstart = mat->cmap->rstart;
4202   if (!Aij->ld) {
4203     /* count number of entries below block diagonal */
4204     ierr    = PetscCalloc1(m,&ld);CHKERRQ(ierr);
4205     Aij->ld = ld;
4206     for (i=0; i<m; i++) {
4207       nnz  = Ii[i+1]- Ii[i];
4208       j     = 0;
4209       while  (J[j] < cstart && j < nnz) {j++;}
4210       J    += nnz;
4211       ld[i] = j;
4212     }
4213   } else {
4214     ld = Aij->ld;
4215   }
4216 
4217   for (i=0; i<m; i++) {
4218     nnz  = Ii[i+1]- Ii[i];
4219     Iii  = Ii[i];
4220     ldi  = ld[i];
4221     md   = Adi[i+1]-Adi[i];
4222     ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr);
4223     ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr);
4224     ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr);
4225     ad  += md;
4226     ao  += nnz - md;
4227   }
4228   nooffprocentries      = mat->nooffprocentries;
4229   mat->nooffprocentries = PETSC_TRUE;
4230   ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr);
4231   ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr);
4232   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
4233   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4234   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4235   mat->nooffprocentries = nooffprocentries;
4236   PetscFunctionReturn(0);
4237 }
4238 
4239 /*@C
4240    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4241    (the default parallel PETSc format).  For good matrix assembly performance
4242    the user should preallocate the matrix storage by setting the parameters
4243    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4244    performance can be increased by more than a factor of 50.
4245 
4246    Collective
4247 
4248    Input Parameters:
4249 +  comm - MPI communicator
4250 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4251            This value should be the same as the local size used in creating the
4252            y vector for the matrix-vector product y = Ax.
4253 .  n - This value should be the same as the local size used in creating the
4254        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4255        calculated if N is given) For square matrices n is almost always m.
4256 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4257 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4258 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4259            (same value is used for all local rows)
4260 .  d_nnz - array containing the number of nonzeros in the various rows of the
4261            DIAGONAL portion of the local submatrix (possibly different for each row)
4262            or NULL, if d_nz is used to specify the nonzero structure.
4263            The size of this array is equal to the number of local rows, i.e 'm'.
4264 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4265            submatrix (same value is used for all local rows).
4266 -  o_nnz - array containing the number of nonzeros in the various rows of the
4267            OFF-DIAGONAL portion of the local submatrix (possibly different for
4268            each row) or NULL, if o_nz is used to specify the nonzero
4269            structure. The size of this array is equal to the number
4270            of local rows, i.e 'm'.
4271 
4272    Output Parameter:
4273 .  A - the matrix
4274 
4275    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4276    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4277    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4278 
4279    Notes:
4280    If the *_nnz parameter is given then the *_nz parameter is ignored
4281 
4282    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4283    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4284    storage requirements for this matrix.
4285 
4286    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4287    processor than it must be used on all processors that share the object for
4288    that argument.
4289 
4290    The user MUST specify either the local or global matrix dimensions
4291    (possibly both).
4292 
4293    The parallel matrix is partitioned across processors such that the
4294    first m0 rows belong to process 0, the next m1 rows belong to
4295    process 1, the next m2 rows belong to process 2 etc.. where
4296    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4297    values corresponding to [m x N] submatrix.
4298 
4299    The columns are logically partitioned with the n0 columns belonging
4300    to 0th partition, the next n1 columns belonging to the next
4301    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4302 
4303    The DIAGONAL portion of the local submatrix on any given processor
4304    is the submatrix corresponding to the rows and columns m,n
4305    corresponding to the given processor. i.e diagonal matrix on
4306    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4307    etc. The remaining portion of the local submatrix [m x (N-n)]
4308    constitute the OFF-DIAGONAL portion. The example below better
4309    illustrates this concept.
4310 
4311    For a square global matrix we define each processor's diagonal portion
4312    to be its local rows and the corresponding columns (a square submatrix);
4313    each processor's off-diagonal portion encompasses the remainder of the
4314    local matrix (a rectangular submatrix).
4315 
4316    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4317 
4318    When calling this routine with a single process communicator, a matrix of
4319    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4320    type of communicator, use the construction mechanism
4321 .vb
4322      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4323 .ve
4324 
4325 $     MatCreate(...,&A);
4326 $     MatSetType(A,MATMPIAIJ);
4327 $     MatSetSizes(A, m,n,M,N);
4328 $     MatMPIAIJSetPreallocation(A,...);
4329 
4330    By default, this format uses inodes (identical nodes) when possible.
4331    We search for consecutive rows with the same nonzero structure, thereby
4332    reusing matrix information to achieve increased efficiency.
4333 
4334    Options Database Keys:
4335 +  -mat_no_inode  - Do not use inodes
4336 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4337 
4338 
4339 
4340    Example usage:
4341 
4342    Consider the following 8x8 matrix with 34 non-zero values, that is
4343    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4344    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4345    as follows
4346 
4347 .vb
4348             1  2  0  |  0  3  0  |  0  4
4349     Proc0   0  5  6  |  7  0  0  |  8  0
4350             9  0 10  | 11  0  0  | 12  0
4351     -------------------------------------
4352            13  0 14  | 15 16 17  |  0  0
4353     Proc1   0 18  0  | 19 20 21  |  0  0
4354             0  0  0  | 22 23  0  | 24  0
4355     -------------------------------------
4356     Proc2  25 26 27  |  0  0 28  | 29  0
4357            30  0  0  | 31 32 33  |  0 34
4358 .ve
4359 
4360    This can be represented as a collection of submatrices as
4361 
4362 .vb
4363       A B C
4364       D E F
4365       G H I
4366 .ve
4367 
4368    Where the submatrices A,B,C are owned by proc0, D,E,F are
4369    owned by proc1, G,H,I are owned by proc2.
4370 
4371    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4372    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4373    The 'M','N' parameters are 8,8, and have the same values on all procs.
4374 
4375    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4376    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4377    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4378    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4379    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4380    matrix, ans [DF] as another SeqAIJ matrix.
4381 
4382    When d_nz, o_nz parameters are specified, d_nz storage elements are
4383    allocated for every row of the local diagonal submatrix, and o_nz
4384    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4385    One way to choose d_nz and o_nz is to use the max nonzerors per local
4386    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4387    In this case, the values of d_nz,o_nz are
4388 .vb
4389      proc0 : dnz = 2, o_nz = 2
4390      proc1 : dnz = 3, o_nz = 2
4391      proc2 : dnz = 1, o_nz = 4
4392 .ve
4393    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4394    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4395    for proc3. i.e we are using 12+15+10=37 storage locations to store
4396    34 values.
4397 
4398    When d_nnz, o_nnz parameters are specified, the storage is specified
4399    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4400    In the above case the values for d_nnz,o_nnz are
4401 .vb
4402      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4403      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4404      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4405 .ve
4406    Here the space allocated is sum of all the above values i.e 34, and
4407    hence pre-allocation is perfect.
4408 
4409    Level: intermediate
4410 
4411 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4412           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4413 @*/
4414 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4415 {
4416   PetscErrorCode ierr;
4417   PetscMPIInt    size;
4418 
4419   PetscFunctionBegin;
4420   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4421   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4422   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
4423   if (size > 1) {
4424     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4425     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4426   } else {
4427     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4428     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4429   }
4430   PetscFunctionReturn(0);
4431 }
4432 
4433 /*@C
4434   MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix
4435 
4436   Not collective
4437 
4438   Input Parameter:
4439 . A - The MPIAIJ matrix
4440 
4441   Output Parameters:
4442 + Ad - The local diagonal block as a SeqAIJ matrix
4443 . Ao - The local off-diagonal block as a SeqAIJ matrix
4444 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4445 
4446   Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4447   in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4448   the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4449   local column numbers to global column numbers in the original matrix.
4450 
4451   Level: intermediate
4452 
4453 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ
4454 @*/
4455 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4456 {
4457   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4458   PetscBool      flg;
4459   PetscErrorCode ierr;
4460 
4461   PetscFunctionBegin;
4462   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4463   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4464   if (Ad)     *Ad     = a->A;
4465   if (Ao)     *Ao     = a->B;
4466   if (colmap) *colmap = a->garray;
4467   PetscFunctionReturn(0);
4468 }
4469 
4470 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4471 {
4472   PetscErrorCode ierr;
4473   PetscInt       m,N,i,rstart,nnz,Ii;
4474   PetscInt       *indx;
4475   PetscScalar    *values;
4476 
4477   PetscFunctionBegin;
4478   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4479   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4480     PetscInt       *dnz,*onz,sum,bs,cbs;
4481 
4482     if (n == PETSC_DECIDE) {
4483       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4484     }
4485     /* Check sum(n) = N */
4486     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4487     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4488 
4489     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
4490     rstart -= m;
4491 
4492     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4493     for (i=0; i<m; i++) {
4494       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4495       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4496       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4497     }
4498 
4499     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4500     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4501     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4502     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4503     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4504     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4505     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4506     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4507     ierr = MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
4508   }
4509 
4510   /* numeric phase */
4511   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4512   for (i=0; i<m; i++) {
4513     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4514     Ii   = i + rstart;
4515     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4516     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4517   }
4518   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4519   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4520   PetscFunctionReturn(0);
4521 }
4522 
4523 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4524 {
4525   PetscErrorCode    ierr;
4526   PetscMPIInt       rank;
4527   PetscInt          m,N,i,rstart,nnz;
4528   size_t            len;
4529   const PetscInt    *indx;
4530   PetscViewer       out;
4531   char              *name;
4532   Mat               B;
4533   const PetscScalar *values;
4534 
4535   PetscFunctionBegin;
4536   ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr);
4537   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
4538   /* Should this be the type of the diagonal block of A? */
4539   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4540   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4541   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4542   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4543   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4544   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
4545   for (i=0; i<m; i++) {
4546     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4547     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4548     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4549   }
4550   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4551   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4552 
4553   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRMPI(ierr);
4554   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4555   ierr = PetscMalloc1(len+6,&name);CHKERRQ(ierr);
4556   ierr = PetscSNPrintf(name,len+6,"%s.%d",outfile,rank);CHKERRQ(ierr);
4557   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4558   ierr = PetscFree(name);CHKERRQ(ierr);
4559   ierr = MatView(B,out);CHKERRQ(ierr);
4560   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4561   ierr = MatDestroy(&B);CHKERRQ(ierr);
4562   PetscFunctionReturn(0);
4563 }
4564 
4565 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4566 {
4567   PetscErrorCode      ierr;
4568   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4569 
4570   PetscFunctionBegin;
4571   if (!merge) PetscFunctionReturn(0);
4572   ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4573   ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4574   ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4575   ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4576   ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4577   ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4578   ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4579   ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4580   ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4581   ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4582   ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4583   ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4584   ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4585   ierr = PetscFree(merge);CHKERRQ(ierr);
4586   PetscFunctionReturn(0);
4587 }
4588 
4589 #include <../src/mat/utils/freespace.h>
4590 #include <petscbt.h>
4591 
4592 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4593 {
4594   PetscErrorCode      ierr;
4595   MPI_Comm            comm;
4596   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4597   PetscMPIInt         size,rank,taga,*len_s;
4598   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4599   PetscInt            proc,m;
4600   PetscInt            **buf_ri,**buf_rj;
4601   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4602   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4603   MPI_Request         *s_waits,*r_waits;
4604   MPI_Status          *status;
4605   MatScalar           *aa=a->a;
4606   MatScalar           **abuf_r,*ba_i;
4607   Mat_Merge_SeqsToMPI *merge;
4608   PetscContainer      container;
4609 
4610   PetscFunctionBegin;
4611   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4612   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4613 
4614   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
4615   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
4616 
4617   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4618   if (!container) SETERRQ(PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4619   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4620 
4621   bi     = merge->bi;
4622   bj     = merge->bj;
4623   buf_ri = merge->buf_ri;
4624   buf_rj = merge->buf_rj;
4625 
4626   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4627   owners = merge->rowmap->range;
4628   len_s  = merge->len_s;
4629 
4630   /* send and recv matrix values */
4631   /*-----------------------------*/
4632   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4633   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4634 
4635   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4636   for (proc=0,k=0; proc<size; proc++) {
4637     if (!len_s[proc]) continue;
4638     i    = owners[proc];
4639     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRMPI(ierr);
4640     k++;
4641   }
4642 
4643   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRMPI(ierr);}
4644   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRMPI(ierr);}
4645   ierr = PetscFree(status);CHKERRQ(ierr);
4646 
4647   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4648   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4649 
4650   /* insert mat values of mpimat */
4651   /*----------------------------*/
4652   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4653   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4654 
4655   for (k=0; k<merge->nrecv; k++) {
4656     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4657     nrows       = *(buf_ri_k[k]);
4658     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4659     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4660   }
4661 
4662   /* set values of ba */
4663   m = merge->rowmap->n;
4664   for (i=0; i<m; i++) {
4665     arow = owners[rank] + i;
4666     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4667     bnzi = bi[i+1] - bi[i];
4668     ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr);
4669 
4670     /* add local non-zero vals of this proc's seqmat into ba */
4671     anzi   = ai[arow+1] - ai[arow];
4672     aj     = a->j + ai[arow];
4673     aa     = a->a + ai[arow];
4674     nextaj = 0;
4675     for (j=0; nextaj<anzi; j++) {
4676       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4677         ba_i[j] += aa[nextaj++];
4678       }
4679     }
4680 
4681     /* add received vals into ba */
4682     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4683       /* i-th row */
4684       if (i == *nextrow[k]) {
4685         anzi   = *(nextai[k]+1) - *nextai[k];
4686         aj     = buf_rj[k] + *(nextai[k]);
4687         aa     = abuf_r[k] + *(nextai[k]);
4688         nextaj = 0;
4689         for (j=0; nextaj<anzi; j++) {
4690           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4691             ba_i[j] += aa[nextaj++];
4692           }
4693         }
4694         nextrow[k]++; nextai[k]++;
4695       }
4696     }
4697     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4698   }
4699   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4700   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4701 
4702   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4703   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4704   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4705   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4706   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4707   PetscFunctionReturn(0);
4708 }
4709 
4710 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4711 {
4712   PetscErrorCode      ierr;
4713   Mat                 B_mpi;
4714   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4715   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4716   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4717   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4718   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4719   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4720   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4721   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4722   MPI_Status          *status;
4723   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4724   PetscBT             lnkbt;
4725   Mat_Merge_SeqsToMPI *merge;
4726   PetscContainer      container;
4727 
4728   PetscFunctionBegin;
4729   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4730 
4731   /* make sure it is a PETSc comm */
4732   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4733   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
4734   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
4735 
4736   ierr = PetscNew(&merge);CHKERRQ(ierr);
4737   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4738 
4739   /* determine row ownership */
4740   /*---------------------------------------------------------*/
4741   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4742   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4743   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4744   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4745   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4746   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4747   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4748 
4749   m      = merge->rowmap->n;
4750   owners = merge->rowmap->range;
4751 
4752   /* determine the number of messages to send, their lengths */
4753   /*---------------------------------------------------------*/
4754   len_s = merge->len_s;
4755 
4756   len          = 0; /* length of buf_si[] */
4757   merge->nsend = 0;
4758   for (proc=0; proc<size; proc++) {
4759     len_si[proc] = 0;
4760     if (proc == rank) {
4761       len_s[proc] = 0;
4762     } else {
4763       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4764       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4765     }
4766     if (len_s[proc]) {
4767       merge->nsend++;
4768       nrows = 0;
4769       for (i=owners[proc]; i<owners[proc+1]; i++) {
4770         if (ai[i+1] > ai[i]) nrows++;
4771       }
4772       len_si[proc] = 2*(nrows+1);
4773       len         += len_si[proc];
4774     }
4775   }
4776 
4777   /* determine the number and length of messages to receive for ij-structure */
4778   /*-------------------------------------------------------------------------*/
4779   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4780   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4781 
4782   /* post the Irecv of j-structure */
4783   /*-------------------------------*/
4784   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4785   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4786 
4787   /* post the Isend of j-structure */
4788   /*--------------------------------*/
4789   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4790 
4791   for (proc=0, k=0; proc<size; proc++) {
4792     if (!len_s[proc]) continue;
4793     i    = owners[proc];
4794     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRMPI(ierr);
4795     k++;
4796   }
4797 
4798   /* receives and sends of j-structure are complete */
4799   /*------------------------------------------------*/
4800   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRMPI(ierr);}
4801   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRMPI(ierr);}
4802 
4803   /* send and recv i-structure */
4804   /*---------------------------*/
4805   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4806   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4807 
4808   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4809   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4810   for (proc=0,k=0; proc<size; proc++) {
4811     if (!len_s[proc]) continue;
4812     /* form outgoing message for i-structure:
4813          buf_si[0]:                 nrows to be sent
4814                [1:nrows]:           row index (global)
4815                [nrows+1:2*nrows+1]: i-structure index
4816     */
4817     /*-------------------------------------------*/
4818     nrows       = len_si[proc]/2 - 1;
4819     buf_si_i    = buf_si + nrows+1;
4820     buf_si[0]   = nrows;
4821     buf_si_i[0] = 0;
4822     nrows       = 0;
4823     for (i=owners[proc]; i<owners[proc+1]; i++) {
4824       anzi = ai[i+1] - ai[i];
4825       if (anzi) {
4826         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4827         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4828         nrows++;
4829       }
4830     }
4831     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRMPI(ierr);
4832     k++;
4833     buf_si += len_si[proc];
4834   }
4835 
4836   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRMPI(ierr);}
4837   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRMPI(ierr);}
4838 
4839   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4840   for (i=0; i<merge->nrecv; i++) {
4841     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4842   }
4843 
4844   ierr = PetscFree(len_si);CHKERRQ(ierr);
4845   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4846   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4847   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4848   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4849   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4850   ierr = PetscFree(status);CHKERRQ(ierr);
4851 
4852   /* compute a local seq matrix in each processor */
4853   /*----------------------------------------------*/
4854   /* allocate bi array and free space for accumulating nonzero column info */
4855   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4856   bi[0] = 0;
4857 
4858   /* create and initialize a linked list */
4859   nlnk = N+1;
4860   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4861 
4862   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4863   len  = ai[owners[rank+1]] - ai[owners[rank]];
4864   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4865 
4866   current_space = free_space;
4867 
4868   /* determine symbolic info for each local row */
4869   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4870 
4871   for (k=0; k<merge->nrecv; k++) {
4872     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4873     nrows       = *buf_ri_k[k];
4874     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4875     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4876   }
4877 
4878   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4879   len  = 0;
4880   for (i=0; i<m; i++) {
4881     bnzi = 0;
4882     /* add local non-zero cols of this proc's seqmat into lnk */
4883     arow  = owners[rank] + i;
4884     anzi  = ai[arow+1] - ai[arow];
4885     aj    = a->j + ai[arow];
4886     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4887     bnzi += nlnk;
4888     /* add received col data into lnk */
4889     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4890       if (i == *nextrow[k]) { /* i-th row */
4891         anzi  = *(nextai[k]+1) - *nextai[k];
4892         aj    = buf_rj[k] + *nextai[k];
4893         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4894         bnzi += nlnk;
4895         nextrow[k]++; nextai[k]++;
4896       }
4897     }
4898     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4899 
4900     /* if free space is not available, make more free space */
4901     if (current_space->local_remaining<bnzi) {
4902       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4903       nspacedouble++;
4904     }
4905     /* copy data into free space, then initialize lnk */
4906     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4907     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4908 
4909     current_space->array           += bnzi;
4910     current_space->local_used      += bnzi;
4911     current_space->local_remaining -= bnzi;
4912 
4913     bi[i+1] = bi[i] + bnzi;
4914   }
4915 
4916   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4917 
4918   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4919   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4920   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4921 
4922   /* create symbolic parallel matrix B_mpi */
4923   /*---------------------------------------*/
4924   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4925   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4926   if (n==PETSC_DECIDE) {
4927     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4928   } else {
4929     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4930   }
4931   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4932   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4933   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4934   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4935   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4936 
4937   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4938   B_mpi->assembled  = PETSC_FALSE;
4939   merge->bi         = bi;
4940   merge->bj         = bj;
4941   merge->buf_ri     = buf_ri;
4942   merge->buf_rj     = buf_rj;
4943   merge->coi        = NULL;
4944   merge->coj        = NULL;
4945   merge->owners_co  = NULL;
4946 
4947   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4948 
4949   /* attach the supporting struct to B_mpi for reuse */
4950   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4951   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4952   ierr    = PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI);CHKERRQ(ierr);
4953   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4954   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4955   *mpimat = B_mpi;
4956 
4957   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4958   PetscFunctionReturn(0);
4959 }
4960 
4961 /*@C
4962       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4963                  matrices from each processor
4964 
4965     Collective
4966 
4967    Input Parameters:
4968 +    comm - the communicators the parallel matrix will live on
4969 .    seqmat - the input sequential matrices
4970 .    m - number of local rows (or PETSC_DECIDE)
4971 .    n - number of local columns (or PETSC_DECIDE)
4972 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4973 
4974    Output Parameter:
4975 .    mpimat - the parallel matrix generated
4976 
4977     Level: advanced
4978 
4979    Notes:
4980      The dimensions of the sequential matrix in each processor MUST be the same.
4981      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4982      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4983 @*/
4984 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4985 {
4986   PetscErrorCode ierr;
4987   PetscMPIInt    size;
4988 
4989   PetscFunctionBegin;
4990   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
4991   if (size == 1) {
4992     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4993     if (scall == MAT_INITIAL_MATRIX) {
4994       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4995     } else {
4996       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4997     }
4998     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4999     PetscFunctionReturn(0);
5000   }
5001   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5002   if (scall == MAT_INITIAL_MATRIX) {
5003     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
5004   }
5005   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5006   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5007   PetscFunctionReturn(0);
5008 }
5009 
5010 /*@
5011      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5012           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5013           with MatGetSize()
5014 
5015     Not Collective
5016 
5017    Input Parameters:
5018 +    A - the matrix
5019 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5020 
5021    Output Parameter:
5022 .    A_loc - the local sequential matrix generated
5023 
5024     Level: developer
5025 
5026    Notes:
5027      When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A.
5028      If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called.
5029      This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely
5030      modify the values of the returned A_loc.
5031 
5032 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed(), MatMPIAIJGetLocalMatMerge()
5033 @*/
5034 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5035 {
5036   PetscErrorCode    ierr;
5037   Mat_MPIAIJ        *mpimat=(Mat_MPIAIJ*)A->data;
5038   Mat_SeqAIJ        *mat,*a,*b;
5039   PetscInt          *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5040   const PetscScalar *aa,*ba,*aav,*bav;
5041   PetscScalar       *ca,*cam;
5042   PetscMPIInt       size;
5043   PetscInt          am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5044   PetscInt          *ci,*cj,col,ncols_d,ncols_o,jo;
5045   PetscBool         match;
5046 
5047   PetscFunctionBegin;
5048   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
5049   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5050   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
5051   if (size == 1) {
5052     if (scall == MAT_INITIAL_MATRIX) {
5053       ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr);
5054       *A_loc = mpimat->A;
5055     } else if (scall == MAT_REUSE_MATRIX) {
5056       ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5057     }
5058     PetscFunctionReturn(0);
5059   }
5060 
5061   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5062   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5063   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5064   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5065   ierr = MatSeqAIJGetArrayRead(mpimat->A,&aav);CHKERRQ(ierr);
5066   ierr = MatSeqAIJGetArrayRead(mpimat->B,&bav);CHKERRQ(ierr);
5067   aa   = aav;
5068   ba   = bav;
5069   if (scall == MAT_INITIAL_MATRIX) {
5070     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5071     ci[0] = 0;
5072     for (i=0; i<am; i++) {
5073       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5074     }
5075     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5076     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5077     k    = 0;
5078     for (i=0; i<am; i++) {
5079       ncols_o = bi[i+1] - bi[i];
5080       ncols_d = ai[i+1] - ai[i];
5081       /* off-diagonal portion of A */
5082       for (jo=0; jo<ncols_o; jo++) {
5083         col = cmap[*bj];
5084         if (col >= cstart) break;
5085         cj[k]   = col; bj++;
5086         ca[k++] = *ba++;
5087       }
5088       /* diagonal portion of A */
5089       for (j=0; j<ncols_d; j++) {
5090         cj[k]   = cstart + *aj++;
5091         ca[k++] = *aa++;
5092       }
5093       /* off-diagonal portion of A */
5094       for (j=jo; j<ncols_o; j++) {
5095         cj[k]   = cmap[*bj++];
5096         ca[k++] = *ba++;
5097       }
5098     }
5099     /* put together the new matrix */
5100     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5101     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5102     /* Since these are PETSc arrays, change flags to free them as necessary. */
5103     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5104     mat->free_a  = PETSC_TRUE;
5105     mat->free_ij = PETSC_TRUE;
5106     mat->nonew   = 0;
5107   } else if (scall == MAT_REUSE_MATRIX) {
5108     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5109 #if defined(PETSC_USE_DEVICE)
5110     (*A_loc)->offloadmask = PETSC_OFFLOAD_CPU;
5111 #endif
5112     ci = mat->i; cj = mat->j; cam = mat->a;
5113     for (i=0; i<am; i++) {
5114       /* off-diagonal portion of A */
5115       ncols_o = bi[i+1] - bi[i];
5116       for (jo=0; jo<ncols_o; jo++) {
5117         col = cmap[*bj];
5118         if (col >= cstart) break;
5119         *cam++ = *ba++; bj++;
5120       }
5121       /* diagonal portion of A */
5122       ncols_d = ai[i+1] - ai[i];
5123       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5124       /* off-diagonal portion of A */
5125       for (j=jo; j<ncols_o; j++) {
5126         *cam++ = *ba++; bj++;
5127       }
5128     }
5129   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5130   ierr = MatSeqAIJRestoreArrayRead(mpimat->A,&aav);CHKERRQ(ierr);
5131   ierr = MatSeqAIJRestoreArrayRead(mpimat->B,&bav);CHKERRQ(ierr);
5132   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5133   PetscFunctionReturn(0);
5134 }
5135 
5136 /*@
5137      MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5138           mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part
5139 
5140     Not Collective
5141 
5142    Input Parameters:
5143 +    A - the matrix
5144 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5145 
5146    Output Parameter:
5147 +    glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL)
5148 -    A_loc - the local sequential matrix generated
5149 
5150     Level: developer
5151 
5152    Notes:
5153      This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering)
5154 
5155 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed()
5156 
5157 @*/
5158 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc)
5159 {
5160   PetscErrorCode ierr;
5161   Mat            Ao,Ad;
5162   const PetscInt *cmap;
5163   PetscMPIInt    size;
5164   PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*);
5165 
5166   PetscFunctionBegin;
5167   ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap);CHKERRQ(ierr);
5168   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
5169   if (size == 1) {
5170     if (scall == MAT_INITIAL_MATRIX) {
5171       ierr = PetscObjectReference((PetscObject)Ad);CHKERRQ(ierr);
5172       *A_loc = Ad;
5173     } else if (scall == MAT_REUSE_MATRIX) {
5174       ierr = MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5175     }
5176     if (glob) { ierr = ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob);CHKERRQ(ierr); }
5177     PetscFunctionReturn(0);
5178   }
5179   ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f);CHKERRQ(ierr);
5180   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5181   if (f) {
5182     ierr = (*f)(A,scall,glob,A_loc);CHKERRQ(ierr);
5183   } else {
5184     Mat_SeqAIJ        *a = (Mat_SeqAIJ*)Ad->data;
5185     Mat_SeqAIJ        *b = (Mat_SeqAIJ*)Ao->data;
5186     Mat_SeqAIJ        *c;
5187     PetscInt          *ai = a->i, *aj = a->j;
5188     PetscInt          *bi = b->i, *bj = b->j;
5189     PetscInt          *ci,*cj;
5190     const PetscScalar *aa,*ba;
5191     PetscScalar       *ca;
5192     PetscInt          i,j,am,dn,on;
5193 
5194     ierr = MatGetLocalSize(Ad,&am,&dn);CHKERRQ(ierr);
5195     ierr = MatGetLocalSize(Ao,NULL,&on);CHKERRQ(ierr);
5196     ierr = MatSeqAIJGetArrayRead(Ad,&aa);CHKERRQ(ierr);
5197     ierr = MatSeqAIJGetArrayRead(Ao,&ba);CHKERRQ(ierr);
5198     if (scall == MAT_INITIAL_MATRIX) {
5199       PetscInt k;
5200       ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5201       ierr = PetscMalloc1(ai[am]+bi[am],&cj);CHKERRQ(ierr);
5202       ierr = PetscMalloc1(ai[am]+bi[am],&ca);CHKERRQ(ierr);
5203       ci[0] = 0;
5204       for (i=0,k=0; i<am; i++) {
5205         const PetscInt ncols_o = bi[i+1] - bi[i];
5206         const PetscInt ncols_d = ai[i+1] - ai[i];
5207         ci[i+1] = ci[i] + ncols_o + ncols_d;
5208         /* diagonal portion of A */
5209         for (j=0; j<ncols_d; j++,k++) {
5210           cj[k] = *aj++;
5211           ca[k] = *aa++;
5212         }
5213         /* off-diagonal portion of A */
5214         for (j=0; j<ncols_o; j++,k++) {
5215           cj[k] = dn + *bj++;
5216           ca[k] = *ba++;
5217         }
5218       }
5219       /* put together the new matrix */
5220       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc);CHKERRQ(ierr);
5221       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5222       /* Since these are PETSc arrays, change flags to free them as necessary. */
5223       c          = (Mat_SeqAIJ*)(*A_loc)->data;
5224       c->free_a  = PETSC_TRUE;
5225       c->free_ij = PETSC_TRUE;
5226       c->nonew   = 0;
5227       ierr = MatSetType(*A_loc,((PetscObject)Ad)->type_name);CHKERRQ(ierr);
5228     } else if (scall == MAT_REUSE_MATRIX) {
5229 #if defined(PETSC_HAVE_DEVICE)
5230       (*A_loc)->offloadmask = PETSC_OFFLOAD_CPU;
5231 #endif
5232       c  = (Mat_SeqAIJ*)(*A_loc)->data;
5233       ca = c->a;
5234       for (i=0; i<am; i++) {
5235         const PetscInt ncols_d = ai[i+1] - ai[i];
5236         const PetscInt ncols_o = bi[i+1] - bi[i];
5237         /* diagonal portion of A */
5238         for (j=0; j<ncols_d; j++) *ca++ = *aa++;
5239         /* off-diagonal portion of A */
5240         for (j=0; j<ncols_o; j++) *ca++ = *ba++;
5241       }
5242     } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5243     ierr = MatSeqAIJRestoreArrayRead(Ad,&aa);CHKERRQ(ierr);
5244     ierr = MatSeqAIJRestoreArrayRead(Ao,&aa);CHKERRQ(ierr);
5245     if (glob) {
5246       PetscInt cst, *gidx;
5247 
5248       ierr = MatGetOwnershipRangeColumn(A,&cst,NULL);CHKERRQ(ierr);
5249       ierr = PetscMalloc1(dn+on,&gidx);CHKERRQ(ierr);
5250       for (i=0; i<dn; i++) gidx[i]    = cst + i;
5251       for (i=0; i<on; i++) gidx[i+dn] = cmap[i];
5252       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob);CHKERRQ(ierr);
5253     }
5254   }
5255   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5256   PetscFunctionReturn(0);
5257 }
5258 
5259 /*@C
5260      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5261 
5262     Not Collective
5263 
5264    Input Parameters:
5265 +    A - the matrix
5266 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5267 -    row, col - index sets of rows and columns to extract (or NULL)
5268 
5269    Output Parameter:
5270 .    A_loc - the local sequential matrix generated
5271 
5272     Level: developer
5273 
5274 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5275 
5276 @*/
5277 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5278 {
5279   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5280   PetscErrorCode ierr;
5281   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5282   IS             isrowa,iscola;
5283   Mat            *aloc;
5284   PetscBool      match;
5285 
5286   PetscFunctionBegin;
5287   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5288   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5289   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5290   if (!row) {
5291     start = A->rmap->rstart; end = A->rmap->rend;
5292     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5293   } else {
5294     isrowa = *row;
5295   }
5296   if (!col) {
5297     start = A->cmap->rstart;
5298     cmap  = a->garray;
5299     nzA   = a->A->cmap->n;
5300     nzB   = a->B->cmap->n;
5301     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5302     ncols = 0;
5303     for (i=0; i<nzB; i++) {
5304       if (cmap[i] < start) idx[ncols++] = cmap[i];
5305       else break;
5306     }
5307     imark = i;
5308     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5309     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5310     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5311   } else {
5312     iscola = *col;
5313   }
5314   if (scall != MAT_INITIAL_MATRIX) {
5315     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5316     aloc[0] = *A_loc;
5317   }
5318   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5319   if (!col) { /* attach global id of condensed columns */
5320     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5321   }
5322   *A_loc = aloc[0];
5323   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5324   if (!row) {
5325     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5326   }
5327   if (!col) {
5328     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5329   }
5330   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5331   PetscFunctionReturn(0);
5332 }
5333 
5334 /*
5335  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5336  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5337  * on a global size.
5338  * */
5339 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth)
5340 {
5341   Mat_MPIAIJ               *p=(Mat_MPIAIJ*)P->data;
5342   Mat_SeqAIJ               *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth;
5343   PetscInt                 plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol;
5344   PetscMPIInt              owner;
5345   PetscSFNode              *iremote,*oiremote;
5346   const PetscInt           *lrowindices;
5347   PetscErrorCode           ierr;
5348   PetscSF                  sf,osf;
5349   PetscInt                 pcstart,*roffsets,*loffsets,*pnnz,j;
5350   PetscInt                 ontotalcols,dntotalcols,ntotalcols,nout;
5351   MPI_Comm                 comm;
5352   ISLocalToGlobalMapping   mapping;
5353 
5354   PetscFunctionBegin;
5355   ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr);
5356   /* plocalsize is the number of roots
5357    * nrows is the number of leaves
5358    * */
5359   ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr);
5360   ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr);
5361   ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr);
5362   ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr);
5363   for (i=0;i<nrows;i++) {
5364     /* Find a remote index and an owner for a row
5365      * The row could be local or remote
5366      * */
5367     owner = 0;
5368     lidx  = 0;
5369     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr);
5370     iremote[i].index = lidx;
5371     iremote[i].rank  = owner;
5372   }
5373   /* Create SF to communicate how many nonzero columns for each row */
5374   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5375   /* SF will figure out the number of nonzero colunms for each row, and their
5376    * offsets
5377    * */
5378   ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5379   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5380   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5381 
5382   ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr);
5383   ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr);
5384   ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr);
5385   roffsets[0] = 0;
5386   roffsets[1] = 0;
5387   for (i=0;i<plocalsize;i++) {
5388     /* diag */
5389     nrcols[i*2+0] = pd->i[i+1] - pd->i[i];
5390     /* off diag */
5391     nrcols[i*2+1] = po->i[i+1] - po->i[i];
5392     /* compute offsets so that we relative location for each row */
5393     roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0];
5394     roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1];
5395   }
5396   ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr);
5397   ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr);
5398   /* 'r' means root, and 'l' means leaf */
5399   ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE);CHKERRQ(ierr);
5400   ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE);CHKERRQ(ierr);
5401   ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE);CHKERRQ(ierr);
5402   ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE);CHKERRQ(ierr);
5403   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5404   ierr = PetscFree(roffsets);CHKERRQ(ierr);
5405   ierr = PetscFree(nrcols);CHKERRQ(ierr);
5406   dntotalcols = 0;
5407   ontotalcols = 0;
5408   ncol = 0;
5409   for (i=0;i<nrows;i++) {
5410     pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1];
5411     ncol = PetscMax(pnnz[i],ncol);
5412     /* diag */
5413     dntotalcols += nlcols[i*2+0];
5414     /* off diag */
5415     ontotalcols += nlcols[i*2+1];
5416   }
5417   /* We do not need to figure the right number of columns
5418    * since all the calculations will be done by going through the raw data
5419    * */
5420   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr);
5421   ierr = MatSetUp(*P_oth);CHKERRQ(ierr);
5422   ierr = PetscFree(pnnz);CHKERRQ(ierr);
5423   p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5424   /* diag */
5425   ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr);
5426   /* off diag */
5427   ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr);
5428   /* diag */
5429   ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr);
5430   /* off diag */
5431   ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr);
5432   dntotalcols = 0;
5433   ontotalcols = 0;
5434   ntotalcols  = 0;
5435   for (i=0;i<nrows;i++) {
5436     owner = 0;
5437     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr);
5438     /* Set iremote for diag matrix */
5439     for (j=0;j<nlcols[i*2+0];j++) {
5440       iremote[dntotalcols].index   = loffsets[i*2+0] + j;
5441       iremote[dntotalcols].rank    = owner;
5442       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5443       ilocal[dntotalcols++]        = ntotalcols++;
5444     }
5445     /* off diag */
5446     for (j=0;j<nlcols[i*2+1];j++) {
5447       oiremote[ontotalcols].index   = loffsets[i*2+1] + j;
5448       oiremote[ontotalcols].rank    = owner;
5449       oilocal[ontotalcols++]        = ntotalcols++;
5450     }
5451   }
5452   ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr);
5453   ierr = PetscFree(loffsets);CHKERRQ(ierr);
5454   ierr = PetscFree(nlcols);CHKERRQ(ierr);
5455   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5456   /* P serves as roots and P_oth is leaves
5457    * Diag matrix
5458    * */
5459   ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5460   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5461   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5462 
5463   ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr);
5464   /* Off diag */
5465   ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5466   ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr);
5467   ierr = PetscSFSetUp(osf);CHKERRQ(ierr);
5468   /* We operate on the matrix internal data for saving memory */
5469   ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5470   ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5471   ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr);
5472   /* Convert to global indices for diag matrix */
5473   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart;
5474   ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr);
5475   /* We want P_oth store global indices */
5476   ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr);
5477   /* Use memory scalable approach */
5478   ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr);
5479   ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr);
5480   ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr);
5481   ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr);
5482   /* Convert back to local indices */
5483   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart;
5484   ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr);
5485   nout = 0;
5486   ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr);
5487   if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout);
5488   ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr);
5489   /* Exchange values */
5490   ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5491   ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5492   /* Stop PETSc from shrinking memory */
5493   for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i];
5494   ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5495   ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5496   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5497   ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr);
5498   ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr);
5499   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5500   ierr = PetscSFDestroy(&osf);CHKERRQ(ierr);
5501   PetscFunctionReturn(0);
5502 }
5503 
5504 /*
5505  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5506  * This supports MPIAIJ and MAIJ
5507  * */
5508 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth)
5509 {
5510   Mat_MPIAIJ            *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data;
5511   Mat_SeqAIJ            *p_oth;
5512   Mat_SeqAIJ            *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data;
5513   IS                    rows,map;
5514   PetscHMapI            hamp;
5515   PetscInt              i,htsize,*rowindices,off,*mapping,key,count;
5516   MPI_Comm              comm;
5517   PetscSF               sf,osf;
5518   PetscBool             has;
5519   PetscErrorCode        ierr;
5520 
5521   PetscFunctionBegin;
5522   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5523   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5524   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5525    *  and then create a submatrix (that often is an overlapping matrix)
5526    * */
5527   if (reuse == MAT_INITIAL_MATRIX) {
5528     /* Use a hash table to figure out unique keys */
5529     ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr);
5530     ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr);
5531     ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr);
5532     count = 0;
5533     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5534     for (i=0;i<a->B->cmap->n;i++) {
5535       key  = a->garray[i]/dof;
5536       ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr);
5537       if (!has) {
5538         mapping[i] = count;
5539         ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr);
5540       } else {
5541         /* Current 'i' has the same value the previous step */
5542         mapping[i] = count-1;
5543       }
5544     }
5545     ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr);
5546     ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr);
5547     if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr);
5548     ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr);
5549     off = 0;
5550     ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr);
5551     ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr);
5552     ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr);
5553     ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr);
5554     /* In case, the matrix was already created but users want to recreate the matrix */
5555     ierr = MatDestroy(P_oth);CHKERRQ(ierr);
5556     ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr);
5557     ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr);
5558     ierr = ISDestroy(&map);CHKERRQ(ierr);
5559     ierr = ISDestroy(&rows);CHKERRQ(ierr);
5560   } else if (reuse == MAT_REUSE_MATRIX) {
5561     /* If matrix was already created, we simply update values using SF objects
5562      * that as attached to the matrix ealier.
5563      *  */
5564     ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr);
5565     ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr);
5566     if (!sf || !osf) SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet");
5567     p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5568     /* Update values in place */
5569     ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5570     ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5571     ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5572     ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5573   } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type");
5574   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5575   PetscFunctionReturn(0);
5576 }
5577 
5578 /*@C
5579     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5580 
5581     Collective on Mat
5582 
5583    Input Parameters:
5584 +    A,B - the matrices in mpiaij format
5585 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5586 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5587 
5588    Output Parameter:
5589 +    rowb, colb - index sets of rows and columns of B to extract
5590 -    B_seq - the sequential matrix generated
5591 
5592     Level: developer
5593 
5594 @*/
5595 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5596 {
5597   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5598   PetscErrorCode ierr;
5599   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5600   IS             isrowb,iscolb;
5601   Mat            *bseq=NULL;
5602 
5603   PetscFunctionBegin;
5604   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5605     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5606   }
5607   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5608 
5609   if (scall == MAT_INITIAL_MATRIX) {
5610     start = A->cmap->rstart;
5611     cmap  = a->garray;
5612     nzA   = a->A->cmap->n;
5613     nzB   = a->B->cmap->n;
5614     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5615     ncols = 0;
5616     for (i=0; i<nzB; i++) {  /* row < local row index */
5617       if (cmap[i] < start) idx[ncols++] = cmap[i];
5618       else break;
5619     }
5620     imark = i;
5621     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5622     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5623     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5624     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5625   } else {
5626     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5627     isrowb  = *rowb; iscolb = *colb;
5628     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5629     bseq[0] = *B_seq;
5630   }
5631   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5632   *B_seq = bseq[0];
5633   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5634   if (!rowb) {
5635     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5636   } else {
5637     *rowb = isrowb;
5638   }
5639   if (!colb) {
5640     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5641   } else {
5642     *colb = iscolb;
5643   }
5644   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5645   PetscFunctionReturn(0);
5646 }
5647 
5648 /*
5649     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5650     of the OFF-DIAGONAL portion of local A
5651 
5652     Collective on Mat
5653 
5654    Input Parameters:
5655 +    A,B - the matrices in mpiaij format
5656 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5657 
5658    Output Parameter:
5659 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5660 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5661 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5662 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5663 
5664     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5665      for this matrix. This is not desirable..
5666 
5667     Level: developer
5668 
5669 */
5670 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5671 {
5672   PetscErrorCode         ierr;
5673   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5674   Mat_SeqAIJ             *b_oth;
5675   VecScatter             ctx;
5676   MPI_Comm               comm;
5677   const PetscMPIInt      *rprocs,*sprocs;
5678   const PetscInt         *srow,*rstarts,*sstarts;
5679   PetscInt               *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5680   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len;
5681   PetscScalar            *b_otha,*bufa,*bufA,*vals = NULL;
5682   MPI_Request            *rwaits = NULL,*swaits = NULL;
5683   MPI_Status             rstatus;
5684   PetscMPIInt            size,tag,rank,nsends_mpi,nrecvs_mpi;
5685   PETSC_UNUSED PetscMPIInt jj;
5686 
5687   PetscFunctionBegin;
5688   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5689   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
5690 
5691   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5692     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5693   }
5694   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5695   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
5696 
5697   if (size == 1) {
5698     startsj_s = NULL;
5699     bufa_ptr  = NULL;
5700     *B_oth    = NULL;
5701     PetscFunctionReturn(0);
5702   }
5703 
5704   ctx = a->Mvctx;
5705   tag = ((PetscObject)ctx)->tag;
5706 
5707   ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5708   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5709   ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr);
5710   ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr);
5711   ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr);
5712   ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5713 
5714   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5715   if (scall == MAT_INITIAL_MATRIX) {
5716     /* i-array */
5717     /*---------*/
5718     /*  post receives */
5719     if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */
5720     for (i=0; i<nrecvs; i++) {
5721       rowlen = rvalues + rstarts[i]*rbs;
5722       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5723       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr);
5724     }
5725 
5726     /* pack the outgoing message */
5727     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5728 
5729     sstartsj[0] = 0;
5730     rstartsj[0] = 0;
5731     len         = 0; /* total length of j or a array to be sent */
5732     if (nsends) {
5733       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5734       ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr);
5735     }
5736     for (i=0; i<nsends; i++) {
5737       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5738       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5739       for (j=0; j<nrows; j++) {
5740         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5741         for (l=0; l<sbs; l++) {
5742           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5743 
5744           rowlen[j*sbs+l] = ncols;
5745 
5746           len += ncols;
5747           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5748         }
5749         k++;
5750       }
5751       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr);
5752 
5753       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5754     }
5755     /* recvs and sends of i-array are completed */
5756     i = nrecvs;
5757     while (i--) {
5758       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRMPI(ierr);
5759     }
5760     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);}
5761     ierr = PetscFree(svalues);CHKERRQ(ierr);
5762 
5763     /* allocate buffers for sending j and a arrays */
5764     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5765     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5766 
5767     /* create i-array of B_oth */
5768     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5769 
5770     b_othi[0] = 0;
5771     len       = 0; /* total length of j or a array to be received */
5772     k         = 0;
5773     for (i=0; i<nrecvs; i++) {
5774       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5775       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5776       for (j=0; j<nrows; j++) {
5777         b_othi[k+1] = b_othi[k] + rowlen[j];
5778         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5779         k++;
5780       }
5781       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5782     }
5783     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5784 
5785     /* allocate space for j and a arrrays of B_oth */
5786     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5787     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5788 
5789     /* j-array */
5790     /*---------*/
5791     /*  post receives of j-array */
5792     for (i=0; i<nrecvs; i++) {
5793       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5794       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr);
5795     }
5796 
5797     /* pack the outgoing message j-array */
5798     if (nsends) k = sstarts[0];
5799     for (i=0; i<nsends; i++) {
5800       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5801       bufJ  = bufj+sstartsj[i];
5802       for (j=0; j<nrows; j++) {
5803         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5804         for (ll=0; ll<sbs; ll++) {
5805           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5806           for (l=0; l<ncols; l++) {
5807             *bufJ++ = cols[l];
5808           }
5809           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5810         }
5811       }
5812       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr);
5813     }
5814 
5815     /* recvs and sends of j-array are completed */
5816     i = nrecvs;
5817     while (i--) {
5818       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRMPI(ierr);
5819     }
5820     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);}
5821   } else if (scall == MAT_REUSE_MATRIX) {
5822     sstartsj = *startsj_s;
5823     rstartsj = *startsj_r;
5824     bufa     = *bufa_ptr;
5825     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5826     b_otha   = b_oth->a;
5827 #if defined(PETSC_HAVE_DEVICE)
5828     (*B_oth)->offloadmask = PETSC_OFFLOAD_CPU;
5829 #endif
5830   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5831 
5832   /* a-array */
5833   /*---------*/
5834   /*  post receives of a-array */
5835   for (i=0; i<nrecvs; i++) {
5836     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5837     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr);
5838   }
5839 
5840   /* pack the outgoing message a-array */
5841   if (nsends) k = sstarts[0];
5842   for (i=0; i<nsends; i++) {
5843     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5844     bufA  = bufa+sstartsj[i];
5845     for (j=0; j<nrows; j++) {
5846       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5847       for (ll=0; ll<sbs; ll++) {
5848         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5849         for (l=0; l<ncols; l++) {
5850           *bufA++ = vals[l];
5851         }
5852         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5853       }
5854     }
5855     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr);
5856   }
5857   /* recvs and sends of a-array are completed */
5858   i = nrecvs;
5859   while (i--) {
5860     ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRMPI(ierr);
5861   }
5862   if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);}
5863   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5864 
5865   if (scall == MAT_INITIAL_MATRIX) {
5866     /* put together the new matrix */
5867     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5868 
5869     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5870     /* Since these are PETSc arrays, change flags to free them as necessary. */
5871     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5872     b_oth->free_a  = PETSC_TRUE;
5873     b_oth->free_ij = PETSC_TRUE;
5874     b_oth->nonew   = 0;
5875 
5876     ierr = PetscFree(bufj);CHKERRQ(ierr);
5877     if (!startsj_s || !bufa_ptr) {
5878       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5879       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5880     } else {
5881       *startsj_s = sstartsj;
5882       *startsj_r = rstartsj;
5883       *bufa_ptr  = bufa;
5884     }
5885   }
5886 
5887   ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5888   ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr);
5889   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5890   PetscFunctionReturn(0);
5891 }
5892 
5893 /*@C
5894   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5895 
5896   Not Collective
5897 
5898   Input Parameters:
5899 . A - The matrix in mpiaij format
5900 
5901   Output Parameter:
5902 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5903 . colmap - A map from global column index to local index into lvec
5904 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5905 
5906   Level: developer
5907 
5908 @*/
5909 #if defined(PETSC_USE_CTABLE)
5910 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5911 #else
5912 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5913 #endif
5914 {
5915   Mat_MPIAIJ *a;
5916 
5917   PetscFunctionBegin;
5918   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5919   PetscValidPointer(lvec, 2);
5920   PetscValidPointer(colmap, 3);
5921   PetscValidPointer(multScatter, 4);
5922   a = (Mat_MPIAIJ*) A->data;
5923   if (lvec) *lvec = a->lvec;
5924   if (colmap) *colmap = a->colmap;
5925   if (multScatter) *multScatter = a->Mvctx;
5926   PetscFunctionReturn(0);
5927 }
5928 
5929 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5930 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5931 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5932 #if defined(PETSC_HAVE_MKL_SPARSE)
5933 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5934 #endif
5935 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*);
5936 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5937 #if defined(PETSC_HAVE_ELEMENTAL)
5938 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5939 #endif
5940 #if defined(PETSC_HAVE_SCALAPACK)
5941 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*);
5942 #endif
5943 #if defined(PETSC_HAVE_HYPRE)
5944 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5945 #endif
5946 #if defined(PETSC_HAVE_CUDA)
5947 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*);
5948 #endif
5949 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
5950 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*);
5951 #endif
5952 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5953 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5954 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
5955 
5956 /*
5957     Computes (B'*A')' since computing B*A directly is untenable
5958 
5959                n                       p                          p
5960         [             ]       [             ]         [                 ]
5961       m [      A      ]  *  n [       B     ]   =   m [         C       ]
5962         [             ]       [             ]         [                 ]
5963 
5964 */
5965 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5966 {
5967   PetscErrorCode ierr;
5968   Mat            At,Bt,Ct;
5969 
5970   PetscFunctionBegin;
5971   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5972   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5973   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct);CHKERRQ(ierr);
5974   ierr = MatDestroy(&At);CHKERRQ(ierr);
5975   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5976   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5977   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5978   PetscFunctionReturn(0);
5979 }
5980 
5981 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C)
5982 {
5983   PetscErrorCode ierr;
5984   PetscBool      cisdense;
5985 
5986   PetscFunctionBegin;
5987   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5988   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N);CHKERRQ(ierr);
5989   ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr);
5990   ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr);
5991   if (!cisdense) {
5992     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
5993   }
5994   ierr = MatSetUp(C);CHKERRQ(ierr);
5995 
5996   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5997   PetscFunctionReturn(0);
5998 }
5999 
6000 /* ----------------------------------------------------------------*/
6001 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6002 {
6003   Mat_Product *product = C->product;
6004   Mat         A = product->A,B=product->B;
6005 
6006   PetscFunctionBegin;
6007   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)
6008     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
6009 
6010   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6011   C->ops->productsymbolic = MatProductSymbolic_AB;
6012   PetscFunctionReturn(0);
6013 }
6014 
6015 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6016 {
6017   PetscErrorCode ierr;
6018   Mat_Product    *product = C->product;
6019 
6020   PetscFunctionBegin;
6021   if (product->type == MATPRODUCT_AB) {
6022     ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr);
6023   }
6024   PetscFunctionReturn(0);
6025 }
6026 /* ----------------------------------------------------------------*/
6027 
6028 /*MC
6029    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6030 
6031    Options Database Keys:
6032 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
6033 
6034    Level: beginner
6035 
6036    Notes:
6037     MatSetValues() may be called for this matrix type with a NULL argument for the numerical values,
6038     in this case the values associated with the rows and columns one passes in are set to zero
6039     in the matrix
6040 
6041     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
6042     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
6043 
6044 .seealso: MatCreateAIJ()
6045 M*/
6046 
6047 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6048 {
6049   Mat_MPIAIJ     *b;
6050   PetscErrorCode ierr;
6051   PetscMPIInt    size;
6052 
6053   PetscFunctionBegin;
6054   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr);
6055 
6056   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
6057   B->data       = (void*)b;
6058   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
6059   B->assembled  = PETSC_FALSE;
6060   B->insertmode = NOT_SET_VALUES;
6061   b->size       = size;
6062 
6063   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRMPI(ierr);
6064 
6065   /* build cache for off array entries formed */
6066   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
6067 
6068   b->donotstash  = PETSC_FALSE;
6069   b->colmap      = NULL;
6070   b->garray      = NULL;
6071   b->roworiented = PETSC_TRUE;
6072 
6073   /* stuff used for matrix vector multiply */
6074   b->lvec  = NULL;
6075   b->Mvctx = NULL;
6076 
6077   /* stuff for MatGetRow() */
6078   b->rowindices   = NULL;
6079   b->rowvalues    = NULL;
6080   b->getrowactive = PETSC_FALSE;
6081 
6082   /* flexible pointer used in CUSPARSE classes */
6083   b->spptr = NULL;
6084 
6085   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
6086   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
6087   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
6088   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
6089   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
6090   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
6091   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
6092   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
6093   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
6094   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
6095 #if defined(PETSC_HAVE_CUDA)
6096   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE);CHKERRQ(ierr);
6097 #endif
6098 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6099   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos);CHKERRQ(ierr);
6100 #endif
6101 #if defined(PETSC_HAVE_MKL_SPARSE)
6102   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
6103 #endif
6104   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
6105   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr);
6106   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
6107   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense);CHKERRQ(ierr);
6108 #if defined(PETSC_HAVE_ELEMENTAL)
6109   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
6110 #endif
6111 #if defined(PETSC_HAVE_SCALAPACK)
6112   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK);CHKERRQ(ierr);
6113 #endif
6114   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
6115   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
6116 #if defined(PETSC_HAVE_HYPRE)
6117   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
6118   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr);
6119 #endif
6120   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr);
6121   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr);
6122   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
6123   PetscFunctionReturn(0);
6124 }
6125 
6126 /*@C
6127      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
6128          and "off-diagonal" part of the matrix in CSR format.
6129 
6130    Collective
6131 
6132    Input Parameters:
6133 +  comm - MPI communicator
6134 .  m - number of local rows (Cannot be PETSC_DECIDE)
6135 .  n - This value should be the same as the local size used in creating the
6136        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
6137        calculated if N is given) For square matrices n is almost always m.
6138 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
6139 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
6140 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6141 .   j - column indices
6142 .   a - matrix values
6143 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6144 .   oj - column indices
6145 -   oa - matrix values
6146 
6147    Output Parameter:
6148 .   mat - the matrix
6149 
6150    Level: advanced
6151 
6152    Notes:
6153        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6154        must free the arrays once the matrix has been destroyed and not before.
6155 
6156        The i and j indices are 0 based
6157 
6158        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
6159 
6160        This sets local rows and cannot be used to set off-processor values.
6161 
6162        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6163        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6164        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6165        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6166        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
6167        communication if it is known that only local entries will be set.
6168 
6169 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
6170           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
6171 @*/
6172 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
6173 {
6174   PetscErrorCode ierr;
6175   Mat_MPIAIJ     *maij;
6176 
6177   PetscFunctionBegin;
6178   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
6179   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
6180   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
6181   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
6182   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
6183   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
6184   maij = (Mat_MPIAIJ*) (*mat)->data;
6185 
6186   (*mat)->preallocated = PETSC_TRUE;
6187 
6188   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
6189   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
6190 
6191   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
6192   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
6193 
6194   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6195   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6196   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6197   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6198 
6199   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
6200   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6201   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6202   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
6203   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
6204   PetscFunctionReturn(0);
6205 }
6206 
6207 /*
6208     Special version for direct calls from Fortran
6209 */
6210 #include <petsc/private/fortranimpl.h>
6211 
6212 /* Change these macros so can be used in void function */
6213 #undef CHKERRQ
6214 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
6215 #undef SETERRQ2
6216 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
6217 #undef SETERRQ3
6218 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
6219 #undef SETERRQ
6220 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
6221 
6222 #if defined(PETSC_HAVE_FORTRAN_CAPS)
6223 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
6224 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
6225 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
6226 #else
6227 #endif
6228 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
6229 {
6230   Mat            mat  = *mmat;
6231   PetscInt       m    = *mm, n = *mn;
6232   InsertMode     addv = *maddv;
6233   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
6234   PetscScalar    value;
6235   PetscErrorCode ierr;
6236 
6237   MatCheckPreallocated(mat,1);
6238   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
6239   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
6240   {
6241     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
6242     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
6243     PetscBool roworiented = aij->roworiented;
6244 
6245     /* Some Variables required in the macro */
6246     Mat        A                    = aij->A;
6247     Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
6248     PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
6249     MatScalar  *aa                  = a->a;
6250     PetscBool  ignorezeroentries    = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
6251     Mat        B                    = aij->B;
6252     Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
6253     PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
6254     MatScalar  *ba                  = b->a;
6255     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
6256      * cannot use "#if defined" inside a macro. */
6257     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
6258 
6259     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
6260     PetscInt  nonew = a->nonew;
6261     MatScalar *ap1,*ap2;
6262 
6263     PetscFunctionBegin;
6264     for (i=0; i<m; i++) {
6265       if (im[i] < 0) continue;
6266       if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
6267       if (im[i] >= rstart && im[i] < rend) {
6268         row      = im[i] - rstart;
6269         lastcol1 = -1;
6270         rp1      = aj + ai[row];
6271         ap1      = aa + ai[row];
6272         rmax1    = aimax[row];
6273         nrow1    = ailen[row];
6274         low1     = 0;
6275         high1    = nrow1;
6276         lastcol2 = -1;
6277         rp2      = bj + bi[row];
6278         ap2      = ba + bi[row];
6279         rmax2    = bimax[row];
6280         nrow2    = bilen[row];
6281         low2     = 0;
6282         high2    = nrow2;
6283 
6284         for (j=0; j<n; j++) {
6285           if (roworiented) value = v[i*n+j];
6286           else value = v[i+j*m];
6287           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
6288           if (in[j] >= cstart && in[j] < cend) {
6289             col = in[j] - cstart;
6290             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
6291 #if defined(PETSC_HAVE_DEVICE)
6292             if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
6293 #endif
6294           } else if (in[j] < 0) continue;
6295           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
6296             /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
6297             SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
6298           } else {
6299             if (mat->was_assembled) {
6300               if (!aij->colmap) {
6301                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
6302               }
6303 #if defined(PETSC_USE_CTABLE)
6304               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
6305               col--;
6306 #else
6307               col = aij->colmap[in[j]] - 1;
6308 #endif
6309               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
6310                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
6311                 col  =  in[j];
6312                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
6313                 B        = aij->B;
6314                 b        = (Mat_SeqAIJ*)B->data;
6315                 bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
6316                 rp2      = bj + bi[row];
6317                 ap2      = ba + bi[row];
6318                 rmax2    = bimax[row];
6319                 nrow2    = bilen[row];
6320                 low2     = 0;
6321                 high2    = nrow2;
6322                 bm       = aij->B->rmap->n;
6323                 ba       = b->a;
6324                 inserted = PETSC_FALSE;
6325               }
6326             } else col = in[j];
6327             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
6328 #if defined(PETSC_HAVE_DEVICE)
6329             if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
6330 #endif
6331           }
6332         }
6333       } else if (!aij->donotstash) {
6334         if (roworiented) {
6335           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6336         } else {
6337           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6338         }
6339       }
6340     }
6341   }
6342   PetscFunctionReturnVoid();
6343 }
6344 
6345 typedef struct {
6346   Mat       *mp;    /* intermediate products */
6347   PetscBool *mptmp; /* is the intermediate product temporary ? */
6348   PetscInt  cp;     /* number of intermediate products */
6349 
6350   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
6351   PetscInt    *startsj_s,*startsj_r;
6352   PetscScalar *bufa;
6353   Mat         P_oth;
6354 
6355   /* may take advantage of merging product->B */
6356   Mat Bloc;
6357 
6358   /* cusparse does not have support to split between symbolic and numeric phases
6359      When api_user is true, we don't need to update the numerical values
6360      of the temporary storage */
6361   PetscBool reusesym;
6362 
6363   /* support for COO values insertion */
6364   PetscScalar  *coo_v,*coo_w;
6365   PetscInt     **own;
6366   PetscInt     **off;
6367   PetscBool    hasoffproc; /* if true, non-local values insertion (i.e. AtB or PtAP) */
6368   PetscSF      sf; /* used for non-local values insertion and memory malloc */
6369   PetscMemType mtype;
6370 
6371   /* customization */
6372   PetscBool abmerge;
6373   PetscBool P_oth_bind;
6374 } MatMatMPIAIJBACKEND;
6375 
6376 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
6377 {
6378   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data;
6379   PetscInt            i;
6380   PetscErrorCode      ierr;
6381 
6382   PetscFunctionBegin;
6383   ierr = PetscFree2(mmdata->startsj_s,mmdata->startsj_r);CHKERRQ(ierr);
6384   ierr = PetscFree(mmdata->bufa);CHKERRQ(ierr);
6385   ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v);CHKERRQ(ierr);
6386   ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w);CHKERRQ(ierr);
6387   ierr = MatDestroy(&mmdata->P_oth);CHKERRQ(ierr);
6388   ierr = MatDestroy(&mmdata->Bloc);CHKERRQ(ierr);
6389   ierr = PetscSFDestroy(&mmdata->sf);CHKERRQ(ierr);
6390   for (i = 0; i < mmdata->cp; i++) {
6391     ierr = MatDestroy(&mmdata->mp[i]);CHKERRQ(ierr);
6392   }
6393   ierr = PetscFree(mmdata->mp);CHKERRQ(ierr);
6394   ierr = PetscFree(mmdata->mptmp);CHKERRQ(ierr);
6395   ierr = PetscFree(mmdata->own[0]);CHKERRQ(ierr);
6396   ierr = PetscFree(mmdata->own);CHKERRQ(ierr);
6397   ierr = PetscFree(mmdata->off[0]);CHKERRQ(ierr);
6398   ierr = PetscFree(mmdata->off);CHKERRQ(ierr);
6399   ierr = PetscFree(mmdata);CHKERRQ(ierr);
6400   PetscFunctionReturn(0);
6401 }
6402 
6403 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
6404 {
6405   PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]);
6406   PetscErrorCode ierr;
6407 
6408   PetscFunctionBegin;
6409   ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f);CHKERRQ(ierr);
6410   if (f) {
6411     ierr = (*f)(A,n,idx,v);CHKERRQ(ierr);
6412   } else {
6413     const PetscScalar *vv;
6414 
6415     ierr = MatSeqAIJGetArrayRead(A,&vv);CHKERRQ(ierr);
6416     if (n && idx) {
6417       PetscScalar    *w = v;
6418       const PetscInt *oi = idx;
6419       PetscInt       j;
6420 
6421       for (j = 0; j < n; j++) *w++ = vv[*oi++];
6422     } else {
6423       ierr = PetscArraycpy(v,vv,n);CHKERRQ(ierr);
6424     }
6425     ierr = MatSeqAIJRestoreArrayRead(A,&vv);CHKERRQ(ierr);
6426   }
6427   PetscFunctionReturn(0);
6428 }
6429 
6430 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
6431 {
6432   MatMatMPIAIJBACKEND *mmdata;
6433   PetscInt            i,n_d,n_o;
6434   PetscErrorCode      ierr;
6435 
6436   PetscFunctionBegin;
6437   MatCheckProduct(C,1);
6438   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
6439   mmdata = (MatMatMPIAIJBACKEND*)C->product->data;
6440   if (!mmdata->reusesym) { /* update temporary matrices */
6441     if (mmdata->P_oth) {
6442       ierr = MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr);
6443     }
6444     if (mmdata->Bloc) {
6445       ierr = MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc);CHKERRQ(ierr);
6446     }
6447   }
6448   mmdata->reusesym = PETSC_FALSE;
6449 
6450   for (i = 0; i < mmdata->cp; i++) {
6451     if (!mmdata->mp[i]->ops->productnumeric) SETERRQ1(PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]);
6452     ierr = (*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]);CHKERRQ(ierr);
6453   }
6454   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
6455     PetscInt noff = mmdata->off[i+1] - mmdata->off[i];
6456 
6457     if (mmdata->mptmp[i]) continue;
6458     if (noff) {
6459       PetscInt nown = mmdata->own[i+1] - mmdata->own[i];
6460 
6461       ierr = MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o);CHKERRQ(ierr);
6462       ierr = MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d);CHKERRQ(ierr);
6463       n_o += noff;
6464       n_d += nown;
6465     } else {
6466       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data;
6467 
6468       ierr = MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d);CHKERRQ(ierr);
6469       n_d += mm->nz;
6470     }
6471   }
6472   if (mmdata->hasoffproc) { /* offprocess insertion */
6473     ierr = PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr);
6474     ierr = PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr);
6475   }
6476   ierr = MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES);CHKERRQ(ierr);
6477   PetscFunctionReturn(0);
6478 }
6479 
6480 /* Support for Pt * A, A * P, or Pt * A * P */
6481 #define MAX_NUMBER_INTERMEDIATE 4
6482 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
6483 {
6484   Mat_Product            *product = C->product;
6485   Mat                    A,P,mp[MAX_NUMBER_INTERMEDIATE];
6486   Mat_MPIAIJ             *a,*p;
6487   MatMatMPIAIJBACKEND    *mmdata;
6488   ISLocalToGlobalMapping P_oth_l2g = NULL;
6489   IS                     glob = NULL;
6490   const char             *prefix;
6491   char                   pprefix[256];
6492   const PetscInt         *globidx,*P_oth_idx;
6493   const PetscInt         *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE];
6494   PetscInt               cp = 0,m,n,M,N,ncoo,ncoo_d,ncoo_o,ncoo_oown,*coo_i,*coo_j,cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE],i,j;
6495   MatProductType         ptype;
6496   PetscBool              mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk;
6497   PetscMPIInt            size;
6498   PetscErrorCode         ierr;
6499 
6500   PetscFunctionBegin;
6501   MatCheckProduct(C,1);
6502   if (product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
6503   ptype = product->type;
6504   if (product->A->symmetric && ptype == MATPRODUCT_AtB) ptype = MATPRODUCT_AB;
6505   switch (ptype) {
6506   case MATPRODUCT_AB:
6507     A = product->A;
6508     P = product->B;
6509     m = A->rmap->n;
6510     n = P->cmap->n;
6511     M = A->rmap->N;
6512     N = P->cmap->N;
6513     break;
6514   case MATPRODUCT_AtB:
6515     P = product->A;
6516     A = product->B;
6517     m = P->cmap->n;
6518     n = A->cmap->n;
6519     M = P->cmap->N;
6520     N = A->cmap->N;
6521     hasoffproc = PETSC_TRUE;
6522     break;
6523   case MATPRODUCT_PtAP:
6524     A = product->A;
6525     P = product->B;
6526     m = P->cmap->n;
6527     n = P->cmap->n;
6528     M = P->cmap->N;
6529     N = P->cmap->N;
6530     hasoffproc = PETSC_TRUE;
6531     break;
6532   default:
6533     SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
6534   }
6535   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)C),&size);CHKERRMPI(ierr);
6536   if (size == 1) hasoffproc = PETSC_FALSE;
6537 
6538   /* defaults */
6539   for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) {
6540     mp[i]    = NULL;
6541     mptmp[i] = PETSC_FALSE;
6542     rmapt[i] = -1;
6543     cmapt[i] = -1;
6544     rmapa[i] = NULL;
6545     cmapa[i] = NULL;
6546   }
6547 
6548   /* customization */
6549   ierr = PetscNew(&mmdata);CHKERRQ(ierr);
6550   mmdata->reusesym = product->api_user;
6551   if (ptype == MATPRODUCT_AB) {
6552     if (product->api_user) {
6553       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat");CHKERRQ(ierr);
6554       ierr = PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr);
6555       ierr = PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
6556       ierr = PetscOptionsEnd();CHKERRQ(ierr);
6557     } else {
6558       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr);
6559       ierr = PetscOptionsBool("-matproduct_ab_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr);
6560       ierr = PetscOptionsBool("-matproduct_ab_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
6561       ierr = PetscOptionsEnd();CHKERRQ(ierr);
6562     }
6563   } else if (ptype == MATPRODUCT_PtAP) {
6564     if (product->api_user) {
6565       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat");CHKERRQ(ierr);
6566       ierr = PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
6567       ierr = PetscOptionsEnd();CHKERRQ(ierr);
6568     } else {
6569       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr);
6570       ierr = PetscOptionsBool("-matproduct_ptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
6571       ierr = PetscOptionsEnd();CHKERRQ(ierr);
6572     }
6573   }
6574   a = (Mat_MPIAIJ*)A->data;
6575   p = (Mat_MPIAIJ*)P->data;
6576   ierr = MatSetSizes(C,m,n,M,N);CHKERRQ(ierr);
6577   ierr = PetscLayoutSetUp(C->rmap);CHKERRQ(ierr);
6578   ierr = PetscLayoutSetUp(C->cmap);CHKERRQ(ierr);
6579   ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
6580   ierr = MatGetOptionsPrefix(C,&prefix);CHKERRQ(ierr);
6581   switch (ptype) {
6582   case MATPRODUCT_AB: /* A * P */
6583     ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr);
6584 
6585     if (mmdata->abmerge) { /* A_diag * P_loc and A_off * P_oth */
6586       /* P is product->B */
6587       ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr);
6588       ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6589       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6590       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6591       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6592       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6593       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6594       mp[cp]->product->api_user = product->api_user;
6595       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6596       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6597       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6598       ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
6599       rmapt[cp] = 1;
6600       cmapt[cp] = 2;
6601       cmapa[cp] = globidx;
6602       mptmp[cp] = PETSC_FALSE;
6603       cp++;
6604     } else { /* A_diag * P_diag and A_diag * P_off and A_off * P_oth */
6605       ierr = MatProductCreate(a->A,p->A,NULL,&mp[cp]);CHKERRQ(ierr);
6606       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6607       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6608       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6609       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6610       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6611       mp[cp]->product->api_user = product->api_user;
6612       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6613       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6614       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6615       rmapt[cp] = 1;
6616       cmapt[cp] = 1;
6617       mptmp[cp] = PETSC_FALSE;
6618       cp++;
6619       ierr = MatProductCreate(a->A,p->B,NULL,&mp[cp]);CHKERRQ(ierr);
6620       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6621       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6622       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6623       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6624       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6625       mp[cp]->product->api_user = product->api_user;
6626       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6627       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6628       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6629       rmapt[cp] = 1;
6630       cmapt[cp] = 2;
6631       cmapa[cp] = p->garray;
6632       mptmp[cp] = PETSC_FALSE;
6633       cp++;
6634     }
6635     if (mmdata->P_oth) {
6636       ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr);
6637       ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr);
6638       ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr);
6639       ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr);
6640       ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr);
6641       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6642       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6643       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6644       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6645       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6646       mp[cp]->product->api_user = product->api_user;
6647       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6648       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6649       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6650       rmapt[cp] = 1;
6651       cmapt[cp] = 2;
6652       cmapa[cp] = P_oth_idx;
6653       mptmp[cp] = PETSC_FALSE;
6654       cp++;
6655     }
6656     break;
6657   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
6658     /* A is product->B */
6659     ierr = MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr);
6660     if (A == P) {
6661       ierr = MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6662       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
6663       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6664       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6665       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6666       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6667       mp[cp]->product->api_user = product->api_user;
6668       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6669       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6670       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6671       ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
6672       rmapt[cp] = 2;
6673       rmapa[cp] = globidx;
6674       cmapt[cp] = 2;
6675       cmapa[cp] = globidx;
6676       mptmp[cp] = PETSC_FALSE;
6677       cp++;
6678     } else {
6679       ierr = MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6680       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
6681       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6682       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6683       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6684       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6685       mp[cp]->product->api_user = product->api_user;
6686       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6687       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6688       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6689       ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
6690       rmapt[cp] = 1;
6691       cmapt[cp] = 2;
6692       cmapa[cp] = globidx;
6693       mptmp[cp] = PETSC_FALSE;
6694       cp++;
6695       ierr = MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6696       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
6697       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6698       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6699       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6700       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6701       mp[cp]->product->api_user = product->api_user;
6702       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6703       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6704       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6705       rmapt[cp] = 2;
6706       rmapa[cp] = p->garray;
6707       cmapt[cp] = 2;
6708       cmapa[cp] = globidx;
6709       mptmp[cp] = PETSC_FALSE;
6710       cp++;
6711     }
6712     break;
6713   case MATPRODUCT_PtAP:
6714     ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr);
6715     /* P is product->B */
6716     ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr);
6717     ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6718     ierr = MatProductSetType(mp[cp],MATPRODUCT_PtAP);CHKERRQ(ierr);
6719     ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6720     ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6721     ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6722     ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6723     mp[cp]->product->api_user = product->api_user;
6724     ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6725     if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6726     ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6727     ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
6728     rmapt[cp] = 2;
6729     rmapa[cp] = globidx;
6730     cmapt[cp] = 2;
6731     cmapa[cp] = globidx;
6732     mptmp[cp] = PETSC_FALSE;
6733     cp++;
6734     if (mmdata->P_oth) {
6735       ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr);
6736       ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr);
6737       ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr);
6738       ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr);
6739       ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr);
6740       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6741       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6742       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6743       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6744       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6745       mp[cp]->product->api_user = product->api_user;
6746       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6747       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6748       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6749       mptmp[cp] = PETSC_TRUE;
6750       cp++;
6751       ierr = MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp]);CHKERRQ(ierr);
6752       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
6753       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6754       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6755       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6756       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6757       mp[cp]->product->api_user = product->api_user;
6758       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6759       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6760       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6761       rmapt[cp] = 2;
6762       rmapa[cp] = globidx;
6763       cmapt[cp] = 2;
6764       cmapa[cp] = P_oth_idx;
6765       mptmp[cp] = PETSC_FALSE;
6766       cp++;
6767     }
6768     break;
6769   default:
6770     SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
6771   }
6772   /* sanity check */
6773   if (size > 1) for (i = 0; i < cp; i++) if (rmapt[i] == 2 && !hasoffproc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %D",i);
6774 
6775   ierr = PetscMalloc1(cp,&mmdata->mp);CHKERRQ(ierr);
6776   for (i = 0; i < cp; i++) mmdata->mp[i] = mp[i];
6777   ierr = PetscMalloc1(cp,&mmdata->mptmp);CHKERRQ(ierr);
6778   for (i = 0; i < cp; i++) mmdata->mptmp[i] = mptmp[i];
6779   mmdata->cp = cp;
6780   C->product->data       = mmdata;
6781   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
6782   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
6783 
6784   /* memory type */
6785   mmdata->mtype = PETSC_MEMTYPE_HOST;
6786   ierr = PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,"");CHKERRQ(ierr);
6787   ierr = PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,"");CHKERRQ(ierr);
6788   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
6789   // enable the line below MatSeqAIJCopySubArray_SeqAIJKokkos is implemented
6790   //else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_DEVICE;
6791 
6792   /* prepare coo coordinates for values insertion */
6793   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
6794     Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
6795     if (mptmp[cp]) continue;
6796     if (rmapt[cp] == 2 && hasoffproc) {
6797       const PetscInt *rmap = rmapa[cp];
6798       const PetscInt mr = mp[cp]->rmap->n;
6799       const PetscInt rs = C->rmap->rstart;
6800       const PetscInt re = C->rmap->rend;
6801       const PetscInt *ii  = mm->i;
6802       for (i = 0; i < mr; i++) {
6803         const PetscInt gr = rmap[i];
6804         const PetscInt nz = ii[i+1] - ii[i];
6805         if (gr < rs || gr >= re) ncoo_o += nz;
6806         else ncoo_oown += nz;
6807       }
6808     } else ncoo_d += mm->nz;
6809   }
6810   ierr = PetscCalloc1(mmdata->cp+1,&mmdata->off);CHKERRQ(ierr);
6811   ierr = PetscCalloc1(mmdata->cp+1,&mmdata->own);CHKERRQ(ierr);
6812   if (hasoffproc) { /* handle offproc values insertion */
6813     PetscSF  msf;
6814     PetscInt ncoo2,*coo_i2,*coo_j2;
6815 
6816     ierr = PetscMalloc1(ncoo_o,&mmdata->off[0]);CHKERRQ(ierr);
6817     ierr = PetscMalloc1(ncoo_oown,&mmdata->own[0]);CHKERRQ(ierr);
6818     ierr = PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j);CHKERRQ(ierr);
6819     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
6820       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
6821       PetscInt   *idxoff = mmdata->off[cp];
6822       PetscInt   *idxown = mmdata->own[cp];
6823       if (!mptmp[cp] && rmapt[cp] == 2) {
6824         const PetscInt *rmap = rmapa[cp];
6825         const PetscInt *cmap = cmapa[cp];
6826         const PetscInt *ii  = mm->i;
6827         PetscInt       *coi = coo_i + ncoo_o;
6828         PetscInt       *coj = coo_j + ncoo_o;
6829         const PetscInt mr = mp[cp]->rmap->n;
6830         const PetscInt rs = C->rmap->rstart;
6831         const PetscInt re = C->rmap->rend;
6832         const PetscInt cs = C->cmap->rstart;
6833         for (i = 0; i < mr; i++) {
6834           const PetscInt *jj = mm->j + ii[i];
6835           const PetscInt gr  = rmap[i];
6836           const PetscInt nz  = ii[i+1] - ii[i];
6837           if (gr < rs || gr >= re) {
6838             for (j = ii[i]; j < ii[i+1]; j++) {
6839               *coi++ = gr;
6840               *idxoff++ = j;
6841             }
6842             if (!cmapt[cp]) { /* already global */
6843               for (j = 0; j < nz; j++) *coj++ = jj[j];
6844             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
6845               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
6846             } else { /* offdiag */
6847               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
6848             }
6849             ncoo_o += nz;
6850           } else {
6851             for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j;
6852           }
6853         }
6854       }
6855       mmdata->off[cp + 1] = idxoff;
6856       mmdata->own[cp + 1] = idxown;
6857     }
6858 
6859     ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr);
6860     ierr = PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o,NULL,PETSC_OWN_POINTER,coo_i);CHKERRQ(ierr);
6861     ierr = PetscSFGetMultiSF(mmdata->sf,&msf);CHKERRQ(ierr);
6862     ierr = PetscSFGetGraph(msf,&ncoo2,NULL,NULL,NULL);CHKERRQ(ierr);
6863     ncoo = ncoo_d + ncoo_oown + ncoo2;
6864     ierr = PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2);CHKERRQ(ierr);
6865     ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr);
6866     ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr);
6867     ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr);
6868     ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr);
6869     ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr);
6870     ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w);CHKERRQ(ierr);
6871     coo_i = coo_i2;
6872     coo_j = coo_j2;
6873   } else { /* no offproc values insertion */
6874     ncoo = ncoo_d;
6875     ierr = PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j);CHKERRQ(ierr);
6876 
6877     ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr);
6878     ierr = PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER);CHKERRQ(ierr);
6879     ierr = PetscSFSetUp(mmdata->sf);CHKERRQ(ierr);
6880   }
6881   mmdata->hasoffproc = hasoffproc;
6882 
6883   /* on-process indices */
6884   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
6885     Mat_SeqAIJ     *mm = (Mat_SeqAIJ*)mp[cp]->data;
6886     PetscInt       *coi = coo_i + ncoo_d;
6887     PetscInt       *coj = coo_j + ncoo_d;
6888     const PetscInt *jj  = mm->j;
6889     const PetscInt *ii  = mm->i;
6890     const PetscInt *cmap = cmapa[cp];
6891     const PetscInt *rmap = rmapa[cp];
6892     const PetscInt mr = mp[cp]->rmap->n;
6893     const PetscInt rs = C->rmap->rstart;
6894     const PetscInt re = C->rmap->rend;
6895     const PetscInt cs = C->cmap->rstart;
6896 
6897     if (mptmp[cp]) continue;
6898     if (rmapt[cp] == 1) {
6899       for (i = 0; i < mr; i++) {
6900         const PetscInt gr = i + rs;
6901         for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr;
6902       }
6903       /* columns coo */
6904       if (!cmapt[cp]) {
6905         ierr = PetscArraycpy(coj,jj,mm->nz);CHKERRQ(ierr);
6906       } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
6907         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs;
6908       } else { /* offdiag */
6909         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
6910       }
6911       ncoo_d += mm->nz;
6912     } else if (rmapt[cp] == 2) {
6913       for (i = 0; i < mr; i++) {
6914         const PetscInt *jj = mm->j + ii[i];
6915         const PetscInt gr  = rmap[i];
6916         const PetscInt nz  = ii[i+1] - ii[i];
6917         if (gr >= rs && gr < re) {
6918           for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr;
6919           if (!cmapt[cp]) { /* already global */
6920             for (j = 0; j < nz; j++) *coj++ = jj[j];
6921           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
6922             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
6923           } else { /* offdiag */
6924             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
6925           }
6926           ncoo_d += nz;
6927         }
6928       }
6929     }
6930   }
6931   if (glob) {
6932     ierr = ISRestoreIndices(glob,&globidx);CHKERRQ(ierr);
6933   }
6934   ierr = ISDestroy(&glob);CHKERRQ(ierr);
6935   if (P_oth_l2g) {
6936     ierr = ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr);
6937   }
6938   ierr = ISLocalToGlobalMappingDestroy(&P_oth_l2g);CHKERRQ(ierr);
6939   ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v);CHKERRQ(ierr);
6940 
6941   /* preallocate with COO data */
6942   ierr = MatSetPreallocationCOO(C,ncoo,coo_i,coo_j);CHKERRQ(ierr);
6943   ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr);
6944   PetscFunctionReturn(0);
6945 }
6946 
6947 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
6948 {
6949   Mat_Product    *product = mat->product;
6950   PetscErrorCode ierr;
6951 #if defined(PETSC_HAVE_DEVICE)
6952   PetscBool      match = PETSC_FALSE;
6953   PetscBool      usecpu = PETSC_FALSE;
6954 #else
6955   PetscBool      match = PETSC_TRUE;
6956 #endif
6957 
6958   PetscFunctionBegin;
6959   MatCheckProduct(mat,1);
6960 #if defined(PETSC_HAVE_DEVICE)
6961   if (!product->A->boundtocpu && !product->B->boundtocpu) {
6962     ierr = PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match);CHKERRQ(ierr);
6963   }
6964   if (match) { /* we can always fallback to CPU in case an operation is not performing on the device */
6965     switch (product->type) {
6966     case MATPRODUCT_AB:
6967       if (product->api_user) {
6968         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat");CHKERRQ(ierr);
6969         ierr = PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
6970         ierr = PetscOptionsEnd();CHKERRQ(ierr);
6971       } else {
6972         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr);
6973         ierr = PetscOptionsBool("-matproduct_ab_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
6974         ierr = PetscOptionsEnd();CHKERRQ(ierr);
6975       }
6976       break;
6977     case MATPRODUCT_AtB:
6978       if (product->api_user) {
6979         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat");CHKERRQ(ierr);
6980         ierr = PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
6981         ierr = PetscOptionsEnd();CHKERRQ(ierr);
6982       } else {
6983         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat");CHKERRQ(ierr);
6984         ierr = PetscOptionsBool("-matproduct_atb_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
6985         ierr = PetscOptionsEnd();CHKERRQ(ierr);
6986       }
6987       break;
6988     case MATPRODUCT_PtAP:
6989       if (product->api_user) {
6990         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat");CHKERRQ(ierr);
6991         ierr = PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
6992         ierr = PetscOptionsEnd();CHKERRQ(ierr);
6993       } else {
6994         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr);
6995         ierr = PetscOptionsBool("-matproduct_ptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL);CHKERRQ(ierr);
6996         ierr = PetscOptionsEnd();CHKERRQ(ierr);
6997       }
6998       break;
6999     default:
7000       break;
7001     }
7002     match = (PetscBool)!usecpu;
7003   }
7004 #endif
7005   if (match) {
7006     switch (product->type) {
7007     case MATPRODUCT_AB:
7008     case MATPRODUCT_AtB:
7009     case MATPRODUCT_PtAP:
7010       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7011       break;
7012     default:
7013       break;
7014     }
7015   }
7016   /* fallback to MPIAIJ ops */
7017   if (!mat->ops->productsymbolic) {
7018     ierr = MatProductSetFromOptions_MPIAIJ(mat);CHKERRQ(ierr);
7019   }
7020   PetscFunctionReturn(0);
7021 }
7022