xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision dde8958d96c09bc677f093c6b71afab400f91c19)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg)
48 {
49   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
54   A->boundtocpu = flg;
55 #endif
56   if (a->A) {
57     ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr);
58   }
59   if (a->B) {
60     ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr);
61   }
62   PetscFunctionReturn(0);
63 }
64 
65 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
66 {
67   PetscErrorCode ierr;
68   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
69 
70   PetscFunctionBegin;
71   if (mat->A) {
72     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
73     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
74   }
75   PetscFunctionReturn(0);
76 }
77 
78 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
79 {
80   PetscErrorCode  ierr;
81   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
82   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
83   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
84   const PetscInt  *ia,*ib;
85   const MatScalar *aa,*bb,*aav,*bav;
86   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
87   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
88 
89   PetscFunctionBegin;
90   *keptrows = NULL;
91 
92   ia   = a->i;
93   ib   = b->i;
94   ierr = MatSeqAIJGetArrayRead(mat->A,&aav);CHKERRQ(ierr);
95   ierr = MatSeqAIJGetArrayRead(mat->B,&bav);CHKERRQ(ierr);
96   for (i=0; i<m; i++) {
97     na = ia[i+1] - ia[i];
98     nb = ib[i+1] - ib[i];
99     if (!na && !nb) {
100       cnt++;
101       goto ok1;
102     }
103     aa = aav + ia[i];
104     for (j=0; j<na; j++) {
105       if (aa[j] != 0.0) goto ok1;
106     }
107     bb = bav + ib[i];
108     for (j=0; j <nb; j++) {
109       if (bb[j] != 0.0) goto ok1;
110     }
111     cnt++;
112 ok1:;
113   }
114   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRMPI(ierr);
115   if (!n0rows) {
116     ierr = MatSeqAIJRestoreArrayRead(mat->A,&aav);CHKERRQ(ierr);
117     ierr = MatSeqAIJRestoreArrayRead(mat->B,&bav);CHKERRQ(ierr);
118     PetscFunctionReturn(0);
119   }
120   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
121   cnt  = 0;
122   for (i=0; i<m; i++) {
123     na = ia[i+1] - ia[i];
124     nb = ib[i+1] - ib[i];
125     if (!na && !nb) continue;
126     aa = aav + ia[i];
127     for (j=0; j<na;j++) {
128       if (aa[j] != 0.0) {
129         rows[cnt++] = rstart + i;
130         goto ok2;
131       }
132     }
133     bb = bav + ib[i];
134     for (j=0; j<nb; j++) {
135       if (bb[j] != 0.0) {
136         rows[cnt++] = rstart + i;
137         goto ok2;
138       }
139     }
140 ok2:;
141   }
142   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
143   ierr = MatSeqAIJRestoreArrayRead(mat->A,&aav);CHKERRQ(ierr);
144   ierr = MatSeqAIJRestoreArrayRead(mat->B,&bav);CHKERRQ(ierr);
145   PetscFunctionReturn(0);
146 }
147 
148 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
149 {
150   PetscErrorCode    ierr;
151   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
152   PetscBool         cong;
153 
154   PetscFunctionBegin;
155   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
156   if (Y->assembled && cong) {
157     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
158   } else {
159     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
160   }
161   PetscFunctionReturn(0);
162 }
163 
164 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
165 {
166   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
167   PetscErrorCode ierr;
168   PetscInt       i,rstart,nrows,*rows;
169 
170   PetscFunctionBegin;
171   *zrows = NULL;
172   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
173   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
174   for (i=0; i<nrows; i++) rows[i] += rstart;
175   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
176   PetscFunctionReturn(0);
177 }
178 
179 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A,PetscInt type,PetscReal *reductions)
180 {
181   PetscErrorCode    ierr;
182   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)A->data;
183   PetscInt          i,m,n,*garray = aij->garray;
184   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ*) aij->A->data;
185   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ*) aij->B->data;
186   PetscReal         *work;
187   const PetscScalar *dummy;
188 
189   PetscFunctionBegin;
190   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
191   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
192   ierr = MatSeqAIJGetArrayRead(aij->A,&dummy);CHKERRQ(ierr);
193   ierr = MatSeqAIJRestoreArrayRead(aij->A,&dummy);CHKERRQ(ierr);
194   ierr = MatSeqAIJGetArrayRead(aij->B,&dummy);CHKERRQ(ierr);
195   ierr = MatSeqAIJRestoreArrayRead(aij->B,&dummy);CHKERRQ(ierr);
196   if (type == NORM_2) {
197     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
198       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
199     }
200     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
201       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
202     }
203   } else if (type == NORM_1) {
204     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
205       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
206     }
207     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
208       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
209     }
210   } else if (type == NORM_INFINITY) {
211     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
212       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
213     }
214     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
215       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
216     }
217   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
218     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
219       work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
220     }
221     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
222       work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
223     }
224   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
225     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
226       work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
227     }
228     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
229       work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
230     }
231   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type");
232   if (type == NORM_INFINITY) {
233     ierr = MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
234   } else {
235     ierr = MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
236   }
237   ierr = PetscFree(work);CHKERRQ(ierr);
238   if (type == NORM_2) {
239     for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
240   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
241     for (i=0; i<n; i++) reductions[i] /= m;
242   }
243   PetscFunctionReturn(0);
244 }
245 
246 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
247 {
248   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
249   IS              sis,gis;
250   PetscErrorCode  ierr;
251   const PetscInt  *isis,*igis;
252   PetscInt        n,*iis,nsis,ngis,rstart,i;
253 
254   PetscFunctionBegin;
255   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
256   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
257   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
258   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
259   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
260   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
261 
262   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
263   ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr);
264   ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr);
265   n    = ngis + nsis;
266   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
267   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
268   for (i=0; i<n; i++) iis[i] += rstart;
269   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
270 
271   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
272   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
273   ierr = ISDestroy(&sis);CHKERRQ(ierr);
274   ierr = ISDestroy(&gis);CHKERRQ(ierr);
275   PetscFunctionReturn(0);
276 }
277 
278 /*
279   Local utility routine that creates a mapping from the global column
280 number to the local number in the off-diagonal part of the local
281 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
282 a slightly higher hash table cost; without it it is not scalable (each processor
283 has an order N integer array but is fast to access.
284 */
285 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
286 {
287   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
288   PetscErrorCode ierr;
289   PetscInt       n = aij->B->cmap->n,i;
290 
291   PetscFunctionBegin;
292   if (n && !aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
293 #if defined(PETSC_USE_CTABLE)
294   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
295   for (i=0; i<n; i++) {
296     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
297   }
298 #else
299   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
300   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
301   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
302 #endif
303   PetscFunctionReturn(0);
304 }
305 
306 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
307 { \
308     if (col <= lastcol1)  low1 = 0;     \
309     else                 high1 = nrow1; \
310     lastcol1 = col;\
311     while (high1-low1 > 5) { \
312       t = (low1+high1)/2; \
313       if (rp1[t] > col) high1 = t; \
314       else              low1  = t; \
315     } \
316       for (_i=low1; _i<high1; _i++) { \
317         if (rp1[_i] > col) break; \
318         if (rp1[_i] == col) { \
319           if (addv == ADD_VALUES) { \
320             ap1[_i] += value;   \
321             /* Not sure LogFlops will slow dow the code or not */ \
322             (void)PetscLogFlops(1.0);   \
323            } \
324           else                    ap1[_i] = value; \
325           inserted = PETSC_TRUE; \
326           goto a_noinsert; \
327         } \
328       }  \
329       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
330       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
331       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
332       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
333       N = nrow1++ - 1; a->nz++; high1++; \
334       /* shift up all the later entries in this row */ \
335       ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\
336       ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\
337       rp1[_i] = col;  \
338       ap1[_i] = value;  \
339       A->nonzerostate++;\
340       a_noinsert: ; \
341       ailen[row] = nrow1; \
342 }
343 
344 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
345   { \
346     if (col <= lastcol2) low2 = 0;                        \
347     else high2 = nrow2;                                   \
348     lastcol2 = col;                                       \
349     while (high2-low2 > 5) {                              \
350       t = (low2+high2)/2;                                 \
351       if (rp2[t] > col) high2 = t;                        \
352       else             low2  = t;                         \
353     }                                                     \
354     for (_i=low2; _i<high2; _i++) {                       \
355       if (rp2[_i] > col) break;                           \
356       if (rp2[_i] == col) {                               \
357         if (addv == ADD_VALUES) {                         \
358           ap2[_i] += value;                               \
359           (void)PetscLogFlops(1.0);                       \
360         }                                                 \
361         else                    ap2[_i] = value;          \
362         inserted = PETSC_TRUE;                            \
363         goto b_noinsert;                                  \
364       }                                                   \
365     }                                                     \
366     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
367     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
368     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
369     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
370     N = nrow2++ - 1; b->nz++; high2++;                    \
371     /* shift up all the later entries in this row */      \
372     ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\
373     ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\
374     rp2[_i] = col;                                        \
375     ap2[_i] = value;                                      \
376     B->nonzerostate++;                                    \
377     b_noinsert: ;                                         \
378     bilen[row] = nrow2;                                   \
379   }
380 
381 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
382 {
383   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
384   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
385   PetscErrorCode ierr;
386   PetscInt       l,*garray = mat->garray,diag;
387 
388   PetscFunctionBegin;
389   /* code only works for square matrices A */
390 
391   /* find size of row to the left of the diagonal part */
392   ierr = MatGetOwnershipRange(A,&diag,NULL);CHKERRQ(ierr);
393   row  = row - diag;
394   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
395     if (garray[b->j[b->i[row]+l]] > diag) break;
396   }
397   ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr);
398 
399   /* diagonal part */
400   ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr);
401 
402   /* right of diagonal part */
403   ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr);
404 #if defined(PETSC_HAVE_DEVICE)
405   if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU;
406 #endif
407   PetscFunctionReturn(0);
408 }
409 
410 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
411 {
412   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
413   PetscScalar    value = 0.0;
414   PetscErrorCode ierr;
415   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
416   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
417   PetscBool      roworiented = aij->roworiented;
418 
419   /* Some Variables required in the macro */
420   Mat        A                    = aij->A;
421   Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
422   PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
423   PetscBool  ignorezeroentries    = a->ignorezeroentries;
424   Mat        B                    = aij->B;
425   Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
426   PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
427   MatScalar  *aa,*ba;
428   /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
429    * cannot use "#if defined" inside a macro. */
430   PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
431 
432   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
433   PetscInt  nonew;
434   MatScalar *ap1,*ap2;
435 
436   PetscFunctionBegin;
437 #if defined(PETSC_HAVE_DEVICE)
438   if (A->offloadmask == PETSC_OFFLOAD_GPU) {
439     const PetscScalar *dummy;
440     ierr = MatSeqAIJGetArrayRead(A,&dummy);CHKERRQ(ierr);
441     ierr = MatSeqAIJRestoreArrayRead(A,&dummy);CHKERRQ(ierr);
442   }
443   if (B->offloadmask == PETSC_OFFLOAD_GPU) {
444     const PetscScalar *dummy;
445     ierr = MatSeqAIJGetArrayRead(B,&dummy);CHKERRQ(ierr);
446     ierr = MatSeqAIJRestoreArrayRead(B,&dummy);CHKERRQ(ierr);
447   }
448 #endif
449   aa = a->a;
450   ba = b->a;
451   for (i=0; i<m; i++) {
452     if (im[i] < 0) continue;
453     if (PetscUnlikely(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
454     if (im[i] >= rstart && im[i] < rend) {
455       row      = im[i] - rstart;
456       lastcol1 = -1;
457       rp1      = aj + ai[row];
458       ap1      = aa + ai[row];
459       rmax1    = aimax[row];
460       nrow1    = ailen[row];
461       low1     = 0;
462       high1    = nrow1;
463       lastcol2 = -1;
464       rp2      = bj + bi[row];
465       ap2      = ba + bi[row];
466       rmax2    = bimax[row];
467       nrow2    = bilen[row];
468       low2     = 0;
469       high2    = nrow2;
470 
471       for (j=0; j<n; j++) {
472         if (v)  value = roworiented ? v[i*n+j] : v[i+j*m];
473         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
474         if (in[j] >= cstart && in[j] < cend) {
475           col   = in[j] - cstart;
476           nonew = a->nonew;
477           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
478 #if defined(PETSC_HAVE_DEVICE)
479           if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
480 #endif
481         } else if (in[j] < 0) continue;
482         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
483         else {
484           if (mat->was_assembled) {
485             if (!aij->colmap) {
486               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
487             }
488 #if defined(PETSC_USE_CTABLE)
489             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
490             col--;
491 #else
492             col = aij->colmap[in[j]] - 1;
493 #endif
494             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
495               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
496               col  =  in[j];
497               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
498               B        = aij->B;
499               b        = (Mat_SeqAIJ*)B->data;
500               bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
501               rp2      = bj + bi[row];
502               ap2      = ba + bi[row];
503               rmax2    = bimax[row];
504               nrow2    = bilen[row];
505               low2     = 0;
506               high2    = nrow2;
507               bm       = aij->B->rmap->n;
508               ba       = b->a;
509               inserted = PETSC_FALSE;
510             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
511               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
512                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
513               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
514             }
515           } else col = in[j];
516           nonew = b->nonew;
517           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
518 #if defined(PETSC_HAVE_DEVICE)
519           if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
520 #endif
521         }
522       }
523     } else {
524       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
525       if (!aij->donotstash) {
526         mat->assembled = PETSC_FALSE;
527         if (roworiented) {
528           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
529         } else {
530           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
531         }
532       }
533     }
534   }
535   PetscFunctionReturn(0);
536 }
537 
538 /*
539     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
540     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
541     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
542 */
543 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
544 {
545   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
546   Mat            A           = aij->A; /* diagonal part of the matrix */
547   Mat            B           = aij->B; /* offdiagonal part of the matrix */
548   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
549   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
550   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
551   PetscInt       *ailen      = a->ilen,*aj = a->j;
552   PetscInt       *bilen      = b->ilen,*bj = b->j;
553   PetscInt       am          = aij->A->rmap->n,j;
554   PetscInt       diag_so_far = 0,dnz;
555   PetscInt       offd_so_far = 0,onz;
556 
557   PetscFunctionBegin;
558   /* Iterate over all rows of the matrix */
559   for (j=0; j<am; j++) {
560     dnz = onz = 0;
561     /*  Iterate over all non-zero columns of the current row */
562     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
563       /* If column is in the diagonal */
564       if (mat_j[col] >= cstart && mat_j[col] < cend) {
565         aj[diag_so_far++] = mat_j[col] - cstart;
566         dnz++;
567       } else { /* off-diagonal entries */
568         bj[offd_so_far++] = mat_j[col];
569         onz++;
570       }
571     }
572     ailen[j] = dnz;
573     bilen[j] = onz;
574   }
575   PetscFunctionReturn(0);
576 }
577 
578 /*
579     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
580     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
581     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
582     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
583     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
584 */
585 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
586 {
587   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
588   Mat            A      = aij->A; /* diagonal part of the matrix */
589   Mat            B      = aij->B; /* offdiagonal part of the matrix */
590   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
591   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
592   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
593   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
594   PetscInt       *ailen = a->ilen,*aj = a->j;
595   PetscInt       *bilen = b->ilen,*bj = b->j;
596   PetscInt       am     = aij->A->rmap->n,j;
597   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
598   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
599   PetscScalar    *aa = a->a,*ba = b->a;
600 
601   PetscFunctionBegin;
602   /* Iterate over all rows of the matrix */
603   for (j=0; j<am; j++) {
604     dnz_row = onz_row = 0;
605     rowstart_offd = full_offd_i[j];
606     rowstart_diag = full_diag_i[j];
607     /*  Iterate over all non-zero columns of the current row */
608     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
609       /* If column is in the diagonal */
610       if (mat_j[col] >= cstart && mat_j[col] < cend) {
611         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
612         aa[rowstart_diag+dnz_row] = mat_a[col];
613         dnz_row++;
614       } else { /* off-diagonal entries */
615         bj[rowstart_offd+onz_row] = mat_j[col];
616         ba[rowstart_offd+onz_row] = mat_a[col];
617         onz_row++;
618       }
619     }
620     ailen[j] = dnz_row;
621     bilen[j] = onz_row;
622   }
623   PetscFunctionReturn(0);
624 }
625 
626 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
627 {
628   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
629   PetscErrorCode ierr;
630   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
631   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
632 
633   PetscFunctionBegin;
634   for (i=0; i<m; i++) {
635     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
636     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
637     if (idxm[i] >= rstart && idxm[i] < rend) {
638       row = idxm[i] - rstart;
639       for (j=0; j<n; j++) {
640         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
641         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
642         if (idxn[j] >= cstart && idxn[j] < cend) {
643           col  = idxn[j] - cstart;
644           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
645         } else {
646           if (!aij->colmap) {
647             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
648           }
649 #if defined(PETSC_USE_CTABLE)
650           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
651           col--;
652 #else
653           col = aij->colmap[idxn[j]] - 1;
654 #endif
655           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
656           else {
657             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
658           }
659         }
660       }
661     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
662   }
663   PetscFunctionReturn(0);
664 }
665 
666 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
667 {
668   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
669   PetscErrorCode ierr;
670   PetscInt       nstash,reallocs;
671 
672   PetscFunctionBegin;
673   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
674 
675   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
676   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
677   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
678   PetscFunctionReturn(0);
679 }
680 
681 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
682 {
683   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
684   PetscErrorCode ierr;
685   PetscMPIInt    n;
686   PetscInt       i,j,rstart,ncols,flg;
687   PetscInt       *row,*col;
688   PetscBool      other_disassembled;
689   PetscScalar    *val;
690 
691   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
692 
693   PetscFunctionBegin;
694   if (!aij->donotstash && !mat->nooffprocentries) {
695     while (1) {
696       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
697       if (!flg) break;
698 
699       for (i=0; i<n;) {
700         /* Now identify the consecutive vals belonging to the same row */
701         for (j=i,rstart=row[j]; j<n; j++) {
702           if (row[j] != rstart) break;
703         }
704         if (j < n) ncols = j-i;
705         else       ncols = n-i;
706         /* Now assemble all these values with a single function call */
707         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
708         i    = j;
709       }
710     }
711     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
712   }
713 #if defined(PETSC_HAVE_DEVICE)
714   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
715   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
716   if (mat->boundtocpu) {
717     ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr);
718     ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr);
719   }
720 #endif
721   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
722   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
723 
724   /* determine if any processor has disassembled, if so we must
725      also disassemble ourself, in order that we may reassemble. */
726   /*
727      if nonzero structure of submatrix B cannot change then we know that
728      no processor disassembled thus we can skip this stuff
729   */
730   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
731     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
732     if (mat->was_assembled && !other_disassembled) {
733 #if defined(PETSC_HAVE_DEVICE)
734       aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */
735 #endif
736       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
737     }
738   }
739   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
740     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
741   }
742   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
743 #if defined(PETSC_HAVE_DEVICE)
744   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
745 #endif
746   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
747   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
748 
749   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
750 
751   aij->rowvalues = NULL;
752 
753   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
754 
755   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
756   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
757     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
758     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
759   }
760 #if defined(PETSC_HAVE_DEVICE)
761   mat->offloadmask = PETSC_OFFLOAD_BOTH;
762 #endif
763   PetscFunctionReturn(0);
764 }
765 
766 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
767 {
768   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
769   PetscErrorCode ierr;
770 
771   PetscFunctionBegin;
772   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
773   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
774   PetscFunctionReturn(0);
775 }
776 
777 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
778 {
779   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
780   PetscObjectState sA, sB;
781   PetscInt        *lrows;
782   PetscInt         r, len;
783   PetscBool        cong, lch, gch;
784   PetscErrorCode   ierr;
785 
786   PetscFunctionBegin;
787   /* get locally owned rows */
788   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
789   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
790   /* fix right hand side if needed */
791   if (x && b) {
792     const PetscScalar *xx;
793     PetscScalar       *bb;
794 
795     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
796     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
797     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
798     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
799     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
800     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
801   }
802 
803   sA = mat->A->nonzerostate;
804   sB = mat->B->nonzerostate;
805 
806   if (diag != 0.0 && cong) {
807     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
808     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
809   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
810     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
811     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
812     PetscInt   nnwA, nnwB;
813     PetscBool  nnzA, nnzB;
814 
815     nnwA = aijA->nonew;
816     nnwB = aijB->nonew;
817     nnzA = aijA->keepnonzeropattern;
818     nnzB = aijB->keepnonzeropattern;
819     if (!nnzA) {
820       ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr);
821       aijA->nonew = 0;
822     }
823     if (!nnzB) {
824       ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr);
825       aijB->nonew = 0;
826     }
827     /* Must zero here before the next loop */
828     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
829     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
830     for (r = 0; r < len; ++r) {
831       const PetscInt row = lrows[r] + A->rmap->rstart;
832       if (row >= A->cmap->N) continue;
833       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
834     }
835     aijA->nonew = nnwA;
836     aijB->nonew = nnwB;
837   } else {
838     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
839     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
840   }
841   ierr = PetscFree(lrows);CHKERRQ(ierr);
842   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
843   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
844 
845   /* reduce nonzerostate */
846   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
847   ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
848   if (gch) A->nonzerostate++;
849   PetscFunctionReturn(0);
850 }
851 
852 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
853 {
854   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
855   PetscErrorCode    ierr;
856   PetscMPIInt       n = A->rmap->n;
857   PetscInt          i,j,r,m,len = 0;
858   PetscInt          *lrows,*owners = A->rmap->range;
859   PetscMPIInt       p = 0;
860   PetscSFNode       *rrows;
861   PetscSF           sf;
862   const PetscScalar *xx;
863   PetscScalar       *bb,*mask;
864   Vec               xmask,lmask;
865   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
866   const PetscInt    *aj, *ii,*ridx;
867   PetscScalar       *aa;
868 
869   PetscFunctionBegin;
870   /* Create SF where leaves are input rows and roots are owned rows */
871   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
872   for (r = 0; r < n; ++r) lrows[r] = -1;
873   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
874   for (r = 0; r < N; ++r) {
875     const PetscInt idx   = rows[r];
876     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
877     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
878       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
879     }
880     rrows[r].rank  = p;
881     rrows[r].index = rows[r] - owners[p];
882   }
883   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
884   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
885   /* Collect flags for rows to be zeroed */
886   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
887   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
888   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
889   /* Compress and put in row numbers */
890   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
891   /* zero diagonal part of matrix */
892   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
893   /* handle off diagonal part of matrix */
894   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
895   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
896   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
897   for (i=0; i<len; i++) bb[lrows[i]] = 1;
898   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
899   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
900   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
901   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
902   if (x && b) { /* this code is buggy when the row and column layout don't match */
903     PetscBool cong;
904 
905     ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
906     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
907     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
908     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
909     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
910     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
911   }
912   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
913   /* remove zeroed rows of off diagonal matrix */
914   ii = aij->i;
915   for (i=0; i<len; i++) {
916     ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr);
917   }
918   /* loop over all elements of off process part of matrix zeroing removed columns*/
919   if (aij->compressedrow.use) {
920     m    = aij->compressedrow.nrows;
921     ii   = aij->compressedrow.i;
922     ridx = aij->compressedrow.rindex;
923     for (i=0; i<m; i++) {
924       n  = ii[i+1] - ii[i];
925       aj = aij->j + ii[i];
926       aa = aij->a + ii[i];
927 
928       for (j=0; j<n; j++) {
929         if (PetscAbsScalar(mask[*aj])) {
930           if (b) bb[*ridx] -= *aa*xx[*aj];
931           *aa = 0.0;
932         }
933         aa++;
934         aj++;
935       }
936       ridx++;
937     }
938   } else { /* do not use compressed row format */
939     m = l->B->rmap->n;
940     for (i=0; i<m; i++) {
941       n  = ii[i+1] - ii[i];
942       aj = aij->j + ii[i];
943       aa = aij->a + ii[i];
944       for (j=0; j<n; j++) {
945         if (PetscAbsScalar(mask[*aj])) {
946           if (b) bb[i] -= *aa*xx[*aj];
947           *aa = 0.0;
948         }
949         aa++;
950         aj++;
951       }
952     }
953   }
954   if (x && b) {
955     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
956     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
957   }
958   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
959   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
960   ierr = PetscFree(lrows);CHKERRQ(ierr);
961 
962   /* only change matrix nonzero state if pattern was allowed to be changed */
963   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
964     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
965     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
966   }
967   PetscFunctionReturn(0);
968 }
969 
970 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
971 {
972   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
973   PetscErrorCode ierr;
974   PetscInt       nt;
975   VecScatter     Mvctx = a->Mvctx;
976 
977   PetscFunctionBegin;
978   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
979   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
980   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
981   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
982   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
983   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
984   PetscFunctionReturn(0);
985 }
986 
987 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
988 {
989   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
990   PetscErrorCode ierr;
991 
992   PetscFunctionBegin;
993   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
994   PetscFunctionReturn(0);
995 }
996 
997 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
998 {
999   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1000   PetscErrorCode ierr;
1001   VecScatter     Mvctx = a->Mvctx;
1002 
1003   PetscFunctionBegin;
1004   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1005   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1006   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1007   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1008   PetscFunctionReturn(0);
1009 }
1010 
1011 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1012 {
1013   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1014   PetscErrorCode ierr;
1015 
1016   PetscFunctionBegin;
1017   /* do nondiagonal part */
1018   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1019   /* do local part */
1020   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1021   /* add partial results together */
1022   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1023   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1024   PetscFunctionReturn(0);
1025 }
1026 
1027 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1028 {
1029   MPI_Comm       comm;
1030   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1031   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1032   IS             Me,Notme;
1033   PetscErrorCode ierr;
1034   PetscInt       M,N,first,last,*notme,i;
1035   PetscBool      lf;
1036   PetscMPIInt    size;
1037 
1038   PetscFunctionBegin;
1039   /* Easy test: symmetric diagonal block */
1040   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1041   ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr);
1042   ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRMPI(ierr);
1043   if (!*f) PetscFunctionReturn(0);
1044   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1045   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
1046   if (size == 1) PetscFunctionReturn(0);
1047 
1048   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1049   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1050   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1051   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1052   for (i=0; i<first; i++) notme[i] = i;
1053   for (i=last; i<M; i++) notme[i-last+first] = i;
1054   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1055   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1056   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1057   Aoff = Aoffs[0];
1058   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1059   Boff = Boffs[0];
1060   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1061   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1062   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1063   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1064   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1065   ierr = PetscFree(notme);CHKERRQ(ierr);
1066   PetscFunctionReturn(0);
1067 }
1068 
1069 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1070 {
1071   PetscErrorCode ierr;
1072 
1073   PetscFunctionBegin;
1074   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1075   PetscFunctionReturn(0);
1076 }
1077 
1078 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1079 {
1080   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1081   PetscErrorCode ierr;
1082 
1083   PetscFunctionBegin;
1084   /* do nondiagonal part */
1085   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1086   /* do local part */
1087   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1088   /* add partial results together */
1089   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1090   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1091   PetscFunctionReturn(0);
1092 }
1093 
1094 /*
1095   This only works correctly for square matrices where the subblock A->A is the
1096    diagonal block
1097 */
1098 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1099 {
1100   PetscErrorCode ierr;
1101   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1102 
1103   PetscFunctionBegin;
1104   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1105   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1106   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1107   PetscFunctionReturn(0);
1108 }
1109 
1110 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1111 {
1112   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1113   PetscErrorCode ierr;
1114 
1115   PetscFunctionBegin;
1116   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1117   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1118   PetscFunctionReturn(0);
1119 }
1120 
1121 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1122 {
1123   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1124   PetscErrorCode ierr;
1125 
1126   PetscFunctionBegin;
1127 #if defined(PETSC_USE_LOG)
1128   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1129 #endif
1130   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1131   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1132   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1133   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1134 #if defined(PETSC_USE_CTABLE)
1135   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1136 #else
1137   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1138 #endif
1139   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1140   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1141   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1142   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1143   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1144   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1145 
1146   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
1147   ierr = PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL);CHKERRQ(ierr);
1148 
1149   ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr);
1150   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1151   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1152   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1153   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1154   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1155   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1156   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1157   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr);
1158   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1159 #if defined(PETSC_HAVE_CUDA)
1160   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL);CHKERRQ(ierr);
1161 #endif
1162 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
1163   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL);CHKERRQ(ierr);
1164 #endif
1165   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
1166 #if defined(PETSC_HAVE_ELEMENTAL)
1167   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1168 #endif
1169 #if defined(PETSC_HAVE_SCALAPACK)
1170   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL);CHKERRQ(ierr);
1171 #endif
1172 #if defined(PETSC_HAVE_HYPRE)
1173   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1174   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1175 #endif
1176   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1177   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr);
1178   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1179   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL);CHKERRQ(ierr);
1180   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL);CHKERRQ(ierr);
1181   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL);CHKERRQ(ierr);
1182 #if defined(PETSC_HAVE_MKL_SPARSE)
1183   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL);CHKERRQ(ierr);
1184 #endif
1185   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL);CHKERRQ(ierr);
1186   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1187   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL);CHKERRQ(ierr);
1188   PetscFunctionReturn(0);
1189 }
1190 
1191 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1192 {
1193   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1194   Mat_SeqAIJ        *A   = (Mat_SeqAIJ*)aij->A->data;
1195   Mat_SeqAIJ        *B   = (Mat_SeqAIJ*)aij->B->data;
1196   const PetscInt    *garray = aij->garray;
1197   const PetscScalar *aa,*ba;
1198   PetscInt          header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb;
1199   PetscInt          *rowlens;
1200   PetscInt          *colidxs;
1201   PetscScalar       *matvals;
1202   PetscErrorCode    ierr;
1203 
1204   PetscFunctionBegin;
1205   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1206 
1207   M  = mat->rmap->N;
1208   N  = mat->cmap->N;
1209   m  = mat->rmap->n;
1210   rs = mat->rmap->rstart;
1211   cs = mat->cmap->rstart;
1212   nz = A->nz + B->nz;
1213 
1214   /* write matrix header */
1215   header[0] = MAT_FILE_CLASSID;
1216   header[1] = M; header[2] = N; header[3] = nz;
1217   ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1218   ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr);
1219 
1220   /* fill in and store row lengths  */
1221   ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr);
1222   for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1223   ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr);
1224   ierr = PetscFree(rowlens);CHKERRQ(ierr);
1225 
1226   /* fill in and store column indices */
1227   ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr);
1228   for (cnt=0, i=0; i<m; i++) {
1229     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1230       if (garray[B->j[jb]] > cs) break;
1231       colidxs[cnt++] = garray[B->j[jb]];
1232     }
1233     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1234       colidxs[cnt++] = A->j[ja] + cs;
1235     for (; jb<B->i[i+1]; jb++)
1236       colidxs[cnt++] = garray[B->j[jb]];
1237   }
1238   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1239   ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
1240   ierr = PetscFree(colidxs);CHKERRQ(ierr);
1241 
1242   /* fill in and store nonzero values */
1243   ierr = MatSeqAIJGetArrayRead(aij->A,&aa);CHKERRQ(ierr);
1244   ierr = MatSeqAIJGetArrayRead(aij->B,&ba);CHKERRQ(ierr);
1245   ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr);
1246   for (cnt=0, i=0; i<m; i++) {
1247     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1248       if (garray[B->j[jb]] > cs) break;
1249       matvals[cnt++] = ba[jb];
1250     }
1251     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1252       matvals[cnt++] = aa[ja];
1253     for (; jb<B->i[i+1]; jb++)
1254       matvals[cnt++] = ba[jb];
1255   }
1256   ierr = MatSeqAIJRestoreArrayRead(aij->A,&aa);CHKERRQ(ierr);
1257   ierr = MatSeqAIJRestoreArrayRead(aij->B,&ba);CHKERRQ(ierr);
1258   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1259   ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
1260   ierr = PetscFree(matvals);CHKERRQ(ierr);
1261 
1262   /* write block size option to the viewer's .info file */
1263   ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
1264   PetscFunctionReturn(0);
1265 }
1266 
1267 #include <petscdraw.h>
1268 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1269 {
1270   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1271   PetscErrorCode    ierr;
1272   PetscMPIInt       rank = aij->rank,size = aij->size;
1273   PetscBool         isdraw,iascii,isbinary;
1274   PetscViewer       sviewer;
1275   PetscViewerFormat format;
1276 
1277   PetscFunctionBegin;
1278   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1279   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1280   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1281   if (iascii) {
1282     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1283     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1284       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1285       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1286       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1287       for (i=0; i<(PetscInt)size; i++) {
1288         nmax = PetscMax(nmax,nz[i]);
1289         nmin = PetscMin(nmin,nz[i]);
1290         navg += nz[i];
1291       }
1292       ierr = PetscFree(nz);CHKERRQ(ierr);
1293       navg = navg/size;
1294       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1295       PetscFunctionReturn(0);
1296     }
1297     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1298     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1299       MatInfo   info;
1300       PetscInt *inodes=NULL;
1301 
1302       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr);
1303       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1304       ierr = MatInodeGetInodeSizes(aij->A,NULL,&inodes,NULL);CHKERRQ(ierr);
1305       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1306       if (!inodes) {
1307         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1308                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1309       } else {
1310         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1311                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1312       }
1313       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1314       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1315       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1316       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1317       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1318       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1319       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1320       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1321       PetscFunctionReturn(0);
1322     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1323       PetscInt inodecount,inodelimit,*inodes;
1324       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1325       if (inodes) {
1326         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1327       } else {
1328         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1329       }
1330       PetscFunctionReturn(0);
1331     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1332       PetscFunctionReturn(0);
1333     }
1334   } else if (isbinary) {
1335     if (size == 1) {
1336       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1337       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1338     } else {
1339       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1340     }
1341     PetscFunctionReturn(0);
1342   } else if (iascii && size == 1) {
1343     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1344     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1345     PetscFunctionReturn(0);
1346   } else if (isdraw) {
1347     PetscDraw draw;
1348     PetscBool isnull;
1349     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1350     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1351     if (isnull) PetscFunctionReturn(0);
1352   }
1353 
1354   { /* assemble the entire matrix onto first processor */
1355     Mat A = NULL, Av;
1356     IS  isrow,iscol;
1357 
1358     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1359     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1360     ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1361     ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr);
1362 /*  The commented code uses MatCreateSubMatrices instead */
1363 /*
1364     Mat *AA, A = NULL, Av;
1365     IS  isrow,iscol;
1366 
1367     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1368     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1369     ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr);
1370     if (rank == 0) {
1371        ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr);
1372        A    = AA[0];
1373        Av   = AA[0];
1374     }
1375     ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr);
1376 */
1377     ierr = ISDestroy(&iscol);CHKERRQ(ierr);
1378     ierr = ISDestroy(&isrow);CHKERRQ(ierr);
1379     /*
1380        Everyone has to call to draw the matrix since the graphics waits are
1381        synchronized across all processors that share the PetscDraw object
1382     */
1383     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1384     if (rank == 0) {
1385       if (((PetscObject)mat)->name) {
1386         ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr);
1387       }
1388       ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr);
1389     }
1390     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1391     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1392     ierr = MatDestroy(&A);CHKERRQ(ierr);
1393   }
1394   PetscFunctionReturn(0);
1395 }
1396 
1397 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1398 {
1399   PetscErrorCode ierr;
1400   PetscBool      iascii,isdraw,issocket,isbinary;
1401 
1402   PetscFunctionBegin;
1403   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1404   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1405   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1406   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1407   if (iascii || isdraw || isbinary || issocket) {
1408     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1409   }
1410   PetscFunctionReturn(0);
1411 }
1412 
1413 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1414 {
1415   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1416   PetscErrorCode ierr;
1417   Vec            bb1 = NULL;
1418   PetscBool      hasop;
1419 
1420   PetscFunctionBegin;
1421   if (flag == SOR_APPLY_UPPER) {
1422     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1423     PetscFunctionReturn(0);
1424   }
1425 
1426   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1427     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1428   }
1429 
1430   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1431     if (flag & SOR_ZERO_INITIAL_GUESS) {
1432       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1433       its--;
1434     }
1435 
1436     while (its--) {
1437       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1438       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1439 
1440       /* update rhs: bb1 = bb - B*x */
1441       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1442       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1443 
1444       /* local sweep */
1445       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1446     }
1447   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1448     if (flag & SOR_ZERO_INITIAL_GUESS) {
1449       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1450       its--;
1451     }
1452     while (its--) {
1453       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1454       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1455 
1456       /* update rhs: bb1 = bb - B*x */
1457       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1458       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1459 
1460       /* local sweep */
1461       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1462     }
1463   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1464     if (flag & SOR_ZERO_INITIAL_GUESS) {
1465       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1466       its--;
1467     }
1468     while (its--) {
1469       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1470       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1471 
1472       /* update rhs: bb1 = bb - B*x */
1473       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1474       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1475 
1476       /* local sweep */
1477       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1478     }
1479   } else if (flag & SOR_EISENSTAT) {
1480     Vec xx1;
1481 
1482     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1483     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1484 
1485     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1486     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1487     if (!mat->diag) {
1488       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1489       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1490     }
1491     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1492     if (hasop) {
1493       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1494     } else {
1495       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1496     }
1497     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1498 
1499     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1500 
1501     /* local sweep */
1502     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1503     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1504     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1505   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1506 
1507   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1508 
1509   matin->factorerrortype = mat->A->factorerrortype;
1510   PetscFunctionReturn(0);
1511 }
1512 
1513 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1514 {
1515   Mat            aA,aB,Aperm;
1516   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1517   PetscScalar    *aa,*ba;
1518   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1519   PetscSF        rowsf,sf;
1520   IS             parcolp = NULL;
1521   PetscBool      done;
1522   PetscErrorCode ierr;
1523 
1524   PetscFunctionBegin;
1525   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1526   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1527   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1528   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1529 
1530   /* Invert row permutation to find out where my rows should go */
1531   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1532   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1533   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1534   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1535   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr);
1536   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr);
1537 
1538   /* Invert column permutation to find out where my columns should go */
1539   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1540   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1541   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1542   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1543   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr);
1544   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr);
1545   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1546 
1547   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1548   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1549   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1550 
1551   /* Find out where my gcols should go */
1552   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1553   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1554   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1555   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1556   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1557   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE);CHKERRQ(ierr);
1558   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE);CHKERRQ(ierr);
1559   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1560 
1561   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1562   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1563   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1564   for (i=0; i<m; i++) {
1565     PetscInt    row = rdest[i];
1566     PetscMPIInt rowner;
1567     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1568     for (j=ai[i]; j<ai[i+1]; j++) {
1569       PetscInt    col = cdest[aj[j]];
1570       PetscMPIInt cowner;
1571       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1572       if (rowner == cowner) dnnz[i]++;
1573       else onnz[i]++;
1574     }
1575     for (j=bi[i]; j<bi[i+1]; j++) {
1576       PetscInt    col = gcdest[bj[j]];
1577       PetscMPIInt cowner;
1578       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1579       if (rowner == cowner) dnnz[i]++;
1580       else onnz[i]++;
1581     }
1582   }
1583   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE);CHKERRQ(ierr);
1584   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE);CHKERRQ(ierr);
1585   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE);CHKERRQ(ierr);
1586   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE);CHKERRQ(ierr);
1587   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1588 
1589   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1590   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1591   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1592   for (i=0; i<m; i++) {
1593     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1594     PetscInt j0,rowlen;
1595     rowlen = ai[i+1] - ai[i];
1596     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1597       for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1598       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1599     }
1600     rowlen = bi[i+1] - bi[i];
1601     for (j0=j=0; j<rowlen; j0=j) {
1602       for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1603       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1604     }
1605   }
1606   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1607   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1608   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1609   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1610   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1611   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1612   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1613   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1614   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1615   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1616   *B = Aperm;
1617   PetscFunctionReturn(0);
1618 }
1619 
1620 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1621 {
1622   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1623   PetscErrorCode ierr;
1624 
1625   PetscFunctionBegin;
1626   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1627   if (ghosts) *ghosts = aij->garray;
1628   PetscFunctionReturn(0);
1629 }
1630 
1631 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1632 {
1633   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1634   Mat            A    = mat->A,B = mat->B;
1635   PetscErrorCode ierr;
1636   PetscLogDouble isend[5],irecv[5];
1637 
1638   PetscFunctionBegin;
1639   info->block_size = 1.0;
1640   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1641 
1642   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1643   isend[3] = info->memory;  isend[4] = info->mallocs;
1644 
1645   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1646 
1647   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1648   isend[3] += info->memory;  isend[4] += info->mallocs;
1649   if (flag == MAT_LOCAL) {
1650     info->nz_used      = isend[0];
1651     info->nz_allocated = isend[1];
1652     info->nz_unneeded  = isend[2];
1653     info->memory       = isend[3];
1654     info->mallocs      = isend[4];
1655   } else if (flag == MAT_GLOBAL_MAX) {
1656     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr);
1657 
1658     info->nz_used      = irecv[0];
1659     info->nz_allocated = irecv[1];
1660     info->nz_unneeded  = irecv[2];
1661     info->memory       = irecv[3];
1662     info->mallocs      = irecv[4];
1663   } else if (flag == MAT_GLOBAL_SUM) {
1664     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr);
1665 
1666     info->nz_used      = irecv[0];
1667     info->nz_allocated = irecv[1];
1668     info->nz_unneeded  = irecv[2];
1669     info->memory       = irecv[3];
1670     info->mallocs      = irecv[4];
1671   }
1672   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1673   info->fill_ratio_needed = 0;
1674   info->factor_mallocs    = 0;
1675   PetscFunctionReturn(0);
1676 }
1677 
1678 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1679 {
1680   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1681   PetscErrorCode ierr;
1682 
1683   PetscFunctionBegin;
1684   switch (op) {
1685   case MAT_NEW_NONZERO_LOCATIONS:
1686   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1687   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1688   case MAT_KEEP_NONZERO_PATTERN:
1689   case MAT_NEW_NONZERO_LOCATION_ERR:
1690   case MAT_USE_INODES:
1691   case MAT_IGNORE_ZERO_ENTRIES:
1692   case MAT_FORM_EXPLICIT_TRANSPOSE:
1693     MatCheckPreallocated(A,1);
1694     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1695     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1696     break;
1697   case MAT_ROW_ORIENTED:
1698     MatCheckPreallocated(A,1);
1699     a->roworiented = flg;
1700 
1701     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1702     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1703     break;
1704   case MAT_FORCE_DIAGONAL_ENTRIES:
1705   case MAT_SORTED_FULL:
1706     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1707     break;
1708   case MAT_IGNORE_OFF_PROC_ENTRIES:
1709     a->donotstash = flg;
1710     break;
1711   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1712   case MAT_SPD:
1713   case MAT_SYMMETRIC:
1714   case MAT_STRUCTURALLY_SYMMETRIC:
1715   case MAT_HERMITIAN:
1716   case MAT_SYMMETRY_ETERNAL:
1717     break;
1718   case MAT_SUBMAT_SINGLEIS:
1719     A->submat_singleis = flg;
1720     break;
1721   case MAT_STRUCTURE_ONLY:
1722     /* The option is handled directly by MatSetOption() */
1723     break;
1724   default:
1725     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1726   }
1727   PetscFunctionReturn(0);
1728 }
1729 
1730 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1731 {
1732   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1733   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1734   PetscErrorCode ierr;
1735   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1736   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1737   PetscInt       *cmap,*idx_p;
1738 
1739   PetscFunctionBegin;
1740   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1741   mat->getrowactive = PETSC_TRUE;
1742 
1743   if (!mat->rowvalues && (idx || v)) {
1744     /*
1745         allocate enough space to hold information from the longest row.
1746     */
1747     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1748     PetscInt   max = 1,tmp;
1749     for (i=0; i<matin->rmap->n; i++) {
1750       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1751       if (max < tmp) max = tmp;
1752     }
1753     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1754   }
1755 
1756   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1757   lrow = row - rstart;
1758 
1759   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1760   if (!v)   {pvA = NULL; pvB = NULL;}
1761   if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1762   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1763   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1764   nztot = nzA + nzB;
1765 
1766   cmap = mat->garray;
1767   if (v  || idx) {
1768     if (nztot) {
1769       /* Sort by increasing column numbers, assuming A and B already sorted */
1770       PetscInt imark = -1;
1771       if (v) {
1772         *v = v_p = mat->rowvalues;
1773         for (i=0; i<nzB; i++) {
1774           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1775           else break;
1776         }
1777         imark = i;
1778         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1779         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1780       }
1781       if (idx) {
1782         *idx = idx_p = mat->rowindices;
1783         if (imark > -1) {
1784           for (i=0; i<imark; i++) {
1785             idx_p[i] = cmap[cworkB[i]];
1786           }
1787         } else {
1788           for (i=0; i<nzB; i++) {
1789             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1790             else break;
1791           }
1792           imark = i;
1793         }
1794         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1795         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1796       }
1797     } else {
1798       if (idx) *idx = NULL;
1799       if (v)   *v   = NULL;
1800     }
1801   }
1802   *nz  = nztot;
1803   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1804   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1805   PetscFunctionReturn(0);
1806 }
1807 
1808 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1809 {
1810   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1811 
1812   PetscFunctionBegin;
1813   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1814   aij->getrowactive = PETSC_FALSE;
1815   PetscFunctionReturn(0);
1816 }
1817 
1818 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1819 {
1820   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1821   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1822   PetscErrorCode ierr;
1823   PetscInt       i,j,cstart = mat->cmap->rstart;
1824   PetscReal      sum = 0.0;
1825   MatScalar      *v;
1826 
1827   PetscFunctionBegin;
1828   if (aij->size == 1) {
1829     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1830   } else {
1831     if (type == NORM_FROBENIUS) {
1832       v = amat->a;
1833       for (i=0; i<amat->nz; i++) {
1834         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1835       }
1836       v = bmat->a;
1837       for (i=0; i<bmat->nz; i++) {
1838         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1839       }
1840       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1841       *norm = PetscSqrtReal(*norm);
1842       ierr = PetscLogFlops(2.0*amat->nz+2.0*bmat->nz);CHKERRQ(ierr);
1843     } else if (type == NORM_1) { /* max column norm */
1844       PetscReal *tmp,*tmp2;
1845       PetscInt  *jj,*garray = aij->garray;
1846       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1847       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1848       *norm = 0.0;
1849       v     = amat->a; jj = amat->j;
1850       for (j=0; j<amat->nz; j++) {
1851         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1852       }
1853       v = bmat->a; jj = bmat->j;
1854       for (j=0; j<bmat->nz; j++) {
1855         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1856       }
1857       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1858       for (j=0; j<mat->cmap->N; j++) {
1859         if (tmp2[j] > *norm) *norm = tmp2[j];
1860       }
1861       ierr = PetscFree(tmp);CHKERRQ(ierr);
1862       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1863       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1864     } else if (type == NORM_INFINITY) { /* max row norm */
1865       PetscReal ntemp = 0.0;
1866       for (j=0; j<aij->A->rmap->n; j++) {
1867         v   = amat->a + amat->i[j];
1868         sum = 0.0;
1869         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1870           sum += PetscAbsScalar(*v); v++;
1871         }
1872         v = bmat->a + bmat->i[j];
1873         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1874           sum += PetscAbsScalar(*v); v++;
1875         }
1876         if (sum > ntemp) ntemp = sum;
1877       }
1878       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1879       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1880     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1881   }
1882   PetscFunctionReturn(0);
1883 }
1884 
1885 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1886 {
1887   Mat_MPIAIJ      *a    =(Mat_MPIAIJ*)A->data,*b;
1888   Mat_SeqAIJ      *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
1889   PetscInt        M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
1890   const PetscInt  *ai,*aj,*bi,*bj,*B_diag_i;
1891   PetscErrorCode  ierr;
1892   Mat             B,A_diag,*B_diag;
1893   const MatScalar *pbv,*bv;
1894 
1895   PetscFunctionBegin;
1896   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1897   ai = Aloc->i; aj = Aloc->j;
1898   bi = Bloc->i; bj = Bloc->j;
1899   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1900     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1901     PetscSFNode          *oloc;
1902     PETSC_UNUSED PetscSF sf;
1903 
1904     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1905     /* compute d_nnz for preallocation */
1906     ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr);
1907     for (i=0; i<ai[ma]; i++) {
1908       d_nnz[aj[i]]++;
1909     }
1910     /* compute local off-diagonal contributions */
1911     ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr);
1912     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1913     /* map those to global */
1914     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1915     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1916     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1917     ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr);
1918     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1919     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1920     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1921 
1922     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1923     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1924     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1925     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1926     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1927     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1928   } else {
1929     B    = *matout;
1930     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1931   }
1932 
1933   b           = (Mat_MPIAIJ*)B->data;
1934   A_diag      = a->A;
1935   B_diag      = &b->A;
1936   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
1937   A_diag_ncol = A_diag->cmap->N;
1938   B_diag_ilen = sub_B_diag->ilen;
1939   B_diag_i    = sub_B_diag->i;
1940 
1941   /* Set ilen for diagonal of B */
1942   for (i=0; i<A_diag_ncol; i++) {
1943     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
1944   }
1945 
1946   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
1947   very quickly (=without using MatSetValues), because all writes are local. */
1948   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
1949 
1950   /* copy over the B part */
1951   ierr = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr);
1952   ierr = MatSeqAIJGetArrayRead(a->B,&bv);CHKERRQ(ierr);
1953   pbv  = bv;
1954   row  = A->rmap->rstart;
1955   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1956   cols_tmp = cols;
1957   for (i=0; i<mb; i++) {
1958     ncol = bi[i+1]-bi[i];
1959     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES);CHKERRQ(ierr);
1960     row++;
1961     pbv += ncol; cols_tmp += ncol;
1962   }
1963   ierr = PetscFree(cols);CHKERRQ(ierr);
1964   ierr = MatSeqAIJRestoreArrayRead(a->B,&bv);CHKERRQ(ierr);
1965 
1966   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1967   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1968   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1969     *matout = B;
1970   } else {
1971     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
1972   }
1973   PetscFunctionReturn(0);
1974 }
1975 
1976 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1977 {
1978   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1979   Mat            a    = aij->A,b = aij->B;
1980   PetscErrorCode ierr;
1981   PetscInt       s1,s2,s3;
1982 
1983   PetscFunctionBegin;
1984   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1985   if (rr) {
1986     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1987     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1988     /* Overlap communication with computation. */
1989     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1990   }
1991   if (ll) {
1992     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1993     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1994     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1995   }
1996   /* scale  the diagonal block */
1997   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1998 
1999   if (rr) {
2000     /* Do a scatter end and then right scale the off-diagonal block */
2001     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2002     ierr = (*b->ops->diagonalscale)(b,NULL,aij->lvec);CHKERRQ(ierr);
2003   }
2004   PetscFunctionReturn(0);
2005 }
2006 
2007 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2008 {
2009   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2010   PetscErrorCode ierr;
2011 
2012   PetscFunctionBegin;
2013   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2014   PetscFunctionReturn(0);
2015 }
2016 
2017 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2018 {
2019   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2020   Mat            a,b,c,d;
2021   PetscBool      flg;
2022   PetscErrorCode ierr;
2023 
2024   PetscFunctionBegin;
2025   a = matA->A; b = matA->B;
2026   c = matB->A; d = matB->B;
2027 
2028   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2029   if (flg) {
2030     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2031   }
2032   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
2033   PetscFunctionReturn(0);
2034 }
2035 
2036 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2037 {
2038   PetscErrorCode ierr;
2039   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2040   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2041 
2042   PetscFunctionBegin;
2043   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2044   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2045     /* because of the column compression in the off-processor part of the matrix a->B,
2046        the number of columns in a->B and b->B may be different, hence we cannot call
2047        the MatCopy() directly on the two parts. If need be, we can provide a more
2048        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2049        then copying the submatrices */
2050     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2051   } else {
2052     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2053     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2054   }
2055   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2056   PetscFunctionReturn(0);
2057 }
2058 
2059 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2060 {
2061   PetscErrorCode ierr;
2062 
2063   PetscFunctionBegin;
2064   ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr);
2065   PetscFunctionReturn(0);
2066 }
2067 
2068 /*
2069    Computes the number of nonzeros per row needed for preallocation when X and Y
2070    have different nonzero structure.
2071 */
2072 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2073 {
2074   PetscInt       i,j,k,nzx,nzy;
2075 
2076   PetscFunctionBegin;
2077   /* Set the number of nonzeros in the new matrix */
2078   for (i=0; i<m; i++) {
2079     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2080     nzx = xi[i+1] - xi[i];
2081     nzy = yi[i+1] - yi[i];
2082     nnz[i] = 0;
2083     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2084       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2085       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2086       nnz[i]++;
2087     }
2088     for (; k<nzy; k++) nnz[i]++;
2089   }
2090   PetscFunctionReturn(0);
2091 }
2092 
2093 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2094 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2095 {
2096   PetscErrorCode ierr;
2097   PetscInt       m = Y->rmap->N;
2098   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2099   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2100 
2101   PetscFunctionBegin;
2102   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2103   PetscFunctionReturn(0);
2104 }
2105 
2106 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2107 {
2108   PetscErrorCode ierr;
2109   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2110 
2111   PetscFunctionBegin;
2112   if (str == SAME_NONZERO_PATTERN) {
2113     ierr = MatAXPY(yy->A,a,xx->A,str);CHKERRQ(ierr);
2114     ierr = MatAXPY(yy->B,a,xx->B,str);CHKERRQ(ierr);
2115   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2116     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2117   } else {
2118     Mat      B;
2119     PetscInt *nnz_d,*nnz_o;
2120 
2121     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2122     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2123     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2124     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2125     ierr = MatSetLayouts(B,Y->rmap,Y->cmap);CHKERRQ(ierr);
2126     ierr = MatSetType(B,((PetscObject)Y)->type_name);CHKERRQ(ierr);
2127     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2128     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2129     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2130     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2131     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2132     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2133     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2134   }
2135   PetscFunctionReturn(0);
2136 }
2137 
2138 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2139 
2140 PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2141 {
2142 #if defined(PETSC_USE_COMPLEX)
2143   PetscErrorCode ierr;
2144   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2145 
2146   PetscFunctionBegin;
2147   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2148   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2149 #else
2150   PetscFunctionBegin;
2151 #endif
2152   PetscFunctionReturn(0);
2153 }
2154 
2155 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2156 {
2157   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2158   PetscErrorCode ierr;
2159 
2160   PetscFunctionBegin;
2161   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2162   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2163   PetscFunctionReturn(0);
2164 }
2165 
2166 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2167 {
2168   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2169   PetscErrorCode ierr;
2170 
2171   PetscFunctionBegin;
2172   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2173   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2174   PetscFunctionReturn(0);
2175 }
2176 
2177 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2178 {
2179   Mat_MPIAIJ        *a = (Mat_MPIAIJ*)A->data;
2180   PetscErrorCode    ierr;
2181   PetscInt          i,*idxb = NULL,m = A->rmap->n;
2182   PetscScalar       *va,*vv;
2183   Vec               vB,vA;
2184   const PetscScalar *vb;
2185 
2186   PetscFunctionBegin;
2187   ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vA);CHKERRQ(ierr);
2188   ierr = MatGetRowMaxAbs(a->A,vA,idx);CHKERRQ(ierr);
2189 
2190   ierr = VecGetArrayWrite(vA,&va);CHKERRQ(ierr);
2191   if (idx) {
2192     for (i=0; i<m; i++) {
2193       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2194     }
2195   }
2196 
2197   ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vB);CHKERRQ(ierr);
2198   ierr = PetscMalloc1(m,&idxb);CHKERRQ(ierr);
2199   ierr = MatGetRowMaxAbs(a->B,vB,idxb);CHKERRQ(ierr);
2200 
2201   ierr = VecGetArrayWrite(v,&vv);CHKERRQ(ierr);
2202   ierr = VecGetArrayRead(vB,&vb);CHKERRQ(ierr);
2203   for (i=0; i<m; i++) {
2204     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2205       vv[i] = vb[i];
2206       if (idx) idx[i] = a->garray[idxb[i]];
2207     } else {
2208       vv[i] = va[i];
2209       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]])
2210         idx[i] = a->garray[idxb[i]];
2211     }
2212   }
2213   ierr = VecRestoreArrayWrite(vA,&vv);CHKERRQ(ierr);
2214   ierr = VecRestoreArrayWrite(vA,&va);CHKERRQ(ierr);
2215   ierr = VecRestoreArrayRead(vB,&vb);CHKERRQ(ierr);
2216   ierr = PetscFree(idxb);CHKERRQ(ierr);
2217   ierr = VecDestroy(&vA);CHKERRQ(ierr);
2218   ierr = VecDestroy(&vB);CHKERRQ(ierr);
2219   PetscFunctionReturn(0);
2220 }
2221 
2222 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2223 {
2224   Mat_MPIAIJ        *mat   = (Mat_MPIAIJ*) A->data;
2225   PetscInt          m = A->rmap->n,n = A->cmap->n;
2226   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2227   PetscInt          *cmap  = mat->garray;
2228   PetscInt          *diagIdx, *offdiagIdx;
2229   Vec               diagV, offdiagV;
2230   PetscScalar       *a, *diagA, *offdiagA;
2231   const PetscScalar *ba,*bav;
2232   PetscInt          r,j,col,ncols,*bi,*bj;
2233   PetscErrorCode    ierr;
2234   Mat               B = mat->B;
2235   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2236 
2237   PetscFunctionBegin;
2238   /* When a process holds entire A and other processes have no entry */
2239   if (A->cmap->N == n) {
2240     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2241     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2242     ierr = MatGetRowMinAbs(mat->A,diagV,idx);CHKERRQ(ierr);
2243     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2244     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2245     PetscFunctionReturn(0);
2246   } else if (n == 0) {
2247     if (m) {
2248       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2249       for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;}
2250       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2251     }
2252     PetscFunctionReturn(0);
2253   }
2254 
2255   ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2256   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2257   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2258   ierr = MatGetRowMinAbs(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2259 
2260   /* Get offdiagIdx[] for implicit 0.0 */
2261   ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr);
2262   ba   = bav;
2263   bi   = b->i;
2264   bj   = b->j;
2265   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2266   for (r = 0; r < m; r++) {
2267     ncols = bi[r+1] - bi[r];
2268     if (ncols == A->cmap->N - n) { /* Brow is dense */
2269       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2270     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2271       offdiagA[r] = 0.0;
2272 
2273       /* Find first hole in the cmap */
2274       for (j=0; j<ncols; j++) {
2275         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2276         if (col > j && j < cstart) {
2277           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2278           break;
2279         } else if (col > j + n && j >= cstart) {
2280           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2281           break;
2282         }
2283       }
2284       if (j == ncols && ncols < A->cmap->N - n) {
2285         /* a hole is outside compressed Bcols */
2286         if (ncols == 0) {
2287           if (cstart) {
2288             offdiagIdx[r] = 0;
2289           } else offdiagIdx[r] = cend;
2290         } else { /* ncols > 0 */
2291           offdiagIdx[r] = cmap[ncols-1] + 1;
2292           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2293         }
2294       }
2295     }
2296 
2297     for (j=0; j<ncols; j++) {
2298       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2299       ba++; bj++;
2300     }
2301   }
2302 
2303   ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr);
2304   ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2305   for (r = 0; r < m; ++r) {
2306     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2307       a[r]   = diagA[r];
2308       if (idx) idx[r] = cstart + diagIdx[r];
2309     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2310       a[r] = diagA[r];
2311       if (idx) {
2312         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2313           idx[r] = cstart + diagIdx[r];
2314         } else idx[r] = offdiagIdx[r];
2315       }
2316     } else {
2317       a[r]   = offdiagA[r];
2318       if (idx) idx[r] = offdiagIdx[r];
2319     }
2320   }
2321   ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr);
2322   ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr);
2323   ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2324   ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2325   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2326   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2327   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2328   PetscFunctionReturn(0);
2329 }
2330 
2331 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2332 {
2333   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*) A->data;
2334   PetscInt          m = A->rmap->n,n = A->cmap->n;
2335   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2336   PetscInt          *cmap  = mat->garray;
2337   PetscInt          *diagIdx, *offdiagIdx;
2338   Vec               diagV, offdiagV;
2339   PetscScalar       *a, *diagA, *offdiagA;
2340   const PetscScalar *ba,*bav;
2341   PetscInt          r,j,col,ncols,*bi,*bj;
2342   PetscErrorCode    ierr;
2343   Mat               B = mat->B;
2344   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2345 
2346   PetscFunctionBegin;
2347   /* When a process holds entire A and other processes have no entry */
2348   if (A->cmap->N == n) {
2349     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2350     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2351     ierr = MatGetRowMin(mat->A,diagV,idx);CHKERRQ(ierr);
2352     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2353     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2354     PetscFunctionReturn(0);
2355   } else if (n == 0) {
2356     if (m) {
2357       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2358       for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;}
2359       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2360     }
2361     PetscFunctionReturn(0);
2362   }
2363 
2364   ierr = PetscCalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2365   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2366   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2367   ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2368 
2369   /* Get offdiagIdx[] for implicit 0.0 */
2370   ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr);
2371   ba   = bav;
2372   bi   = b->i;
2373   bj   = b->j;
2374   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2375   for (r = 0; r < m; r++) {
2376     ncols = bi[r+1] - bi[r];
2377     if (ncols == A->cmap->N - n) { /* Brow is dense */
2378       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2379     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2380       offdiagA[r] = 0.0;
2381 
2382       /* Find first hole in the cmap */
2383       for (j=0; j<ncols; j++) {
2384         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2385         if (col > j && j < cstart) {
2386           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2387           break;
2388         } else if (col > j + n && j >= cstart) {
2389           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2390           break;
2391         }
2392       }
2393       if (j == ncols && ncols < A->cmap->N - n) {
2394         /* a hole is outside compressed Bcols */
2395         if (ncols == 0) {
2396           if (cstart) {
2397             offdiagIdx[r] = 0;
2398           } else offdiagIdx[r] = cend;
2399         } else { /* ncols > 0 */
2400           offdiagIdx[r] = cmap[ncols-1] + 1;
2401           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2402         }
2403       }
2404     }
2405 
2406     for (j=0; j<ncols; j++) {
2407       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2408       ba++; bj++;
2409     }
2410   }
2411 
2412   ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr);
2413   ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2414   for (r = 0; r < m; ++r) {
2415     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2416       a[r]   = diagA[r];
2417       if (idx) idx[r] = cstart + diagIdx[r];
2418     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2419       a[r] = diagA[r];
2420       if (idx) {
2421         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2422           idx[r] = cstart + diagIdx[r];
2423         } else idx[r] = offdiagIdx[r];
2424       }
2425     } else {
2426       a[r]   = offdiagA[r];
2427       if (idx) idx[r] = offdiagIdx[r];
2428     }
2429   }
2430   ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr);
2431   ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr);
2432   ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2433   ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2434   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2435   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2436   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2437   PetscFunctionReturn(0);
2438 }
2439 
2440 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2441 {
2442   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*)A->data;
2443   PetscInt          m = A->rmap->n,n = A->cmap->n;
2444   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2445   PetscInt          *cmap  = mat->garray;
2446   PetscInt          *diagIdx, *offdiagIdx;
2447   Vec               diagV, offdiagV;
2448   PetscScalar       *a, *diagA, *offdiagA;
2449   const PetscScalar *ba,*bav;
2450   PetscInt          r,j,col,ncols,*bi,*bj;
2451   PetscErrorCode    ierr;
2452   Mat               B = mat->B;
2453   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2454 
2455   PetscFunctionBegin;
2456   /* When a process holds entire A and other processes have no entry */
2457   if (A->cmap->N == n) {
2458     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2459     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2460     ierr = MatGetRowMax(mat->A,diagV,idx);CHKERRQ(ierr);
2461     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2462     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2463     PetscFunctionReturn(0);
2464   } else if (n == 0) {
2465     if (m) {
2466       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2467       for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;}
2468       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2469     }
2470     PetscFunctionReturn(0);
2471   }
2472 
2473   ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2474   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2475   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2476   ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2477 
2478   /* Get offdiagIdx[] for implicit 0.0 */
2479   ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr);
2480   ba   = bav;
2481   bi   = b->i;
2482   bj   = b->j;
2483   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2484   for (r = 0; r < m; r++) {
2485     ncols = bi[r+1] - bi[r];
2486     if (ncols == A->cmap->N - n) { /* Brow is dense */
2487       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2488     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2489       offdiagA[r] = 0.0;
2490 
2491       /* Find first hole in the cmap */
2492       for (j=0; j<ncols; j++) {
2493         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2494         if (col > j && j < cstart) {
2495           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2496           break;
2497         } else if (col > j + n && j >= cstart) {
2498           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2499           break;
2500         }
2501       }
2502       if (j == ncols && ncols < A->cmap->N - n) {
2503         /* a hole is outside compressed Bcols */
2504         if (ncols == 0) {
2505           if (cstart) {
2506             offdiagIdx[r] = 0;
2507           } else offdiagIdx[r] = cend;
2508         } else { /* ncols > 0 */
2509           offdiagIdx[r] = cmap[ncols-1] + 1;
2510           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2511         }
2512       }
2513     }
2514 
2515     for (j=0; j<ncols; j++) {
2516       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2517       ba++; bj++;
2518     }
2519   }
2520 
2521   ierr = VecGetArrayWrite(v,    &a);CHKERRQ(ierr);
2522   ierr = VecGetArrayRead(diagV,(const PetscScalar**)&diagA);CHKERRQ(ierr);
2523   for (r = 0; r < m; ++r) {
2524     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2525       a[r] = diagA[r];
2526       if (idx) idx[r] = cstart + diagIdx[r];
2527     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2528       a[r] = diagA[r];
2529       if (idx) {
2530         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2531           idx[r] = cstart + diagIdx[r];
2532         } else idx[r] = offdiagIdx[r];
2533       }
2534     } else {
2535       a[r] = offdiagA[r];
2536       if (idx) idx[r] = offdiagIdx[r];
2537     }
2538   }
2539   ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr);
2540   ierr = VecRestoreArrayWrite(v,       &a);CHKERRQ(ierr);
2541   ierr = VecRestoreArrayRead(diagV,   (const PetscScalar**)&diagA);CHKERRQ(ierr);
2542   ierr = VecRestoreArrayWrite(offdiagV,&offdiagA);CHKERRQ(ierr);
2543   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2544   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2545   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2546   PetscFunctionReturn(0);
2547 }
2548 
2549 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2550 {
2551   PetscErrorCode ierr;
2552   Mat            *dummy;
2553 
2554   PetscFunctionBegin;
2555   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2556   *newmat = *dummy;
2557   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2558   PetscFunctionReturn(0);
2559 }
2560 
2561 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2562 {
2563   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2564   PetscErrorCode ierr;
2565 
2566   PetscFunctionBegin;
2567   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2568   A->factorerrortype = a->A->factorerrortype;
2569   PetscFunctionReturn(0);
2570 }
2571 
2572 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2573 {
2574   PetscErrorCode ierr;
2575   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2576 
2577   PetscFunctionBegin;
2578   if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2579   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2580   if (x->assembled) {
2581     ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2582   } else {
2583     ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr);
2584   }
2585   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2586   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2587   PetscFunctionReturn(0);
2588 }
2589 
2590 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2591 {
2592   PetscFunctionBegin;
2593   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2594   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2595   PetscFunctionReturn(0);
2596 }
2597 
2598 /*@
2599    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2600 
2601    Collective on Mat
2602 
2603    Input Parameters:
2604 +    A - the matrix
2605 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2606 
2607  Level: advanced
2608 
2609 @*/
2610 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2611 {
2612   PetscErrorCode       ierr;
2613 
2614   PetscFunctionBegin;
2615   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2616   PetscFunctionReturn(0);
2617 }
2618 
2619 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2620 {
2621   PetscErrorCode       ierr;
2622   PetscBool            sc = PETSC_FALSE,flg;
2623 
2624   PetscFunctionBegin;
2625   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2626   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2627   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2628   if (flg) {
2629     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2630   }
2631   ierr = PetscOptionsTail();CHKERRQ(ierr);
2632   PetscFunctionReturn(0);
2633 }
2634 
2635 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2636 {
2637   PetscErrorCode ierr;
2638   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2639   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2640 
2641   PetscFunctionBegin;
2642   if (!Y->preallocated) {
2643     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2644   } else if (!aij->nz) {
2645     PetscInt nonew = aij->nonew;
2646     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2647     aij->nonew = nonew;
2648   }
2649   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2650   PetscFunctionReturn(0);
2651 }
2652 
2653 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2654 {
2655   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2656   PetscErrorCode ierr;
2657 
2658   PetscFunctionBegin;
2659   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2660   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2661   if (d) {
2662     PetscInt rstart;
2663     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2664     *d += rstart;
2665 
2666   }
2667   PetscFunctionReturn(0);
2668 }
2669 
2670 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2671 {
2672   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2673   PetscErrorCode ierr;
2674 
2675   PetscFunctionBegin;
2676   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2677   PetscFunctionReturn(0);
2678 }
2679 
2680 /* -------------------------------------------------------------------*/
2681 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2682                                        MatGetRow_MPIAIJ,
2683                                        MatRestoreRow_MPIAIJ,
2684                                        MatMult_MPIAIJ,
2685                                 /* 4*/ MatMultAdd_MPIAIJ,
2686                                        MatMultTranspose_MPIAIJ,
2687                                        MatMultTransposeAdd_MPIAIJ,
2688                                        NULL,
2689                                        NULL,
2690                                        NULL,
2691                                 /*10*/ NULL,
2692                                        NULL,
2693                                        NULL,
2694                                        MatSOR_MPIAIJ,
2695                                        MatTranspose_MPIAIJ,
2696                                 /*15*/ MatGetInfo_MPIAIJ,
2697                                        MatEqual_MPIAIJ,
2698                                        MatGetDiagonal_MPIAIJ,
2699                                        MatDiagonalScale_MPIAIJ,
2700                                        MatNorm_MPIAIJ,
2701                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2702                                        MatAssemblyEnd_MPIAIJ,
2703                                        MatSetOption_MPIAIJ,
2704                                        MatZeroEntries_MPIAIJ,
2705                                 /*24*/ MatZeroRows_MPIAIJ,
2706                                        NULL,
2707                                        NULL,
2708                                        NULL,
2709                                        NULL,
2710                                 /*29*/ MatSetUp_MPIAIJ,
2711                                        NULL,
2712                                        NULL,
2713                                        MatGetDiagonalBlock_MPIAIJ,
2714                                        NULL,
2715                                 /*34*/ MatDuplicate_MPIAIJ,
2716                                        NULL,
2717                                        NULL,
2718                                        NULL,
2719                                        NULL,
2720                                 /*39*/ MatAXPY_MPIAIJ,
2721                                        MatCreateSubMatrices_MPIAIJ,
2722                                        MatIncreaseOverlap_MPIAIJ,
2723                                        MatGetValues_MPIAIJ,
2724                                        MatCopy_MPIAIJ,
2725                                 /*44*/ MatGetRowMax_MPIAIJ,
2726                                        MatScale_MPIAIJ,
2727                                        MatShift_MPIAIJ,
2728                                        MatDiagonalSet_MPIAIJ,
2729                                        MatZeroRowsColumns_MPIAIJ,
2730                                 /*49*/ MatSetRandom_MPIAIJ,
2731                                        NULL,
2732                                        NULL,
2733                                        NULL,
2734                                        NULL,
2735                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2736                                        NULL,
2737                                        MatSetUnfactored_MPIAIJ,
2738                                        MatPermute_MPIAIJ,
2739                                        NULL,
2740                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2741                                        MatDestroy_MPIAIJ,
2742                                        MatView_MPIAIJ,
2743                                        NULL,
2744                                        NULL,
2745                                 /*64*/ NULL,
2746                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2747                                        NULL,
2748                                        NULL,
2749                                        NULL,
2750                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2751                                        MatGetRowMinAbs_MPIAIJ,
2752                                        NULL,
2753                                        NULL,
2754                                        NULL,
2755                                        NULL,
2756                                 /*75*/ MatFDColoringApply_AIJ,
2757                                        MatSetFromOptions_MPIAIJ,
2758                                        NULL,
2759                                        NULL,
2760                                        MatFindZeroDiagonals_MPIAIJ,
2761                                 /*80*/ NULL,
2762                                        NULL,
2763                                        NULL,
2764                                 /*83*/ MatLoad_MPIAIJ,
2765                                        MatIsSymmetric_MPIAIJ,
2766                                        NULL,
2767                                        NULL,
2768                                        NULL,
2769                                        NULL,
2770                                 /*89*/ NULL,
2771                                        NULL,
2772                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2773                                        NULL,
2774                                        NULL,
2775                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2776                                        NULL,
2777                                        NULL,
2778                                        NULL,
2779                                        MatBindToCPU_MPIAIJ,
2780                                 /*99*/ MatProductSetFromOptions_MPIAIJ,
2781                                        NULL,
2782                                        NULL,
2783                                        MatConjugate_MPIAIJ,
2784                                        NULL,
2785                                 /*104*/MatSetValuesRow_MPIAIJ,
2786                                        MatRealPart_MPIAIJ,
2787                                        MatImaginaryPart_MPIAIJ,
2788                                        NULL,
2789                                        NULL,
2790                                 /*109*/NULL,
2791                                        NULL,
2792                                        MatGetRowMin_MPIAIJ,
2793                                        NULL,
2794                                        MatMissingDiagonal_MPIAIJ,
2795                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2796                                        NULL,
2797                                        MatGetGhosts_MPIAIJ,
2798                                        NULL,
2799                                        NULL,
2800                                 /*119*/MatMultDiagonalBlock_MPIAIJ,
2801                                        NULL,
2802                                        NULL,
2803                                        NULL,
2804                                        MatGetMultiProcBlock_MPIAIJ,
2805                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2806                                        MatGetColumnReductions_MPIAIJ,
2807                                        MatInvertBlockDiagonal_MPIAIJ,
2808                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2809                                        MatCreateSubMatricesMPI_MPIAIJ,
2810                                 /*129*/NULL,
2811                                        NULL,
2812                                        NULL,
2813                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2814                                        NULL,
2815                                 /*134*/NULL,
2816                                        NULL,
2817                                        NULL,
2818                                        NULL,
2819                                        NULL,
2820                                 /*139*/MatSetBlockSizes_MPIAIJ,
2821                                        NULL,
2822                                        NULL,
2823                                        MatFDColoringSetUp_MPIXAIJ,
2824                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2825                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2826                                 /*145*/NULL,
2827                                        NULL,
2828                                        NULL
2829 };
2830 
2831 /* ----------------------------------------------------------------------------------------*/
2832 
2833 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2834 {
2835   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2836   PetscErrorCode ierr;
2837 
2838   PetscFunctionBegin;
2839   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2840   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2841   PetscFunctionReturn(0);
2842 }
2843 
2844 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2845 {
2846   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2847   PetscErrorCode ierr;
2848 
2849   PetscFunctionBegin;
2850   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2851   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2852   PetscFunctionReturn(0);
2853 }
2854 
2855 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2856 {
2857   Mat_MPIAIJ     *b;
2858   PetscErrorCode ierr;
2859   PetscMPIInt    size;
2860 
2861   PetscFunctionBegin;
2862   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2863   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2864   b = (Mat_MPIAIJ*)B->data;
2865 
2866 #if defined(PETSC_USE_CTABLE)
2867   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2868 #else
2869   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2870 #endif
2871   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2872   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2873   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2874 
2875   /* Because the B will have been resized we simply destroy it and create a new one each time */
2876   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr);
2877   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2878   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2879   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2880   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2881   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2882   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2883 
2884   if (!B->preallocated) {
2885     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2886     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2887     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2888     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2889     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2890   }
2891 
2892   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2893   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2894   B->preallocated  = PETSC_TRUE;
2895   B->was_assembled = PETSC_FALSE;
2896   B->assembled     = PETSC_FALSE;
2897   PetscFunctionReturn(0);
2898 }
2899 
2900 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2901 {
2902   Mat_MPIAIJ     *b;
2903   PetscErrorCode ierr;
2904 
2905   PetscFunctionBegin;
2906   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2907   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2908   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2909   b = (Mat_MPIAIJ*)B->data;
2910 
2911 #if defined(PETSC_USE_CTABLE)
2912   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2913 #else
2914   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2915 #endif
2916   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2917   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2918   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2919 
2920   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2921   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2922   B->preallocated  = PETSC_TRUE;
2923   B->was_assembled = PETSC_FALSE;
2924   B->assembled = PETSC_FALSE;
2925   PetscFunctionReturn(0);
2926 }
2927 
2928 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2929 {
2930   Mat            mat;
2931   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2932   PetscErrorCode ierr;
2933 
2934   PetscFunctionBegin;
2935   *newmat = NULL;
2936   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2937   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2938   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2939   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2940   a       = (Mat_MPIAIJ*)mat->data;
2941 
2942   mat->factortype   = matin->factortype;
2943   mat->assembled    = matin->assembled;
2944   mat->insertmode   = NOT_SET_VALUES;
2945   mat->preallocated = matin->preallocated;
2946 
2947   a->size         = oldmat->size;
2948   a->rank         = oldmat->rank;
2949   a->donotstash   = oldmat->donotstash;
2950   a->roworiented  = oldmat->roworiented;
2951   a->rowindices   = NULL;
2952   a->rowvalues    = NULL;
2953   a->getrowactive = PETSC_FALSE;
2954 
2955   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2956   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2957 
2958   if (oldmat->colmap) {
2959 #if defined(PETSC_USE_CTABLE)
2960     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2961 #else
2962     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2963     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2964     ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr);
2965 #endif
2966   } else a->colmap = NULL;
2967   if (oldmat->garray) {
2968     PetscInt len;
2969     len  = oldmat->B->cmap->n;
2970     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2971     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2972     if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); }
2973   } else a->garray = NULL;
2974 
2975   /* It may happen MatDuplicate is called with a non-assembled matrix
2976      In fact, MatDuplicate only requires the matrix to be preallocated
2977      This may happen inside a DMCreateMatrix_Shell */
2978   if (oldmat->lvec) {
2979     ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2980     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2981   }
2982   if (oldmat->Mvctx) {
2983     ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2984     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2985   }
2986   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2987   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2988   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2989   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2990   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2991   *newmat = mat;
2992   PetscFunctionReturn(0);
2993 }
2994 
2995 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2996 {
2997   PetscBool      isbinary, ishdf5;
2998   PetscErrorCode ierr;
2999 
3000   PetscFunctionBegin;
3001   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
3002   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
3003   /* force binary viewer to load .info file if it has not yet done so */
3004   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3005   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
3006   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
3007   if (isbinary) {
3008     ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr);
3009   } else if (ishdf5) {
3010 #if defined(PETSC_HAVE_HDF5)
3011     ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr);
3012 #else
3013     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3014 #endif
3015   } else {
3016     SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
3017   }
3018   PetscFunctionReturn(0);
3019 }
3020 
3021 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3022 {
3023   PetscInt       header[4],M,N,m,nz,rows,cols,sum,i;
3024   PetscInt       *rowidxs,*colidxs;
3025   PetscScalar    *matvals;
3026   PetscErrorCode ierr;
3027 
3028   PetscFunctionBegin;
3029   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3030 
3031   /* read in matrix header */
3032   ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3033   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3034   M  = header[1]; N = header[2]; nz = header[3];
3035   if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M);
3036   if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N);
3037   if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ");
3038 
3039   /* set block sizes from the viewer's .info file */
3040   ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
3041   /* set global sizes if not set already */
3042   if (mat->rmap->N < 0) mat->rmap->N = M;
3043   if (mat->cmap->N < 0) mat->cmap->N = N;
3044   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
3045   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
3046 
3047   /* check if the matrix sizes are correct */
3048   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3049   if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols);
3050 
3051   /* read in row lengths and build row indices */
3052   ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr);
3053   ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr);
3054   ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr);
3055   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3056   ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRMPI(ierr);
3057   if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum);
3058   /* read in column indices and matrix values */
3059   ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr);
3060   ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
3061   ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
3062   /* store matrix indices and values */
3063   ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr);
3064   ierr = PetscFree(rowidxs);CHKERRQ(ierr);
3065   ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr);
3066   PetscFunctionReturn(0);
3067 }
3068 
3069 /* Not scalable because of ISAllGather() unless getting all columns. */
3070 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3071 {
3072   PetscErrorCode ierr;
3073   IS             iscol_local;
3074   PetscBool      isstride;
3075   PetscMPIInt    lisstride=0,gisstride;
3076 
3077   PetscFunctionBegin;
3078   /* check if we are grabbing all columns*/
3079   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3080 
3081   if (isstride) {
3082     PetscInt  start,len,mstart,mlen;
3083     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3084     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3085     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3086     if (mstart == start && mlen-mstart == len) lisstride = 1;
3087   }
3088 
3089   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
3090   if (gisstride) {
3091     PetscInt N;
3092     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3093     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr);
3094     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3095     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3096   } else {
3097     PetscInt cbs;
3098     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3099     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3100     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3101   }
3102 
3103   *isseq = iscol_local;
3104   PetscFunctionReturn(0);
3105 }
3106 
3107 /*
3108  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3109  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3110 
3111  Input Parameters:
3112    mat - matrix
3113    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3114            i.e., mat->rstart <= isrow[i] < mat->rend
3115    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3116            i.e., mat->cstart <= iscol[i] < mat->cend
3117  Output Parameter:
3118    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3119    iscol_o - sequential column index set for retrieving mat->B
3120    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3121  */
3122 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3123 {
3124   PetscErrorCode ierr;
3125   Vec            x,cmap;
3126   const PetscInt *is_idx;
3127   PetscScalar    *xarray,*cmaparray;
3128   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3129   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3130   Mat            B=a->B;
3131   Vec            lvec=a->lvec,lcmap;
3132   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3133   MPI_Comm       comm;
3134   VecScatter     Mvctx=a->Mvctx;
3135 
3136   PetscFunctionBegin;
3137   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3138   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3139 
3140   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3141   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3142   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3143   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3144   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3145 
3146   /* Get start indices */
3147   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
3148   isstart -= ncols;
3149   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3150 
3151   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3152   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3153   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3154   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3155   for (i=0; i<ncols; i++) {
3156     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3157     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3158     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3159   }
3160   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3161   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3162   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3163 
3164   /* Get iscol_d */
3165   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3166   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3167   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3168 
3169   /* Get isrow_d */
3170   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3171   rstart = mat->rmap->rstart;
3172   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3173   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3174   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3175   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3176 
3177   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3178   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3179   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3180 
3181   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3182   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3183   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3184 
3185   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3186 
3187   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3188   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3189 
3190   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3191   /* off-process column indices */
3192   count = 0;
3193   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3194   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3195 
3196   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3197   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3198   for (i=0; i<Bn; i++) {
3199     if (PetscRealPart(xarray[i]) > -1.0) {
3200       idx[count]     = i;                   /* local column index in off-diagonal part B */
3201       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3202       count++;
3203     }
3204   }
3205   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3206   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3207 
3208   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3209   /* cannot ensure iscol_o has same blocksize as iscol! */
3210 
3211   ierr = PetscFree(idx);CHKERRQ(ierr);
3212   *garray = cmap1;
3213 
3214   ierr = VecDestroy(&x);CHKERRQ(ierr);
3215   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3216   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3217   PetscFunctionReturn(0);
3218 }
3219 
3220 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3221 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3222 {
3223   PetscErrorCode ierr;
3224   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3225   Mat            M = NULL;
3226   MPI_Comm       comm;
3227   IS             iscol_d,isrow_d,iscol_o;
3228   Mat            Asub = NULL,Bsub = NULL;
3229   PetscInt       n;
3230 
3231   PetscFunctionBegin;
3232   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3233 
3234   if (call == MAT_REUSE_MATRIX) {
3235     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3236     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3237     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3238 
3239     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3240     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3241 
3242     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3243     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3244 
3245     /* Update diagonal and off-diagonal portions of submat */
3246     asub = (Mat_MPIAIJ*)(*submat)->data;
3247     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3248     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3249     if (n) {
3250       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3251     }
3252     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3253     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3254 
3255   } else { /* call == MAT_INITIAL_MATRIX) */
3256     const PetscInt *garray;
3257     PetscInt        BsubN;
3258 
3259     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3260     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3261 
3262     /* Create local submatrices Asub and Bsub */
3263     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3264     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3265 
3266     /* Create submatrix M */
3267     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3268 
3269     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3270     asub = (Mat_MPIAIJ*)M->data;
3271 
3272     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3273     n = asub->B->cmap->N;
3274     if (BsubN > n) {
3275       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3276       const PetscInt *idx;
3277       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3278       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3279 
3280       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3281       j = 0;
3282       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3283       for (i=0; i<n; i++) {
3284         if (j >= BsubN) break;
3285         while (subgarray[i] > garray[j]) j++;
3286 
3287         if (subgarray[i] == garray[j]) {
3288           idx_new[i] = idx[j++];
3289         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3290       }
3291       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3292 
3293       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3294       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3295 
3296     } else if (BsubN < n) {
3297       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3298     }
3299 
3300     ierr = PetscFree(garray);CHKERRQ(ierr);
3301     *submat = M;
3302 
3303     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3304     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3305     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3306 
3307     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3308     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3309 
3310     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3311     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3312   }
3313   PetscFunctionReturn(0);
3314 }
3315 
3316 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3317 {
3318   PetscErrorCode ierr;
3319   IS             iscol_local=NULL,isrow_d;
3320   PetscInt       csize;
3321   PetscInt       n,i,j,start,end;
3322   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3323   MPI_Comm       comm;
3324 
3325   PetscFunctionBegin;
3326   /* If isrow has same processor distribution as mat,
3327      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3328   if (call == MAT_REUSE_MATRIX) {
3329     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3330     if (isrow_d) {
3331       sameRowDist  = PETSC_TRUE;
3332       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3333     } else {
3334       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3335       if (iscol_local) {
3336         sameRowDist  = PETSC_TRUE;
3337         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3338       }
3339     }
3340   } else {
3341     /* Check if isrow has same processor distribution as mat */
3342     sameDist[0] = PETSC_FALSE;
3343     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3344     if (!n) {
3345       sameDist[0] = PETSC_TRUE;
3346     } else {
3347       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3348       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3349       if (i >= start && j < end) {
3350         sameDist[0] = PETSC_TRUE;
3351       }
3352     }
3353 
3354     /* Check if iscol has same processor distribution as mat */
3355     sameDist[1] = PETSC_FALSE;
3356     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3357     if (!n) {
3358       sameDist[1] = PETSC_TRUE;
3359     } else {
3360       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3361       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3362       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3363     }
3364 
3365     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3366     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRMPI(ierr);
3367     sameRowDist = tsameDist[0];
3368   }
3369 
3370   if (sameRowDist) {
3371     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3372       /* isrow and iscol have same processor distribution as mat */
3373       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3374       PetscFunctionReturn(0);
3375     } else { /* sameRowDist */
3376       /* isrow has same processor distribution as mat */
3377       if (call == MAT_INITIAL_MATRIX) {
3378         PetscBool sorted;
3379         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3380         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3381         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3382         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3383 
3384         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3385         if (sorted) {
3386           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3387           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3388           PetscFunctionReturn(0);
3389         }
3390       } else { /* call == MAT_REUSE_MATRIX */
3391         IS iscol_sub;
3392         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3393         if (iscol_sub) {
3394           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3395           PetscFunctionReturn(0);
3396         }
3397       }
3398     }
3399   }
3400 
3401   /* General case: iscol -> iscol_local which has global size of iscol */
3402   if (call == MAT_REUSE_MATRIX) {
3403     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3404     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3405   } else {
3406     if (!iscol_local) {
3407       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3408     }
3409   }
3410 
3411   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3412   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3413 
3414   if (call == MAT_INITIAL_MATRIX) {
3415     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3416     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3417   }
3418   PetscFunctionReturn(0);
3419 }
3420 
3421 /*@C
3422      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3423          and "off-diagonal" part of the matrix in CSR format.
3424 
3425    Collective
3426 
3427    Input Parameters:
3428 +  comm - MPI communicator
3429 .  A - "diagonal" portion of matrix
3430 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3431 -  garray - global index of B columns
3432 
3433    Output Parameter:
3434 .   mat - the matrix, with input A as its local diagonal matrix
3435    Level: advanced
3436 
3437    Notes:
3438        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3439        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3440 
3441 .seealso: MatCreateMPIAIJWithSplitArrays()
3442 @*/
3443 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3444 {
3445   PetscErrorCode    ierr;
3446   Mat_MPIAIJ        *maij;
3447   Mat_SeqAIJ        *b=(Mat_SeqAIJ*)B->data,*bnew;
3448   PetscInt          *oi=b->i,*oj=b->j,i,nz,col;
3449   const PetscScalar *oa;
3450   Mat               Bnew;
3451   PetscInt          m,n,N;
3452 
3453   PetscFunctionBegin;
3454   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3455   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3456   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3457   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3458   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3459   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3460 
3461   /* Get global columns of mat */
3462   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
3463 
3464   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3465   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3466   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3467   maij = (Mat_MPIAIJ*)(*mat)->data;
3468 
3469   (*mat)->preallocated = PETSC_TRUE;
3470 
3471   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3472   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3473 
3474   /* Set A as diagonal portion of *mat */
3475   maij->A = A;
3476 
3477   nz = oi[m];
3478   for (i=0; i<nz; i++) {
3479     col   = oj[i];
3480     oj[i] = garray[col];
3481   }
3482 
3483   /* Set Bnew as off-diagonal portion of *mat */
3484   ierr = MatSeqAIJGetArrayRead(B,&oa);CHKERRQ(ierr);
3485   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew);CHKERRQ(ierr);
3486   ierr = MatSeqAIJRestoreArrayRead(B,&oa);CHKERRQ(ierr);
3487   bnew        = (Mat_SeqAIJ*)Bnew->data;
3488   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3489   maij->B     = Bnew;
3490 
3491   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3492 
3493   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3494   b->free_a       = PETSC_FALSE;
3495   b->free_ij      = PETSC_FALSE;
3496   ierr = MatDestroy(&B);CHKERRQ(ierr);
3497 
3498   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3499   bnew->free_a       = PETSC_TRUE;
3500   bnew->free_ij      = PETSC_TRUE;
3501 
3502   /* condense columns of maij->B */
3503   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3504   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3505   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3506   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3507   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3508   PetscFunctionReturn(0);
3509 }
3510 
3511 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3512 
3513 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3514 {
3515   PetscErrorCode ierr;
3516   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3517   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3518   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3519   Mat            M,Msub,B=a->B;
3520   MatScalar      *aa;
3521   Mat_SeqAIJ     *aij;
3522   PetscInt       *garray = a->garray,*colsub,Ncols;
3523   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3524   IS             iscol_sub,iscmap;
3525   const PetscInt *is_idx,*cmap;
3526   PetscBool      allcolumns=PETSC_FALSE;
3527   MPI_Comm       comm;
3528 
3529   PetscFunctionBegin;
3530   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3531   if (call == MAT_REUSE_MATRIX) {
3532     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3533     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3534     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3535 
3536     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3537     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3538 
3539     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3540     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3541 
3542     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3543 
3544   } else { /* call == MAT_INITIAL_MATRIX) */
3545     PetscBool flg;
3546 
3547     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3548     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3549 
3550     /* (1) iscol -> nonscalable iscol_local */
3551     /* Check for special case: each processor gets entire matrix columns */
3552     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3553     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3554     ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
3555     if (allcolumns) {
3556       iscol_sub = iscol_local;
3557       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3558       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3559 
3560     } else {
3561       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3562       PetscInt *idx,*cmap1,k;
3563       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3564       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3565       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3566       count = 0;
3567       k     = 0;
3568       for (i=0; i<Ncols; i++) {
3569         j = is_idx[i];
3570         if (j >= cstart && j < cend) {
3571           /* diagonal part of mat */
3572           idx[count]     = j;
3573           cmap1[count++] = i; /* column index in submat */
3574         } else if (Bn) {
3575           /* off-diagonal part of mat */
3576           if (j == garray[k]) {
3577             idx[count]     = j;
3578             cmap1[count++] = i;  /* column index in submat */
3579           } else if (j > garray[k]) {
3580             while (j > garray[k] && k < Bn-1) k++;
3581             if (j == garray[k]) {
3582               idx[count]     = j;
3583               cmap1[count++] = i; /* column index in submat */
3584             }
3585           }
3586         }
3587       }
3588       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3589 
3590       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3591       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3592       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3593 
3594       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3595     }
3596 
3597     /* (3) Create sequential Msub */
3598     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3599   }
3600 
3601   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3602   aij  = (Mat_SeqAIJ*)(Msub)->data;
3603   ii   = aij->i;
3604   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3605 
3606   /*
3607       m - number of local rows
3608       Ncols - number of columns (same on all processors)
3609       rstart - first row in new global matrix generated
3610   */
3611   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3612 
3613   if (call == MAT_INITIAL_MATRIX) {
3614     /* (4) Create parallel newmat */
3615     PetscMPIInt    rank,size;
3616     PetscInt       csize;
3617 
3618     ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
3619     ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
3620 
3621     /*
3622         Determine the number of non-zeros in the diagonal and off-diagonal
3623         portions of the matrix in order to do correct preallocation
3624     */
3625 
3626     /* first get start and end of "diagonal" columns */
3627     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3628     if (csize == PETSC_DECIDE) {
3629       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3630       if (mglobal == Ncols) { /* square matrix */
3631         nlocal = m;
3632       } else {
3633         nlocal = Ncols/size + ((Ncols % size) > rank);
3634       }
3635     } else {
3636       nlocal = csize;
3637     }
3638     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
3639     rstart = rend - nlocal;
3640     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3641 
3642     /* next, compute all the lengths */
3643     jj    = aij->j;
3644     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3645     olens = dlens + m;
3646     for (i=0; i<m; i++) {
3647       jend = ii[i+1] - ii[i];
3648       olen = 0;
3649       dlen = 0;
3650       for (j=0; j<jend; j++) {
3651         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3652         else dlen++;
3653         jj++;
3654       }
3655       olens[i] = olen;
3656       dlens[i] = dlen;
3657     }
3658 
3659     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3660     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3661 
3662     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3663     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3664     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3665     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3666     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3667     ierr = PetscFree(dlens);CHKERRQ(ierr);
3668 
3669   } else { /* call == MAT_REUSE_MATRIX */
3670     M    = *newmat;
3671     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3672     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3673     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3674     /*
3675          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3676        rather than the slower MatSetValues().
3677     */
3678     M->was_assembled = PETSC_TRUE;
3679     M->assembled     = PETSC_FALSE;
3680   }
3681 
3682   /* (5) Set values of Msub to *newmat */
3683   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3684   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3685 
3686   jj   = aij->j;
3687   ierr = MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr);
3688   for (i=0; i<m; i++) {
3689     row = rstart + i;
3690     nz  = ii[i+1] - ii[i];
3691     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3692     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3693     jj += nz; aa += nz;
3694   }
3695   ierr = MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr);
3696   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3697 
3698   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3699   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3700 
3701   ierr = PetscFree(colsub);CHKERRQ(ierr);
3702 
3703   /* save Msub, iscol_sub and iscmap used in processor for next request */
3704   if (call == MAT_INITIAL_MATRIX) {
3705     *newmat = M;
3706     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3707     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3708 
3709     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3710     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3711 
3712     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3713     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3714 
3715     if (iscol_local) {
3716       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3717       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3718     }
3719   }
3720   PetscFunctionReturn(0);
3721 }
3722 
3723 /*
3724     Not great since it makes two copies of the submatrix, first an SeqAIJ
3725   in local and then by concatenating the local matrices the end result.
3726   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3727 
3728   Note: This requires a sequential iscol with all indices.
3729 */
3730 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3731 {
3732   PetscErrorCode ierr;
3733   PetscMPIInt    rank,size;
3734   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3735   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3736   Mat            M,Mreuse;
3737   MatScalar      *aa,*vwork;
3738   MPI_Comm       comm;
3739   Mat_SeqAIJ     *aij;
3740   PetscBool      colflag,allcolumns=PETSC_FALSE;
3741 
3742   PetscFunctionBegin;
3743   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3744   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
3745   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
3746 
3747   /* Check for special case: each processor gets entire matrix columns */
3748   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3749   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3750   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3751   ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
3752 
3753   if (call ==  MAT_REUSE_MATRIX) {
3754     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3755     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3756     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3757   } else {
3758     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3759   }
3760 
3761   /*
3762       m - number of local rows
3763       n - number of columns (same on all processors)
3764       rstart - first row in new global matrix generated
3765   */
3766   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3767   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3768   if (call == MAT_INITIAL_MATRIX) {
3769     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3770     ii  = aij->i;
3771     jj  = aij->j;
3772 
3773     /*
3774         Determine the number of non-zeros in the diagonal and off-diagonal
3775         portions of the matrix in order to do correct preallocation
3776     */
3777 
3778     /* first get start and end of "diagonal" columns */
3779     if (csize == PETSC_DECIDE) {
3780       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3781       if (mglobal == n) { /* square matrix */
3782         nlocal = m;
3783       } else {
3784         nlocal = n/size + ((n % size) > rank);
3785       }
3786     } else {
3787       nlocal = csize;
3788     }
3789     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
3790     rstart = rend - nlocal;
3791     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3792 
3793     /* next, compute all the lengths */
3794     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3795     olens = dlens + m;
3796     for (i=0; i<m; i++) {
3797       jend = ii[i+1] - ii[i];
3798       olen = 0;
3799       dlen = 0;
3800       for (j=0; j<jend; j++) {
3801         if (*jj < rstart || *jj >= rend) olen++;
3802         else dlen++;
3803         jj++;
3804       }
3805       olens[i] = olen;
3806       dlens[i] = dlen;
3807     }
3808     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3809     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3810     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3811     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3812     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3813     ierr = PetscFree(dlens);CHKERRQ(ierr);
3814   } else {
3815     PetscInt ml,nl;
3816 
3817     M    = *newmat;
3818     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3819     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3820     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3821     /*
3822          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3823        rather than the slower MatSetValues().
3824     */
3825     M->was_assembled = PETSC_TRUE;
3826     M->assembled     = PETSC_FALSE;
3827   }
3828   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3829   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3830   ii   = aij->i;
3831   jj   = aij->j;
3832 
3833   /* trigger copy to CPU if needed */
3834   ierr = MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr);
3835   for (i=0; i<m; i++) {
3836     row   = rstart + i;
3837     nz    = ii[i+1] - ii[i];
3838     cwork = jj; jj += nz;
3839     vwork = aa; aa += nz;
3840     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3841   }
3842   ierr = MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr);
3843 
3844   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3845   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3846   *newmat = M;
3847 
3848   /* save submatrix used in processor for next request */
3849   if (call ==  MAT_INITIAL_MATRIX) {
3850     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3851     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3852   }
3853   PetscFunctionReturn(0);
3854 }
3855 
3856 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3857 {
3858   PetscInt       m,cstart, cend,j,nnz,i,d;
3859   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3860   const PetscInt *JJ;
3861   PetscErrorCode ierr;
3862   PetscBool      nooffprocentries;
3863 
3864   PetscFunctionBegin;
3865   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3866 
3867   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3868   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3869   m      = B->rmap->n;
3870   cstart = B->cmap->rstart;
3871   cend   = B->cmap->rend;
3872   rstart = B->rmap->rstart;
3873 
3874   ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3875 
3876   if (PetscDefined(USE_DEBUG)) {
3877     for (i=0; i<m; i++) {
3878       nnz = Ii[i+1]- Ii[i];
3879       JJ  = J + Ii[i];
3880       if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3881       if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3882       if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3883     }
3884   }
3885 
3886   for (i=0; i<m; i++) {
3887     nnz     = Ii[i+1]- Ii[i];
3888     JJ      = J + Ii[i];
3889     nnz_max = PetscMax(nnz_max,nnz);
3890     d       = 0;
3891     for (j=0; j<nnz; j++) {
3892       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3893     }
3894     d_nnz[i] = d;
3895     o_nnz[i] = nnz - d;
3896   }
3897   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3898   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3899 
3900   for (i=0; i<m; i++) {
3901     ii   = i + rstart;
3902     ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr);
3903   }
3904   nooffprocentries    = B->nooffprocentries;
3905   B->nooffprocentries = PETSC_TRUE;
3906   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3907   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3908   B->nooffprocentries = nooffprocentries;
3909 
3910   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3911   PetscFunctionReturn(0);
3912 }
3913 
3914 /*@
3915    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3916    (the default parallel PETSc format).
3917 
3918    Collective
3919 
3920    Input Parameters:
3921 +  B - the matrix
3922 .  i - the indices into j for the start of each local row (starts with zero)
3923 .  j - the column indices for each local row (starts with zero)
3924 -  v - optional values in the matrix
3925 
3926    Level: developer
3927 
3928    Notes:
3929        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3930      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3931      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3932 
3933        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3934 
3935        The format which is used for the sparse matrix input, is equivalent to a
3936     row-major ordering.. i.e for the following matrix, the input data expected is
3937     as shown
3938 
3939 $        1 0 0
3940 $        2 0 3     P0
3941 $       -------
3942 $        4 5 6     P1
3943 $
3944 $     Process0 [P0]: rows_owned=[0,1]
3945 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3946 $        j =  {0,0,2}  [size = 3]
3947 $        v =  {1,2,3}  [size = 3]
3948 $
3949 $     Process1 [P1]: rows_owned=[2]
3950 $        i =  {0,3}    [size = nrow+1  = 1+1]
3951 $        j =  {0,1,2}  [size = 3]
3952 $        v =  {4,5,6}  [size = 3]
3953 
3954 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3955           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3956 @*/
3957 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3958 {
3959   PetscErrorCode ierr;
3960 
3961   PetscFunctionBegin;
3962   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3963   PetscFunctionReturn(0);
3964 }
3965 
3966 /*@C
3967    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3968    (the default parallel PETSc format).  For good matrix assembly performance
3969    the user should preallocate the matrix storage by setting the parameters
3970    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3971    performance can be increased by more than a factor of 50.
3972 
3973    Collective
3974 
3975    Input Parameters:
3976 +  B - the matrix
3977 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3978            (same value is used for all local rows)
3979 .  d_nnz - array containing the number of nonzeros in the various rows of the
3980            DIAGONAL portion of the local submatrix (possibly different for each row)
3981            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3982            The size of this array is equal to the number of local rows, i.e 'm'.
3983            For matrices that will be factored, you must leave room for (and set)
3984            the diagonal entry even if it is zero.
3985 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3986            submatrix (same value is used for all local rows).
3987 -  o_nnz - array containing the number of nonzeros in the various rows of the
3988            OFF-DIAGONAL portion of the local submatrix (possibly different for
3989            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3990            structure. The size of this array is equal to the number
3991            of local rows, i.e 'm'.
3992 
3993    If the *_nnz parameter is given then the *_nz parameter is ignored
3994 
3995    The AIJ format (also called the Yale sparse matrix format or
3996    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3997    storage.  The stored row and column indices begin with zero.
3998    See Users-Manual: ch_mat for details.
3999 
4000    The parallel matrix is partitioned such that the first m0 rows belong to
4001    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4002    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4003 
4004    The DIAGONAL portion of the local submatrix of a processor can be defined
4005    as the submatrix which is obtained by extraction the part corresponding to
4006    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4007    first row that belongs to the processor, r2 is the last row belonging to
4008    the this processor, and c1-c2 is range of indices of the local part of a
4009    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4010    common case of a square matrix, the row and column ranges are the same and
4011    the DIAGONAL part is also square. The remaining portion of the local
4012    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4013 
4014    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4015 
4016    You can call MatGetInfo() to get information on how effective the preallocation was;
4017    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4018    You can also run with the option -info and look for messages with the string
4019    malloc in them to see if additional memory allocation was needed.
4020 
4021    Example usage:
4022 
4023    Consider the following 8x8 matrix with 34 non-zero values, that is
4024    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4025    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4026    as follows:
4027 
4028 .vb
4029             1  2  0  |  0  3  0  |  0  4
4030     Proc0   0  5  6  |  7  0  0  |  8  0
4031             9  0 10  | 11  0  0  | 12  0
4032     -------------------------------------
4033            13  0 14  | 15 16 17  |  0  0
4034     Proc1   0 18  0  | 19 20 21  |  0  0
4035             0  0  0  | 22 23  0  | 24  0
4036     -------------------------------------
4037     Proc2  25 26 27  |  0  0 28  | 29  0
4038            30  0  0  | 31 32 33  |  0 34
4039 .ve
4040 
4041    This can be represented as a collection of submatrices as:
4042 
4043 .vb
4044       A B C
4045       D E F
4046       G H I
4047 .ve
4048 
4049    Where the submatrices A,B,C are owned by proc0, D,E,F are
4050    owned by proc1, G,H,I are owned by proc2.
4051 
4052    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4053    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4054    The 'M','N' parameters are 8,8, and have the same values on all procs.
4055 
4056    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4057    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4058    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4059    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4060    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4061    matrix, ans [DF] as another SeqAIJ matrix.
4062 
4063    When d_nz, o_nz parameters are specified, d_nz storage elements are
4064    allocated for every row of the local diagonal submatrix, and o_nz
4065    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4066    One way to choose d_nz and o_nz is to use the max nonzerors per local
4067    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4068    In this case, the values of d_nz,o_nz are:
4069 .vb
4070      proc0 : dnz = 2, o_nz = 2
4071      proc1 : dnz = 3, o_nz = 2
4072      proc2 : dnz = 1, o_nz = 4
4073 .ve
4074    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4075    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4076    for proc3. i.e we are using 12+15+10=37 storage locations to store
4077    34 values.
4078 
4079    When d_nnz, o_nnz parameters are specified, the storage is specified
4080    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4081    In the above case the values for d_nnz,o_nnz are:
4082 .vb
4083      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4084      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4085      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4086 .ve
4087    Here the space allocated is sum of all the above values i.e 34, and
4088    hence pre-allocation is perfect.
4089 
4090    Level: intermediate
4091 
4092 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4093           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4094 @*/
4095 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4096 {
4097   PetscErrorCode ierr;
4098 
4099   PetscFunctionBegin;
4100   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4101   PetscValidType(B,1);
4102   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4103   PetscFunctionReturn(0);
4104 }
4105 
4106 /*@
4107      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4108          CSR format for the local rows.
4109 
4110    Collective
4111 
4112    Input Parameters:
4113 +  comm - MPI communicator
4114 .  m - number of local rows (Cannot be PETSC_DECIDE)
4115 .  n - This value should be the same as the local size used in creating the
4116        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4117        calculated if N is given) For square matrices n is almost always m.
4118 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4119 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4120 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4121 .   j - column indices
4122 -   a - matrix values
4123 
4124    Output Parameter:
4125 .   mat - the matrix
4126 
4127    Level: intermediate
4128 
4129    Notes:
4130        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4131      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4132      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4133 
4134        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4135 
4136        The format which is used for the sparse matrix input, is equivalent to a
4137     row-major ordering.. i.e for the following matrix, the input data expected is
4138     as shown
4139 
4140        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4141 
4142 $        1 0 0
4143 $        2 0 3     P0
4144 $       -------
4145 $        4 5 6     P1
4146 $
4147 $     Process0 [P0]: rows_owned=[0,1]
4148 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4149 $        j =  {0,0,2}  [size = 3]
4150 $        v =  {1,2,3}  [size = 3]
4151 $
4152 $     Process1 [P1]: rows_owned=[2]
4153 $        i =  {0,3}    [size = nrow+1  = 1+1]
4154 $        j =  {0,1,2}  [size = 3]
4155 $        v =  {4,5,6}  [size = 3]
4156 
4157 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4158           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4159 @*/
4160 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4161 {
4162   PetscErrorCode ierr;
4163 
4164   PetscFunctionBegin;
4165   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4166   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4167   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4168   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4169   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4170   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4171   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4172   PetscFunctionReturn(0);
4173 }
4174 
4175 /*@
4176      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4177          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical
4178 
4179    Collective
4180 
4181    Input Parameters:
4182 +  mat - the matrix
4183 .  m - number of local rows (Cannot be PETSC_DECIDE)
4184 .  n - This value should be the same as the local size used in creating the
4185        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4186        calculated if N is given) For square matrices n is almost always m.
4187 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4188 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4189 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4190 .  J - column indices
4191 -  v - matrix values
4192 
4193    Level: intermediate
4194 
4195 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4196           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4197 @*/
4198 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4199 {
4200   PetscErrorCode ierr;
4201   PetscInt       cstart,nnz,i,j;
4202   PetscInt       *ld;
4203   PetscBool      nooffprocentries;
4204   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4205   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data, *Ao  = (Mat_SeqAIJ*)Aij->B->data;
4206   PetscScalar    *ad = Ad->a, *ao = Ao->a;
4207   const PetscInt *Adi = Ad->i;
4208   PetscInt       ldi,Iii,md;
4209 
4210   PetscFunctionBegin;
4211   if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4212   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4213   if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4214   if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4215 
4216   cstart = mat->cmap->rstart;
4217   if (!Aij->ld) {
4218     /* count number of entries below block diagonal */
4219     ierr    = PetscCalloc1(m,&ld);CHKERRQ(ierr);
4220     Aij->ld = ld;
4221     for (i=0; i<m; i++) {
4222       nnz  = Ii[i+1]- Ii[i];
4223       j     = 0;
4224       while  (J[j] < cstart && j < nnz) {j++;}
4225       J    += nnz;
4226       ld[i] = j;
4227     }
4228   } else {
4229     ld = Aij->ld;
4230   }
4231 
4232   for (i=0; i<m; i++) {
4233     nnz  = Ii[i+1]- Ii[i];
4234     Iii  = Ii[i];
4235     ldi  = ld[i];
4236     md   = Adi[i+1]-Adi[i];
4237     ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr);
4238     ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr);
4239     ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr);
4240     ad  += md;
4241     ao  += nnz - md;
4242   }
4243   nooffprocentries      = mat->nooffprocentries;
4244   mat->nooffprocentries = PETSC_TRUE;
4245   ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr);
4246   ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr);
4247   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
4248   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4249   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4250   mat->nooffprocentries = nooffprocentries;
4251   PetscFunctionReturn(0);
4252 }
4253 
4254 /*@C
4255    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4256    (the default parallel PETSc format).  For good matrix assembly performance
4257    the user should preallocate the matrix storage by setting the parameters
4258    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4259    performance can be increased by more than a factor of 50.
4260 
4261    Collective
4262 
4263    Input Parameters:
4264 +  comm - MPI communicator
4265 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4266            This value should be the same as the local size used in creating the
4267            y vector for the matrix-vector product y = Ax.
4268 .  n - This value should be the same as the local size used in creating the
4269        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4270        calculated if N is given) For square matrices n is almost always m.
4271 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4272 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4273 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4274            (same value is used for all local rows)
4275 .  d_nnz - array containing the number of nonzeros in the various rows of the
4276            DIAGONAL portion of the local submatrix (possibly different for each row)
4277            or NULL, if d_nz is used to specify the nonzero structure.
4278            The size of this array is equal to the number of local rows, i.e 'm'.
4279 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4280            submatrix (same value is used for all local rows).
4281 -  o_nnz - array containing the number of nonzeros in the various rows of the
4282            OFF-DIAGONAL portion of the local submatrix (possibly different for
4283            each row) or NULL, if o_nz is used to specify the nonzero
4284            structure. The size of this array is equal to the number
4285            of local rows, i.e 'm'.
4286 
4287    Output Parameter:
4288 .  A - the matrix
4289 
4290    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4291    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4292    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4293 
4294    Notes:
4295    If the *_nnz parameter is given then the *_nz parameter is ignored
4296 
4297    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4298    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4299    storage requirements for this matrix.
4300 
4301    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4302    processor than it must be used on all processors that share the object for
4303    that argument.
4304 
4305    The user MUST specify either the local or global matrix dimensions
4306    (possibly both).
4307 
4308    The parallel matrix is partitioned across processors such that the
4309    first m0 rows belong to process 0, the next m1 rows belong to
4310    process 1, the next m2 rows belong to process 2 etc.. where
4311    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4312    values corresponding to [m x N] submatrix.
4313 
4314    The columns are logically partitioned with the n0 columns belonging
4315    to 0th partition, the next n1 columns belonging to the next
4316    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4317 
4318    The DIAGONAL portion of the local submatrix on any given processor
4319    is the submatrix corresponding to the rows and columns m,n
4320    corresponding to the given processor. i.e diagonal matrix on
4321    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4322    etc. The remaining portion of the local submatrix [m x (N-n)]
4323    constitute the OFF-DIAGONAL portion. The example below better
4324    illustrates this concept.
4325 
4326    For a square global matrix we define each processor's diagonal portion
4327    to be its local rows and the corresponding columns (a square submatrix);
4328    each processor's off-diagonal portion encompasses the remainder of the
4329    local matrix (a rectangular submatrix).
4330 
4331    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4332 
4333    When calling this routine with a single process communicator, a matrix of
4334    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4335    type of communicator, use the construction mechanism
4336 .vb
4337      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4338 .ve
4339 
4340 $     MatCreate(...,&A);
4341 $     MatSetType(A,MATMPIAIJ);
4342 $     MatSetSizes(A, m,n,M,N);
4343 $     MatMPIAIJSetPreallocation(A,...);
4344 
4345    By default, this format uses inodes (identical nodes) when possible.
4346    We search for consecutive rows with the same nonzero structure, thereby
4347    reusing matrix information to achieve increased efficiency.
4348 
4349    Options Database Keys:
4350 +  -mat_no_inode  - Do not use inodes
4351 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4352 -  -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in MatMult() of sparse parallel matrices.
4353         See viewer types in manual of MatView(). Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix.
4354         Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one MatMult() call.
4355 
4356    Example usage:
4357 
4358    Consider the following 8x8 matrix with 34 non-zero values, that is
4359    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4360    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4361    as follows
4362 
4363 .vb
4364             1  2  0  |  0  3  0  |  0  4
4365     Proc0   0  5  6  |  7  0  0  |  8  0
4366             9  0 10  | 11  0  0  | 12  0
4367     -------------------------------------
4368            13  0 14  | 15 16 17  |  0  0
4369     Proc1   0 18  0  | 19 20 21  |  0  0
4370             0  0  0  | 22 23  0  | 24  0
4371     -------------------------------------
4372     Proc2  25 26 27  |  0  0 28  | 29  0
4373            30  0  0  | 31 32 33  |  0 34
4374 .ve
4375 
4376    This can be represented as a collection of submatrices as
4377 
4378 .vb
4379       A B C
4380       D E F
4381       G H I
4382 .ve
4383 
4384    Where the submatrices A,B,C are owned by proc0, D,E,F are
4385    owned by proc1, G,H,I are owned by proc2.
4386 
4387    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4388    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4389    The 'M','N' parameters are 8,8, and have the same values on all procs.
4390 
4391    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4392    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4393    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4394    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4395    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4396    matrix, ans [DF] as another SeqAIJ matrix.
4397 
4398    When d_nz, o_nz parameters are specified, d_nz storage elements are
4399    allocated for every row of the local diagonal submatrix, and o_nz
4400    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4401    One way to choose d_nz and o_nz is to use the max nonzerors per local
4402    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4403    In this case, the values of d_nz,o_nz are
4404 .vb
4405      proc0 : dnz = 2, o_nz = 2
4406      proc1 : dnz = 3, o_nz = 2
4407      proc2 : dnz = 1, o_nz = 4
4408 .ve
4409    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4410    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4411    for proc3. i.e we are using 12+15+10=37 storage locations to store
4412    34 values.
4413 
4414    When d_nnz, o_nnz parameters are specified, the storage is specified
4415    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4416    In the above case the values for d_nnz,o_nnz are
4417 .vb
4418      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4419      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4420      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4421 .ve
4422    Here the space allocated is sum of all the above values i.e 34, and
4423    hence pre-allocation is perfect.
4424 
4425    Level: intermediate
4426 
4427 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4428           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4429 @*/
4430 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4431 {
4432   PetscErrorCode ierr;
4433   PetscMPIInt    size;
4434 
4435   PetscFunctionBegin;
4436   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4437   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4438   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
4439   if (size > 1) {
4440     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4441     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4442   } else {
4443     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4444     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4445   }
4446   PetscFunctionReturn(0);
4447 }
4448 
4449 /*@C
4450   MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix
4451 
4452   Not collective
4453 
4454   Input Parameter:
4455 . A - The MPIAIJ matrix
4456 
4457   Output Parameters:
4458 + Ad - The local diagonal block as a SeqAIJ matrix
4459 . Ao - The local off-diagonal block as a SeqAIJ matrix
4460 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4461 
4462   Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4463   in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4464   the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4465   local column numbers to global column numbers in the original matrix.
4466 
4467   Level: intermediate
4468 
4469 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ
4470 @*/
4471 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4472 {
4473   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4474   PetscBool      flg;
4475   PetscErrorCode ierr;
4476 
4477   PetscFunctionBegin;
4478   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4479   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4480   if (Ad)     *Ad     = a->A;
4481   if (Ao)     *Ao     = a->B;
4482   if (colmap) *colmap = a->garray;
4483   PetscFunctionReturn(0);
4484 }
4485 
4486 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4487 {
4488   PetscErrorCode ierr;
4489   PetscInt       m,N,i,rstart,nnz,Ii;
4490   PetscInt       *indx;
4491   PetscScalar    *values;
4492 
4493   PetscFunctionBegin;
4494   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4495   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4496     PetscInt       *dnz,*onz,sum,bs,cbs;
4497 
4498     if (n == PETSC_DECIDE) {
4499       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4500     }
4501     /* Check sum(n) = N */
4502     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
4503     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4504 
4505     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
4506     rstart -= m;
4507 
4508     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4509     for (i=0; i<m; i++) {
4510       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4511       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4512       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4513     }
4514 
4515     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4516     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4517     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4518     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4519     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4520     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4521     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4522     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4523     ierr = MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
4524   }
4525 
4526   /* numeric phase */
4527   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4528   for (i=0; i<m; i++) {
4529     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4530     Ii   = i + rstart;
4531     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4532     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4533   }
4534   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4535   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4536   PetscFunctionReturn(0);
4537 }
4538 
4539 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4540 {
4541   PetscErrorCode    ierr;
4542   PetscMPIInt       rank;
4543   PetscInt          m,N,i,rstart,nnz;
4544   size_t            len;
4545   const PetscInt    *indx;
4546   PetscViewer       out;
4547   char              *name;
4548   Mat               B;
4549   const PetscScalar *values;
4550 
4551   PetscFunctionBegin;
4552   ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr);
4553   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
4554   /* Should this be the type of the diagonal block of A? */
4555   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4556   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4557   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4558   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4559   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4560   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
4561   for (i=0; i<m; i++) {
4562     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4563     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4564     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4565   }
4566   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4567   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4568 
4569   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRMPI(ierr);
4570   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4571   ierr = PetscMalloc1(len+6,&name);CHKERRQ(ierr);
4572   ierr = PetscSNPrintf(name,len+6,"%s.%d",outfile,rank);CHKERRQ(ierr);
4573   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4574   ierr = PetscFree(name);CHKERRQ(ierr);
4575   ierr = MatView(B,out);CHKERRQ(ierr);
4576   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4577   ierr = MatDestroy(&B);CHKERRQ(ierr);
4578   PetscFunctionReturn(0);
4579 }
4580 
4581 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4582 {
4583   PetscErrorCode      ierr;
4584   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4585 
4586   PetscFunctionBegin;
4587   if (!merge) PetscFunctionReturn(0);
4588   ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4589   ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4590   ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4591   ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4592   ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4593   ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4594   ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4595   ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4596   ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4597   ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4598   ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4599   ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4600   ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4601   ierr = PetscFree(merge);CHKERRQ(ierr);
4602   PetscFunctionReturn(0);
4603 }
4604 
4605 #include <../src/mat/utils/freespace.h>
4606 #include <petscbt.h>
4607 
4608 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4609 {
4610   PetscErrorCode      ierr;
4611   MPI_Comm            comm;
4612   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4613   PetscMPIInt         size,rank,taga,*len_s;
4614   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4615   PetscInt            proc,m;
4616   PetscInt            **buf_ri,**buf_rj;
4617   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4618   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4619   MPI_Request         *s_waits,*r_waits;
4620   MPI_Status          *status;
4621   MatScalar           *aa=a->a;
4622   MatScalar           **abuf_r,*ba_i;
4623   Mat_Merge_SeqsToMPI *merge;
4624   PetscContainer      container;
4625 
4626   PetscFunctionBegin;
4627   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4628   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4629 
4630   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
4631   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
4632 
4633   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4634   if (!container) SETERRQ(PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4635   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4636 
4637   bi     = merge->bi;
4638   bj     = merge->bj;
4639   buf_ri = merge->buf_ri;
4640   buf_rj = merge->buf_rj;
4641 
4642   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4643   owners = merge->rowmap->range;
4644   len_s  = merge->len_s;
4645 
4646   /* send and recv matrix values */
4647   /*-----------------------------*/
4648   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4649   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4650 
4651   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4652   for (proc=0,k=0; proc<size; proc++) {
4653     if (!len_s[proc]) continue;
4654     i    = owners[proc];
4655     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRMPI(ierr);
4656     k++;
4657   }
4658 
4659   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRMPI(ierr);}
4660   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRMPI(ierr);}
4661   ierr = PetscFree(status);CHKERRQ(ierr);
4662 
4663   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4664   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4665 
4666   /* insert mat values of mpimat */
4667   /*----------------------------*/
4668   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4669   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4670 
4671   for (k=0; k<merge->nrecv; k++) {
4672     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4673     nrows       = *(buf_ri_k[k]);
4674     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4675     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4676   }
4677 
4678   /* set values of ba */
4679   m = merge->rowmap->n;
4680   for (i=0; i<m; i++) {
4681     arow = owners[rank] + i;
4682     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4683     bnzi = bi[i+1] - bi[i];
4684     ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr);
4685 
4686     /* add local non-zero vals of this proc's seqmat into ba */
4687     anzi   = ai[arow+1] - ai[arow];
4688     aj     = a->j + ai[arow];
4689     aa     = a->a + ai[arow];
4690     nextaj = 0;
4691     for (j=0; nextaj<anzi; j++) {
4692       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4693         ba_i[j] += aa[nextaj++];
4694       }
4695     }
4696 
4697     /* add received vals into ba */
4698     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4699       /* i-th row */
4700       if (i == *nextrow[k]) {
4701         anzi   = *(nextai[k]+1) - *nextai[k];
4702         aj     = buf_rj[k] + *(nextai[k]);
4703         aa     = abuf_r[k] + *(nextai[k]);
4704         nextaj = 0;
4705         for (j=0; nextaj<anzi; j++) {
4706           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4707             ba_i[j] += aa[nextaj++];
4708           }
4709         }
4710         nextrow[k]++; nextai[k]++;
4711       }
4712     }
4713     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4714   }
4715   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4716   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4717 
4718   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4719   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4720   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4721   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4722   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4723   PetscFunctionReturn(0);
4724 }
4725 
4726 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4727 {
4728   PetscErrorCode      ierr;
4729   Mat                 B_mpi;
4730   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4731   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4732   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4733   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4734   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4735   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4736   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4737   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4738   MPI_Status          *status;
4739   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4740   PetscBT             lnkbt;
4741   Mat_Merge_SeqsToMPI *merge;
4742   PetscContainer      container;
4743 
4744   PetscFunctionBegin;
4745   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4746 
4747   /* make sure it is a PETSc comm */
4748   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4749   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
4750   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
4751 
4752   ierr = PetscNew(&merge);CHKERRQ(ierr);
4753   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4754 
4755   /* determine row ownership */
4756   /*---------------------------------------------------------*/
4757   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4758   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4759   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4760   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4761   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4762   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4763   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4764 
4765   m      = merge->rowmap->n;
4766   owners = merge->rowmap->range;
4767 
4768   /* determine the number of messages to send, their lengths */
4769   /*---------------------------------------------------------*/
4770   len_s = merge->len_s;
4771 
4772   len          = 0; /* length of buf_si[] */
4773   merge->nsend = 0;
4774   for (proc=0; proc<size; proc++) {
4775     len_si[proc] = 0;
4776     if (proc == rank) {
4777       len_s[proc] = 0;
4778     } else {
4779       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4780       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4781     }
4782     if (len_s[proc]) {
4783       merge->nsend++;
4784       nrows = 0;
4785       for (i=owners[proc]; i<owners[proc+1]; i++) {
4786         if (ai[i+1] > ai[i]) nrows++;
4787       }
4788       len_si[proc] = 2*(nrows+1);
4789       len         += len_si[proc];
4790     }
4791   }
4792 
4793   /* determine the number and length of messages to receive for ij-structure */
4794   /*-------------------------------------------------------------------------*/
4795   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4796   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4797 
4798   /* post the Irecv of j-structure */
4799   /*-------------------------------*/
4800   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4801   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4802 
4803   /* post the Isend of j-structure */
4804   /*--------------------------------*/
4805   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4806 
4807   for (proc=0, k=0; proc<size; proc++) {
4808     if (!len_s[proc]) continue;
4809     i    = owners[proc];
4810     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRMPI(ierr);
4811     k++;
4812   }
4813 
4814   /* receives and sends of j-structure are complete */
4815   /*------------------------------------------------*/
4816   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRMPI(ierr);}
4817   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRMPI(ierr);}
4818 
4819   /* send and recv i-structure */
4820   /*---------------------------*/
4821   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4822   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4823 
4824   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4825   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4826   for (proc=0,k=0; proc<size; proc++) {
4827     if (!len_s[proc]) continue;
4828     /* form outgoing message for i-structure:
4829          buf_si[0]:                 nrows to be sent
4830                [1:nrows]:           row index (global)
4831                [nrows+1:2*nrows+1]: i-structure index
4832     */
4833     /*-------------------------------------------*/
4834     nrows       = len_si[proc]/2 - 1;
4835     buf_si_i    = buf_si + nrows+1;
4836     buf_si[0]   = nrows;
4837     buf_si_i[0] = 0;
4838     nrows       = 0;
4839     for (i=owners[proc]; i<owners[proc+1]; i++) {
4840       anzi = ai[i+1] - ai[i];
4841       if (anzi) {
4842         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4843         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4844         nrows++;
4845       }
4846     }
4847     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRMPI(ierr);
4848     k++;
4849     buf_si += len_si[proc];
4850   }
4851 
4852   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRMPI(ierr);}
4853   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRMPI(ierr);}
4854 
4855   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4856   for (i=0; i<merge->nrecv; i++) {
4857     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4858   }
4859 
4860   ierr = PetscFree(len_si);CHKERRQ(ierr);
4861   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4862   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4863   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4864   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4865   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4866   ierr = PetscFree(status);CHKERRQ(ierr);
4867 
4868   /* compute a local seq matrix in each processor */
4869   /*----------------------------------------------*/
4870   /* allocate bi array and free space for accumulating nonzero column info */
4871   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4872   bi[0] = 0;
4873 
4874   /* create and initialize a linked list */
4875   nlnk = N+1;
4876   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4877 
4878   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4879   len  = ai[owners[rank+1]] - ai[owners[rank]];
4880   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4881 
4882   current_space = free_space;
4883 
4884   /* determine symbolic info for each local row */
4885   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4886 
4887   for (k=0; k<merge->nrecv; k++) {
4888     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4889     nrows       = *buf_ri_k[k];
4890     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4891     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4892   }
4893 
4894   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4895   len  = 0;
4896   for (i=0; i<m; i++) {
4897     bnzi = 0;
4898     /* add local non-zero cols of this proc's seqmat into lnk */
4899     arow  = owners[rank] + i;
4900     anzi  = ai[arow+1] - ai[arow];
4901     aj    = a->j + ai[arow];
4902     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4903     bnzi += nlnk;
4904     /* add received col data into lnk */
4905     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4906       if (i == *nextrow[k]) { /* i-th row */
4907         anzi  = *(nextai[k]+1) - *nextai[k];
4908         aj    = buf_rj[k] + *nextai[k];
4909         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4910         bnzi += nlnk;
4911         nextrow[k]++; nextai[k]++;
4912       }
4913     }
4914     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4915 
4916     /* if free space is not available, make more free space */
4917     if (current_space->local_remaining<bnzi) {
4918       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4919       nspacedouble++;
4920     }
4921     /* copy data into free space, then initialize lnk */
4922     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4923     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4924 
4925     current_space->array           += bnzi;
4926     current_space->local_used      += bnzi;
4927     current_space->local_remaining -= bnzi;
4928 
4929     bi[i+1] = bi[i] + bnzi;
4930   }
4931 
4932   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4933 
4934   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4935   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4936   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4937 
4938   /* create symbolic parallel matrix B_mpi */
4939   /*---------------------------------------*/
4940   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4941   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4942   if (n==PETSC_DECIDE) {
4943     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4944   } else {
4945     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4946   }
4947   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4948   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4949   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4950   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4951   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4952 
4953   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4954   B_mpi->assembled  = PETSC_FALSE;
4955   merge->bi         = bi;
4956   merge->bj         = bj;
4957   merge->buf_ri     = buf_ri;
4958   merge->buf_rj     = buf_rj;
4959   merge->coi        = NULL;
4960   merge->coj        = NULL;
4961   merge->owners_co  = NULL;
4962 
4963   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4964 
4965   /* attach the supporting struct to B_mpi for reuse */
4966   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4967   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4968   ierr    = PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI);CHKERRQ(ierr);
4969   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4970   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4971   *mpimat = B_mpi;
4972 
4973   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4974   PetscFunctionReturn(0);
4975 }
4976 
4977 /*@C
4978       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4979                  matrices from each processor
4980 
4981     Collective
4982 
4983    Input Parameters:
4984 +    comm - the communicators the parallel matrix will live on
4985 .    seqmat - the input sequential matrices
4986 .    m - number of local rows (or PETSC_DECIDE)
4987 .    n - number of local columns (or PETSC_DECIDE)
4988 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4989 
4990    Output Parameter:
4991 .    mpimat - the parallel matrix generated
4992 
4993     Level: advanced
4994 
4995    Notes:
4996      The dimensions of the sequential matrix in each processor MUST be the same.
4997      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4998      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4999 @*/
5000 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
5001 {
5002   PetscErrorCode ierr;
5003   PetscMPIInt    size;
5004 
5005   PetscFunctionBegin;
5006   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
5007   if (size == 1) {
5008     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5009     if (scall == MAT_INITIAL_MATRIX) {
5010       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
5011     } else {
5012       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5013     }
5014     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5015     PetscFunctionReturn(0);
5016   }
5017   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5018   if (scall == MAT_INITIAL_MATRIX) {
5019     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
5020   }
5021   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5022   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5023   PetscFunctionReturn(0);
5024 }
5025 
5026 /*@
5027      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5028           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5029           with MatGetSize()
5030 
5031     Not Collective
5032 
5033    Input Parameters:
5034 +    A - the matrix
5035 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5036 
5037    Output Parameter:
5038 .    A_loc - the local sequential matrix generated
5039 
5040     Level: developer
5041 
5042    Notes:
5043      When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A.
5044      If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called.
5045      This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely
5046      modify the values of the returned A_loc.
5047 
5048 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed(), MatMPIAIJGetLocalMatMerge()
5049 @*/
5050 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5051 {
5052   PetscErrorCode    ierr;
5053   Mat_MPIAIJ        *mpimat=(Mat_MPIAIJ*)A->data;
5054   Mat_SeqAIJ        *mat,*a,*b;
5055   PetscInt          *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5056   const PetscScalar *aa,*ba,*aav,*bav;
5057   PetscScalar       *ca,*cam;
5058   PetscMPIInt       size;
5059   PetscInt          am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5060   PetscInt          *ci,*cj,col,ncols_d,ncols_o,jo;
5061   PetscBool         match;
5062 
5063   PetscFunctionBegin;
5064   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
5065   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5066   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
5067   if (size == 1) {
5068     if (scall == MAT_INITIAL_MATRIX) {
5069       ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr);
5070       *A_loc = mpimat->A;
5071     } else if (scall == MAT_REUSE_MATRIX) {
5072       ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5073     }
5074     PetscFunctionReturn(0);
5075   }
5076 
5077   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5078   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5079   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5080   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5081   ierr = MatSeqAIJGetArrayRead(mpimat->A,&aav);CHKERRQ(ierr);
5082   ierr = MatSeqAIJGetArrayRead(mpimat->B,&bav);CHKERRQ(ierr);
5083   aa   = aav;
5084   ba   = bav;
5085   if (scall == MAT_INITIAL_MATRIX) {
5086     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5087     ci[0] = 0;
5088     for (i=0; i<am; i++) {
5089       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5090     }
5091     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5092     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5093     k    = 0;
5094     for (i=0; i<am; i++) {
5095       ncols_o = bi[i+1] - bi[i];
5096       ncols_d = ai[i+1] - ai[i];
5097       /* off-diagonal portion of A */
5098       for (jo=0; jo<ncols_o; jo++) {
5099         col = cmap[*bj];
5100         if (col >= cstart) break;
5101         cj[k]   = col; bj++;
5102         ca[k++] = *ba++;
5103       }
5104       /* diagonal portion of A */
5105       for (j=0; j<ncols_d; j++) {
5106         cj[k]   = cstart + *aj++;
5107         ca[k++] = *aa++;
5108       }
5109       /* off-diagonal portion of A */
5110       for (j=jo; j<ncols_o; j++) {
5111         cj[k]   = cmap[*bj++];
5112         ca[k++] = *ba++;
5113       }
5114     }
5115     /* put together the new matrix */
5116     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5117     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5118     /* Since these are PETSc arrays, change flags to free them as necessary. */
5119     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5120     mat->free_a  = PETSC_TRUE;
5121     mat->free_ij = PETSC_TRUE;
5122     mat->nonew   = 0;
5123   } else if (scall == MAT_REUSE_MATRIX) {
5124     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5125 #if defined(PETSC_USE_DEVICE)
5126     (*A_loc)->offloadmask = PETSC_OFFLOAD_CPU;
5127 #endif
5128     ci = mat->i; cj = mat->j; cam = mat->a;
5129     for (i=0; i<am; i++) {
5130       /* off-diagonal portion of A */
5131       ncols_o = bi[i+1] - bi[i];
5132       for (jo=0; jo<ncols_o; jo++) {
5133         col = cmap[*bj];
5134         if (col >= cstart) break;
5135         *cam++ = *ba++; bj++;
5136       }
5137       /* diagonal portion of A */
5138       ncols_d = ai[i+1] - ai[i];
5139       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5140       /* off-diagonal portion of A */
5141       for (j=jo; j<ncols_o; j++) {
5142         *cam++ = *ba++; bj++;
5143       }
5144     }
5145   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5146   ierr = MatSeqAIJRestoreArrayRead(mpimat->A,&aav);CHKERRQ(ierr);
5147   ierr = MatSeqAIJRestoreArrayRead(mpimat->B,&bav);CHKERRQ(ierr);
5148   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5149   PetscFunctionReturn(0);
5150 }
5151 
5152 /*@
5153      MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5154           mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part
5155 
5156     Not Collective
5157 
5158    Input Parameters:
5159 +    A - the matrix
5160 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5161 
5162    Output Parameters:
5163 +    glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL)
5164 -    A_loc - the local sequential matrix generated
5165 
5166     Level: developer
5167 
5168    Notes:
5169      This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering)
5170 
5171 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed()
5172 
5173 @*/
5174 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc)
5175 {
5176   PetscErrorCode ierr;
5177   Mat            Ao,Ad;
5178   const PetscInt *cmap;
5179   PetscMPIInt    size;
5180   PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*);
5181 
5182   PetscFunctionBegin;
5183   ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap);CHKERRQ(ierr);
5184   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
5185   if (size == 1) {
5186     if (scall == MAT_INITIAL_MATRIX) {
5187       ierr = PetscObjectReference((PetscObject)Ad);CHKERRQ(ierr);
5188       *A_loc = Ad;
5189     } else if (scall == MAT_REUSE_MATRIX) {
5190       ierr = MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5191     }
5192     if (glob) { ierr = ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob);CHKERRQ(ierr); }
5193     PetscFunctionReturn(0);
5194   }
5195   ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f);CHKERRQ(ierr);
5196   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5197   if (f) {
5198     ierr = (*f)(A,scall,glob,A_loc);CHKERRQ(ierr);
5199   } else {
5200     Mat_SeqAIJ        *a = (Mat_SeqAIJ*)Ad->data;
5201     Mat_SeqAIJ        *b = (Mat_SeqAIJ*)Ao->data;
5202     Mat_SeqAIJ        *c;
5203     PetscInt          *ai = a->i, *aj = a->j;
5204     PetscInt          *bi = b->i, *bj = b->j;
5205     PetscInt          *ci,*cj;
5206     const PetscScalar *aa,*ba;
5207     PetscScalar       *ca;
5208     PetscInt          i,j,am,dn,on;
5209 
5210     ierr = MatGetLocalSize(Ad,&am,&dn);CHKERRQ(ierr);
5211     ierr = MatGetLocalSize(Ao,NULL,&on);CHKERRQ(ierr);
5212     ierr = MatSeqAIJGetArrayRead(Ad,&aa);CHKERRQ(ierr);
5213     ierr = MatSeqAIJGetArrayRead(Ao,&ba);CHKERRQ(ierr);
5214     if (scall == MAT_INITIAL_MATRIX) {
5215       PetscInt k;
5216       ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5217       ierr = PetscMalloc1(ai[am]+bi[am],&cj);CHKERRQ(ierr);
5218       ierr = PetscMalloc1(ai[am]+bi[am],&ca);CHKERRQ(ierr);
5219       ci[0] = 0;
5220       for (i=0,k=0; i<am; i++) {
5221         const PetscInt ncols_o = bi[i+1] - bi[i];
5222         const PetscInt ncols_d = ai[i+1] - ai[i];
5223         ci[i+1] = ci[i] + ncols_o + ncols_d;
5224         /* diagonal portion of A */
5225         for (j=0; j<ncols_d; j++,k++) {
5226           cj[k] = *aj++;
5227           ca[k] = *aa++;
5228         }
5229         /* off-diagonal portion of A */
5230         for (j=0; j<ncols_o; j++,k++) {
5231           cj[k] = dn + *bj++;
5232           ca[k] = *ba++;
5233         }
5234       }
5235       /* put together the new matrix */
5236       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc);CHKERRQ(ierr);
5237       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5238       /* Since these are PETSc arrays, change flags to free them as necessary. */
5239       c          = (Mat_SeqAIJ*)(*A_loc)->data;
5240       c->free_a  = PETSC_TRUE;
5241       c->free_ij = PETSC_TRUE;
5242       c->nonew   = 0;
5243       ierr = MatSetType(*A_loc,((PetscObject)Ad)->type_name);CHKERRQ(ierr);
5244     } else if (scall == MAT_REUSE_MATRIX) {
5245 #if defined(PETSC_HAVE_DEVICE)
5246       (*A_loc)->offloadmask = PETSC_OFFLOAD_CPU;
5247 #endif
5248       c  = (Mat_SeqAIJ*)(*A_loc)->data;
5249       ca = c->a;
5250       for (i=0; i<am; i++) {
5251         const PetscInt ncols_d = ai[i+1] - ai[i];
5252         const PetscInt ncols_o = bi[i+1] - bi[i];
5253         /* diagonal portion of A */
5254         for (j=0; j<ncols_d; j++) *ca++ = *aa++;
5255         /* off-diagonal portion of A */
5256         for (j=0; j<ncols_o; j++) *ca++ = *ba++;
5257       }
5258     } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5259     ierr = MatSeqAIJRestoreArrayRead(Ad,&aa);CHKERRQ(ierr);
5260     ierr = MatSeqAIJRestoreArrayRead(Ao,&aa);CHKERRQ(ierr);
5261     if (glob) {
5262       PetscInt cst, *gidx;
5263 
5264       ierr = MatGetOwnershipRangeColumn(A,&cst,NULL);CHKERRQ(ierr);
5265       ierr = PetscMalloc1(dn+on,&gidx);CHKERRQ(ierr);
5266       for (i=0; i<dn; i++) gidx[i]    = cst + i;
5267       for (i=0; i<on; i++) gidx[i+dn] = cmap[i];
5268       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob);CHKERRQ(ierr);
5269     }
5270   }
5271   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5272   PetscFunctionReturn(0);
5273 }
5274 
5275 /*@C
5276      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5277 
5278     Not Collective
5279 
5280    Input Parameters:
5281 +    A - the matrix
5282 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5283 -    row, col - index sets of rows and columns to extract (or NULL)
5284 
5285    Output Parameter:
5286 .    A_loc - the local sequential matrix generated
5287 
5288     Level: developer
5289 
5290 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5291 
5292 @*/
5293 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5294 {
5295   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5296   PetscErrorCode ierr;
5297   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5298   IS             isrowa,iscola;
5299   Mat            *aloc;
5300   PetscBool      match;
5301 
5302   PetscFunctionBegin;
5303   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5304   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5305   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5306   if (!row) {
5307     start = A->rmap->rstart; end = A->rmap->rend;
5308     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5309   } else {
5310     isrowa = *row;
5311   }
5312   if (!col) {
5313     start = A->cmap->rstart;
5314     cmap  = a->garray;
5315     nzA   = a->A->cmap->n;
5316     nzB   = a->B->cmap->n;
5317     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5318     ncols = 0;
5319     for (i=0; i<nzB; i++) {
5320       if (cmap[i] < start) idx[ncols++] = cmap[i];
5321       else break;
5322     }
5323     imark = i;
5324     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5325     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5326     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5327   } else {
5328     iscola = *col;
5329   }
5330   if (scall != MAT_INITIAL_MATRIX) {
5331     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5332     aloc[0] = *A_loc;
5333   }
5334   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5335   if (!col) { /* attach global id of condensed columns */
5336     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5337   }
5338   *A_loc = aloc[0];
5339   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5340   if (!row) {
5341     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5342   }
5343   if (!col) {
5344     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5345   }
5346   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5347   PetscFunctionReturn(0);
5348 }
5349 
5350 /*
5351  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5352  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5353  * on a global size.
5354  * */
5355 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth)
5356 {
5357   Mat_MPIAIJ               *p=(Mat_MPIAIJ*)P->data;
5358   Mat_SeqAIJ               *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth;
5359   PetscInt                 plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol;
5360   PetscMPIInt              owner;
5361   PetscSFNode              *iremote,*oiremote;
5362   const PetscInt           *lrowindices;
5363   PetscErrorCode           ierr;
5364   PetscSF                  sf,osf;
5365   PetscInt                 pcstart,*roffsets,*loffsets,*pnnz,j;
5366   PetscInt                 ontotalcols,dntotalcols,ntotalcols,nout;
5367   MPI_Comm                 comm;
5368   ISLocalToGlobalMapping   mapping;
5369 
5370   PetscFunctionBegin;
5371   ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr);
5372   /* plocalsize is the number of roots
5373    * nrows is the number of leaves
5374    * */
5375   ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr);
5376   ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr);
5377   ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr);
5378   ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr);
5379   for (i=0;i<nrows;i++) {
5380     /* Find a remote index and an owner for a row
5381      * The row could be local or remote
5382      * */
5383     owner = 0;
5384     lidx  = 0;
5385     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr);
5386     iremote[i].index = lidx;
5387     iremote[i].rank  = owner;
5388   }
5389   /* Create SF to communicate how many nonzero columns for each row */
5390   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5391   /* SF will figure out the number of nonzero colunms for each row, and their
5392    * offsets
5393    * */
5394   ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5395   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5396   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5397 
5398   ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr);
5399   ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr);
5400   ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr);
5401   roffsets[0] = 0;
5402   roffsets[1] = 0;
5403   for (i=0;i<plocalsize;i++) {
5404     /* diag */
5405     nrcols[i*2+0] = pd->i[i+1] - pd->i[i];
5406     /* off diag */
5407     nrcols[i*2+1] = po->i[i+1] - po->i[i];
5408     /* compute offsets so that we relative location for each row */
5409     roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0];
5410     roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1];
5411   }
5412   ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr);
5413   ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr);
5414   /* 'r' means root, and 'l' means leaf */
5415   ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE);CHKERRQ(ierr);
5416   ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE);CHKERRQ(ierr);
5417   ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE);CHKERRQ(ierr);
5418   ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE);CHKERRQ(ierr);
5419   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5420   ierr = PetscFree(roffsets);CHKERRQ(ierr);
5421   ierr = PetscFree(nrcols);CHKERRQ(ierr);
5422   dntotalcols = 0;
5423   ontotalcols = 0;
5424   ncol = 0;
5425   for (i=0;i<nrows;i++) {
5426     pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1];
5427     ncol = PetscMax(pnnz[i],ncol);
5428     /* diag */
5429     dntotalcols += nlcols[i*2+0];
5430     /* off diag */
5431     ontotalcols += nlcols[i*2+1];
5432   }
5433   /* We do not need to figure the right number of columns
5434    * since all the calculations will be done by going through the raw data
5435    * */
5436   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr);
5437   ierr = MatSetUp(*P_oth);CHKERRQ(ierr);
5438   ierr = PetscFree(pnnz);CHKERRQ(ierr);
5439   p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5440   /* diag */
5441   ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr);
5442   /* off diag */
5443   ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr);
5444   /* diag */
5445   ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr);
5446   /* off diag */
5447   ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr);
5448   dntotalcols = 0;
5449   ontotalcols = 0;
5450   ntotalcols  = 0;
5451   for (i=0;i<nrows;i++) {
5452     owner = 0;
5453     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr);
5454     /* Set iremote for diag matrix */
5455     for (j=0;j<nlcols[i*2+0];j++) {
5456       iremote[dntotalcols].index   = loffsets[i*2+0] + j;
5457       iremote[dntotalcols].rank    = owner;
5458       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5459       ilocal[dntotalcols++]        = ntotalcols++;
5460     }
5461     /* off diag */
5462     for (j=0;j<nlcols[i*2+1];j++) {
5463       oiremote[ontotalcols].index   = loffsets[i*2+1] + j;
5464       oiremote[ontotalcols].rank    = owner;
5465       oilocal[ontotalcols++]        = ntotalcols++;
5466     }
5467   }
5468   ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr);
5469   ierr = PetscFree(loffsets);CHKERRQ(ierr);
5470   ierr = PetscFree(nlcols);CHKERRQ(ierr);
5471   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5472   /* P serves as roots and P_oth is leaves
5473    * Diag matrix
5474    * */
5475   ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5476   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5477   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5478 
5479   ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr);
5480   /* Off diag */
5481   ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5482   ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr);
5483   ierr = PetscSFSetUp(osf);CHKERRQ(ierr);
5484   /* We operate on the matrix internal data for saving memory */
5485   ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5486   ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5487   ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr);
5488   /* Convert to global indices for diag matrix */
5489   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart;
5490   ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr);
5491   /* We want P_oth store global indices */
5492   ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr);
5493   /* Use memory scalable approach */
5494   ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr);
5495   ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr);
5496   ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr);
5497   ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr);
5498   /* Convert back to local indices */
5499   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart;
5500   ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr);
5501   nout = 0;
5502   ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr);
5503   if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout);
5504   ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr);
5505   /* Exchange values */
5506   ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5507   ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5508   /* Stop PETSc from shrinking memory */
5509   for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i];
5510   ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5511   ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5512   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5513   ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr);
5514   ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr);
5515   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5516   ierr = PetscSFDestroy(&osf);CHKERRQ(ierr);
5517   PetscFunctionReturn(0);
5518 }
5519 
5520 /*
5521  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5522  * This supports MPIAIJ and MAIJ
5523  * */
5524 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth)
5525 {
5526   Mat_MPIAIJ            *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data;
5527   Mat_SeqAIJ            *p_oth;
5528   Mat_SeqAIJ            *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data;
5529   IS                    rows,map;
5530   PetscHMapI            hamp;
5531   PetscInt              i,htsize,*rowindices,off,*mapping,key,count;
5532   MPI_Comm              comm;
5533   PetscSF               sf,osf;
5534   PetscBool             has;
5535   PetscErrorCode        ierr;
5536 
5537   PetscFunctionBegin;
5538   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5539   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5540   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5541    *  and then create a submatrix (that often is an overlapping matrix)
5542    * */
5543   if (reuse == MAT_INITIAL_MATRIX) {
5544     /* Use a hash table to figure out unique keys */
5545     ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr);
5546     ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr);
5547     ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr);
5548     count = 0;
5549     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5550     for (i=0;i<a->B->cmap->n;i++) {
5551       key  = a->garray[i]/dof;
5552       ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr);
5553       if (!has) {
5554         mapping[i] = count;
5555         ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr);
5556       } else {
5557         /* Current 'i' has the same value the previous step */
5558         mapping[i] = count-1;
5559       }
5560     }
5561     ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr);
5562     ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr);
5563     if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);
5564     ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr);
5565     off = 0;
5566     ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr);
5567     ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr);
5568     ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr);
5569     ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr);
5570     /* In case, the matrix was already created but users want to recreate the matrix */
5571     ierr = MatDestroy(P_oth);CHKERRQ(ierr);
5572     ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr);
5573     ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr);
5574     ierr = ISDestroy(&map);CHKERRQ(ierr);
5575     ierr = ISDestroy(&rows);CHKERRQ(ierr);
5576   } else if (reuse == MAT_REUSE_MATRIX) {
5577     /* If matrix was already created, we simply update values using SF objects
5578      * that as attached to the matrix ealier.
5579      *  */
5580     ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr);
5581     ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr);
5582     if (!sf || !osf) SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet");
5583     p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5584     /* Update values in place */
5585     ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5586     ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5587     ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5588     ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5589   } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type");
5590   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5591   PetscFunctionReturn(0);
5592 }
5593 
5594 /*@C
5595     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5596 
5597     Collective on Mat
5598 
5599    Input Parameters:
5600 +    A - the first matrix in mpiaij format
5601 .    B - the second matrix in mpiaij format
5602 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5603 
5604    Input/Output Parameters:
5605 +    rowb - index sets of rows of B to extract (or NULL), modified on output
5606 -    colb - index sets of columns of B to extract (or NULL), modified on output
5607 
5608    Output Parameter:
5609 .    B_seq - the sequential matrix generated
5610 
5611     Level: developer
5612 
5613 @*/
5614 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5615 {
5616   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5617   PetscErrorCode ierr;
5618   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5619   IS             isrowb,iscolb;
5620   Mat            *bseq=NULL;
5621 
5622   PetscFunctionBegin;
5623   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5624     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5625   }
5626   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5627 
5628   if (scall == MAT_INITIAL_MATRIX) {
5629     start = A->cmap->rstart;
5630     cmap  = a->garray;
5631     nzA   = a->A->cmap->n;
5632     nzB   = a->B->cmap->n;
5633     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5634     ncols = 0;
5635     for (i=0; i<nzB; i++) {  /* row < local row index */
5636       if (cmap[i] < start) idx[ncols++] = cmap[i];
5637       else break;
5638     }
5639     imark = i;
5640     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5641     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5642     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5643     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5644   } else {
5645     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5646     isrowb  = *rowb; iscolb = *colb;
5647     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5648     bseq[0] = *B_seq;
5649   }
5650   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5651   *B_seq = bseq[0];
5652   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5653   if (!rowb) {
5654     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5655   } else {
5656     *rowb = isrowb;
5657   }
5658   if (!colb) {
5659     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5660   } else {
5661     *colb = iscolb;
5662   }
5663   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5664   PetscFunctionReturn(0);
5665 }
5666 
5667 /*
5668     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5669     of the OFF-DIAGONAL portion of local A
5670 
5671     Collective on Mat
5672 
5673    Input Parameters:
5674 +    A,B - the matrices in mpiaij format
5675 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5676 
5677    Output Parameter:
5678 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5679 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5680 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5681 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5682 
5683     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5684      for this matrix. This is not desirable..
5685 
5686     Level: developer
5687 
5688 */
5689 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5690 {
5691   PetscErrorCode         ierr;
5692   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5693   Mat_SeqAIJ             *b_oth;
5694   VecScatter             ctx;
5695   MPI_Comm               comm;
5696   const PetscMPIInt      *rprocs,*sprocs;
5697   const PetscInt         *srow,*rstarts,*sstarts;
5698   PetscInt               *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5699   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len;
5700   PetscScalar            *b_otha,*bufa,*bufA,*vals = NULL;
5701   MPI_Request            *reqs = NULL,*rwaits = NULL,*swaits = NULL;
5702   PetscMPIInt            size,tag,rank,nreqs;
5703 
5704   PetscFunctionBegin;
5705   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5706   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
5707 
5708   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5709     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5710   }
5711   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5712   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
5713 
5714   if (size == 1) {
5715     startsj_s = NULL;
5716     bufa_ptr  = NULL;
5717     *B_oth    = NULL;
5718     PetscFunctionReturn(0);
5719   }
5720 
5721   ctx = a->Mvctx;
5722   tag = ((PetscObject)ctx)->tag;
5723 
5724   ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5725   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5726   ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr);
5727   ierr = PetscMPIIntCast(nsends+nrecvs,&nreqs);CHKERRQ(ierr);
5728   ierr = PetscMalloc1(nreqs,&reqs);CHKERRQ(ierr);
5729   rwaits = reqs;
5730   swaits = reqs + nrecvs;
5731 
5732   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5733   if (scall == MAT_INITIAL_MATRIX) {
5734     /* i-array */
5735     /*---------*/
5736     /*  post receives */
5737     if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */
5738     for (i=0; i<nrecvs; i++) {
5739       rowlen = rvalues + rstarts[i]*rbs;
5740       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5741       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr);
5742     }
5743 
5744     /* pack the outgoing message */
5745     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5746 
5747     sstartsj[0] = 0;
5748     rstartsj[0] = 0;
5749     len         = 0; /* total length of j or a array to be sent */
5750     if (nsends) {
5751       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5752       ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr);
5753     }
5754     for (i=0; i<nsends; i++) {
5755       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5756       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5757       for (j=0; j<nrows; j++) {
5758         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5759         for (l=0; l<sbs; l++) {
5760           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5761 
5762           rowlen[j*sbs+l] = ncols;
5763 
5764           len += ncols;
5765           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5766         }
5767         k++;
5768       }
5769       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr);
5770 
5771       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5772     }
5773     /* recvs and sends of i-array are completed */
5774     if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);}
5775     ierr = PetscFree(svalues);CHKERRQ(ierr);
5776 
5777     /* allocate buffers for sending j and a arrays */
5778     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5779     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5780 
5781     /* create i-array of B_oth */
5782     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5783 
5784     b_othi[0] = 0;
5785     len       = 0; /* total length of j or a array to be received */
5786     k         = 0;
5787     for (i=0; i<nrecvs; i++) {
5788       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5789       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5790       for (j=0; j<nrows; j++) {
5791         b_othi[k+1] = b_othi[k] + rowlen[j];
5792         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5793         k++;
5794       }
5795       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5796     }
5797     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5798 
5799     /* allocate space for j and a arrrays of B_oth */
5800     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5801     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5802 
5803     /* j-array */
5804     /*---------*/
5805     /*  post receives of j-array */
5806     for (i=0; i<nrecvs; i++) {
5807       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5808       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr);
5809     }
5810 
5811     /* pack the outgoing message j-array */
5812     if (nsends) k = sstarts[0];
5813     for (i=0; i<nsends; i++) {
5814       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5815       bufJ  = bufj+sstartsj[i];
5816       for (j=0; j<nrows; j++) {
5817         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5818         for (ll=0; ll<sbs; ll++) {
5819           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5820           for (l=0; l<ncols; l++) {
5821             *bufJ++ = cols[l];
5822           }
5823           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5824         }
5825       }
5826       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr);
5827     }
5828 
5829     /* recvs and sends of j-array are completed */
5830     if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);}
5831   } else if (scall == MAT_REUSE_MATRIX) {
5832     sstartsj = *startsj_s;
5833     rstartsj = *startsj_r;
5834     bufa     = *bufa_ptr;
5835     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5836     b_otha   = b_oth->a;
5837 #if defined(PETSC_HAVE_DEVICE)
5838     (*B_oth)->offloadmask = PETSC_OFFLOAD_CPU;
5839 #endif
5840   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5841 
5842   /* a-array */
5843   /*---------*/
5844   /*  post receives of a-array */
5845   for (i=0; i<nrecvs; i++) {
5846     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5847     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr);
5848   }
5849 
5850   /* pack the outgoing message a-array */
5851   if (nsends) k = sstarts[0];
5852   for (i=0; i<nsends; i++) {
5853     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5854     bufA  = bufa+sstartsj[i];
5855     for (j=0; j<nrows; j++) {
5856       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5857       for (ll=0; ll<sbs; ll++) {
5858         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5859         for (l=0; l<ncols; l++) {
5860           *bufA++ = vals[l];
5861         }
5862         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5863       }
5864     }
5865     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr);
5866   }
5867   /* recvs and sends of a-array are completed */
5868   if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);}
5869   ierr = PetscFree(reqs);CHKERRQ(ierr);
5870 
5871   if (scall == MAT_INITIAL_MATRIX) {
5872     /* put together the new matrix */
5873     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5874 
5875     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5876     /* Since these are PETSc arrays, change flags to free them as necessary. */
5877     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5878     b_oth->free_a  = PETSC_TRUE;
5879     b_oth->free_ij = PETSC_TRUE;
5880     b_oth->nonew   = 0;
5881 
5882     ierr = PetscFree(bufj);CHKERRQ(ierr);
5883     if (!startsj_s || !bufa_ptr) {
5884       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5885       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5886     } else {
5887       *startsj_s = sstartsj;
5888       *startsj_r = rstartsj;
5889       *bufa_ptr  = bufa;
5890     }
5891   }
5892 
5893   ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5894   ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr);
5895   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5896   PetscFunctionReturn(0);
5897 }
5898 
5899 /*@C
5900   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5901 
5902   Not Collective
5903 
5904   Input Parameter:
5905 . A - The matrix in mpiaij format
5906 
5907   Output Parameters:
5908 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5909 . colmap - A map from global column index to local index into lvec
5910 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5911 
5912   Level: developer
5913 
5914 @*/
5915 #if defined(PETSC_USE_CTABLE)
5916 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5917 #else
5918 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5919 #endif
5920 {
5921   Mat_MPIAIJ *a;
5922 
5923   PetscFunctionBegin;
5924   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5925   PetscValidPointer(lvec, 2);
5926   PetscValidPointer(colmap, 3);
5927   PetscValidPointer(multScatter, 4);
5928   a = (Mat_MPIAIJ*) A->data;
5929   if (lvec) *lvec = a->lvec;
5930   if (colmap) *colmap = a->colmap;
5931   if (multScatter) *multScatter = a->Mvctx;
5932   PetscFunctionReturn(0);
5933 }
5934 
5935 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5936 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5937 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5938 #if defined(PETSC_HAVE_MKL_SPARSE)
5939 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5940 #endif
5941 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*);
5942 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5943 #if defined(PETSC_HAVE_ELEMENTAL)
5944 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5945 #endif
5946 #if defined(PETSC_HAVE_SCALAPACK)
5947 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*);
5948 #endif
5949 #if defined(PETSC_HAVE_HYPRE)
5950 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5951 #endif
5952 #if defined(PETSC_HAVE_CUDA)
5953 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*);
5954 #endif
5955 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
5956 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*);
5957 #endif
5958 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5959 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5960 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
5961 
5962 /*
5963     Computes (B'*A')' since computing B*A directly is untenable
5964 
5965                n                       p                          p
5966         [             ]       [             ]         [                 ]
5967       m [      A      ]  *  n [       B     ]   =   m [         C       ]
5968         [             ]       [             ]         [                 ]
5969 
5970 */
5971 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5972 {
5973   PetscErrorCode ierr;
5974   Mat            At,Bt,Ct;
5975 
5976   PetscFunctionBegin;
5977   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5978   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5979   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct);CHKERRQ(ierr);
5980   ierr = MatDestroy(&At);CHKERRQ(ierr);
5981   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5982   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5983   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5984   PetscFunctionReturn(0);
5985 }
5986 
5987 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C)
5988 {
5989   PetscErrorCode ierr;
5990   PetscBool      cisdense;
5991 
5992   PetscFunctionBegin;
5993   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5994   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N);CHKERRQ(ierr);
5995   ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr);
5996   ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr);
5997   if (!cisdense) {
5998     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
5999   }
6000   ierr = MatSetUp(C);CHKERRQ(ierr);
6001 
6002   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6003   PetscFunctionReturn(0);
6004 }
6005 
6006 /* ----------------------------------------------------------------*/
6007 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6008 {
6009   Mat_Product *product = C->product;
6010   Mat         A = product->A,B=product->B;
6011 
6012   PetscFunctionBegin;
6013   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)
6014     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
6015 
6016   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6017   C->ops->productsymbolic = MatProductSymbolic_AB;
6018   PetscFunctionReturn(0);
6019 }
6020 
6021 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6022 {
6023   PetscErrorCode ierr;
6024   Mat_Product    *product = C->product;
6025 
6026   PetscFunctionBegin;
6027   if (product->type == MATPRODUCT_AB) {
6028     ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr);
6029   }
6030   PetscFunctionReturn(0);
6031 }
6032 /* ----------------------------------------------------------------*/
6033 
6034 /*MC
6035    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6036 
6037    Options Database Keys:
6038 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
6039 
6040    Level: beginner
6041 
6042    Notes:
6043     MatSetValues() may be called for this matrix type with a NULL argument for the numerical values,
6044     in this case the values associated with the rows and columns one passes in are set to zero
6045     in the matrix
6046 
6047     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
6048     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
6049 
6050 .seealso: MatCreateAIJ()
6051 M*/
6052 
6053 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6054 {
6055   Mat_MPIAIJ     *b;
6056   PetscErrorCode ierr;
6057   PetscMPIInt    size;
6058 
6059   PetscFunctionBegin;
6060   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr);
6061 
6062   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
6063   B->data       = (void*)b;
6064   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
6065   B->assembled  = PETSC_FALSE;
6066   B->insertmode = NOT_SET_VALUES;
6067   b->size       = size;
6068 
6069   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRMPI(ierr);
6070 
6071   /* build cache for off array entries formed */
6072   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
6073 
6074   b->donotstash  = PETSC_FALSE;
6075   b->colmap      = NULL;
6076   b->garray      = NULL;
6077   b->roworiented = PETSC_TRUE;
6078 
6079   /* stuff used for matrix vector multiply */
6080   b->lvec  = NULL;
6081   b->Mvctx = NULL;
6082 
6083   /* stuff for MatGetRow() */
6084   b->rowindices   = NULL;
6085   b->rowvalues    = NULL;
6086   b->getrowactive = PETSC_FALSE;
6087 
6088   /* flexible pointer used in CUSPARSE classes */
6089   b->spptr = NULL;
6090 
6091   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
6092   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
6093   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
6094   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
6095   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
6096   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
6097   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
6098   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
6099   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
6100   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
6101 #if defined(PETSC_HAVE_CUDA)
6102   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE);CHKERRQ(ierr);
6103 #endif
6104 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6105   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos);CHKERRQ(ierr);
6106 #endif
6107 #if defined(PETSC_HAVE_MKL_SPARSE)
6108   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
6109 #endif
6110   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
6111   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr);
6112   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
6113   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense);CHKERRQ(ierr);
6114 #if defined(PETSC_HAVE_ELEMENTAL)
6115   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
6116 #endif
6117 #if defined(PETSC_HAVE_SCALAPACK)
6118   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK);CHKERRQ(ierr);
6119 #endif
6120   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
6121   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
6122 #if defined(PETSC_HAVE_HYPRE)
6123   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
6124   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr);
6125 #endif
6126   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr);
6127   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr);
6128   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
6129   PetscFunctionReturn(0);
6130 }
6131 
6132 /*@C
6133      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
6134          and "off-diagonal" part of the matrix in CSR format.
6135 
6136    Collective
6137 
6138    Input Parameters:
6139 +  comm - MPI communicator
6140 .  m - number of local rows (Cannot be PETSC_DECIDE)
6141 .  n - This value should be the same as the local size used in creating the
6142        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
6143        calculated if N is given) For square matrices n is almost always m.
6144 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
6145 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
6146 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6147 .   j - column indices, which must be local, i.e., based off the start column of the diagonal portion
6148 .   a - matrix values
6149 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6150 .   oj - column indices, which must be global, representing global columns in the MPIAIJ matrix
6151 -   oa - matrix values
6152 
6153    Output Parameter:
6154 .   mat - the matrix
6155 
6156    Level: advanced
6157 
6158    Notes:
6159        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6160        must free the arrays once the matrix has been destroyed and not before.
6161 
6162        The i and j indices are 0 based
6163 
6164        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
6165 
6166        This sets local rows and cannot be used to set off-processor values.
6167 
6168        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6169        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6170        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6171        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6172        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
6173        communication if it is known that only local entries will be set.
6174 
6175 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
6176           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
6177 @*/
6178 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
6179 {
6180   PetscErrorCode ierr;
6181   Mat_MPIAIJ     *maij;
6182 
6183   PetscFunctionBegin;
6184   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
6185   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
6186   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
6187   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
6188   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
6189   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
6190   maij = (Mat_MPIAIJ*) (*mat)->data;
6191 
6192   (*mat)->preallocated = PETSC_TRUE;
6193 
6194   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
6195   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
6196 
6197   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
6198   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
6199 
6200   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
6201   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6202   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6203   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
6204   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
6205   PetscFunctionReturn(0);
6206 }
6207 
6208 /*
6209     Special version for direct calls from Fortran
6210 */
6211 #include <petsc/private/fortranimpl.h>
6212 
6213 /* Change these macros so can be used in void function */
6214 #undef CHKERRQ
6215 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
6216 #undef SETERRQ2
6217 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
6218 #undef SETERRQ3
6219 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
6220 #undef SETERRQ
6221 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
6222 
6223 #if defined(PETSC_HAVE_FORTRAN_CAPS)
6224 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
6225 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
6226 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
6227 #else
6228 #endif
6229 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
6230 {
6231   Mat            mat  = *mmat;
6232   PetscInt       m    = *mm, n = *mn;
6233   InsertMode     addv = *maddv;
6234   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
6235   PetscScalar    value;
6236   PetscErrorCode ierr;
6237 
6238   MatCheckPreallocated(mat,1);
6239   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
6240   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
6241   {
6242     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
6243     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
6244     PetscBool roworiented = aij->roworiented;
6245 
6246     /* Some Variables required in the macro */
6247     Mat        A                    = aij->A;
6248     Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
6249     PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
6250     MatScalar  *aa                  = a->a;
6251     PetscBool  ignorezeroentries    = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
6252     Mat        B                    = aij->B;
6253     Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
6254     PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
6255     MatScalar  *ba                  = b->a;
6256     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
6257      * cannot use "#if defined" inside a macro. */
6258     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
6259 
6260     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
6261     PetscInt  nonew = a->nonew;
6262     MatScalar *ap1,*ap2;
6263 
6264     PetscFunctionBegin;
6265     for (i=0; i<m; i++) {
6266       if (im[i] < 0) continue;
6267       if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
6268       if (im[i] >= rstart && im[i] < rend) {
6269         row      = im[i] - rstart;
6270         lastcol1 = -1;
6271         rp1      = aj + ai[row];
6272         ap1      = aa + ai[row];
6273         rmax1    = aimax[row];
6274         nrow1    = ailen[row];
6275         low1     = 0;
6276         high1    = nrow1;
6277         lastcol2 = -1;
6278         rp2      = bj + bi[row];
6279         ap2      = ba + bi[row];
6280         rmax2    = bimax[row];
6281         nrow2    = bilen[row];
6282         low2     = 0;
6283         high2    = nrow2;
6284 
6285         for (j=0; j<n; j++) {
6286           if (roworiented) value = v[i*n+j];
6287           else value = v[i+j*m];
6288           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
6289           if (in[j] >= cstart && in[j] < cend) {
6290             col = in[j] - cstart;
6291             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
6292 #if defined(PETSC_HAVE_DEVICE)
6293             if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
6294 #endif
6295           } else if (in[j] < 0) continue;
6296           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
6297             /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
6298             SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
6299           } else {
6300             if (mat->was_assembled) {
6301               if (!aij->colmap) {
6302                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
6303               }
6304 #if defined(PETSC_USE_CTABLE)
6305               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
6306               col--;
6307 #else
6308               col = aij->colmap[in[j]] - 1;
6309 #endif
6310               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
6311                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
6312                 col  =  in[j];
6313                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
6314                 B        = aij->B;
6315                 b        = (Mat_SeqAIJ*)B->data;
6316                 bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
6317                 rp2      = bj + bi[row];
6318                 ap2      = ba + bi[row];
6319                 rmax2    = bimax[row];
6320                 nrow2    = bilen[row];
6321                 low2     = 0;
6322                 high2    = nrow2;
6323                 bm       = aij->B->rmap->n;
6324                 ba       = b->a;
6325                 inserted = PETSC_FALSE;
6326               }
6327             } else col = in[j];
6328             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
6329 #if defined(PETSC_HAVE_DEVICE)
6330             if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
6331 #endif
6332           }
6333         }
6334       } else if (!aij->donotstash) {
6335         if (roworiented) {
6336           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6337         } else {
6338           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6339         }
6340       }
6341     }
6342   }
6343   PetscFunctionReturnVoid();
6344 }
6345 
6346 typedef struct {
6347   Mat       *mp;    /* intermediate products */
6348   PetscBool *mptmp; /* is the intermediate product temporary ? */
6349   PetscInt  cp;     /* number of intermediate products */
6350 
6351   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
6352   PetscInt    *startsj_s,*startsj_r;
6353   PetscScalar *bufa;
6354   Mat         P_oth;
6355 
6356   /* may take advantage of merging product->B */
6357   Mat Bloc; /* B-local by merging diag and off-diag */
6358 
6359   /* cusparse does not have support to split between symbolic and numeric phases.
6360      When api_user is true, we don't need to update the numerical values
6361      of the temporary storage */
6362   PetscBool reusesym;
6363 
6364   /* support for COO values insertion */
6365   PetscScalar  *coo_v,*coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
6366   PetscInt     **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */
6367   PetscInt     **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */
6368   PetscBool    hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */
6369   PetscSF      sf; /* used for non-local values insertion and memory malloc */
6370   PetscMemType mtype;
6371 
6372   /* customization */
6373   PetscBool abmerge;
6374   PetscBool P_oth_bind;
6375 } MatMatMPIAIJBACKEND;
6376 
6377 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
6378 {
6379   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data;
6380   PetscInt            i;
6381   PetscErrorCode      ierr;
6382 
6383   PetscFunctionBegin;
6384   ierr = PetscFree2(mmdata->startsj_s,mmdata->startsj_r);CHKERRQ(ierr);
6385   ierr = PetscFree(mmdata->bufa);CHKERRQ(ierr);
6386   ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v);CHKERRQ(ierr);
6387   ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w);CHKERRQ(ierr);
6388   ierr = MatDestroy(&mmdata->P_oth);CHKERRQ(ierr);
6389   ierr = MatDestroy(&mmdata->Bloc);CHKERRQ(ierr);
6390   ierr = PetscSFDestroy(&mmdata->sf);CHKERRQ(ierr);
6391   for (i = 0; i < mmdata->cp; i++) {
6392     ierr = MatDestroy(&mmdata->mp[i]);CHKERRQ(ierr);
6393   }
6394   ierr = PetscFree2(mmdata->mp,mmdata->mptmp);CHKERRQ(ierr);
6395   ierr = PetscFree(mmdata->own[0]);CHKERRQ(ierr);
6396   ierr = PetscFree(mmdata->own);CHKERRQ(ierr);
6397   ierr = PetscFree(mmdata->off[0]);CHKERRQ(ierr);
6398   ierr = PetscFree(mmdata->off);CHKERRQ(ierr);
6399   ierr = PetscFree(mmdata);CHKERRQ(ierr);
6400   PetscFunctionReturn(0);
6401 }
6402 
6403 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
6404 {
6405   PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]);
6406   PetscErrorCode ierr;
6407 
6408   PetscFunctionBegin;
6409   ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f);CHKERRQ(ierr);
6410   if (f) {
6411     ierr = (*f)(A,n,idx,v);CHKERRQ(ierr);
6412   } else {
6413     const PetscScalar *vv;
6414 
6415     ierr = MatSeqAIJGetArrayRead(A,&vv);CHKERRQ(ierr);
6416     if (n && idx) {
6417       PetscScalar    *w = v;
6418       const PetscInt *oi = idx;
6419       PetscInt       j;
6420 
6421       for (j = 0; j < n; j++) *w++ = vv[*oi++];
6422     } else {
6423       ierr = PetscArraycpy(v,vv,n);CHKERRQ(ierr);
6424     }
6425     ierr = MatSeqAIJRestoreArrayRead(A,&vv);CHKERRQ(ierr);
6426   }
6427   PetscFunctionReturn(0);
6428 }
6429 
6430 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
6431 {
6432   MatMatMPIAIJBACKEND *mmdata;
6433   PetscInt            i,n_d,n_o;
6434   PetscErrorCode      ierr;
6435 
6436   PetscFunctionBegin;
6437   MatCheckProduct(C,1);
6438   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
6439   mmdata = (MatMatMPIAIJBACKEND*)C->product->data;
6440   if (!mmdata->reusesym) { /* update temporary matrices */
6441     if (mmdata->P_oth) {
6442       ierr = MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr);
6443     }
6444     if (mmdata->Bloc) {
6445       ierr = MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc);CHKERRQ(ierr);
6446     }
6447   }
6448   mmdata->reusesym = PETSC_FALSE;
6449 
6450   for (i = 0; i < mmdata->cp; i++) {
6451     if (!mmdata->mp[i]->ops->productnumeric) SETERRQ1(PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]);
6452     ierr = (*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]);CHKERRQ(ierr);
6453   }
6454   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
6455     PetscInt noff = mmdata->off[i+1] - mmdata->off[i];
6456 
6457     if (mmdata->mptmp[i]) continue;
6458     if (noff) {
6459       PetscInt nown = mmdata->own[i+1] - mmdata->own[i];
6460 
6461       ierr = MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o);CHKERRQ(ierr);
6462       ierr = MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d);CHKERRQ(ierr);
6463       n_o += noff;
6464       n_d += nown;
6465     } else {
6466       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data;
6467 
6468       ierr = MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d);CHKERRQ(ierr);
6469       n_d += mm->nz;
6470     }
6471   }
6472   if (mmdata->hasoffproc) { /* offprocess insertion */
6473     ierr = PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr);
6474     ierr = PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr);
6475   }
6476   ierr = MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES);CHKERRQ(ierr);
6477   PetscFunctionReturn(0);
6478 }
6479 
6480 /* Support for Pt * A, A * P, or Pt * A * P */
6481 #define MAX_NUMBER_INTERMEDIATE 4
6482 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
6483 {
6484   Mat_Product            *product = C->product;
6485   Mat                    A,P,mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
6486   Mat_MPIAIJ             *a,*p;
6487   MatMatMPIAIJBACKEND    *mmdata;
6488   ISLocalToGlobalMapping P_oth_l2g = NULL;
6489   IS                     glob = NULL;
6490   const char             *prefix;
6491   char                   pprefix[256];
6492   const PetscInt         *globidx,*P_oth_idx;
6493   PetscInt               i,j,cp,m,n,M,N,ncoo,ncoo_d,ncoo_o,ncoo_oown,*coo_i,*coo_j;
6494   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
6495                                                                                         /* type-0: consecutive, start from 0; type-1: consecutive with */
6496                                                                                         /* a base offset; type-2: sparse with a local to global map table */
6497   const PetscInt         *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */
6498 
6499   MatProductType         ptype;
6500   PetscBool              mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk;
6501   PetscMPIInt            size;
6502   PetscErrorCode         ierr;
6503 
6504   PetscFunctionBegin;
6505   MatCheckProduct(C,1);
6506   if (product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
6507   ptype = product->type;
6508   if (product->A->symmetric && ptype == MATPRODUCT_AtB) ptype = MATPRODUCT_AB;
6509   switch (ptype) {
6510   case MATPRODUCT_AB:
6511     A = product->A;
6512     P = product->B;
6513     m = A->rmap->n;
6514     n = P->cmap->n;
6515     M = A->rmap->N;
6516     N = P->cmap->N;
6517     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
6518     break;
6519   case MATPRODUCT_AtB:
6520     P = product->A;
6521     A = product->B;
6522     m = P->cmap->n;
6523     n = A->cmap->n;
6524     M = P->cmap->N;
6525     N = A->cmap->N;
6526     hasoffproc = PETSC_TRUE;
6527     break;
6528   case MATPRODUCT_PtAP:
6529     A = product->A;
6530     P = product->B;
6531     m = P->cmap->n;
6532     n = P->cmap->n;
6533     M = P->cmap->N;
6534     N = P->cmap->N;
6535     hasoffproc = PETSC_TRUE;
6536     break;
6537   default:
6538     SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
6539   }
6540   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)C),&size);CHKERRMPI(ierr);
6541   if (size == 1) hasoffproc = PETSC_FALSE;
6542 
6543   /* defaults */
6544   for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) {
6545     mp[i]    = NULL;
6546     mptmp[i] = PETSC_FALSE;
6547     rmapt[i] = -1;
6548     cmapt[i] = -1;
6549     rmapa[i] = NULL;
6550     cmapa[i] = NULL;
6551   }
6552 
6553   /* customization */
6554   ierr = PetscNew(&mmdata);CHKERRQ(ierr);
6555   mmdata->reusesym = product->api_user;
6556   if (ptype == MATPRODUCT_AB) {
6557     if (product->api_user) {
6558       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat");CHKERRQ(ierr);
6559       ierr = PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr);
6560       ierr = PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
6561       ierr = PetscOptionsEnd();CHKERRQ(ierr);
6562     } else {
6563       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr);
6564       ierr = PetscOptionsBool("-matproduct_ab_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr);
6565       ierr = PetscOptionsBool("-matproduct_ab_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
6566       ierr = PetscOptionsEnd();CHKERRQ(ierr);
6567     }
6568   } else if (ptype == MATPRODUCT_PtAP) {
6569     if (product->api_user) {
6570       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat");CHKERRQ(ierr);
6571       ierr = PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
6572       ierr = PetscOptionsEnd();CHKERRQ(ierr);
6573     } else {
6574       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr);
6575       ierr = PetscOptionsBool("-matproduct_ptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
6576       ierr = PetscOptionsEnd();CHKERRQ(ierr);
6577     }
6578   }
6579   a = (Mat_MPIAIJ*)A->data;
6580   p = (Mat_MPIAIJ*)P->data;
6581   ierr = MatSetSizes(C,m,n,M,N);CHKERRQ(ierr);
6582   ierr = PetscLayoutSetUp(C->rmap);CHKERRQ(ierr);
6583   ierr = PetscLayoutSetUp(C->cmap);CHKERRQ(ierr);
6584   ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
6585   ierr = MatGetOptionsPrefix(C,&prefix);CHKERRQ(ierr);
6586 
6587   cp   = 0;
6588   switch (ptype) {
6589   case MATPRODUCT_AB: /* A * P */
6590     ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr);
6591 
6592     /* A_diag * P_local (merged or not) */
6593     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
6594       /* P is product->B */
6595       ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr);
6596       ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6597       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6598       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6599       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6600       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6601       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6602       mp[cp]->product->api_user = product->api_user;
6603       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6604       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6605       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6606       ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
6607       rmapt[cp] = 1;
6608       cmapt[cp] = 2;
6609       cmapa[cp] = globidx;
6610       mptmp[cp] = PETSC_FALSE;
6611       cp++;
6612     } else { /* A_diag * P_diag and A_diag * P_off */
6613       ierr = MatProductCreate(a->A,p->A,NULL,&mp[cp]);CHKERRQ(ierr);
6614       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6615       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6616       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6617       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6618       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6619       mp[cp]->product->api_user = product->api_user;
6620       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6621       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6622       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6623       rmapt[cp] = 1;
6624       cmapt[cp] = 1;
6625       mptmp[cp] = PETSC_FALSE;
6626       cp++;
6627       ierr = MatProductCreate(a->A,p->B,NULL,&mp[cp]);CHKERRQ(ierr);
6628       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6629       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6630       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6631       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6632       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6633       mp[cp]->product->api_user = product->api_user;
6634       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6635       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6636       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6637       rmapt[cp] = 1;
6638       cmapt[cp] = 2;
6639       cmapa[cp] = p->garray;
6640       mptmp[cp] = PETSC_FALSE;
6641       cp++;
6642     }
6643 
6644     /* A_off * P_other */
6645     if (mmdata->P_oth) {
6646       ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr); /* make P_oth use local col ids */
6647       ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr);
6648       ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr);
6649       ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr);
6650       ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr);
6651       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6652       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6653       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6654       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6655       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6656       mp[cp]->product->api_user = product->api_user;
6657       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6658       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6659       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6660       rmapt[cp] = 1;
6661       cmapt[cp] = 2;
6662       cmapa[cp] = P_oth_idx;
6663       mptmp[cp] = PETSC_FALSE;
6664       cp++;
6665     }
6666     break;
6667 
6668   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
6669     /* A is product->B */
6670     ierr = MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr);
6671     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
6672       ierr = MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6673       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
6674       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6675       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6676       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6677       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6678       mp[cp]->product->api_user = product->api_user;
6679       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6680       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6681       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6682       ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
6683       rmapt[cp] = 2;
6684       rmapa[cp] = globidx;
6685       cmapt[cp] = 2;
6686       cmapa[cp] = globidx;
6687       mptmp[cp] = PETSC_FALSE;
6688       cp++;
6689     } else {
6690       ierr = MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6691       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
6692       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6693       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6694       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6695       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6696       mp[cp]->product->api_user = product->api_user;
6697       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6698       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6699       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6700       ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
6701       rmapt[cp] = 1;
6702       cmapt[cp] = 2;
6703       cmapa[cp] = globidx;
6704       mptmp[cp] = PETSC_FALSE;
6705       cp++;
6706       ierr = MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6707       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
6708       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6709       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6710       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6711       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6712       mp[cp]->product->api_user = product->api_user;
6713       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6714       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6715       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6716       rmapt[cp] = 2;
6717       rmapa[cp] = p->garray;
6718       cmapt[cp] = 2;
6719       cmapa[cp] = globidx;
6720       mptmp[cp] = PETSC_FALSE;
6721       cp++;
6722     }
6723     break;
6724   case MATPRODUCT_PtAP:
6725     ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr);
6726     /* P is product->B */
6727     ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr);
6728     ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6729     ierr = MatProductSetType(mp[cp],MATPRODUCT_PtAP);CHKERRQ(ierr);
6730     ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6731     ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6732     ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6733     ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6734     mp[cp]->product->api_user = product->api_user;
6735     ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6736     if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6737     ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6738     ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
6739     rmapt[cp] = 2;
6740     rmapa[cp] = globidx;
6741     cmapt[cp] = 2;
6742     cmapa[cp] = globidx;
6743     mptmp[cp] = PETSC_FALSE;
6744     cp++;
6745     if (mmdata->P_oth) {
6746       ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr);
6747       ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr);
6748       ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr);
6749       ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr);
6750       ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr);
6751       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6752       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6753       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6754       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6755       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6756       mp[cp]->product->api_user = product->api_user;
6757       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6758       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6759       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6760       mptmp[cp] = PETSC_TRUE;
6761       cp++;
6762       ierr = MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp]);CHKERRQ(ierr);
6763       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
6764       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6765       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6766       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6767       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6768       mp[cp]->product->api_user = product->api_user;
6769       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6770       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6771       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6772       rmapt[cp] = 2;
6773       rmapa[cp] = globidx;
6774       cmapt[cp] = 2;
6775       cmapa[cp] = P_oth_idx;
6776       mptmp[cp] = PETSC_FALSE;
6777       cp++;
6778     }
6779     break;
6780   default:
6781     SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
6782   }
6783   /* sanity check */
6784   if (size > 1) for (i = 0; i < cp; i++) if (rmapt[i] == 2 && !hasoffproc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %D",i);
6785 
6786   ierr = PetscMalloc2(cp,&mmdata->mp,cp,&mmdata->mptmp);CHKERRQ(ierr);
6787   for (i = 0; i < cp; i++) {
6788     mmdata->mp[i]    = mp[i];
6789     mmdata->mptmp[i] = mptmp[i];
6790   }
6791   mmdata->cp = cp;
6792   C->product->data       = mmdata;
6793   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
6794   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
6795 
6796   /* memory type */
6797   mmdata->mtype = PETSC_MEMTYPE_HOST;
6798   ierr = PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,"");CHKERRQ(ierr);
6799   ierr = PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,"");CHKERRQ(ierr);
6800   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
6801   // enable the line below MatSeqAIJCopySubArray_SeqAIJKokkos is implemented
6802   //else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_DEVICE;
6803 
6804   /* prepare coo coordinates for values insertion */
6805 
6806   /* count total nonzeros of those intermediate seqaij Mats
6807     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
6808     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
6809     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
6810   */
6811   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
6812     Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
6813     if (mptmp[cp]) continue;
6814     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
6815       const PetscInt *rmap = rmapa[cp];
6816       const PetscInt mr = mp[cp]->rmap->n;
6817       const PetscInt rs = C->rmap->rstart;
6818       const PetscInt re = C->rmap->rend;
6819       const PetscInt *ii  = mm->i;
6820       for (i = 0; i < mr; i++) {
6821         const PetscInt gr = rmap[i];
6822         const PetscInt nz = ii[i+1] - ii[i];
6823         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
6824         else ncoo_oown += nz; /* this row is local */
6825       }
6826     } else ncoo_d += mm->nz;
6827   }
6828 
6829   /*
6830     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
6831 
6832     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
6833 
6834     off[0] points to a big index array, which is shared by off[1,2,...]. Similarily, for own[0].
6835 
6836     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
6837     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
6838     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
6839 
6840     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
6841     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive.
6842   */
6843   ierr = PetscCalloc1(mmdata->cp+1,&mmdata->off);CHKERRQ(ierr); /* +1 to make a csr-like data structure */
6844   ierr = PetscCalloc1(mmdata->cp+1,&mmdata->own);CHKERRQ(ierr);
6845 
6846   /* gather (i,j) of nonzeros inserted by remote procs */
6847   if (hasoffproc) {
6848     PetscSF  msf;
6849     PetscInt ncoo2,*coo_i2,*coo_j2;
6850 
6851     ierr = PetscMalloc1(ncoo_o,&mmdata->off[0]);CHKERRQ(ierr);
6852     ierr = PetscMalloc1(ncoo_oown,&mmdata->own[0]);CHKERRQ(ierr);
6853     ierr = PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j);CHKERRQ(ierr); /* to collect (i,j) of entries to be sent to others */
6854 
6855     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
6856       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
6857       PetscInt   *idxoff = mmdata->off[cp];
6858       PetscInt   *idxown = mmdata->own[cp];
6859       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
6860         const PetscInt *rmap = rmapa[cp];
6861         const PetscInt *cmap = cmapa[cp];
6862         const PetscInt *ii  = mm->i;
6863         PetscInt       *coi = coo_i + ncoo_o;
6864         PetscInt       *coj = coo_j + ncoo_o;
6865         const PetscInt mr = mp[cp]->rmap->n;
6866         const PetscInt rs = C->rmap->rstart;
6867         const PetscInt re = C->rmap->rend;
6868         const PetscInt cs = C->cmap->rstart;
6869         for (i = 0; i < mr; i++) {
6870           const PetscInt *jj = mm->j + ii[i];
6871           const PetscInt gr  = rmap[i];
6872           const PetscInt nz  = ii[i+1] - ii[i];
6873           if (gr < rs || gr >= re) { /* this is an offproc row */
6874             for (j = ii[i]; j < ii[i+1]; j++) {
6875               *coi++ = gr;
6876               *idxoff++ = j;
6877             }
6878             if (!cmapt[cp]) { /* already global */
6879               for (j = 0; j < nz; j++) *coj++ = jj[j];
6880             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
6881               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
6882             } else { /* offdiag */
6883               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
6884             }
6885             ncoo_o += nz;
6886           } else { /* this is a local row */
6887             for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j;
6888           }
6889         }
6890       }
6891       mmdata->off[cp + 1] = idxoff;
6892       mmdata->own[cp + 1] = idxown;
6893     }
6894 
6895     ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr);
6896     ierr = PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o/*nleaves*/,NULL/*ilocal*/,PETSC_OWN_POINTER,coo_i);CHKERRQ(ierr);
6897     ierr = PetscSFGetMultiSF(mmdata->sf,&msf);CHKERRQ(ierr);
6898     ierr = PetscSFGetGraph(msf,&ncoo2/*nroots*/,NULL,NULL,NULL);CHKERRQ(ierr);
6899     ncoo = ncoo_d + ncoo_oown + ncoo2;
6900     ierr = PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2);CHKERRQ(ierr);
6901     ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); /* put (i,j) of remote nonzeros at back */
6902     ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr);
6903     ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr);
6904     ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr);
6905     ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr);
6906     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
6907     ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w);CHKERRQ(ierr);
6908     coo_i = coo_i2;
6909     coo_j = coo_j2;
6910   } else { /* no offproc values insertion */
6911     ncoo = ncoo_d;
6912     ierr = PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j);CHKERRQ(ierr);
6913 
6914     ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr);
6915     ierr = PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER);CHKERRQ(ierr);
6916     ierr = PetscSFSetUp(mmdata->sf);CHKERRQ(ierr);
6917   }
6918   mmdata->hasoffproc = hasoffproc;
6919 
6920    /* gather (i,j) of nonzeros inserted locally */
6921   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
6922     Mat_SeqAIJ     *mm = (Mat_SeqAIJ*)mp[cp]->data;
6923     PetscInt       *coi = coo_i + ncoo_d;
6924     PetscInt       *coj = coo_j + ncoo_d;
6925     const PetscInt *jj  = mm->j;
6926     const PetscInt *ii  = mm->i;
6927     const PetscInt *cmap = cmapa[cp];
6928     const PetscInt *rmap = rmapa[cp];
6929     const PetscInt mr = mp[cp]->rmap->n;
6930     const PetscInt rs = C->rmap->rstart;
6931     const PetscInt re = C->rmap->rend;
6932     const PetscInt cs = C->cmap->rstart;
6933 
6934     if (mptmp[cp]) continue;
6935     if (rmapt[cp] == 1) { /* consecutive rows */
6936       /* fill coo_i */
6937       for (i = 0; i < mr; i++) {
6938         const PetscInt gr = i + rs;
6939         for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr;
6940       }
6941       /* fill coo_j */
6942       if (!cmapt[cp]) { /* type-0, already global */
6943         ierr = PetscArraycpy(coj,jj,mm->nz);CHKERRQ(ierr);
6944       } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */
6945         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
6946       } else { /* type-2, local to global for sparse columns */
6947         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
6948       }
6949       ncoo_d += mm->nz;
6950     } else if (rmapt[cp] == 2) { /* sparse rows */
6951       for (i = 0; i < mr; i++) {
6952         const PetscInt *jj = mm->j + ii[i];
6953         const PetscInt gr  = rmap[i];
6954         const PetscInt nz  = ii[i+1] - ii[i];
6955         if (gr >= rs && gr < re) { /* local rows */
6956           for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr;
6957           if (!cmapt[cp]) { /* type-0, already global */
6958             for (j = 0; j < nz; j++) *coj++ = jj[j];
6959           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
6960             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
6961           } else { /* type-2, local to global for sparse columns */
6962             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
6963           }
6964           ncoo_d += nz;
6965         }
6966       }
6967     }
6968   }
6969   if (glob) {
6970     ierr = ISRestoreIndices(glob,&globidx);CHKERRQ(ierr);
6971   }
6972   ierr = ISDestroy(&glob);CHKERRQ(ierr);
6973   if (P_oth_l2g) {
6974     ierr = ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr);
6975   }
6976   ierr = ISLocalToGlobalMappingDestroy(&P_oth_l2g);CHKERRQ(ierr);
6977   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
6978   ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v);CHKERRQ(ierr);
6979 
6980   /* preallocate with COO data */
6981   ierr = MatSetPreallocationCOO(C,ncoo,coo_i,coo_j);CHKERRQ(ierr);
6982   ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr);
6983   PetscFunctionReturn(0);
6984 }
6985 
6986 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
6987 {
6988   Mat_Product    *product = mat->product;
6989   PetscErrorCode ierr;
6990 #if defined(PETSC_HAVE_DEVICE)
6991   PetscBool      match = PETSC_FALSE;
6992   PetscBool      usecpu = PETSC_FALSE;
6993 #else
6994   PetscBool      match = PETSC_TRUE;
6995 #endif
6996 
6997   PetscFunctionBegin;
6998   MatCheckProduct(mat,1);
6999 #if defined(PETSC_HAVE_DEVICE)
7000   if (!product->A->boundtocpu && !product->B->boundtocpu) {
7001     ierr = PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match);CHKERRQ(ierr);
7002   }
7003   if (match) { /* we can always fallback to the CPU if requested */
7004     switch (product->type) {
7005     case MATPRODUCT_AB:
7006       if (product->api_user) {
7007         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat");CHKERRQ(ierr);
7008         ierr = PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7009         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7010       } else {
7011         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr);
7012         ierr = PetscOptionsBool("-matproduct_ab_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7013         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7014       }
7015       break;
7016     case MATPRODUCT_AtB:
7017       if (product->api_user) {
7018         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat");CHKERRQ(ierr);
7019         ierr = PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7020         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7021       } else {
7022         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat");CHKERRQ(ierr);
7023         ierr = PetscOptionsBool("-matproduct_atb_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7024         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7025       }
7026       break;
7027     case MATPRODUCT_PtAP:
7028       if (product->api_user) {
7029         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat");CHKERRQ(ierr);
7030         ierr = PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7031         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7032       } else {
7033         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr);
7034         ierr = PetscOptionsBool("-matproduct_ptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7035         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7036       }
7037       break;
7038     default:
7039       break;
7040     }
7041     match = (PetscBool)!usecpu;
7042   }
7043 #endif
7044   if (match) {
7045     switch (product->type) {
7046     case MATPRODUCT_AB:
7047     case MATPRODUCT_AtB:
7048     case MATPRODUCT_PtAP:
7049       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7050       break;
7051     default:
7052       break;
7053     }
7054   }
7055   /* fallback to MPIAIJ ops */
7056   if (!mat->ops->productsymbolic) {
7057     ierr = MatProductSetFromOptions_MPIAIJ(mat);CHKERRQ(ierr);
7058   }
7059   PetscFunctionReturn(0);
7060 }
7061