xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 65f8aed5f7eaa1e2ef2ddeffe666264e0669c876)
1 
2 
3 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
4 #include <petsc/private/vecimpl.h>
5 #include <petsc/private/isimpl.h>
6 #include <petscblaslapack.h>
7 #include <petscsf.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
48 {
49   PetscErrorCode ierr;
50   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
51 
52   PetscFunctionBegin;
53   if (mat->A) {
54     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
55     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
56   }
57   PetscFunctionReturn(0);
58 }
59 
60 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
61 {
62   PetscErrorCode  ierr;
63   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
64   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
65   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
66   const PetscInt  *ia,*ib;
67   const MatScalar *aa,*bb;
68   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
69   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
70 
71   PetscFunctionBegin;
72   *keptrows = 0;
73   ia        = a->i;
74   ib        = b->i;
75   for (i=0; i<m; i++) {
76     na = ia[i+1] - ia[i];
77     nb = ib[i+1] - ib[i];
78     if (!na && !nb) {
79       cnt++;
80       goto ok1;
81     }
82     aa = a->a + ia[i];
83     for (j=0; j<na; j++) {
84       if (aa[j] != 0.0) goto ok1;
85     }
86     bb = b->a + ib[i];
87     for (j=0; j <nb; j++) {
88       if (bb[j] != 0.0) goto ok1;
89     }
90     cnt++;
91 ok1:;
92   }
93   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
94   if (!n0rows) PetscFunctionReturn(0);
95   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
96   cnt  = 0;
97   for (i=0; i<m; i++) {
98     na = ia[i+1] - ia[i];
99     nb = ib[i+1] - ib[i];
100     if (!na && !nb) continue;
101     aa = a->a + ia[i];
102     for (j=0; j<na;j++) {
103       if (aa[j] != 0.0) {
104         rows[cnt++] = rstart + i;
105         goto ok2;
106       }
107     }
108     bb = b->a + ib[i];
109     for (j=0; j<nb; j++) {
110       if (bb[j] != 0.0) {
111         rows[cnt++] = rstart + i;
112         goto ok2;
113       }
114     }
115 ok2:;
116   }
117   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
118   PetscFunctionReturn(0);
119 }
120 
121 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
122 {
123   PetscErrorCode    ierr;
124   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
125   PetscBool         cong;
126 
127   PetscFunctionBegin;
128   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
129   if (Y->assembled && cong) {
130     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
131   } else {
132     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
133   }
134   PetscFunctionReturn(0);
135 }
136 
137 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
138 {
139   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
140   PetscErrorCode ierr;
141   PetscInt       i,rstart,nrows,*rows;
142 
143   PetscFunctionBegin;
144   *zrows = NULL;
145   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
146   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
147   for (i=0; i<nrows; i++) rows[i] += rstart;
148   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
149   PetscFunctionReturn(0);
150 }
151 
152 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
153 {
154   PetscErrorCode ierr;
155   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
156   PetscInt       i,n,*garray = aij->garray;
157   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
158   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
159   PetscReal      *work;
160 
161   PetscFunctionBegin;
162   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
163   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
164   if (type == NORM_2) {
165     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
166       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
167     }
168     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
169       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
170     }
171   } else if (type == NORM_1) {
172     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
173       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
174     }
175     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
176       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
177     }
178   } else if (type == NORM_INFINITY) {
179     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
180       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
181     }
182     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
183       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
184     }
185 
186   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
187   if (type == NORM_INFINITY) {
188     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
189   } else {
190     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
191   }
192   ierr = PetscFree(work);CHKERRQ(ierr);
193   if (type == NORM_2) {
194     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
195   }
196   PetscFunctionReturn(0);
197 }
198 
199 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
200 {
201   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
202   IS              sis,gis;
203   PetscErrorCode  ierr;
204   const PetscInt  *isis,*igis;
205   PetscInt        n,*iis,nsis,ngis,rstart,i;
206 
207   PetscFunctionBegin;
208   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
209   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
210   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
211   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
212   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
213   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
214 
215   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
216   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
217   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
218   n    = ngis + nsis;
219   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
220   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
221   for (i=0; i<n; i++) iis[i] += rstart;
222   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
223 
224   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
225   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
226   ierr = ISDestroy(&sis);CHKERRQ(ierr);
227   ierr = ISDestroy(&gis);CHKERRQ(ierr);
228   PetscFunctionReturn(0);
229 }
230 
231 /*
232     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
233     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
234 
235     Only for square matrices
236 
237     Used by a preconditioner, hence PETSC_EXTERN
238 */
239 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
240 {
241   PetscMPIInt    rank,size;
242   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
243   PetscErrorCode ierr;
244   Mat            mat;
245   Mat_SeqAIJ     *gmata;
246   PetscMPIInt    tag;
247   MPI_Status     status;
248   PetscBool      aij;
249   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
250 
251   PetscFunctionBegin;
252   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
253   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
254   if (!rank) {
255     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
256     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
257   }
258   if (reuse == MAT_INITIAL_MATRIX) {
259     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
260     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
261     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
262     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
263     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
264     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
265     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
266     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
267     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
268 
269     rowners[0] = 0;
270     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
271     rstart = rowners[rank];
272     rend   = rowners[rank+1];
273     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
274     if (!rank) {
275       gmata = (Mat_SeqAIJ*) gmat->data;
276       /* send row lengths to all processors */
277       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
278       for (i=1; i<size; i++) {
279         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
280       }
281       /* determine number diagonal and off-diagonal counts */
282       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
283       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
284       jj   = 0;
285       for (i=0; i<m; i++) {
286         for (j=0; j<dlens[i]; j++) {
287           if (gmata->j[jj] < rstart) ld[i]++;
288           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
289           jj++;
290         }
291       }
292       /* send column indices to other processes */
293       for (i=1; i<size; i++) {
294         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
295         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
296         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
297       }
298 
299       /* send numerical values to other processes */
300       for (i=1; i<size; i++) {
301         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
302         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
303       }
304       gmataa = gmata->a;
305       gmataj = gmata->j;
306 
307     } else {
308       /* receive row lengths */
309       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
310       /* receive column indices */
311       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
312       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
313       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
314       /* determine number diagonal and off-diagonal counts */
315       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
316       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
317       jj   = 0;
318       for (i=0; i<m; i++) {
319         for (j=0; j<dlens[i]; j++) {
320           if (gmataj[jj] < rstart) ld[i]++;
321           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
322           jj++;
323         }
324       }
325       /* receive numerical values */
326       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
327       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
328     }
329     /* set preallocation */
330     for (i=0; i<m; i++) {
331       dlens[i] -= olens[i];
332     }
333     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
334     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
335 
336     for (i=0; i<m; i++) {
337       dlens[i] += olens[i];
338     }
339     cnt = 0;
340     for (i=0; i<m; i++) {
341       row  = rstart + i;
342       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
343       cnt += dlens[i];
344     }
345     if (rank) {
346       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
347     }
348     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
349     ierr = PetscFree(rowners);CHKERRQ(ierr);
350 
351     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
352 
353     *inmat = mat;
354   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
355     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
356     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
357     mat  = *inmat;
358     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
359     if (!rank) {
360       /* send numerical values to other processes */
361       gmata  = (Mat_SeqAIJ*) gmat->data;
362       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
363       gmataa = gmata->a;
364       for (i=1; i<size; i++) {
365         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
366         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
367       }
368       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
369     } else {
370       /* receive numerical values from process 0*/
371       nz   = Ad->nz + Ao->nz;
372       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
373       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
374     }
375     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
376     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
377     ad = Ad->a;
378     ao = Ao->a;
379     if (mat->rmap->n) {
380       i  = 0;
381       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
382       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
383     }
384     for (i=1; i<mat->rmap->n; i++) {
385       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
386       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
387     }
388     i--;
389     if (mat->rmap->n) {
390       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
391     }
392     if (rank) {
393       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
394     }
395   }
396   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
397   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
398   PetscFunctionReturn(0);
399 }
400 
401 /*
402   Local utility routine that creates a mapping from the global column
403 number to the local number in the off-diagonal part of the local
404 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
405 a slightly higher hash table cost; without it it is not scalable (each processor
406 has an order N integer array but is fast to acess.
407 */
408 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
409 {
410   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
411   PetscErrorCode ierr;
412   PetscInt       n = aij->B->cmap->n,i;
413 
414   PetscFunctionBegin;
415   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
416 #if defined(PETSC_USE_CTABLE)
417   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
418   for (i=0; i<n; i++) {
419     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
420   }
421 #else
422   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
423   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
424   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
425 #endif
426   PetscFunctionReturn(0);
427 }
428 
429 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
430 { \
431     if (col <= lastcol1)  low1 = 0;     \
432     else                 high1 = nrow1; \
433     lastcol1 = col;\
434     while (high1-low1 > 5) { \
435       t = (low1+high1)/2; \
436       if (rp1[t] > col) high1 = t; \
437       else              low1  = t; \
438     } \
439       for (_i=low1; _i<high1; _i++) { \
440         if (rp1[_i] > col) break; \
441         if (rp1[_i] == col) { \
442           if (addv == ADD_VALUES) ap1[_i] += value;   \
443           else                    ap1[_i] = value; \
444           goto a_noinsert; \
445         } \
446       }  \
447       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
448       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
449       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
450       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
451       N = nrow1++ - 1; a->nz++; high1++; \
452       /* shift up all the later entries in this row */ \
453       for (ii=N; ii>=_i; ii--) { \
454         rp1[ii+1] = rp1[ii]; \
455         ap1[ii+1] = ap1[ii]; \
456       } \
457       rp1[_i] = col;  \
458       ap1[_i] = value;  \
459       A->nonzerostate++;\
460       a_noinsert: ; \
461       ailen[row] = nrow1; \
462 }
463 
464 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
465   { \
466     if (col <= lastcol2) low2 = 0;                        \
467     else high2 = nrow2;                                   \
468     lastcol2 = col;                                       \
469     while (high2-low2 > 5) {                              \
470       t = (low2+high2)/2;                                 \
471       if (rp2[t] > col) high2 = t;                        \
472       else             low2  = t;                         \
473     }                                                     \
474     for (_i=low2; _i<high2; _i++) {                       \
475       if (rp2[_i] > col) break;                           \
476       if (rp2[_i] == col) {                               \
477         if (addv == ADD_VALUES) ap2[_i] += value;         \
478         else                    ap2[_i] = value;          \
479         goto b_noinsert;                                  \
480       }                                                   \
481     }                                                     \
482     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
483     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
484     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
485     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
486     N = nrow2++ - 1; b->nz++; high2++;                    \
487     /* shift up all the later entries in this row */      \
488     for (ii=N; ii>=_i; ii--) {                            \
489       rp2[ii+1] = rp2[ii];                                \
490       ap2[ii+1] = ap2[ii];                                \
491     }                                                     \
492     rp2[_i] = col;                                        \
493     ap2[_i] = value;                                      \
494     B->nonzerostate++;                                    \
495     b_noinsert: ;                                         \
496     bilen[row] = nrow2;                                   \
497   }
498 
499 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
500 {
501   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
502   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
503   PetscErrorCode ierr;
504   PetscInt       l,*garray = mat->garray,diag;
505 
506   PetscFunctionBegin;
507   /* code only works for square matrices A */
508 
509   /* find size of row to the left of the diagonal part */
510   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
511   row  = row - diag;
512   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
513     if (garray[b->j[b->i[row]+l]] > diag) break;
514   }
515   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
516 
517   /* diagonal part */
518   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
519 
520   /* right of diagonal part */
521   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
522   PetscFunctionReturn(0);
523 }
524 
525 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
526 {
527   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
528   PetscScalar    value;
529   PetscErrorCode ierr;
530   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
531   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
532   PetscBool      roworiented = aij->roworiented;
533 
534   /* Some Variables required in the macro */
535   Mat        A                 = aij->A;
536   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
537   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
538   MatScalar  *aa               = a->a;
539   PetscBool  ignorezeroentries = a->ignorezeroentries;
540   Mat        B                 = aij->B;
541   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
542   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
543   MatScalar  *ba               = b->a;
544 
545   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
546   PetscInt  nonew;
547   MatScalar *ap1,*ap2;
548 
549   PetscFunctionBegin;
550   for (i=0; i<m; i++) {
551     if (im[i] < 0) continue;
552 #if defined(PETSC_USE_DEBUG)
553     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
554 #endif
555     if (im[i] >= rstart && im[i] < rend) {
556       row      = im[i] - rstart;
557       lastcol1 = -1;
558       rp1      = aj + ai[row];
559       ap1      = aa + ai[row];
560       rmax1    = aimax[row];
561       nrow1    = ailen[row];
562       low1     = 0;
563       high1    = nrow1;
564       lastcol2 = -1;
565       rp2      = bj + bi[row];
566       ap2      = ba + bi[row];
567       rmax2    = bimax[row];
568       nrow2    = bilen[row];
569       low2     = 0;
570       high2    = nrow2;
571 
572       for (j=0; j<n; j++) {
573         if (roworiented) value = v[i*n+j];
574         else             value = v[i+j*m];
575         if (in[j] >= cstart && in[j] < cend) {
576           col   = in[j] - cstart;
577           nonew = a->nonew;
578           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
579           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
580         } else if (in[j] < 0) continue;
581 #if defined(PETSC_USE_DEBUG)
582         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
583 #endif
584         else {
585           if (mat->was_assembled) {
586             if (!aij->colmap) {
587               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
588             }
589 #if defined(PETSC_USE_CTABLE)
590             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
591             col--;
592 #else
593             col = aij->colmap[in[j]] - 1;
594 #endif
595             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
596               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
597               col  =  in[j];
598               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
599               B     = aij->B;
600               b     = (Mat_SeqAIJ*)B->data;
601               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
602               rp2   = bj + bi[row];
603               ap2   = ba + bi[row];
604               rmax2 = bimax[row];
605               nrow2 = bilen[row];
606               low2  = 0;
607               high2 = nrow2;
608               bm    = aij->B->rmap->n;
609               ba    = b->a;
610             } else if (col < 0) {
611               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
612                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
613               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
614             }
615           } else col = in[j];
616           nonew = b->nonew;
617           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
618         }
619       }
620     } else {
621       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
622       if (!aij->donotstash) {
623         mat->assembled = PETSC_FALSE;
624         if (roworiented) {
625           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
626         } else {
627           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
628         }
629       }
630     }
631   }
632   PetscFunctionReturn(0);
633 }
634 
635 /*
636     This function sets the j-arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
637     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
638 */
639 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscInt *dnz, const PetscInt *onz)
640 {
641   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
642   Mat            A      = aij->A; /* diagonal part of the matrix */
643   Mat            B      = aij->B; /* offdiagonal part of the matrix */
644   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
645   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
646   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
647   PetscInt       *ailen = a->ilen,*aj = a->j;
648   PetscInt       *bilen = b->ilen,*bj = b->j;
649   PetscInt       am     = aij->A->rmap->n,i,j;
650   PetscInt       col, diag_so_far=0, offd_so_far=0;
651 
652   PetscFunctionBegin;
653   /* Iterate over all rows of the matrix */
654   for (j=0; j<am; j++) {
655     for (i=0; i<dnz[j]+onz[j]; i++) {
656       col = i + mat_i[j];
657       /* If column is in the diagonal */
658       if (mat_j[col] >= cstart && mat_j[col] < cend) {
659         aj[diag_so_far++] = mat_j[col] - cstart;
660       } else { /* off-diagonal entries */
661         bj[offd_so_far++] = mat_j[col];
662       }
663     }
664     ailen[j] = dnz[j];
665     bilen[j] = onz[j];
666   }
667   PetscFunctionReturn(0);
668 }
669 
670 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
671 {
672   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
673   PetscErrorCode ierr;
674   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
675   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
676 
677   PetscFunctionBegin;
678   for (i=0; i<m; i++) {
679     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
680     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
681     if (idxm[i] >= rstart && idxm[i] < rend) {
682       row = idxm[i] - rstart;
683       for (j=0; j<n; j++) {
684         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
685         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
686         if (idxn[j] >= cstart && idxn[j] < cend) {
687           col  = idxn[j] - cstart;
688           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
689         } else {
690           if (!aij->colmap) {
691             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
692           }
693 #if defined(PETSC_USE_CTABLE)
694           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
695           col--;
696 #else
697           col = aij->colmap[idxn[j]] - 1;
698 #endif
699           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
700           else {
701             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
702           }
703         }
704       }
705     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
706   }
707   PetscFunctionReturn(0);
708 }
709 
710 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
711 
712 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
713 {
714   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
715   PetscErrorCode ierr;
716   PetscInt       nstash,reallocs;
717 
718   PetscFunctionBegin;
719   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
720 
721   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
722   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
723   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
724   PetscFunctionReturn(0);
725 }
726 
727 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
728 {
729   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
730   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
731   PetscErrorCode ierr;
732   PetscMPIInt    n;
733   PetscInt       i,j,rstart,ncols,flg;
734   PetscInt       *row,*col;
735   PetscBool      other_disassembled;
736   PetscScalar    *val;
737 
738   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
739 
740   PetscFunctionBegin;
741   if (!aij->donotstash && !mat->nooffprocentries) {
742     while (1) {
743       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
744       if (!flg) break;
745 
746       for (i=0; i<n; ) {
747         /* Now identify the consecutive vals belonging to the same row */
748         for (j=i,rstart=row[j]; j<n; j++) {
749           if (row[j] != rstart) break;
750         }
751         if (j < n) ncols = j-i;
752         else       ncols = n-i;
753         /* Now assemble all these values with a single function call */
754         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
755 
756         i = j;
757       }
758     }
759     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
760   }
761   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
762   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
763 
764   /* determine if any processor has disassembled, if so we must
765      also disassemble ourselfs, in order that we may reassemble. */
766   /*
767      if nonzero structure of submatrix B cannot change then we know that
768      no processor disassembled thus we can skip this stuff
769   */
770   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
771     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
772     if (mat->was_assembled && !other_disassembled) {
773       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
774     }
775   }
776   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
777     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
778   }
779   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
780   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
781   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
782 
783   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
784 
785   aij->rowvalues = 0;
786 
787   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
788   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
789 
790   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
791   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
792     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
793     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
794   }
795   PetscFunctionReturn(0);
796 }
797 
798 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
799 {
800   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
801   PetscErrorCode ierr;
802 
803   PetscFunctionBegin;
804   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
805   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
806   PetscFunctionReturn(0);
807 }
808 
809 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
810 {
811   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
812   PetscInt      *lrows;
813   PetscInt       r, len;
814   PetscBool      cong;
815   PetscErrorCode ierr;
816 
817   PetscFunctionBegin;
818   /* get locally owned rows */
819   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
820   /* fix right hand side if needed */
821   if (x && b) {
822     const PetscScalar *xx;
823     PetscScalar       *bb;
824 
825     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
826     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
827     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
828     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
829     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
830   }
831   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
832   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
833   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
834   if ((diag != 0.0) && cong) {
835     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
836   } else if (diag != 0.0) {
837     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
838     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
839     for (r = 0; r < len; ++r) {
840       const PetscInt row = lrows[r] + A->rmap->rstart;
841       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
842     }
843     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
844     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
845   } else {
846     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
847   }
848   ierr = PetscFree(lrows);CHKERRQ(ierr);
849 
850   /* only change matrix nonzero state if pattern was allowed to be changed */
851   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
852     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
853     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
854   }
855   PetscFunctionReturn(0);
856 }
857 
858 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
859 {
860   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
861   PetscErrorCode    ierr;
862   PetscMPIInt       n = A->rmap->n;
863   PetscInt          i,j,r,m,p = 0,len = 0;
864   PetscInt          *lrows,*owners = A->rmap->range;
865   PetscSFNode       *rrows;
866   PetscSF           sf;
867   const PetscScalar *xx;
868   PetscScalar       *bb,*mask;
869   Vec               xmask,lmask;
870   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
871   const PetscInt    *aj, *ii,*ridx;
872   PetscScalar       *aa;
873 
874   PetscFunctionBegin;
875   /* Create SF where leaves are input rows and roots are owned rows */
876   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
877   for (r = 0; r < n; ++r) lrows[r] = -1;
878   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
879   for (r = 0; r < N; ++r) {
880     const PetscInt idx   = rows[r];
881     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
882     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
883       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
884     }
885     rrows[r].rank  = p;
886     rrows[r].index = rows[r] - owners[p];
887   }
888   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
889   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
890   /* Collect flags for rows to be zeroed */
891   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
892   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
893   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
894   /* Compress and put in row numbers */
895   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
896   /* zero diagonal part of matrix */
897   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
898   /* handle off diagonal part of matrix */
899   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
900   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
901   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
902   for (i=0; i<len; i++) bb[lrows[i]] = 1;
903   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
904   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
905   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
906   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
907   if (x) {
908     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
909     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
910     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
911     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
912   }
913   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
914   /* remove zeroed rows of off diagonal matrix */
915   ii = aij->i;
916   for (i=0; i<len; i++) {
917     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
918   }
919   /* loop over all elements of off process part of matrix zeroing removed columns*/
920   if (aij->compressedrow.use) {
921     m    = aij->compressedrow.nrows;
922     ii   = aij->compressedrow.i;
923     ridx = aij->compressedrow.rindex;
924     for (i=0; i<m; i++) {
925       n  = ii[i+1] - ii[i];
926       aj = aij->j + ii[i];
927       aa = aij->a + ii[i];
928 
929       for (j=0; j<n; j++) {
930         if (PetscAbsScalar(mask[*aj])) {
931           if (b) bb[*ridx] -= *aa*xx[*aj];
932           *aa = 0.0;
933         }
934         aa++;
935         aj++;
936       }
937       ridx++;
938     }
939   } else { /* do not use compressed row format */
940     m = l->B->rmap->n;
941     for (i=0; i<m; i++) {
942       n  = ii[i+1] - ii[i];
943       aj = aij->j + ii[i];
944       aa = aij->a + ii[i];
945       for (j=0; j<n; j++) {
946         if (PetscAbsScalar(mask[*aj])) {
947           if (b) bb[i] -= *aa*xx[*aj];
948           *aa = 0.0;
949         }
950         aa++;
951         aj++;
952       }
953     }
954   }
955   if (x) {
956     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
957     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
958   }
959   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
960   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
961   ierr = PetscFree(lrows);CHKERRQ(ierr);
962 
963   /* only change matrix nonzero state if pattern was allowed to be changed */
964   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
965     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
966     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
967   }
968   PetscFunctionReturn(0);
969 }
970 
971 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
972 {
973   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
974   PetscErrorCode ierr;
975   PetscInt       nt;
976   VecScatter     Mvctx = a->Mvctx;
977 
978   PetscFunctionBegin;
979   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
980   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
981 
982   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
983   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
984   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
985   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
986   PetscFunctionReturn(0);
987 }
988 
989 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
990 {
991   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
992   PetscErrorCode ierr;
993 
994   PetscFunctionBegin;
995   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
996   PetscFunctionReturn(0);
997 }
998 
999 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1000 {
1001   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1002   PetscErrorCode ierr;
1003   VecScatter     Mvctx = a->Mvctx;
1004 
1005   PetscFunctionBegin;
1006   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
1007   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1008   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1009   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1010   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1011   PetscFunctionReturn(0);
1012 }
1013 
1014 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1015 {
1016   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1017   PetscErrorCode ierr;
1018   PetscBool      merged;
1019 
1020   PetscFunctionBegin;
1021   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1022   /* do nondiagonal part */
1023   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1024   if (!merged) {
1025     /* send it on its way */
1026     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1027     /* do local part */
1028     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1029     /* receive remote parts: note this assumes the values are not actually */
1030     /* added in yy until the next line, */
1031     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1032   } else {
1033     /* do local part */
1034     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1035     /* send it on its way */
1036     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1037     /* values actually were received in the Begin() but we need to call this nop */
1038     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1039   }
1040   PetscFunctionReturn(0);
1041 }
1042 
1043 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1044 {
1045   MPI_Comm       comm;
1046   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1047   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1048   IS             Me,Notme;
1049   PetscErrorCode ierr;
1050   PetscInt       M,N,first,last,*notme,i;
1051   PetscMPIInt    size;
1052 
1053   PetscFunctionBegin;
1054   /* Easy test: symmetric diagonal block */
1055   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1056   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1057   if (!*f) PetscFunctionReturn(0);
1058   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1059   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1060   if (size == 1) PetscFunctionReturn(0);
1061 
1062   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1063   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1064   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1065   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1066   for (i=0; i<first; i++) notme[i] = i;
1067   for (i=last; i<M; i++) notme[i-last+first] = i;
1068   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1069   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1070   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1071   Aoff = Aoffs[0];
1072   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1073   Boff = Boffs[0];
1074   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1075   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1076   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1077   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1078   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1079   ierr = PetscFree(notme);CHKERRQ(ierr);
1080   PetscFunctionReturn(0);
1081 }
1082 
1083 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1084 {
1085   PetscErrorCode ierr;
1086 
1087   PetscFunctionBegin;
1088   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1089   PetscFunctionReturn(0);
1090 }
1091 
1092 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1093 {
1094   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1095   PetscErrorCode ierr;
1096 
1097   PetscFunctionBegin;
1098   /* do nondiagonal part */
1099   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1100   /* send it on its way */
1101   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1102   /* do local part */
1103   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1104   /* receive remote parts */
1105   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1106   PetscFunctionReturn(0);
1107 }
1108 
1109 /*
1110   This only works correctly for square matrices where the subblock A->A is the
1111    diagonal block
1112 */
1113 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1114 {
1115   PetscErrorCode ierr;
1116   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1117 
1118   PetscFunctionBegin;
1119   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1120   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1121   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1122   PetscFunctionReturn(0);
1123 }
1124 
1125 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1126 {
1127   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1128   PetscErrorCode ierr;
1129 
1130   PetscFunctionBegin;
1131   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1132   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1133   PetscFunctionReturn(0);
1134 }
1135 
1136 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1137 {
1138   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1139   PetscErrorCode ierr;
1140 
1141   PetscFunctionBegin;
1142 #if defined(PETSC_USE_LOG)
1143   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1144 #endif
1145   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1146   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1147   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1148   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1149 #if defined(PETSC_USE_CTABLE)
1150   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1151 #else
1152   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1153 #endif
1154   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1155   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1156   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1157   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1158   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1159   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1160   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1161 
1162   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1163   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1164   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1165   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1166   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1167   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1168   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1169   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1170   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1171 #if defined(PETSC_HAVE_ELEMENTAL)
1172   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1173 #endif
1174 #if defined(PETSC_HAVE_HYPRE)
1175   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1176   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1177 #endif
1178   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1179   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr);
1180   PetscFunctionReturn(0);
1181 }
1182 
1183 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1184 {
1185   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1186   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1187   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1188   PetscErrorCode ierr;
1189   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1190   int            fd;
1191   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1192   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1193   PetscScalar    *column_values;
1194   PetscInt       message_count,flowcontrolcount;
1195   FILE           *file;
1196 
1197   PetscFunctionBegin;
1198   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1199   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1200   nz   = A->nz + B->nz;
1201   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1202   if (!rank) {
1203     header[0] = MAT_FILE_CLASSID;
1204     header[1] = mat->rmap->N;
1205     header[2] = mat->cmap->N;
1206 
1207     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1208     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1209     /* get largest number of rows any processor has */
1210     rlen  = mat->rmap->n;
1211     range = mat->rmap->range;
1212     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1213   } else {
1214     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1215     rlen = mat->rmap->n;
1216   }
1217 
1218   /* load up the local row counts */
1219   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1220   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1221 
1222   /* store the row lengths to the file */
1223   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1224   if (!rank) {
1225     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1226     for (i=1; i<size; i++) {
1227       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1228       rlen = range[i+1] - range[i];
1229       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1230       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1231     }
1232     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1233   } else {
1234     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1235     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1236     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1237   }
1238   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1239 
1240   /* load up the local column indices */
1241   nzmax = nz; /* th processor needs space a largest processor needs */
1242   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1243   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1244   cnt   = 0;
1245   for (i=0; i<mat->rmap->n; i++) {
1246     for (j=B->i[i]; j<B->i[i+1]; j++) {
1247       if ((col = garray[B->j[j]]) > cstart) break;
1248       column_indices[cnt++] = col;
1249     }
1250     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1251     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1252   }
1253   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1254 
1255   /* store the column indices to the file */
1256   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1257   if (!rank) {
1258     MPI_Status status;
1259     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1260     for (i=1; i<size; i++) {
1261       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1262       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1263       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1264       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1265       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1266     }
1267     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1268   } else {
1269     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1270     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1271     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1272     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1273   }
1274   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1275 
1276   /* load up the local column values */
1277   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1278   cnt  = 0;
1279   for (i=0; i<mat->rmap->n; i++) {
1280     for (j=B->i[i]; j<B->i[i+1]; j++) {
1281       if (garray[B->j[j]] > cstart) break;
1282       column_values[cnt++] = B->a[j];
1283     }
1284     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1285     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1286   }
1287   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1288 
1289   /* store the column values to the file */
1290   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1291   if (!rank) {
1292     MPI_Status status;
1293     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1294     for (i=1; i<size; i++) {
1295       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1296       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1297       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1298       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1299       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1300     }
1301     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1302   } else {
1303     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1304     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1305     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1306     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1307   }
1308   ierr = PetscFree(column_values);CHKERRQ(ierr);
1309 
1310   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1311   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1312   PetscFunctionReturn(0);
1313 }
1314 
1315 #include <petscdraw.h>
1316 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1317 {
1318   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1319   PetscErrorCode    ierr;
1320   PetscMPIInt       rank = aij->rank,size = aij->size;
1321   PetscBool         isdraw,iascii,isbinary;
1322   PetscViewer       sviewer;
1323   PetscViewerFormat format;
1324 
1325   PetscFunctionBegin;
1326   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1327   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1328   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1329   if (iascii) {
1330     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1331     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1332       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1333       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1334       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1335       for (i=0; i<(PetscInt)size; i++) {
1336         nmax = PetscMax(nmax,nz[i]);
1337         nmin = PetscMin(nmin,nz[i]);
1338         navg += nz[i];
1339       }
1340       ierr = PetscFree(nz);CHKERRQ(ierr);
1341       navg = navg/size;
1342       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1343       PetscFunctionReturn(0);
1344     }
1345     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1346     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1347       MatInfo   info;
1348       PetscBool inodes;
1349 
1350       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1351       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1352       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1353       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1354       if (!inodes) {
1355         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1356                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1357       } else {
1358         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1359                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1360       }
1361       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1362       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1363       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1364       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1365       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1366       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1367       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1368       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1369       PetscFunctionReturn(0);
1370     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1371       PetscInt inodecount,inodelimit,*inodes;
1372       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1373       if (inodes) {
1374         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1375       } else {
1376         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1377       }
1378       PetscFunctionReturn(0);
1379     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1380       PetscFunctionReturn(0);
1381     }
1382   } else if (isbinary) {
1383     if (size == 1) {
1384       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1385       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1386     } else {
1387       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1388     }
1389     PetscFunctionReturn(0);
1390   } else if (isdraw) {
1391     PetscDraw draw;
1392     PetscBool isnull;
1393     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1394     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1395     if (isnull) PetscFunctionReturn(0);
1396   }
1397 
1398   {
1399     /* assemble the entire matrix onto first processor. */
1400     Mat        A;
1401     Mat_SeqAIJ *Aloc;
1402     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1403     MatScalar  *a;
1404 
1405     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1406     if (!rank) {
1407       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1408     } else {
1409       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1410     }
1411     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1412     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1413     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1414     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1415     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1416 
1417     /* copy over the A part */
1418     Aloc = (Mat_SeqAIJ*)aij->A->data;
1419     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1420     row  = mat->rmap->rstart;
1421     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1422     for (i=0; i<m; i++) {
1423       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1424       row++;
1425       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1426     }
1427     aj = Aloc->j;
1428     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1429 
1430     /* copy over the B part */
1431     Aloc = (Mat_SeqAIJ*)aij->B->data;
1432     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1433     row  = mat->rmap->rstart;
1434     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1435     ct   = cols;
1436     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1437     for (i=0; i<m; i++) {
1438       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1439       row++;
1440       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1441     }
1442     ierr = PetscFree(ct);CHKERRQ(ierr);
1443     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1444     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1445     /*
1446        Everyone has to call to draw the matrix since the graphics waits are
1447        synchronized across all processors that share the PetscDraw object
1448     */
1449     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1450     if (!rank) {
1451       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1452       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1453     }
1454     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1455     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1456     ierr = MatDestroy(&A);CHKERRQ(ierr);
1457   }
1458   PetscFunctionReturn(0);
1459 }
1460 
1461 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1462 {
1463   PetscErrorCode ierr;
1464   PetscBool      iascii,isdraw,issocket,isbinary;
1465 
1466   PetscFunctionBegin;
1467   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1468   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1469   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1470   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1471   if (iascii || isdraw || isbinary || issocket) {
1472     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1473   }
1474   PetscFunctionReturn(0);
1475 }
1476 
1477 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1478 {
1479   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1480   PetscErrorCode ierr;
1481   Vec            bb1 = 0;
1482   PetscBool      hasop;
1483 
1484   PetscFunctionBegin;
1485   if (flag == SOR_APPLY_UPPER) {
1486     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1487     PetscFunctionReturn(0);
1488   }
1489 
1490   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1491     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1492   }
1493 
1494   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1495     if (flag & SOR_ZERO_INITIAL_GUESS) {
1496       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1497       its--;
1498     }
1499 
1500     while (its--) {
1501       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1502       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1503 
1504       /* update rhs: bb1 = bb - B*x */
1505       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1506       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1507 
1508       /* local sweep */
1509       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1510     }
1511   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1512     if (flag & SOR_ZERO_INITIAL_GUESS) {
1513       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1514       its--;
1515     }
1516     while (its--) {
1517       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1518       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1519 
1520       /* update rhs: bb1 = bb - B*x */
1521       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1522       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1523 
1524       /* local sweep */
1525       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1526     }
1527   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1528     if (flag & SOR_ZERO_INITIAL_GUESS) {
1529       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1530       its--;
1531     }
1532     while (its--) {
1533       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1534       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1535 
1536       /* update rhs: bb1 = bb - B*x */
1537       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1538       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1539 
1540       /* local sweep */
1541       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1542     }
1543   } else if (flag & SOR_EISENSTAT) {
1544     Vec xx1;
1545 
1546     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1547     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1548 
1549     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1550     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1551     if (!mat->diag) {
1552       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1553       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1554     }
1555     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1556     if (hasop) {
1557       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1558     } else {
1559       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1560     }
1561     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1562 
1563     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1564 
1565     /* local sweep */
1566     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1567     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1568     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1569   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1570 
1571   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1572 
1573   matin->factorerrortype = mat->A->factorerrortype;
1574   PetscFunctionReturn(0);
1575 }
1576 
1577 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1578 {
1579   Mat            aA,aB,Aperm;
1580   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1581   PetscScalar    *aa,*ba;
1582   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1583   PetscSF        rowsf,sf;
1584   IS             parcolp = NULL;
1585   PetscBool      done;
1586   PetscErrorCode ierr;
1587 
1588   PetscFunctionBegin;
1589   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1590   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1591   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1592   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1593 
1594   /* Invert row permutation to find out where my rows should go */
1595   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1596   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1597   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1598   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1599   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1600   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1601 
1602   /* Invert column permutation to find out where my columns should go */
1603   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1604   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1605   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1606   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1607   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1608   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1609   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1610 
1611   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1612   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1613   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1614 
1615   /* Find out where my gcols should go */
1616   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1617   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1618   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1619   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1620   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1621   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1622   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1623   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1624 
1625   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1626   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1627   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1628   for (i=0; i<m; i++) {
1629     PetscInt row = rdest[i],rowner;
1630     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1631     for (j=ai[i]; j<ai[i+1]; j++) {
1632       PetscInt cowner,col = cdest[aj[j]];
1633       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1634       if (rowner == cowner) dnnz[i]++;
1635       else onnz[i]++;
1636     }
1637     for (j=bi[i]; j<bi[i+1]; j++) {
1638       PetscInt cowner,col = gcdest[bj[j]];
1639       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1640       if (rowner == cowner) dnnz[i]++;
1641       else onnz[i]++;
1642     }
1643   }
1644   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1645   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1646   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1647   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1648   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1649 
1650   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1651   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1652   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1653   for (i=0; i<m; i++) {
1654     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1655     PetscInt j0,rowlen;
1656     rowlen = ai[i+1] - ai[i];
1657     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1658       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1659       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1660     }
1661     rowlen = bi[i+1] - bi[i];
1662     for (j0=j=0; j<rowlen; j0=j) {
1663       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1664       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1665     }
1666   }
1667   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1668   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1669   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1670   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1671   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1672   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1673   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1674   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1675   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1676   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1677   *B = Aperm;
1678   PetscFunctionReturn(0);
1679 }
1680 
1681 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1682 {
1683   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1684   PetscErrorCode ierr;
1685 
1686   PetscFunctionBegin;
1687   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1688   if (ghosts) *ghosts = aij->garray;
1689   PetscFunctionReturn(0);
1690 }
1691 
1692 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1693 {
1694   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1695   Mat            A    = mat->A,B = mat->B;
1696   PetscErrorCode ierr;
1697   PetscReal      isend[5],irecv[5];
1698 
1699   PetscFunctionBegin;
1700   info->block_size = 1.0;
1701   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1702 
1703   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1704   isend[3] = info->memory;  isend[4] = info->mallocs;
1705 
1706   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1707 
1708   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1709   isend[3] += info->memory;  isend[4] += info->mallocs;
1710   if (flag == MAT_LOCAL) {
1711     info->nz_used      = isend[0];
1712     info->nz_allocated = isend[1];
1713     info->nz_unneeded  = isend[2];
1714     info->memory       = isend[3];
1715     info->mallocs      = isend[4];
1716   } else if (flag == MAT_GLOBAL_MAX) {
1717     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1718 
1719     info->nz_used      = irecv[0];
1720     info->nz_allocated = irecv[1];
1721     info->nz_unneeded  = irecv[2];
1722     info->memory       = irecv[3];
1723     info->mallocs      = irecv[4];
1724   } else if (flag == MAT_GLOBAL_SUM) {
1725     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1726 
1727     info->nz_used      = irecv[0];
1728     info->nz_allocated = irecv[1];
1729     info->nz_unneeded  = irecv[2];
1730     info->memory       = irecv[3];
1731     info->mallocs      = irecv[4];
1732   }
1733   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1734   info->fill_ratio_needed = 0;
1735   info->factor_mallocs    = 0;
1736   PetscFunctionReturn(0);
1737 }
1738 
1739 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1740 {
1741   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1742   PetscErrorCode ierr;
1743 
1744   PetscFunctionBegin;
1745   switch (op) {
1746   case MAT_NEW_NONZERO_LOCATIONS:
1747   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1748   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1749   case MAT_KEEP_NONZERO_PATTERN:
1750   case MAT_NEW_NONZERO_LOCATION_ERR:
1751   case MAT_USE_INODES:
1752   case MAT_IGNORE_ZERO_ENTRIES:
1753     MatCheckPreallocated(A,1);
1754     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1755     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1756     break;
1757   case MAT_ROW_ORIENTED:
1758     MatCheckPreallocated(A,1);
1759     a->roworiented = flg;
1760 
1761     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1762     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1763     break;
1764   case MAT_NEW_DIAGONALS:
1765     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1766     break;
1767   case MAT_IGNORE_OFF_PROC_ENTRIES:
1768     a->donotstash = flg;
1769     break;
1770   case MAT_SPD:
1771     A->spd_set = PETSC_TRUE;
1772     A->spd     = flg;
1773     if (flg) {
1774       A->symmetric                  = PETSC_TRUE;
1775       A->structurally_symmetric     = PETSC_TRUE;
1776       A->symmetric_set              = PETSC_TRUE;
1777       A->structurally_symmetric_set = PETSC_TRUE;
1778     }
1779     break;
1780   case MAT_SYMMETRIC:
1781     MatCheckPreallocated(A,1);
1782     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1783     break;
1784   case MAT_STRUCTURALLY_SYMMETRIC:
1785     MatCheckPreallocated(A,1);
1786     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1787     break;
1788   case MAT_HERMITIAN:
1789     MatCheckPreallocated(A,1);
1790     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1791     break;
1792   case MAT_SYMMETRY_ETERNAL:
1793     MatCheckPreallocated(A,1);
1794     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1795     break;
1796   case MAT_SUBMAT_SINGLEIS:
1797     A->submat_singleis = flg;
1798     break;
1799   case MAT_STRUCTURE_ONLY:
1800     /* The option is handled directly by MatSetOption() */
1801     break;
1802   default:
1803     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1804   }
1805   PetscFunctionReturn(0);
1806 }
1807 
1808 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1809 {
1810   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1811   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1812   PetscErrorCode ierr;
1813   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1814   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1815   PetscInt       *cmap,*idx_p;
1816 
1817   PetscFunctionBegin;
1818   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1819   mat->getrowactive = PETSC_TRUE;
1820 
1821   if (!mat->rowvalues && (idx || v)) {
1822     /*
1823         allocate enough space to hold information from the longest row.
1824     */
1825     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1826     PetscInt   max = 1,tmp;
1827     for (i=0; i<matin->rmap->n; i++) {
1828       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1829       if (max < tmp) max = tmp;
1830     }
1831     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1832   }
1833 
1834   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1835   lrow = row - rstart;
1836 
1837   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1838   if (!v)   {pvA = 0; pvB = 0;}
1839   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1840   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1841   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1842   nztot = nzA + nzB;
1843 
1844   cmap = mat->garray;
1845   if (v  || idx) {
1846     if (nztot) {
1847       /* Sort by increasing column numbers, assuming A and B already sorted */
1848       PetscInt imark = -1;
1849       if (v) {
1850         *v = v_p = mat->rowvalues;
1851         for (i=0; i<nzB; i++) {
1852           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1853           else break;
1854         }
1855         imark = i;
1856         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1857         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1858       }
1859       if (idx) {
1860         *idx = idx_p = mat->rowindices;
1861         if (imark > -1) {
1862           for (i=0; i<imark; i++) {
1863             idx_p[i] = cmap[cworkB[i]];
1864           }
1865         } else {
1866           for (i=0; i<nzB; i++) {
1867             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1868             else break;
1869           }
1870           imark = i;
1871         }
1872         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1873         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1874       }
1875     } else {
1876       if (idx) *idx = 0;
1877       if (v)   *v   = 0;
1878     }
1879   }
1880   *nz  = nztot;
1881   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1882   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1883   PetscFunctionReturn(0);
1884 }
1885 
1886 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1887 {
1888   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1889 
1890   PetscFunctionBegin;
1891   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1892   aij->getrowactive = PETSC_FALSE;
1893   PetscFunctionReturn(0);
1894 }
1895 
1896 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1897 {
1898   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1899   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1900   PetscErrorCode ierr;
1901   PetscInt       i,j,cstart = mat->cmap->rstart;
1902   PetscReal      sum = 0.0;
1903   MatScalar      *v;
1904 
1905   PetscFunctionBegin;
1906   if (aij->size == 1) {
1907     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1908   } else {
1909     if (type == NORM_FROBENIUS) {
1910       v = amat->a;
1911       for (i=0; i<amat->nz; i++) {
1912         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1913       }
1914       v = bmat->a;
1915       for (i=0; i<bmat->nz; i++) {
1916         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1917       }
1918       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1919       *norm = PetscSqrtReal(*norm);
1920       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1921     } else if (type == NORM_1) { /* max column norm */
1922       PetscReal *tmp,*tmp2;
1923       PetscInt  *jj,*garray = aij->garray;
1924       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1925       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1926       *norm = 0.0;
1927       v     = amat->a; jj = amat->j;
1928       for (j=0; j<amat->nz; j++) {
1929         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1930       }
1931       v = bmat->a; jj = bmat->j;
1932       for (j=0; j<bmat->nz; j++) {
1933         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1934       }
1935       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1936       for (j=0; j<mat->cmap->N; j++) {
1937         if (tmp2[j] > *norm) *norm = tmp2[j];
1938       }
1939       ierr = PetscFree(tmp);CHKERRQ(ierr);
1940       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1941       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1942     } else if (type == NORM_INFINITY) { /* max row norm */
1943       PetscReal ntemp = 0.0;
1944       for (j=0; j<aij->A->rmap->n; j++) {
1945         v   = amat->a + amat->i[j];
1946         sum = 0.0;
1947         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1948           sum += PetscAbsScalar(*v); v++;
1949         }
1950         v = bmat->a + bmat->i[j];
1951         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1952           sum += PetscAbsScalar(*v); v++;
1953         }
1954         if (sum > ntemp) ntemp = sum;
1955       }
1956       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1957       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1958     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1959   }
1960   PetscFunctionReturn(0);
1961 }
1962 
1963 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1964 {
1965   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1966   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1967   PetscErrorCode ierr;
1968   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1969   PetscInt       cstart = A->cmap->rstart,ncol;
1970   Mat            B;
1971   MatScalar      *array;
1972 
1973   PetscFunctionBegin;
1974   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1975   ai = Aloc->i; aj = Aloc->j;
1976   bi = Bloc->i; bj = Bloc->j;
1977   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1978     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1979     PetscSFNode          *oloc;
1980     PETSC_UNUSED PetscSF sf;
1981 
1982     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1983     /* compute d_nnz for preallocation */
1984     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1985     for (i=0; i<ai[ma]; i++) {
1986       d_nnz[aj[i]]++;
1987       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1988     }
1989     /* compute local off-diagonal contributions */
1990     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1991     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1992     /* map those to global */
1993     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1994     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1995     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1996     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1997     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1998     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1999     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2000 
2001     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2002     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2003     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2004     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2005     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2006     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2007   } else {
2008     B    = *matout;
2009     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2010     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
2011   }
2012 
2013   /* copy over the A part */
2014   array = Aloc->a;
2015   row   = A->rmap->rstart;
2016   for (i=0; i<ma; i++) {
2017     ncol = ai[i+1]-ai[i];
2018     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2019     row++;
2020     array += ncol; aj += ncol;
2021   }
2022   aj = Aloc->j;
2023   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2024 
2025   /* copy over the B part */
2026   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2027   array = Bloc->a;
2028   row   = A->rmap->rstart;
2029   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2030   cols_tmp = cols;
2031   for (i=0; i<mb; i++) {
2032     ncol = bi[i+1]-bi[i];
2033     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2034     row++;
2035     array += ncol; cols_tmp += ncol;
2036   }
2037   ierr = PetscFree(cols);CHKERRQ(ierr);
2038 
2039   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2040   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2041   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2042     *matout = B;
2043   } else {
2044     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2045   }
2046   PetscFunctionReturn(0);
2047 }
2048 
2049 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2050 {
2051   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2052   Mat            a    = aij->A,b = aij->B;
2053   PetscErrorCode ierr;
2054   PetscInt       s1,s2,s3;
2055 
2056   PetscFunctionBegin;
2057   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2058   if (rr) {
2059     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2060     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2061     /* Overlap communication with computation. */
2062     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2063   }
2064   if (ll) {
2065     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2066     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2067     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2068   }
2069   /* scale  the diagonal block */
2070   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2071 
2072   if (rr) {
2073     /* Do a scatter end and then right scale the off-diagonal block */
2074     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2075     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2076   }
2077   PetscFunctionReturn(0);
2078 }
2079 
2080 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2081 {
2082   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2083   PetscErrorCode ierr;
2084 
2085   PetscFunctionBegin;
2086   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2087   PetscFunctionReturn(0);
2088 }
2089 
2090 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2091 {
2092   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2093   Mat            a,b,c,d;
2094   PetscBool      flg;
2095   PetscErrorCode ierr;
2096 
2097   PetscFunctionBegin;
2098   a = matA->A; b = matA->B;
2099   c = matB->A; d = matB->B;
2100 
2101   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2102   if (flg) {
2103     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2104   }
2105   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2106   PetscFunctionReturn(0);
2107 }
2108 
2109 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2110 {
2111   PetscErrorCode ierr;
2112   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2113   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2114 
2115   PetscFunctionBegin;
2116   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2117   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2118     /* because of the column compression in the off-processor part of the matrix a->B,
2119        the number of columns in a->B and b->B may be different, hence we cannot call
2120        the MatCopy() directly on the two parts. If need be, we can provide a more
2121        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2122        then copying the submatrices */
2123     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2124   } else {
2125     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2126     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2127   }
2128   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2129   PetscFunctionReturn(0);
2130 }
2131 
2132 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2133 {
2134   PetscErrorCode ierr;
2135 
2136   PetscFunctionBegin;
2137   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2138   PetscFunctionReturn(0);
2139 }
2140 
2141 /*
2142    Computes the number of nonzeros per row needed for preallocation when X and Y
2143    have different nonzero structure.
2144 */
2145 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2146 {
2147   PetscInt       i,j,k,nzx,nzy;
2148 
2149   PetscFunctionBegin;
2150   /* Set the number of nonzeros in the new matrix */
2151   for (i=0; i<m; i++) {
2152     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2153     nzx = xi[i+1] - xi[i];
2154     nzy = yi[i+1] - yi[i];
2155     nnz[i] = 0;
2156     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2157       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2158       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2159       nnz[i]++;
2160     }
2161     for (; k<nzy; k++) nnz[i]++;
2162   }
2163   PetscFunctionReturn(0);
2164 }
2165 
2166 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2167 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2168 {
2169   PetscErrorCode ierr;
2170   PetscInt       m = Y->rmap->N;
2171   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2172   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2173 
2174   PetscFunctionBegin;
2175   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2176   PetscFunctionReturn(0);
2177 }
2178 
2179 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2180 {
2181   PetscErrorCode ierr;
2182   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2183   PetscBLASInt   bnz,one=1;
2184   Mat_SeqAIJ     *x,*y;
2185 
2186   PetscFunctionBegin;
2187   if (str == SAME_NONZERO_PATTERN) {
2188     PetscScalar alpha = a;
2189     x    = (Mat_SeqAIJ*)xx->A->data;
2190     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2191     y    = (Mat_SeqAIJ*)yy->A->data;
2192     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2193     x    = (Mat_SeqAIJ*)xx->B->data;
2194     y    = (Mat_SeqAIJ*)yy->B->data;
2195     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2196     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2197     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2198   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2199     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2200   } else {
2201     Mat      B;
2202     PetscInt *nnz_d,*nnz_o;
2203     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2204     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2205     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2206     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2207     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2208     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2209     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2210     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2211     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2212     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2213     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2214     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2215     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2216     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2217   }
2218   PetscFunctionReturn(0);
2219 }
2220 
2221 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2222 
2223 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2224 {
2225 #if defined(PETSC_USE_COMPLEX)
2226   PetscErrorCode ierr;
2227   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2228 
2229   PetscFunctionBegin;
2230   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2231   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2232 #else
2233   PetscFunctionBegin;
2234 #endif
2235   PetscFunctionReturn(0);
2236 }
2237 
2238 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2239 {
2240   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2241   PetscErrorCode ierr;
2242 
2243   PetscFunctionBegin;
2244   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2245   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2246   PetscFunctionReturn(0);
2247 }
2248 
2249 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2250 {
2251   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2252   PetscErrorCode ierr;
2253 
2254   PetscFunctionBegin;
2255   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2256   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2257   PetscFunctionReturn(0);
2258 }
2259 
2260 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2261 {
2262   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2263   PetscErrorCode ierr;
2264   PetscInt       i,*idxb = 0;
2265   PetscScalar    *va,*vb;
2266   Vec            vtmp;
2267 
2268   PetscFunctionBegin;
2269   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2270   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2271   if (idx) {
2272     for (i=0; i<A->rmap->n; i++) {
2273       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2274     }
2275   }
2276 
2277   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2278   if (idx) {
2279     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2280   }
2281   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2282   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2283 
2284   for (i=0; i<A->rmap->n; i++) {
2285     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2286       va[i] = vb[i];
2287       if (idx) idx[i] = a->garray[idxb[i]];
2288     }
2289   }
2290 
2291   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2292   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2293   ierr = PetscFree(idxb);CHKERRQ(ierr);
2294   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2295   PetscFunctionReturn(0);
2296 }
2297 
2298 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2299 {
2300   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2301   PetscErrorCode ierr;
2302   PetscInt       i,*idxb = 0;
2303   PetscScalar    *va,*vb;
2304   Vec            vtmp;
2305 
2306   PetscFunctionBegin;
2307   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2308   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2309   if (idx) {
2310     for (i=0; i<A->cmap->n; i++) {
2311       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2312     }
2313   }
2314 
2315   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2316   if (idx) {
2317     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2318   }
2319   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2320   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2321 
2322   for (i=0; i<A->rmap->n; i++) {
2323     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2324       va[i] = vb[i];
2325       if (idx) idx[i] = a->garray[idxb[i]];
2326     }
2327   }
2328 
2329   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2330   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2331   ierr = PetscFree(idxb);CHKERRQ(ierr);
2332   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2333   PetscFunctionReturn(0);
2334 }
2335 
2336 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2337 {
2338   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2339   PetscInt       n      = A->rmap->n;
2340   PetscInt       cstart = A->cmap->rstart;
2341   PetscInt       *cmap  = mat->garray;
2342   PetscInt       *diagIdx, *offdiagIdx;
2343   Vec            diagV, offdiagV;
2344   PetscScalar    *a, *diagA, *offdiagA;
2345   PetscInt       r;
2346   PetscErrorCode ierr;
2347 
2348   PetscFunctionBegin;
2349   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2350   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2351   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2352   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2353   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2354   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2355   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2356   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2357   for (r = 0; r < n; ++r) {
2358     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2359       a[r]   = diagA[r];
2360       idx[r] = cstart + diagIdx[r];
2361     } else {
2362       a[r]   = offdiagA[r];
2363       idx[r] = cmap[offdiagIdx[r]];
2364     }
2365   }
2366   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2367   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2368   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2369   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2370   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2371   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2372   PetscFunctionReturn(0);
2373 }
2374 
2375 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2376 {
2377   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2378   PetscInt       n      = A->rmap->n;
2379   PetscInt       cstart = A->cmap->rstart;
2380   PetscInt       *cmap  = mat->garray;
2381   PetscInt       *diagIdx, *offdiagIdx;
2382   Vec            diagV, offdiagV;
2383   PetscScalar    *a, *diagA, *offdiagA;
2384   PetscInt       r;
2385   PetscErrorCode ierr;
2386 
2387   PetscFunctionBegin;
2388   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2389   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2390   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2391   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2392   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2393   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2394   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2395   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2396   for (r = 0; r < n; ++r) {
2397     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2398       a[r]   = diagA[r];
2399       idx[r] = cstart + diagIdx[r];
2400     } else {
2401       a[r]   = offdiagA[r];
2402       idx[r] = cmap[offdiagIdx[r]];
2403     }
2404   }
2405   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2406   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2407   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2408   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2409   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2410   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2411   PetscFunctionReturn(0);
2412 }
2413 
2414 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2415 {
2416   PetscErrorCode ierr;
2417   Mat            *dummy;
2418 
2419   PetscFunctionBegin;
2420   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2421   *newmat = *dummy;
2422   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2423   PetscFunctionReturn(0);
2424 }
2425 
2426 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2427 {
2428   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2429   PetscErrorCode ierr;
2430 
2431   PetscFunctionBegin;
2432   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2433   A->factorerrortype = a->A->factorerrortype;
2434   PetscFunctionReturn(0);
2435 }
2436 
2437 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2438 {
2439   PetscErrorCode ierr;
2440   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2441 
2442   PetscFunctionBegin;
2443   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2444   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2445   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2446   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2447   PetscFunctionReturn(0);
2448 }
2449 
2450 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2451 {
2452   PetscFunctionBegin;
2453   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2454   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2455   PetscFunctionReturn(0);
2456 }
2457 
2458 /*@
2459    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2460 
2461    Collective on Mat
2462 
2463    Input Parameters:
2464 +    A - the matrix
2465 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2466 
2467  Level: advanced
2468 
2469 @*/
2470 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2471 {
2472   PetscErrorCode       ierr;
2473 
2474   PetscFunctionBegin;
2475   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2476   PetscFunctionReturn(0);
2477 }
2478 
2479 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2480 {
2481   PetscErrorCode       ierr;
2482   PetscBool            sc = PETSC_FALSE,flg;
2483 
2484   PetscFunctionBegin;
2485   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2486   ierr = PetscObjectOptionsBegin((PetscObject)A);CHKERRQ(ierr);
2487   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2488   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2489   if (flg) {
2490     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2491   }
2492   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2493   PetscFunctionReturn(0);
2494 }
2495 
2496 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2497 {
2498   PetscErrorCode ierr;
2499   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2500   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2501 
2502   PetscFunctionBegin;
2503   if (!Y->preallocated) {
2504     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2505   } else if (!aij->nz) {
2506     PetscInt nonew = aij->nonew;
2507     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2508     aij->nonew = nonew;
2509   }
2510   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2511   PetscFunctionReturn(0);
2512 }
2513 
2514 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2515 {
2516   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2517   PetscErrorCode ierr;
2518 
2519   PetscFunctionBegin;
2520   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2521   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2522   if (d) {
2523     PetscInt rstart;
2524     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2525     *d += rstart;
2526 
2527   }
2528   PetscFunctionReturn(0);
2529 }
2530 
2531 
2532 /* -------------------------------------------------------------------*/
2533 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2534                                        MatGetRow_MPIAIJ,
2535                                        MatRestoreRow_MPIAIJ,
2536                                        MatMult_MPIAIJ,
2537                                 /* 4*/ MatMultAdd_MPIAIJ,
2538                                        MatMultTranspose_MPIAIJ,
2539                                        MatMultTransposeAdd_MPIAIJ,
2540                                        0,
2541                                        0,
2542                                        0,
2543                                 /*10*/ 0,
2544                                        0,
2545                                        0,
2546                                        MatSOR_MPIAIJ,
2547                                        MatTranspose_MPIAIJ,
2548                                 /*15*/ MatGetInfo_MPIAIJ,
2549                                        MatEqual_MPIAIJ,
2550                                        MatGetDiagonal_MPIAIJ,
2551                                        MatDiagonalScale_MPIAIJ,
2552                                        MatNorm_MPIAIJ,
2553                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2554                                        MatAssemblyEnd_MPIAIJ,
2555                                        MatSetOption_MPIAIJ,
2556                                        MatZeroEntries_MPIAIJ,
2557                                 /*24*/ MatZeroRows_MPIAIJ,
2558                                        0,
2559                                        0,
2560                                        0,
2561                                        0,
2562                                 /*29*/ MatSetUp_MPIAIJ,
2563                                        0,
2564                                        0,
2565                                        MatGetDiagonalBlock_MPIAIJ,
2566                                        0,
2567                                 /*34*/ MatDuplicate_MPIAIJ,
2568                                        0,
2569                                        0,
2570                                        0,
2571                                        0,
2572                                 /*39*/ MatAXPY_MPIAIJ,
2573                                        MatCreateSubMatrices_MPIAIJ,
2574                                        MatIncreaseOverlap_MPIAIJ,
2575                                        MatGetValues_MPIAIJ,
2576                                        MatCopy_MPIAIJ,
2577                                 /*44*/ MatGetRowMax_MPIAIJ,
2578                                        MatScale_MPIAIJ,
2579                                        MatShift_MPIAIJ,
2580                                        MatDiagonalSet_MPIAIJ,
2581                                        MatZeroRowsColumns_MPIAIJ,
2582                                 /*49*/ MatSetRandom_MPIAIJ,
2583                                        0,
2584                                        0,
2585                                        0,
2586                                        0,
2587                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2588                                        0,
2589                                        MatSetUnfactored_MPIAIJ,
2590                                        MatPermute_MPIAIJ,
2591                                        0,
2592                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2593                                        MatDestroy_MPIAIJ,
2594                                        MatView_MPIAIJ,
2595                                        0,
2596                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2597                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2598                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2599                                        0,
2600                                        0,
2601                                        0,
2602                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2603                                        MatGetRowMinAbs_MPIAIJ,
2604                                        0,
2605                                        0,
2606                                        0,
2607                                        0,
2608                                 /*75*/ MatFDColoringApply_AIJ,
2609                                        MatSetFromOptions_MPIAIJ,
2610                                        0,
2611                                        0,
2612                                        MatFindZeroDiagonals_MPIAIJ,
2613                                 /*80*/ 0,
2614                                        0,
2615                                        0,
2616                                 /*83*/ MatLoad_MPIAIJ,
2617                                        MatIsSymmetric_MPIAIJ,
2618                                        0,
2619                                        0,
2620                                        0,
2621                                        0,
2622                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2623                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2624                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2625                                        MatPtAP_MPIAIJ_MPIAIJ,
2626                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2627                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2628                                        0,
2629                                        0,
2630                                        0,
2631                                        0,
2632                                 /*99*/ 0,
2633                                        0,
2634                                        0,
2635                                        MatConjugate_MPIAIJ,
2636                                        0,
2637                                 /*104*/MatSetValuesRow_MPIAIJ,
2638                                        MatRealPart_MPIAIJ,
2639                                        MatImaginaryPart_MPIAIJ,
2640                                        0,
2641                                        0,
2642                                 /*109*/0,
2643                                        0,
2644                                        MatGetRowMin_MPIAIJ,
2645                                        0,
2646                                        MatMissingDiagonal_MPIAIJ,
2647                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2648                                        0,
2649                                        MatGetGhosts_MPIAIJ,
2650                                        0,
2651                                        0,
2652                                 /*119*/0,
2653                                        0,
2654                                        0,
2655                                        0,
2656                                        MatGetMultiProcBlock_MPIAIJ,
2657                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2658                                        MatGetColumnNorms_MPIAIJ,
2659                                        MatInvertBlockDiagonal_MPIAIJ,
2660                                        0,
2661                                        MatCreateSubMatricesMPI_MPIAIJ,
2662                                 /*129*/0,
2663                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2664                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2665                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2666                                        0,
2667                                 /*134*/0,
2668                                        0,
2669                                        MatRARt_MPIAIJ_MPIAIJ,
2670                                        0,
2671                                        0,
2672                                 /*139*/MatSetBlockSizes_MPIAIJ,
2673                                        0,
2674                                        0,
2675                                        MatFDColoringSetUp_MPIXAIJ,
2676                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2677                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2678 };
2679 
2680 /* ----------------------------------------------------------------------------------------*/
2681 
2682 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2683 {
2684   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2685   PetscErrorCode ierr;
2686 
2687   PetscFunctionBegin;
2688   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2689   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2690   PetscFunctionReturn(0);
2691 }
2692 
2693 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2694 {
2695   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2696   PetscErrorCode ierr;
2697 
2698   PetscFunctionBegin;
2699   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2700   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2701   PetscFunctionReturn(0);
2702 }
2703 
2704 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2705 {
2706   Mat_MPIAIJ     *b;
2707   PetscErrorCode ierr;
2708 
2709   PetscFunctionBegin;
2710   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2711   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2712   b = (Mat_MPIAIJ*)B->data;
2713 
2714 #if defined(PETSC_USE_CTABLE)
2715   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2716 #else
2717   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2718 #endif
2719   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2720   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2721   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2722 
2723   /* Because the B will have been resized we simply destroy it and create a new one each time */
2724   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2725   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2726   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2727   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2728   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2729   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2730 
2731   if (!B->preallocated) {
2732     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2733     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2734     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2735     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2736     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2737   }
2738 
2739   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2740   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2741   B->preallocated  = PETSC_TRUE;
2742   B->was_assembled = PETSC_FALSE;
2743   B->assembled     = PETSC_FALSE;;
2744   PetscFunctionReturn(0);
2745 }
2746 
2747 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2748 {
2749   Mat_MPIAIJ     *b;
2750   PetscErrorCode ierr;
2751 
2752   PetscFunctionBegin;
2753   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2754   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2755   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2756   b = (Mat_MPIAIJ*)B->data;
2757 
2758 #if defined(PETSC_USE_CTABLE)
2759   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2760 #else
2761   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2762 #endif
2763   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2764   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2765   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2766 
2767   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2768   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2769   B->preallocated  = PETSC_TRUE;
2770   B->was_assembled = PETSC_FALSE;
2771   B->assembled = PETSC_FALSE;
2772   PetscFunctionReturn(0);
2773 }
2774 
2775 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2776 {
2777   Mat            mat;
2778   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2779   PetscErrorCode ierr;
2780 
2781   PetscFunctionBegin;
2782   *newmat = 0;
2783   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2784   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2785   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2786   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2787   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2788   a       = (Mat_MPIAIJ*)mat->data;
2789 
2790   mat->factortype   = matin->factortype;
2791   mat->assembled    = PETSC_TRUE;
2792   mat->insertmode   = NOT_SET_VALUES;
2793   mat->preallocated = PETSC_TRUE;
2794 
2795   a->size         = oldmat->size;
2796   a->rank         = oldmat->rank;
2797   a->donotstash   = oldmat->donotstash;
2798   a->roworiented  = oldmat->roworiented;
2799   a->rowindices   = 0;
2800   a->rowvalues    = 0;
2801   a->getrowactive = PETSC_FALSE;
2802 
2803   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2804   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2805 
2806   if (oldmat->colmap) {
2807 #if defined(PETSC_USE_CTABLE)
2808     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2809 #else
2810     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2811     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2812     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2813 #endif
2814   } else a->colmap = 0;
2815   if (oldmat->garray) {
2816     PetscInt len;
2817     len  = oldmat->B->cmap->n;
2818     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2819     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2820     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2821   } else a->garray = 0;
2822 
2823   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2824   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2825   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2826   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2827 
2828   if (oldmat->Mvctx_mpi1) {
2829     ierr    = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
2830     ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
2831   }
2832 
2833   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2834   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2835   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2836   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2837   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2838   *newmat = mat;
2839   PetscFunctionReturn(0);
2840 }
2841 
2842 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2843 {
2844   PetscScalar    *vals,*svals;
2845   MPI_Comm       comm;
2846   PetscErrorCode ierr;
2847   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2848   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2849   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2850   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2851   PetscInt       cend,cstart,n,*rowners;
2852   int            fd;
2853   PetscInt       bs = newMat->rmap->bs;
2854 
2855   PetscFunctionBegin;
2856   /* force binary viewer to load .info file if it has not yet done so */
2857   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2858   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2859   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2860   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2861   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2862   if (!rank) {
2863     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2864     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2865     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2866   }
2867 
2868   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2869   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2870   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2871   if (bs < 0) bs = 1;
2872 
2873   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2874   M    = header[1]; N = header[2];
2875 
2876   /* If global sizes are set, check if they are consistent with that given in the file */
2877   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2878   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2879 
2880   /* determine ownership of all (block) rows */
2881   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2882   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2883   else m = newMat->rmap->n; /* Set by user */
2884 
2885   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2886   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2887 
2888   /* First process needs enough room for process with most rows */
2889   if (!rank) {
2890     mmax = rowners[1];
2891     for (i=2; i<=size; i++) {
2892       mmax = PetscMax(mmax, rowners[i]);
2893     }
2894   } else mmax = -1;             /* unused, but compilers complain */
2895 
2896   rowners[0] = 0;
2897   for (i=2; i<=size; i++) {
2898     rowners[i] += rowners[i-1];
2899   }
2900   rstart = rowners[rank];
2901   rend   = rowners[rank+1];
2902 
2903   /* distribute row lengths to all processors */
2904   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2905   if (!rank) {
2906     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2907     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2908     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2909     for (j=0; j<m; j++) {
2910       procsnz[0] += ourlens[j];
2911     }
2912     for (i=1; i<size; i++) {
2913       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2914       /* calculate the number of nonzeros on each processor */
2915       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2916         procsnz[i] += rowlengths[j];
2917       }
2918       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2919     }
2920     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2921   } else {
2922     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2923   }
2924 
2925   if (!rank) {
2926     /* determine max buffer needed and allocate it */
2927     maxnz = 0;
2928     for (i=0; i<size; i++) {
2929       maxnz = PetscMax(maxnz,procsnz[i]);
2930     }
2931     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2932 
2933     /* read in my part of the matrix column indices  */
2934     nz   = procsnz[0];
2935     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2936     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2937 
2938     /* read in every one elses and ship off */
2939     for (i=1; i<size; i++) {
2940       nz   = procsnz[i];
2941       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2942       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2943     }
2944     ierr = PetscFree(cols);CHKERRQ(ierr);
2945   } else {
2946     /* determine buffer space needed for message */
2947     nz = 0;
2948     for (i=0; i<m; i++) {
2949       nz += ourlens[i];
2950     }
2951     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2952 
2953     /* receive message of column indices*/
2954     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2955   }
2956 
2957   /* determine column ownership if matrix is not square */
2958   if (N != M) {
2959     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
2960     else n = newMat->cmap->n;
2961     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2962     cstart = cend - n;
2963   } else {
2964     cstart = rstart;
2965     cend   = rend;
2966     n      = cend - cstart;
2967   }
2968 
2969   /* loop over local rows, determining number of off diagonal entries */
2970   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2971   jj   = 0;
2972   for (i=0; i<m; i++) {
2973     for (j=0; j<ourlens[i]; j++) {
2974       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2975       jj++;
2976     }
2977   }
2978 
2979   for (i=0; i<m; i++) {
2980     ourlens[i] -= offlens[i];
2981   }
2982   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
2983 
2984   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
2985 
2986   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
2987 
2988   for (i=0; i<m; i++) {
2989     ourlens[i] += offlens[i];
2990   }
2991 
2992   if (!rank) {
2993     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
2994 
2995     /* read in my part of the matrix numerical values  */
2996     nz   = procsnz[0];
2997     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2998 
2999     /* insert into matrix */
3000     jj      = rstart;
3001     smycols = mycols;
3002     svals   = vals;
3003     for (i=0; i<m; i++) {
3004       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3005       smycols += ourlens[i];
3006       svals   += ourlens[i];
3007       jj++;
3008     }
3009 
3010     /* read in other processors and ship out */
3011     for (i=1; i<size; i++) {
3012       nz   = procsnz[i];
3013       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3014       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3015     }
3016     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3017   } else {
3018     /* receive numeric values */
3019     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3020 
3021     /* receive message of values*/
3022     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3023 
3024     /* insert into matrix */
3025     jj      = rstart;
3026     smycols = mycols;
3027     svals   = vals;
3028     for (i=0; i<m; i++) {
3029       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3030       smycols += ourlens[i];
3031       svals   += ourlens[i];
3032       jj++;
3033     }
3034   }
3035   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3036   ierr = PetscFree(vals);CHKERRQ(ierr);
3037   ierr = PetscFree(mycols);CHKERRQ(ierr);
3038   ierr = PetscFree(rowners);CHKERRQ(ierr);
3039   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3040   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3041   PetscFunctionReturn(0);
3042 }
3043 
3044 /* Not scalable because of ISAllGather() unless getting all columns. */
3045 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3046 {
3047   PetscErrorCode ierr;
3048   IS             iscol_local;
3049   PetscBool      isstride;
3050   PetscMPIInt    lisstride=0,gisstride;
3051 
3052   PetscFunctionBegin;
3053   /* check if we are grabbing all columns*/
3054   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3055 
3056   if (isstride) {
3057     PetscInt  start,len,mstart,mlen;
3058     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3059     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3060     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3061     if (mstart == start && mlen-mstart == len) lisstride = 1;
3062   }
3063 
3064   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3065   if (gisstride) {
3066     PetscInt N;
3067     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3068     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3069     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3070     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3071   } else {
3072     PetscInt cbs;
3073     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3074     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3075     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3076   }
3077 
3078   *isseq = iscol_local;
3079   PetscFunctionReturn(0);
3080 }
3081 
3082 /*
3083  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3084  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3085 
3086  Input Parameters:
3087    mat - matrix
3088    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3089            i.e., mat->rstart <= isrow[i] < mat->rend
3090    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3091            i.e., mat->cstart <= iscol[i] < mat->cend
3092  Output Parameter:
3093    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3094    iscol_o - sequential column index set for retrieving mat->B
3095    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3096  */
3097 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3098 {
3099   PetscErrorCode ierr;
3100   Vec            x,cmap;
3101   const PetscInt *is_idx;
3102   PetscScalar    *xarray,*cmaparray;
3103   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3104   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3105   Mat            B=a->B;
3106   Vec            lvec=a->lvec,lcmap;
3107   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3108   MPI_Comm       comm;
3109   VecScatter     Mvctx=a->Mvctx;
3110 
3111   PetscFunctionBegin;
3112   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3113   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3114 
3115   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3116   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3117   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3118   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3119   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3120 
3121   /* Get start indices */
3122   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3123   isstart -= ncols;
3124   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3125 
3126   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3127   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3128   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3129   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3130   for (i=0; i<ncols; i++) {
3131     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3132     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3133     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3134   }
3135   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3136   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3137   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3138 
3139   /* Get iscol_d */
3140   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3141   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3142   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3143 
3144   /* Get isrow_d */
3145   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3146   rstart = mat->rmap->rstart;
3147   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3148   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3149   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3150   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3151 
3152   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3153   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3154   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3155 
3156   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3157   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3158   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3159 
3160   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3161 
3162   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3163   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3164 
3165   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3166   /* off-process column indices */
3167   count = 0;
3168   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3169   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3170 
3171   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3172   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3173   for (i=0; i<Bn; i++) {
3174     if (PetscRealPart(xarray[i]) > -1.0) {
3175       idx[count]     = i;                   /* local column index in off-diagonal part B */
3176       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3177       count++;
3178     }
3179   }
3180   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3181   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3182 
3183   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3184   /* cannot ensure iscol_o has same blocksize as iscol! */
3185 
3186   ierr = PetscFree(idx);CHKERRQ(ierr);
3187   *garray = cmap1;
3188 
3189   ierr = VecDestroy(&x);CHKERRQ(ierr);
3190   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3191   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3192   PetscFunctionReturn(0);
3193 }
3194 
3195 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3196 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3197 {
3198   PetscErrorCode ierr;
3199   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3200   Mat            M = NULL;
3201   MPI_Comm       comm;
3202   IS             iscol_d,isrow_d,iscol_o;
3203   Mat            Asub = NULL,Bsub = NULL;
3204   PetscInt       n;
3205 
3206   PetscFunctionBegin;
3207   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3208 
3209   if (call == MAT_REUSE_MATRIX) {
3210     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3211     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3212     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3213 
3214     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3215     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3216 
3217     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3218     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3219 
3220     /* Update diagonal and off-diagonal portions of submat */
3221     asub = (Mat_MPIAIJ*)(*submat)->data;
3222     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3223     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3224     if (n) {
3225       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3226     }
3227     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3228     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3229 
3230   } else { /* call == MAT_INITIAL_MATRIX) */
3231     const PetscInt *garray;
3232     PetscInt        BsubN;
3233 
3234     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3235     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3236 
3237     /* Create local submatrices Asub and Bsub */
3238     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3239     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3240 
3241     /* Create submatrix M */
3242     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3243 
3244     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3245     asub = (Mat_MPIAIJ*)M->data;
3246 
3247     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3248     n = asub->B->cmap->N;
3249     if (BsubN > n) {
3250       /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */
3251       const PetscInt *idx;
3252       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3253       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3254 
3255       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3256       j = 0;
3257       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3258       for (i=0; i<n; i++) {
3259         if (j >= BsubN) break;
3260         while (subgarray[i] > garray[j]) j++;
3261 
3262         if (subgarray[i] == garray[j]) {
3263           idx_new[i] = idx[j++];
3264         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3265       }
3266       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3267 
3268       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3269       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3270 
3271     } else if (BsubN < n) {
3272       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3273     }
3274 
3275     ierr = PetscFree(garray);CHKERRQ(ierr);
3276     *submat = M;
3277 
3278     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3279     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3280     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3281 
3282     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3283     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3284 
3285     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3286     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3287   }
3288   PetscFunctionReturn(0);
3289 }
3290 
3291 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3292 {
3293   PetscErrorCode ierr;
3294   IS             iscol_local=NULL,isrow_d;
3295   PetscInt       csize;
3296   PetscInt       n,i,j,start,end;
3297   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3298   MPI_Comm       comm;
3299 
3300   PetscFunctionBegin;
3301   /* If isrow has same processor distribution as mat,
3302      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3303   if (call == MAT_REUSE_MATRIX) {
3304     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3305     if (isrow_d) {
3306       sameRowDist  = PETSC_TRUE;
3307       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3308     } else {
3309       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3310       if (iscol_local) {
3311         sameRowDist  = PETSC_TRUE;
3312         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3313       }
3314     }
3315   } else {
3316     /* Check if isrow has same processor distribution as mat */
3317     sameDist[0] = PETSC_FALSE;
3318     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3319     if (!n) {
3320       sameDist[0] = PETSC_TRUE;
3321     } else {
3322       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3323       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3324       if (i >= start && j < end) {
3325         sameDist[0] = PETSC_TRUE;
3326       }
3327     }
3328 
3329     /* Check if iscol has same processor distribution as mat */
3330     sameDist[1] = PETSC_FALSE;
3331     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3332     if (!n) {
3333       sameDist[1] = PETSC_TRUE;
3334     } else {
3335       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3336       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3337       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3338     }
3339 
3340     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3341     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3342     sameRowDist = tsameDist[0];
3343   }
3344 
3345   if (sameRowDist) {
3346     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3347       /* isrow and iscol have same processor distribution as mat */
3348       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3349       PetscFunctionReturn(0);
3350     } else { /* sameRowDist */
3351       /* isrow has same processor distribution as mat */
3352       if (call == MAT_INITIAL_MATRIX) {
3353         PetscBool sorted;
3354         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3355         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3356         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3357         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3358 
3359         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3360         if (sorted) {
3361           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3362           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3363           PetscFunctionReturn(0);
3364         }
3365       } else { /* call == MAT_REUSE_MATRIX */
3366         IS    iscol_sub;
3367         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3368         if (iscol_sub) {
3369           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3370           PetscFunctionReturn(0);
3371         }
3372       }
3373     }
3374   }
3375 
3376   /* General case: iscol -> iscol_local which has global size of iscol */
3377   if (call == MAT_REUSE_MATRIX) {
3378     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3379     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3380   } else {
3381     if (!iscol_local) {
3382       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3383     }
3384   }
3385 
3386   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3387   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3388 
3389   if (call == MAT_INITIAL_MATRIX) {
3390     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3391     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3392   }
3393   PetscFunctionReturn(0);
3394 }
3395 
3396 /*@C
3397      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3398          and "off-diagonal" part of the matrix in CSR format.
3399 
3400    Collective on MPI_Comm
3401 
3402    Input Parameters:
3403 +  comm - MPI communicator
3404 .  A - "diagonal" portion of matrix
3405 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3406 -  garray - global index of B columns
3407 
3408    Output Parameter:
3409 .   mat - the matrix, with input A as its local diagonal matrix
3410    Level: advanced
3411 
3412    Notes:
3413        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3414        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3415 
3416 .seealso: MatCreateMPIAIJWithSplitArrays()
3417 @*/
3418 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3419 {
3420   PetscErrorCode ierr;
3421   Mat_MPIAIJ     *maij;
3422   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3423   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3424   PetscScalar    *oa=b->a;
3425   Mat            Bnew;
3426   PetscInt       m,n,N;
3427 
3428   PetscFunctionBegin;
3429   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3430   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3431   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3432   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3433   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3434   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3435 
3436   /* Get global columns of mat */
3437   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3438 
3439   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3440   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3441   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3442   maij = (Mat_MPIAIJ*)(*mat)->data;
3443 
3444   (*mat)->preallocated = PETSC_TRUE;
3445 
3446   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3447   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3448 
3449   /* Set A as diagonal portion of *mat */
3450   maij->A = A;
3451 
3452   nz = oi[m];
3453   for (i=0; i<nz; i++) {
3454     col   = oj[i];
3455     oj[i] = garray[col];
3456   }
3457 
3458    /* Set Bnew as off-diagonal portion of *mat */
3459   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3460   bnew        = (Mat_SeqAIJ*)Bnew->data;
3461   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3462   maij->B     = Bnew;
3463 
3464   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3465 
3466   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3467   b->free_a       = PETSC_FALSE;
3468   b->free_ij      = PETSC_FALSE;
3469   ierr = MatDestroy(&B);CHKERRQ(ierr);
3470 
3471   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3472   bnew->free_a       = PETSC_TRUE;
3473   bnew->free_ij      = PETSC_TRUE;
3474 
3475   /* condense columns of maij->B */
3476   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3477   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3478   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3479   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3480   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3481   PetscFunctionReturn(0);
3482 }
3483 
3484 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3485 
3486 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3487 {
3488   PetscErrorCode ierr;
3489   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3490   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3491   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3492   Mat            M,Msub,B=a->B;
3493   MatScalar      *aa;
3494   Mat_SeqAIJ     *aij;
3495   PetscInt       *garray = a->garray,*colsub,Ncols;
3496   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3497   IS             iscol_sub,iscmap;
3498   const PetscInt *is_idx,*cmap;
3499   PetscBool      allcolumns=PETSC_FALSE;
3500   MPI_Comm       comm;
3501 
3502   PetscFunctionBegin;
3503   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3504 
3505   if (call == MAT_REUSE_MATRIX) {
3506     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3507     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3508     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3509 
3510     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3511     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3512 
3513     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3514     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3515 
3516     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3517 
3518   } else { /* call == MAT_INITIAL_MATRIX) */
3519     PetscBool flg;
3520 
3521     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3522     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3523 
3524     /* (1) iscol -> nonscalable iscol_local */
3525     /* Check for special case: each processor gets entire matrix columns */
3526     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3527     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3528     if (allcolumns) {
3529       iscol_sub = iscol_local;
3530       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3531       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3532 
3533     } else {
3534       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3535       PetscInt *idx,*cmap1,k;
3536       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3537       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3538       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3539       count = 0;
3540       k     = 0;
3541       for (i=0; i<Ncols; i++) {
3542         j = is_idx[i];
3543         if (j >= cstart && j < cend) {
3544           /* diagonal part of mat */
3545           idx[count]     = j;
3546           cmap1[count++] = i; /* column index in submat */
3547         } else if (Bn) {
3548           /* off-diagonal part of mat */
3549           if (j == garray[k]) {
3550             idx[count]     = j;
3551             cmap1[count++] = i;  /* column index in submat */
3552           } else if (j > garray[k]) {
3553             while (j > garray[k] && k < Bn-1) k++;
3554             if (j == garray[k]) {
3555               idx[count]     = j;
3556               cmap1[count++] = i; /* column index in submat */
3557             }
3558           }
3559         }
3560       }
3561       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3562 
3563       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3564       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3565       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3566 
3567       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3568     }
3569 
3570     /* (3) Create sequential Msub */
3571     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3572   }
3573 
3574   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3575   aij  = (Mat_SeqAIJ*)(Msub)->data;
3576   ii   = aij->i;
3577   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3578 
3579   /*
3580       m - number of local rows
3581       Ncols - number of columns (same on all processors)
3582       rstart - first row in new global matrix generated
3583   */
3584   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3585 
3586   if (call == MAT_INITIAL_MATRIX) {
3587     /* (4) Create parallel newmat */
3588     PetscMPIInt    rank,size;
3589     PetscInt       csize;
3590 
3591     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3592     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3593 
3594     /*
3595         Determine the number of non-zeros in the diagonal and off-diagonal
3596         portions of the matrix in order to do correct preallocation
3597     */
3598 
3599     /* first get start and end of "diagonal" columns */
3600     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3601     if (csize == PETSC_DECIDE) {
3602       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3603       if (mglobal == Ncols) { /* square matrix */
3604         nlocal = m;
3605       } else {
3606         nlocal = Ncols/size + ((Ncols % size) > rank);
3607       }
3608     } else {
3609       nlocal = csize;
3610     }
3611     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3612     rstart = rend - nlocal;
3613     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3614 
3615     /* next, compute all the lengths */
3616     jj    = aij->j;
3617     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3618     olens = dlens + m;
3619     for (i=0; i<m; i++) {
3620       jend = ii[i+1] - ii[i];
3621       olen = 0;
3622       dlen = 0;
3623       for (j=0; j<jend; j++) {
3624         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3625         else dlen++;
3626         jj++;
3627       }
3628       olens[i] = olen;
3629       dlens[i] = dlen;
3630     }
3631 
3632     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3633     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3634 
3635     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3636     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3637     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3638     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3639     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3640     ierr = PetscFree(dlens);CHKERRQ(ierr);
3641 
3642   } else { /* call == MAT_REUSE_MATRIX */
3643     M    = *newmat;
3644     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3645     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3646     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3647     /*
3648          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3649        rather than the slower MatSetValues().
3650     */
3651     M->was_assembled = PETSC_TRUE;
3652     M->assembled     = PETSC_FALSE;
3653   }
3654 
3655   /* (5) Set values of Msub to *newmat */
3656   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3657   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3658 
3659   jj   = aij->j;
3660   aa   = aij->a;
3661   for (i=0; i<m; i++) {
3662     row = rstart + i;
3663     nz  = ii[i+1] - ii[i];
3664     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3665     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3666     jj += nz; aa += nz;
3667   }
3668   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3669 
3670   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3671   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3672 
3673   ierr = PetscFree(colsub);CHKERRQ(ierr);
3674 
3675   /* save Msub, iscol_sub and iscmap used in processor for next request */
3676   if (call ==  MAT_INITIAL_MATRIX) {
3677     *newmat = M;
3678     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3679     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3680 
3681     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3682     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3683 
3684     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3685     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3686 
3687     if (iscol_local) {
3688       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3689       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3690     }
3691   }
3692   PetscFunctionReturn(0);
3693 }
3694 
3695 /*
3696     Not great since it makes two copies of the submatrix, first an SeqAIJ
3697   in local and then by concatenating the local matrices the end result.
3698   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3699 
3700   Note: This requires a sequential iscol with all indices.
3701 */
3702 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3703 {
3704   PetscErrorCode ierr;
3705   PetscMPIInt    rank,size;
3706   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3707   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3708   Mat            M,Mreuse;
3709   MatScalar      *aa,*vwork;
3710   MPI_Comm       comm;
3711   Mat_SeqAIJ     *aij;
3712   PetscBool      colflag,allcolumns=PETSC_FALSE;
3713 
3714   PetscFunctionBegin;
3715   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3716   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3717   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3718 
3719   /* Check for special case: each processor gets entire matrix columns */
3720   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3721   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3722   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3723 
3724   if (call ==  MAT_REUSE_MATRIX) {
3725     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3726     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3727     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3728   } else {
3729     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3730   }
3731 
3732   /*
3733       m - number of local rows
3734       n - number of columns (same on all processors)
3735       rstart - first row in new global matrix generated
3736   */
3737   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3738   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3739   if (call == MAT_INITIAL_MATRIX) {
3740     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3741     ii  = aij->i;
3742     jj  = aij->j;
3743 
3744     /*
3745         Determine the number of non-zeros in the diagonal and off-diagonal
3746         portions of the matrix in order to do correct preallocation
3747     */
3748 
3749     /* first get start and end of "diagonal" columns */
3750     if (csize == PETSC_DECIDE) {
3751       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3752       if (mglobal == n) { /* square matrix */
3753         nlocal = m;
3754       } else {
3755         nlocal = n/size + ((n % size) > rank);
3756       }
3757     } else {
3758       nlocal = csize;
3759     }
3760     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3761     rstart = rend - nlocal;
3762     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3763 
3764     /* next, compute all the lengths */
3765     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3766     olens = dlens + m;
3767     for (i=0; i<m; i++) {
3768       jend = ii[i+1] - ii[i];
3769       olen = 0;
3770       dlen = 0;
3771       for (j=0; j<jend; j++) {
3772         if (*jj < rstart || *jj >= rend) olen++;
3773         else dlen++;
3774         jj++;
3775       }
3776       olens[i] = olen;
3777       dlens[i] = dlen;
3778     }
3779     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3780     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3781     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3782     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3783     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3784     ierr = PetscFree(dlens);CHKERRQ(ierr);
3785   } else {
3786     PetscInt ml,nl;
3787 
3788     M    = *newmat;
3789     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3790     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3791     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3792     /*
3793          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3794        rather than the slower MatSetValues().
3795     */
3796     M->was_assembled = PETSC_TRUE;
3797     M->assembled     = PETSC_FALSE;
3798   }
3799   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3800   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3801   ii   = aij->i;
3802   jj   = aij->j;
3803   aa   = aij->a;
3804   for (i=0; i<m; i++) {
3805     row   = rstart + i;
3806     nz    = ii[i+1] - ii[i];
3807     cwork = jj;     jj += nz;
3808     vwork = aa;     aa += nz;
3809     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3810   }
3811 
3812   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3813   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3814   *newmat = M;
3815 
3816   /* save submatrix used in processor for next request */
3817   if (call ==  MAT_INITIAL_MATRIX) {
3818     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3819     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3820   }
3821   PetscFunctionReturn(0);
3822 }
3823 
3824 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3825 {
3826   PetscInt       m,cstart, cend,j,nnz,i,d;
3827   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3828   const PetscInt *JJ;
3829   PetscScalar    *values;
3830   PetscErrorCode ierr;
3831   PetscBool      nooffprocentries;
3832 
3833   PetscFunctionBegin;
3834   if (Ii && Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3835 
3836   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3837   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3838   m      = B->rmap->n;
3839   cstart = B->cmap->rstart;
3840   cend   = B->cmap->rend;
3841   rstart = B->rmap->rstart;
3842 
3843   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3844 
3845 #if defined(PETSC_USE_DEBUG)
3846   for (i=0; i<m; i++) {
3847     nnz = Ii[i+1]- Ii[i];
3848     JJ  = J + Ii[i];
3849     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3850     if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3851     if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3852   }
3853 #endif
3854 
3855   for (i=0; i<m; i++) {
3856     nnz     = Ii[i+1]- Ii[i];
3857     JJ      = J + Ii[i];
3858     nnz_max = PetscMax(nnz_max,nnz);
3859     d       = 0;
3860     for (j=0; j<nnz; j++) {
3861       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3862     }
3863     d_nnz[i] = d;
3864     o_nnz[i] = nnz - d;
3865   }
3866   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3867   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3868 
3869   if (v) values = (PetscScalar*)v;
3870   else {
3871     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3872   }
3873 
3874   for (i=0; i<m; i++) {
3875     ii   = i + rstart;
3876     nnz  = Ii[i+1]- Ii[i];
3877     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3878   }
3879   nooffprocentries    = B->nooffprocentries;
3880   B->nooffprocentries = PETSC_TRUE;
3881   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3882   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3883   B->nooffprocentries = nooffprocentries;
3884 
3885   if (!v) {
3886     ierr = PetscFree(values);CHKERRQ(ierr);
3887   }
3888   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3889   PetscFunctionReturn(0);
3890 }
3891 
3892 /*@
3893    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3894    (the default parallel PETSc format).
3895 
3896    Collective on MPI_Comm
3897 
3898    Input Parameters:
3899 +  B - the matrix
3900 .  i - the indices into j for the start of each local row (starts with zero)
3901 .  j - the column indices for each local row (starts with zero)
3902 -  v - optional values in the matrix
3903 
3904    Level: developer
3905 
3906    Notes:
3907        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3908      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3909      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3910 
3911        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3912 
3913        The format which is used for the sparse matrix input, is equivalent to a
3914     row-major ordering.. i.e for the following matrix, the input data expected is
3915     as shown
3916 
3917 $        1 0 0
3918 $        2 0 3     P0
3919 $       -------
3920 $        4 5 6     P1
3921 $
3922 $     Process0 [P0]: rows_owned=[0,1]
3923 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3924 $        j =  {0,0,2}  [size = 3]
3925 $        v =  {1,2,3}  [size = 3]
3926 $
3927 $     Process1 [P1]: rows_owned=[2]
3928 $        i =  {0,3}    [size = nrow+1  = 1+1]
3929 $        j =  {0,1,2}  [size = 3]
3930 $        v =  {4,5,6}  [size = 3]
3931 
3932 .keywords: matrix, aij, compressed row, sparse, parallel
3933 
3934 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3935           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3936 @*/
3937 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3938 {
3939   PetscErrorCode ierr;
3940 
3941   PetscFunctionBegin;
3942   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3943   PetscFunctionReturn(0);
3944 }
3945 
3946 /*@C
3947    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3948    (the default parallel PETSc format).  For good matrix assembly performance
3949    the user should preallocate the matrix storage by setting the parameters
3950    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3951    performance can be increased by more than a factor of 50.
3952 
3953    Collective on MPI_Comm
3954 
3955    Input Parameters:
3956 +  B - the matrix
3957 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3958            (same value is used for all local rows)
3959 .  d_nnz - array containing the number of nonzeros in the various rows of the
3960            DIAGONAL portion of the local submatrix (possibly different for each row)
3961            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3962            The size of this array is equal to the number of local rows, i.e 'm'.
3963            For matrices that will be factored, you must leave room for (and set)
3964            the diagonal entry even if it is zero.
3965 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3966            submatrix (same value is used for all local rows).
3967 -  o_nnz - array containing the number of nonzeros in the various rows of the
3968            OFF-DIAGONAL portion of the local submatrix (possibly different for
3969            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3970            structure. The size of this array is equal to the number
3971            of local rows, i.e 'm'.
3972 
3973    If the *_nnz parameter is given then the *_nz parameter is ignored
3974 
3975    The AIJ format (also called the Yale sparse matrix format or
3976    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3977    storage.  The stored row and column indices begin with zero.
3978    See Users-Manual: ch_mat for details.
3979 
3980    The parallel matrix is partitioned such that the first m0 rows belong to
3981    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3982    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3983 
3984    The DIAGONAL portion of the local submatrix of a processor can be defined
3985    as the submatrix which is obtained by extraction the part corresponding to
3986    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3987    first row that belongs to the processor, r2 is the last row belonging to
3988    the this processor, and c1-c2 is range of indices of the local part of a
3989    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3990    common case of a square matrix, the row and column ranges are the same and
3991    the DIAGONAL part is also square. The remaining portion of the local
3992    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3993 
3994    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3995 
3996    You can call MatGetInfo() to get information on how effective the preallocation was;
3997    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3998    You can also run with the option -info and look for messages with the string
3999    malloc in them to see if additional memory allocation was needed.
4000 
4001    Example usage:
4002 
4003    Consider the following 8x8 matrix with 34 non-zero values, that is
4004    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4005    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4006    as follows:
4007 
4008 .vb
4009             1  2  0  |  0  3  0  |  0  4
4010     Proc0   0  5  6  |  7  0  0  |  8  0
4011             9  0 10  | 11  0  0  | 12  0
4012     -------------------------------------
4013            13  0 14  | 15 16 17  |  0  0
4014     Proc1   0 18  0  | 19 20 21  |  0  0
4015             0  0  0  | 22 23  0  | 24  0
4016     -------------------------------------
4017     Proc2  25 26 27  |  0  0 28  | 29  0
4018            30  0  0  | 31 32 33  |  0 34
4019 .ve
4020 
4021    This can be represented as a collection of submatrices as:
4022 
4023 .vb
4024       A B C
4025       D E F
4026       G H I
4027 .ve
4028 
4029    Where the submatrices A,B,C are owned by proc0, D,E,F are
4030    owned by proc1, G,H,I are owned by proc2.
4031 
4032    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4033    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4034    The 'M','N' parameters are 8,8, and have the same values on all procs.
4035 
4036    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4037    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4038    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4039    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4040    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4041    matrix, ans [DF] as another SeqAIJ matrix.
4042 
4043    When d_nz, o_nz parameters are specified, d_nz storage elements are
4044    allocated for every row of the local diagonal submatrix, and o_nz
4045    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4046    One way to choose d_nz and o_nz is to use the max nonzerors per local
4047    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4048    In this case, the values of d_nz,o_nz are:
4049 .vb
4050      proc0 : dnz = 2, o_nz = 2
4051      proc1 : dnz = 3, o_nz = 2
4052      proc2 : dnz = 1, o_nz = 4
4053 .ve
4054    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4055    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4056    for proc3. i.e we are using 12+15+10=37 storage locations to store
4057    34 values.
4058 
4059    When d_nnz, o_nnz parameters are specified, the storage is specified
4060    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4061    In the above case the values for d_nnz,o_nnz are:
4062 .vb
4063      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4064      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4065      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4066 .ve
4067    Here the space allocated is sum of all the above values i.e 34, and
4068    hence pre-allocation is perfect.
4069 
4070    Level: intermediate
4071 
4072 .keywords: matrix, aij, compressed row, sparse, parallel
4073 
4074 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4075           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4076 @*/
4077 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4078 {
4079   PetscErrorCode ierr;
4080 
4081   PetscFunctionBegin;
4082   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4083   PetscValidType(B,1);
4084   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4085   PetscFunctionReturn(0);
4086 }
4087 
4088 /*@
4089      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4090          CSR format the local rows.
4091 
4092    Collective on MPI_Comm
4093 
4094    Input Parameters:
4095 +  comm - MPI communicator
4096 .  m - number of local rows (Cannot be PETSC_DECIDE)
4097 .  n - This value should be the same as the local size used in creating the
4098        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4099        calculated if N is given) For square matrices n is almost always m.
4100 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4101 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4102 .   i - row indices
4103 .   j - column indices
4104 -   a - matrix values
4105 
4106    Output Parameter:
4107 .   mat - the matrix
4108 
4109    Level: intermediate
4110 
4111    Notes:
4112        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4113      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4114      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4115 
4116        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4117 
4118        The format which is used for the sparse matrix input, is equivalent to a
4119     row-major ordering.. i.e for the following matrix, the input data expected is
4120     as shown
4121 
4122 $        1 0 0
4123 $        2 0 3     P0
4124 $       -------
4125 $        4 5 6     P1
4126 $
4127 $     Process0 [P0]: rows_owned=[0,1]
4128 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4129 $        j =  {0,0,2}  [size = 3]
4130 $        v =  {1,2,3}  [size = 3]
4131 $
4132 $     Process1 [P1]: rows_owned=[2]
4133 $        i =  {0,3}    [size = nrow+1  = 1+1]
4134 $        j =  {0,1,2}  [size = 3]
4135 $        v =  {4,5,6}  [size = 3]
4136 
4137 .keywords: matrix, aij, compressed row, sparse, parallel
4138 
4139 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4140           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4141 @*/
4142 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4143 {
4144   PetscErrorCode ierr;
4145 
4146   PetscFunctionBegin;
4147   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4148   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4149   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4150   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4151   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4152   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4153   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4154   PetscFunctionReturn(0);
4155 }
4156 
4157 /*@C
4158    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4159    (the default parallel PETSc format).  For good matrix assembly performance
4160    the user should preallocate the matrix storage by setting the parameters
4161    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4162    performance can be increased by more than a factor of 50.
4163 
4164    Collective on MPI_Comm
4165 
4166    Input Parameters:
4167 +  comm - MPI communicator
4168 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4169            This value should be the same as the local size used in creating the
4170            y vector for the matrix-vector product y = Ax.
4171 .  n - This value should be the same as the local size used in creating the
4172        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4173        calculated if N is given) For square matrices n is almost always m.
4174 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4175 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4176 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4177            (same value is used for all local rows)
4178 .  d_nnz - array containing the number of nonzeros in the various rows of the
4179            DIAGONAL portion of the local submatrix (possibly different for each row)
4180            or NULL, if d_nz is used to specify the nonzero structure.
4181            The size of this array is equal to the number of local rows, i.e 'm'.
4182 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4183            submatrix (same value is used for all local rows).
4184 -  o_nnz - array containing the number of nonzeros in the various rows of the
4185            OFF-DIAGONAL portion of the local submatrix (possibly different for
4186            each row) or NULL, if o_nz is used to specify the nonzero
4187            structure. The size of this array is equal to the number
4188            of local rows, i.e 'm'.
4189 
4190    Output Parameter:
4191 .  A - the matrix
4192 
4193    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4194    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4195    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4196 
4197    Notes:
4198    If the *_nnz parameter is given then the *_nz parameter is ignored
4199 
4200    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4201    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4202    storage requirements for this matrix.
4203 
4204    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4205    processor than it must be used on all processors that share the object for
4206    that argument.
4207 
4208    The user MUST specify either the local or global matrix dimensions
4209    (possibly both).
4210 
4211    The parallel matrix is partitioned across processors such that the
4212    first m0 rows belong to process 0, the next m1 rows belong to
4213    process 1, the next m2 rows belong to process 2 etc.. where
4214    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4215    values corresponding to [m x N] submatrix.
4216 
4217    The columns are logically partitioned with the n0 columns belonging
4218    to 0th partition, the next n1 columns belonging to the next
4219    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4220 
4221    The DIAGONAL portion of the local submatrix on any given processor
4222    is the submatrix corresponding to the rows and columns m,n
4223    corresponding to the given processor. i.e diagonal matrix on
4224    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4225    etc. The remaining portion of the local submatrix [m x (N-n)]
4226    constitute the OFF-DIAGONAL portion. The example below better
4227    illustrates this concept.
4228 
4229    For a square global matrix we define each processor's diagonal portion
4230    to be its local rows and the corresponding columns (a square submatrix);
4231    each processor's off-diagonal portion encompasses the remainder of the
4232    local matrix (a rectangular submatrix).
4233 
4234    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4235 
4236    When calling this routine with a single process communicator, a matrix of
4237    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4238    type of communicator, use the construction mechanism
4239 .vb
4240      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4241 .ve
4242 
4243 $     MatCreate(...,&A);
4244 $     MatSetType(A,MATMPIAIJ);
4245 $     MatSetSizes(A, m,n,M,N);
4246 $     MatMPIAIJSetPreallocation(A,...);
4247 
4248    By default, this format uses inodes (identical nodes) when possible.
4249    We search for consecutive rows with the same nonzero structure, thereby
4250    reusing matrix information to achieve increased efficiency.
4251 
4252    Options Database Keys:
4253 +  -mat_no_inode  - Do not use inodes
4254 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4255 
4256 
4257 
4258    Example usage:
4259 
4260    Consider the following 8x8 matrix with 34 non-zero values, that is
4261    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4262    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4263    as follows
4264 
4265 .vb
4266             1  2  0  |  0  3  0  |  0  4
4267     Proc0   0  5  6  |  7  0  0  |  8  0
4268             9  0 10  | 11  0  0  | 12  0
4269     -------------------------------------
4270            13  0 14  | 15 16 17  |  0  0
4271     Proc1   0 18  0  | 19 20 21  |  0  0
4272             0  0  0  | 22 23  0  | 24  0
4273     -------------------------------------
4274     Proc2  25 26 27  |  0  0 28  | 29  0
4275            30  0  0  | 31 32 33  |  0 34
4276 .ve
4277 
4278    This can be represented as a collection of submatrices as
4279 
4280 .vb
4281       A B C
4282       D E F
4283       G H I
4284 .ve
4285 
4286    Where the submatrices A,B,C are owned by proc0, D,E,F are
4287    owned by proc1, G,H,I are owned by proc2.
4288 
4289    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4290    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4291    The 'M','N' parameters are 8,8, and have the same values on all procs.
4292 
4293    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4294    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4295    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4296    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4297    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4298    matrix, ans [DF] as another SeqAIJ matrix.
4299 
4300    When d_nz, o_nz parameters are specified, d_nz storage elements are
4301    allocated for every row of the local diagonal submatrix, and o_nz
4302    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4303    One way to choose d_nz and o_nz is to use the max nonzerors per local
4304    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4305    In this case, the values of d_nz,o_nz are
4306 .vb
4307      proc0 : dnz = 2, o_nz = 2
4308      proc1 : dnz = 3, o_nz = 2
4309      proc2 : dnz = 1, o_nz = 4
4310 .ve
4311    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4312    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4313    for proc3. i.e we are using 12+15+10=37 storage locations to store
4314    34 values.
4315 
4316    When d_nnz, o_nnz parameters are specified, the storage is specified
4317    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4318    In the above case the values for d_nnz,o_nnz are
4319 .vb
4320      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4321      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4322      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4323 .ve
4324    Here the space allocated is sum of all the above values i.e 34, and
4325    hence pre-allocation is perfect.
4326 
4327    Level: intermediate
4328 
4329 .keywords: matrix, aij, compressed row, sparse, parallel
4330 
4331 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4332           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4333 @*/
4334 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4335 {
4336   PetscErrorCode ierr;
4337   PetscMPIInt    size;
4338 
4339   PetscFunctionBegin;
4340   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4341   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4342   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4343   if (size > 1) {
4344     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4345     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4346   } else {
4347     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4348     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4349   }
4350   PetscFunctionReturn(0);
4351 }
4352 
4353 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4354 {
4355   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4356   PetscBool      flg;
4357   PetscErrorCode ierr;
4358 
4359   PetscFunctionBegin;
4360   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr);
4361   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4362   if (Ad)     *Ad     = a->A;
4363   if (Ao)     *Ao     = a->B;
4364   if (colmap) *colmap = a->garray;
4365   PetscFunctionReturn(0);
4366 }
4367 
4368 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4369 {
4370   PetscErrorCode ierr;
4371   PetscInt       m,N,i,rstart,nnz,Ii;
4372   PetscInt       *indx;
4373   PetscScalar    *values;
4374 
4375   PetscFunctionBegin;
4376   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4377   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4378     PetscInt       *dnz,*onz,sum,bs,cbs;
4379 
4380     if (n == PETSC_DECIDE) {
4381       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4382     }
4383     /* Check sum(n) = N */
4384     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4385     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4386 
4387     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4388     rstart -= m;
4389 
4390     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4391     for (i=0; i<m; i++) {
4392       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4393       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4394       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4395     }
4396 
4397     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4398     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4399     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4400     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4401     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4402     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4403     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4404     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4405   }
4406 
4407   /* numeric phase */
4408   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4409   for (i=0; i<m; i++) {
4410     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4411     Ii   = i + rstart;
4412     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4413     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4414   }
4415   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4416   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4417   PetscFunctionReturn(0);
4418 }
4419 
4420 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4421 {
4422   PetscErrorCode    ierr;
4423   PetscMPIInt       rank;
4424   PetscInt          m,N,i,rstart,nnz;
4425   size_t            len;
4426   const PetscInt    *indx;
4427   PetscViewer       out;
4428   char              *name;
4429   Mat               B;
4430   const PetscScalar *values;
4431 
4432   PetscFunctionBegin;
4433   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4434   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4435   /* Should this be the type of the diagonal block of A? */
4436   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4437   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4438   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4439   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4440   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4441   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4442   for (i=0; i<m; i++) {
4443     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4444     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4445     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4446   }
4447   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4448   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4449 
4450   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4451   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4452   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4453   sprintf(name,"%s.%d",outfile,rank);
4454   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4455   ierr = PetscFree(name);CHKERRQ(ierr);
4456   ierr = MatView(B,out);CHKERRQ(ierr);
4457   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4458   ierr = MatDestroy(&B);CHKERRQ(ierr);
4459   PetscFunctionReturn(0);
4460 }
4461 
4462 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4463 {
4464   PetscErrorCode      ierr;
4465   Mat_Merge_SeqsToMPI *merge;
4466   PetscContainer      container;
4467 
4468   PetscFunctionBegin;
4469   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4470   if (container) {
4471     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4472     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4473     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4474     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4475     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4476     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4477     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4478     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4479     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4480     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4481     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4482     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4483     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4484     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4485     ierr = PetscFree(merge);CHKERRQ(ierr);
4486     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4487   }
4488   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4489   PetscFunctionReturn(0);
4490 }
4491 
4492 #include <../src/mat/utils/freespace.h>
4493 #include <petscbt.h>
4494 
4495 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4496 {
4497   PetscErrorCode      ierr;
4498   MPI_Comm            comm;
4499   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4500   PetscMPIInt         size,rank,taga,*len_s;
4501   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4502   PetscInt            proc,m;
4503   PetscInt            **buf_ri,**buf_rj;
4504   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4505   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4506   MPI_Request         *s_waits,*r_waits;
4507   MPI_Status          *status;
4508   MatScalar           *aa=a->a;
4509   MatScalar           **abuf_r,*ba_i;
4510   Mat_Merge_SeqsToMPI *merge;
4511   PetscContainer      container;
4512 
4513   PetscFunctionBegin;
4514   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4515   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4516 
4517   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4518   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4519 
4520   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4521   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4522 
4523   bi     = merge->bi;
4524   bj     = merge->bj;
4525   buf_ri = merge->buf_ri;
4526   buf_rj = merge->buf_rj;
4527 
4528   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4529   owners = merge->rowmap->range;
4530   len_s  = merge->len_s;
4531 
4532   /* send and recv matrix values */
4533   /*-----------------------------*/
4534   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4535   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4536 
4537   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4538   for (proc=0,k=0; proc<size; proc++) {
4539     if (!len_s[proc]) continue;
4540     i    = owners[proc];
4541     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4542     k++;
4543   }
4544 
4545   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4546   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4547   ierr = PetscFree(status);CHKERRQ(ierr);
4548 
4549   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4550   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4551 
4552   /* insert mat values of mpimat */
4553   /*----------------------------*/
4554   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4555   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4556 
4557   for (k=0; k<merge->nrecv; k++) {
4558     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4559     nrows       = *(buf_ri_k[k]);
4560     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4561     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4562   }
4563 
4564   /* set values of ba */
4565   m = merge->rowmap->n;
4566   for (i=0; i<m; i++) {
4567     arow = owners[rank] + i;
4568     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4569     bnzi = bi[i+1] - bi[i];
4570     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4571 
4572     /* add local non-zero vals of this proc's seqmat into ba */
4573     anzi   = ai[arow+1] - ai[arow];
4574     aj     = a->j + ai[arow];
4575     aa     = a->a + ai[arow];
4576     nextaj = 0;
4577     for (j=0; nextaj<anzi; j++) {
4578       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4579         ba_i[j] += aa[nextaj++];
4580       }
4581     }
4582 
4583     /* add received vals into ba */
4584     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4585       /* i-th row */
4586       if (i == *nextrow[k]) {
4587         anzi   = *(nextai[k]+1) - *nextai[k];
4588         aj     = buf_rj[k] + *(nextai[k]);
4589         aa     = abuf_r[k] + *(nextai[k]);
4590         nextaj = 0;
4591         for (j=0; nextaj<anzi; j++) {
4592           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4593             ba_i[j] += aa[nextaj++];
4594           }
4595         }
4596         nextrow[k]++; nextai[k]++;
4597       }
4598     }
4599     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4600   }
4601   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4602   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4603 
4604   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4605   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4606   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4607   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4608   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4609   PetscFunctionReturn(0);
4610 }
4611 
4612 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4613 {
4614   PetscErrorCode      ierr;
4615   Mat                 B_mpi;
4616   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4617   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4618   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4619   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4620   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4621   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4622   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4623   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4624   MPI_Status          *status;
4625   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4626   PetscBT             lnkbt;
4627   Mat_Merge_SeqsToMPI *merge;
4628   PetscContainer      container;
4629 
4630   PetscFunctionBegin;
4631   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4632 
4633   /* make sure it is a PETSc comm */
4634   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4635   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4636   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4637 
4638   ierr = PetscNew(&merge);CHKERRQ(ierr);
4639   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4640 
4641   /* determine row ownership */
4642   /*---------------------------------------------------------*/
4643   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4644   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4645   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4646   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4647   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4648   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4649   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4650 
4651   m      = merge->rowmap->n;
4652   owners = merge->rowmap->range;
4653 
4654   /* determine the number of messages to send, their lengths */
4655   /*---------------------------------------------------------*/
4656   len_s = merge->len_s;
4657 
4658   len          = 0; /* length of buf_si[] */
4659   merge->nsend = 0;
4660   for (proc=0; proc<size; proc++) {
4661     len_si[proc] = 0;
4662     if (proc == rank) {
4663       len_s[proc] = 0;
4664     } else {
4665       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4666       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4667     }
4668     if (len_s[proc]) {
4669       merge->nsend++;
4670       nrows = 0;
4671       for (i=owners[proc]; i<owners[proc+1]; i++) {
4672         if (ai[i+1] > ai[i]) nrows++;
4673       }
4674       len_si[proc] = 2*(nrows+1);
4675       len         += len_si[proc];
4676     }
4677   }
4678 
4679   /* determine the number and length of messages to receive for ij-structure */
4680   /*-------------------------------------------------------------------------*/
4681   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4682   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4683 
4684   /* post the Irecv of j-structure */
4685   /*-------------------------------*/
4686   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4687   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4688 
4689   /* post the Isend of j-structure */
4690   /*--------------------------------*/
4691   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4692 
4693   for (proc=0, k=0; proc<size; proc++) {
4694     if (!len_s[proc]) continue;
4695     i    = owners[proc];
4696     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4697     k++;
4698   }
4699 
4700   /* receives and sends of j-structure are complete */
4701   /*------------------------------------------------*/
4702   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4703   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4704 
4705   /* send and recv i-structure */
4706   /*---------------------------*/
4707   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4708   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4709 
4710   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4711   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4712   for (proc=0,k=0; proc<size; proc++) {
4713     if (!len_s[proc]) continue;
4714     /* form outgoing message for i-structure:
4715          buf_si[0]:                 nrows to be sent
4716                [1:nrows]:           row index (global)
4717                [nrows+1:2*nrows+1]: i-structure index
4718     */
4719     /*-------------------------------------------*/
4720     nrows       = len_si[proc]/2 - 1;
4721     buf_si_i    = buf_si + nrows+1;
4722     buf_si[0]   = nrows;
4723     buf_si_i[0] = 0;
4724     nrows       = 0;
4725     for (i=owners[proc]; i<owners[proc+1]; i++) {
4726       anzi = ai[i+1] - ai[i];
4727       if (anzi) {
4728         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4729         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4730         nrows++;
4731       }
4732     }
4733     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4734     k++;
4735     buf_si += len_si[proc];
4736   }
4737 
4738   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4739   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4740 
4741   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4742   for (i=0; i<merge->nrecv; i++) {
4743     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4744   }
4745 
4746   ierr = PetscFree(len_si);CHKERRQ(ierr);
4747   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4748   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4749   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4750   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4751   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4752   ierr = PetscFree(status);CHKERRQ(ierr);
4753 
4754   /* compute a local seq matrix in each processor */
4755   /*----------------------------------------------*/
4756   /* allocate bi array and free space for accumulating nonzero column info */
4757   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4758   bi[0] = 0;
4759 
4760   /* create and initialize a linked list */
4761   nlnk = N+1;
4762   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4763 
4764   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4765   len  = ai[owners[rank+1]] - ai[owners[rank]];
4766   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4767 
4768   current_space = free_space;
4769 
4770   /* determine symbolic info for each local row */
4771   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4772 
4773   for (k=0; k<merge->nrecv; k++) {
4774     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4775     nrows       = *buf_ri_k[k];
4776     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4777     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4778   }
4779 
4780   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4781   len  = 0;
4782   for (i=0; i<m; i++) {
4783     bnzi = 0;
4784     /* add local non-zero cols of this proc's seqmat into lnk */
4785     arow  = owners[rank] + i;
4786     anzi  = ai[arow+1] - ai[arow];
4787     aj    = a->j + ai[arow];
4788     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4789     bnzi += nlnk;
4790     /* add received col data into lnk */
4791     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4792       if (i == *nextrow[k]) { /* i-th row */
4793         anzi  = *(nextai[k]+1) - *nextai[k];
4794         aj    = buf_rj[k] + *nextai[k];
4795         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4796         bnzi += nlnk;
4797         nextrow[k]++; nextai[k]++;
4798       }
4799     }
4800     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4801 
4802     /* if free space is not available, make more free space */
4803     if (current_space->local_remaining<bnzi) {
4804       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4805       nspacedouble++;
4806     }
4807     /* copy data into free space, then initialize lnk */
4808     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4809     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4810 
4811     current_space->array           += bnzi;
4812     current_space->local_used      += bnzi;
4813     current_space->local_remaining -= bnzi;
4814 
4815     bi[i+1] = bi[i] + bnzi;
4816   }
4817 
4818   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4819 
4820   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4821   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4822   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4823 
4824   /* create symbolic parallel matrix B_mpi */
4825   /*---------------------------------------*/
4826   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4827   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4828   if (n==PETSC_DECIDE) {
4829     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4830   } else {
4831     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4832   }
4833   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4834   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4835   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4836   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4837   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4838 
4839   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4840   B_mpi->assembled    = PETSC_FALSE;
4841   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4842   merge->bi           = bi;
4843   merge->bj           = bj;
4844   merge->buf_ri       = buf_ri;
4845   merge->buf_rj       = buf_rj;
4846   merge->coi          = NULL;
4847   merge->coj          = NULL;
4848   merge->owners_co    = NULL;
4849 
4850   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4851 
4852   /* attach the supporting struct to B_mpi for reuse */
4853   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4854   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4855   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4856   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4857   *mpimat = B_mpi;
4858 
4859   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4860   PetscFunctionReturn(0);
4861 }
4862 
4863 /*@C
4864       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4865                  matrices from each processor
4866 
4867     Collective on MPI_Comm
4868 
4869    Input Parameters:
4870 +    comm - the communicators the parallel matrix will live on
4871 .    seqmat - the input sequential matrices
4872 .    m - number of local rows (or PETSC_DECIDE)
4873 .    n - number of local columns (or PETSC_DECIDE)
4874 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4875 
4876    Output Parameter:
4877 .    mpimat - the parallel matrix generated
4878 
4879     Level: advanced
4880 
4881    Notes:
4882      The dimensions of the sequential matrix in each processor MUST be the same.
4883      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4884      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4885 @*/
4886 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4887 {
4888   PetscErrorCode ierr;
4889   PetscMPIInt    size;
4890 
4891   PetscFunctionBegin;
4892   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4893   if (size == 1) {
4894     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4895     if (scall == MAT_INITIAL_MATRIX) {
4896       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4897     } else {
4898       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4899     }
4900     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4901     PetscFunctionReturn(0);
4902   }
4903   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4904   if (scall == MAT_INITIAL_MATRIX) {
4905     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4906   }
4907   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4908   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4909   PetscFunctionReturn(0);
4910 }
4911 
4912 /*@
4913      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4914           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4915           with MatGetSize()
4916 
4917     Not Collective
4918 
4919    Input Parameters:
4920 +    A - the matrix
4921 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4922 
4923    Output Parameter:
4924 .    A_loc - the local sequential matrix generated
4925 
4926     Level: developer
4927 
4928 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4929 
4930 @*/
4931 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4932 {
4933   PetscErrorCode ierr;
4934   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4935   Mat_SeqAIJ     *mat,*a,*b;
4936   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4937   MatScalar      *aa,*ba,*cam;
4938   PetscScalar    *ca;
4939   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4940   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4941   PetscBool      match;
4942   MPI_Comm       comm;
4943   PetscMPIInt    size;
4944 
4945   PetscFunctionBegin;
4946   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4947   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4948   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4949   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4950   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4951 
4952   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4953   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4954   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4955   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4956   aa = a->a; ba = b->a;
4957   if (scall == MAT_INITIAL_MATRIX) {
4958     if (size == 1) {
4959       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4960       PetscFunctionReturn(0);
4961     }
4962 
4963     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4964     ci[0] = 0;
4965     for (i=0; i<am; i++) {
4966       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4967     }
4968     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4969     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4970     k    = 0;
4971     for (i=0; i<am; i++) {
4972       ncols_o = bi[i+1] - bi[i];
4973       ncols_d = ai[i+1] - ai[i];
4974       /* off-diagonal portion of A */
4975       for (jo=0; jo<ncols_o; jo++) {
4976         col = cmap[*bj];
4977         if (col >= cstart) break;
4978         cj[k]   = col; bj++;
4979         ca[k++] = *ba++;
4980       }
4981       /* diagonal portion of A */
4982       for (j=0; j<ncols_d; j++) {
4983         cj[k]   = cstart + *aj++;
4984         ca[k++] = *aa++;
4985       }
4986       /* off-diagonal portion of A */
4987       for (j=jo; j<ncols_o; j++) {
4988         cj[k]   = cmap[*bj++];
4989         ca[k++] = *ba++;
4990       }
4991     }
4992     /* put together the new matrix */
4993     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4994     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4995     /* Since these are PETSc arrays, change flags to free them as necessary. */
4996     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4997     mat->free_a  = PETSC_TRUE;
4998     mat->free_ij = PETSC_TRUE;
4999     mat->nonew   = 0;
5000   } else if (scall == MAT_REUSE_MATRIX) {
5001     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5002     ci = mat->i; cj = mat->j; cam = mat->a;
5003     for (i=0; i<am; i++) {
5004       /* off-diagonal portion of A */
5005       ncols_o = bi[i+1] - bi[i];
5006       for (jo=0; jo<ncols_o; jo++) {
5007         col = cmap[*bj];
5008         if (col >= cstart) break;
5009         *cam++ = *ba++; bj++;
5010       }
5011       /* diagonal portion of A */
5012       ncols_d = ai[i+1] - ai[i];
5013       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5014       /* off-diagonal portion of A */
5015       for (j=jo; j<ncols_o; j++) {
5016         *cam++ = *ba++; bj++;
5017       }
5018     }
5019   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5020   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5021   PetscFunctionReturn(0);
5022 }
5023 
5024 /*@C
5025      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5026 
5027     Not Collective
5028 
5029    Input Parameters:
5030 +    A - the matrix
5031 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5032 -    row, col - index sets of rows and columns to extract (or NULL)
5033 
5034    Output Parameter:
5035 .    A_loc - the local sequential matrix generated
5036 
5037     Level: developer
5038 
5039 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5040 
5041 @*/
5042 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5043 {
5044   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5045   PetscErrorCode ierr;
5046   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5047   IS             isrowa,iscola;
5048   Mat            *aloc;
5049   PetscBool      match;
5050 
5051   PetscFunctionBegin;
5052   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5053   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5054   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5055   if (!row) {
5056     start = A->rmap->rstart; end = A->rmap->rend;
5057     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5058   } else {
5059     isrowa = *row;
5060   }
5061   if (!col) {
5062     start = A->cmap->rstart;
5063     cmap  = a->garray;
5064     nzA   = a->A->cmap->n;
5065     nzB   = a->B->cmap->n;
5066     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5067     ncols = 0;
5068     for (i=0; i<nzB; i++) {
5069       if (cmap[i] < start) idx[ncols++] = cmap[i];
5070       else break;
5071     }
5072     imark = i;
5073     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5074     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5075     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5076   } else {
5077     iscola = *col;
5078   }
5079   if (scall != MAT_INITIAL_MATRIX) {
5080     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5081     aloc[0] = *A_loc;
5082   }
5083   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5084   if (!col) { /* attach global id of condensed columns */
5085     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5086   }
5087   *A_loc = aloc[0];
5088   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5089   if (!row) {
5090     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5091   }
5092   if (!col) {
5093     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5094   }
5095   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5096   PetscFunctionReturn(0);
5097 }
5098 
5099 /*@C
5100     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5101 
5102     Collective on Mat
5103 
5104    Input Parameters:
5105 +    A,B - the matrices in mpiaij format
5106 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5107 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5108 
5109    Output Parameter:
5110 +    rowb, colb - index sets of rows and columns of B to extract
5111 -    B_seq - the sequential matrix generated
5112 
5113     Level: developer
5114 
5115 @*/
5116 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5117 {
5118   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5119   PetscErrorCode ierr;
5120   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5121   IS             isrowb,iscolb;
5122   Mat            *bseq=NULL;
5123 
5124   PetscFunctionBegin;
5125   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5126     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5127   }
5128   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5129 
5130   if (scall == MAT_INITIAL_MATRIX) {
5131     start = A->cmap->rstart;
5132     cmap  = a->garray;
5133     nzA   = a->A->cmap->n;
5134     nzB   = a->B->cmap->n;
5135     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5136     ncols = 0;
5137     for (i=0; i<nzB; i++) {  /* row < local row index */
5138       if (cmap[i] < start) idx[ncols++] = cmap[i];
5139       else break;
5140     }
5141     imark = i;
5142     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5143     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5144     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5145     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5146   } else {
5147     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5148     isrowb  = *rowb; iscolb = *colb;
5149     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5150     bseq[0] = *B_seq;
5151   }
5152   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5153   *B_seq = bseq[0];
5154   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5155   if (!rowb) {
5156     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5157   } else {
5158     *rowb = isrowb;
5159   }
5160   if (!colb) {
5161     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5162   } else {
5163     *colb = iscolb;
5164   }
5165   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5166   PetscFunctionReturn(0);
5167 }
5168 
5169 /*
5170     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5171     of the OFF-DIAGONAL portion of local A
5172 
5173     Collective on Mat
5174 
5175    Input Parameters:
5176 +    A,B - the matrices in mpiaij format
5177 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5178 
5179    Output Parameter:
5180 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5181 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5182 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5183 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5184 
5185     Level: developer
5186 
5187 */
5188 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5189 {
5190   VecScatter_MPI_General *gen_to,*gen_from;
5191   PetscErrorCode         ierr;
5192   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5193   Mat_SeqAIJ             *b_oth;
5194   VecScatter             ctx;
5195   MPI_Comm               comm;
5196   PetscMPIInt            *rprocs,*sprocs,tag,rank;
5197   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5198   PetscInt               *rvalues,*svalues,*cols,sbs,rbs;
5199   PetscScalar              *b_otha,*bufa,*bufA,*vals;
5200   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5201   MPI_Request            *rwaits = NULL,*swaits = NULL;
5202   MPI_Status             *sstatus,rstatus;
5203   PetscMPIInt            jj,size;
5204   VecScatterType         type;
5205   PetscBool              mpi1;
5206 
5207   PetscFunctionBegin;
5208   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5209   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5210 
5211   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5212     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5213   }
5214   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5215   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5216 
5217   if (size == 1) {
5218     startsj_s = NULL;
5219     bufa_ptr  = NULL;
5220     *B_oth    = NULL;
5221     PetscFunctionReturn(0);
5222   }
5223 
5224   ctx = a->Mvctx;
5225   ierr = VecScatterGetType(ctx,&type);CHKERRQ(ierr);
5226   ierr = PetscStrcmp(type,"mpi1",&mpi1);CHKERRQ(ierr);
5227   if (!mpi1) {
5228     /* a->Mvctx is not type MPI1 which is not implemented for Mat-Mat ops,
5229      thus create a->Mvctx_mpi1 */
5230     if (!a->Mvctx_mpi1) {
5231       a->Mvctx_mpi1_flg = PETSC_TRUE;
5232       ierr = MatSetUpMultiply_MPIAIJ(A);CHKERRQ(ierr);
5233     }
5234     ctx = a->Mvctx_mpi1;
5235   }
5236   tag = ((PetscObject)ctx)->tag;
5237 
5238   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5239   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5240   nrecvs   = gen_from->n;
5241   nsends   = gen_to->n;
5242 
5243   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5244   srow    = gen_to->indices;    /* local row index to be sent */
5245   sstarts = gen_to->starts;
5246   sprocs  = gen_to->procs;
5247   sstatus = gen_to->sstatus;
5248   sbs     = gen_to->bs;
5249   rstarts = gen_from->starts;
5250   rprocs  = gen_from->procs;
5251   rbs     = gen_from->bs;
5252 
5253   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5254   if (scall == MAT_INITIAL_MATRIX) {
5255     /* i-array */
5256     /*---------*/
5257     /*  post receives */
5258     ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);
5259     for (i=0; i<nrecvs; i++) {
5260       rowlen = rvalues + rstarts[i]*rbs;
5261       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5262       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5263     }
5264 
5265     /* pack the outgoing message */
5266     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5267 
5268     sstartsj[0] = 0;
5269     rstartsj[0] = 0;
5270     len         = 0; /* total length of j or a array to be sent */
5271     k           = 0;
5272     ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr);
5273     for (i=0; i<nsends; i++) {
5274       rowlen = svalues + sstarts[i]*sbs;
5275       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5276       for (j=0; j<nrows; j++) {
5277         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5278         for (l=0; l<sbs; l++) {
5279           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5280 
5281           rowlen[j*sbs+l] = ncols;
5282 
5283           len += ncols;
5284           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5285         }
5286         k++;
5287       }
5288       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5289 
5290       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5291     }
5292     /* recvs and sends of i-array are completed */
5293     i = nrecvs;
5294     while (i--) {
5295       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5296     }
5297     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5298     ierr = PetscFree(svalues);CHKERRQ(ierr);
5299 
5300     /* allocate buffers for sending j and a arrays */
5301     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5302     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5303 
5304     /* create i-array of B_oth */
5305     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5306 
5307     b_othi[0] = 0;
5308     len       = 0; /* total length of j or a array to be received */
5309     k         = 0;
5310     for (i=0; i<nrecvs; i++) {
5311       rowlen = rvalues + rstarts[i]*rbs;
5312       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */
5313       for (j=0; j<nrows; j++) {
5314         b_othi[k+1] = b_othi[k] + rowlen[j];
5315         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5316         k++;
5317       }
5318       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5319     }
5320     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5321 
5322     /* allocate space for j and a arrrays of B_oth */
5323     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5324     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5325 
5326     /* j-array */
5327     /*---------*/
5328     /*  post receives of j-array */
5329     for (i=0; i<nrecvs; i++) {
5330       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5331       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5332     }
5333 
5334     /* pack the outgoing message j-array */
5335     k = 0;
5336     for (i=0; i<nsends; i++) {
5337       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5338       bufJ  = bufj+sstartsj[i];
5339       for (j=0; j<nrows; j++) {
5340         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5341         for (ll=0; ll<sbs; ll++) {
5342           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5343           for (l=0; l<ncols; l++) {
5344             *bufJ++ = cols[l];
5345           }
5346           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5347         }
5348       }
5349       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5350     }
5351 
5352     /* recvs and sends of j-array are completed */
5353     i = nrecvs;
5354     while (i--) {
5355       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5356     }
5357     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5358   } else if (scall == MAT_REUSE_MATRIX) {
5359     sstartsj = *startsj_s;
5360     rstartsj = *startsj_r;
5361     bufa     = *bufa_ptr;
5362     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5363     b_otha   = b_oth->a;
5364   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5365 
5366   /* a-array */
5367   /*---------*/
5368   /*  post receives of a-array */
5369   for (i=0; i<nrecvs; i++) {
5370     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5371     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5372   }
5373 
5374   /* pack the outgoing message a-array */
5375   k = 0;
5376   for (i=0; i<nsends; i++) {
5377     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5378     bufA  = bufa+sstartsj[i];
5379     for (j=0; j<nrows; j++) {
5380       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5381       for (ll=0; ll<sbs; ll++) {
5382         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5383         for (l=0; l<ncols; l++) {
5384           *bufA++ = vals[l];
5385         }
5386         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5387       }
5388     }
5389     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5390   }
5391   /* recvs and sends of a-array are completed */
5392   i = nrecvs;
5393   while (i--) {
5394     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5395   }
5396   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5397   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5398 
5399   if (scall == MAT_INITIAL_MATRIX) {
5400     /* put together the new matrix */
5401     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5402 
5403     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5404     /* Since these are PETSc arrays, change flags to free them as necessary. */
5405     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5406     b_oth->free_a  = PETSC_TRUE;
5407     b_oth->free_ij = PETSC_TRUE;
5408     b_oth->nonew   = 0;
5409 
5410     ierr = PetscFree(bufj);CHKERRQ(ierr);
5411     if (!startsj_s || !bufa_ptr) {
5412       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5413       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5414     } else {
5415       *startsj_s = sstartsj;
5416       *startsj_r = rstartsj;
5417       *bufa_ptr  = bufa;
5418     }
5419   }
5420   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5421   PetscFunctionReturn(0);
5422 }
5423 
5424 /*@C
5425   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5426 
5427   Not Collective
5428 
5429   Input Parameters:
5430 . A - The matrix in mpiaij format
5431 
5432   Output Parameter:
5433 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5434 . colmap - A map from global column index to local index into lvec
5435 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5436 
5437   Level: developer
5438 
5439 @*/
5440 #if defined(PETSC_USE_CTABLE)
5441 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5442 #else
5443 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5444 #endif
5445 {
5446   Mat_MPIAIJ *a;
5447 
5448   PetscFunctionBegin;
5449   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5450   PetscValidPointer(lvec, 2);
5451   PetscValidPointer(colmap, 3);
5452   PetscValidPointer(multScatter, 4);
5453   a = (Mat_MPIAIJ*) A->data;
5454   if (lvec) *lvec = a->lvec;
5455   if (colmap) *colmap = a->colmap;
5456   if (multScatter) *multScatter = a->Mvctx;
5457   PetscFunctionReturn(0);
5458 }
5459 
5460 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5461 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5462 #if defined(PETSC_HAVE_MKL_SPARSE)
5463 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5464 #endif
5465 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5466 #if defined(PETSC_HAVE_ELEMENTAL)
5467 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5468 #endif
5469 #if defined(PETSC_HAVE_HYPRE)
5470 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5471 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5472 #endif
5473 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5474 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5475 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
5476 
5477 /*
5478     Computes (B'*A')' since computing B*A directly is untenable
5479 
5480                n                       p                          p
5481         (              )       (              )         (                  )
5482       m (      A       )  *  n (       B      )   =   m (         C        )
5483         (              )       (              )         (                  )
5484 
5485 */
5486 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5487 {
5488   PetscErrorCode ierr;
5489   Mat            At,Bt,Ct;
5490 
5491   PetscFunctionBegin;
5492   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5493   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5494   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5495   ierr = MatDestroy(&At);CHKERRQ(ierr);
5496   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5497   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5498   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5499   PetscFunctionReturn(0);
5500 }
5501 
5502 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5503 {
5504   PetscErrorCode ierr;
5505   PetscInt       m=A->rmap->n,n=B->cmap->n;
5506   Mat            Cmat;
5507 
5508   PetscFunctionBegin;
5509   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5510   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5511   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5512   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5513   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5514   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5515   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5516   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5517 
5518   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5519 
5520   *C = Cmat;
5521   PetscFunctionReturn(0);
5522 }
5523 
5524 /* ----------------------------------------------------------------*/
5525 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5526 {
5527   PetscErrorCode ierr;
5528 
5529   PetscFunctionBegin;
5530   if (scall == MAT_INITIAL_MATRIX) {
5531     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5532     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5533     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5534   }
5535   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5536   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5537   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5538   PetscFunctionReturn(0);
5539 }
5540 
5541 /*MC
5542    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5543 
5544    Options Database Keys:
5545 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5546 
5547   Level: beginner
5548 
5549 .seealso: MatCreateAIJ()
5550 M*/
5551 
5552 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5553 {
5554   Mat_MPIAIJ     *b;
5555   PetscErrorCode ierr;
5556   PetscMPIInt    size;
5557 
5558   PetscFunctionBegin;
5559   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5560 
5561   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5562   B->data       = (void*)b;
5563   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5564   B->assembled  = PETSC_FALSE;
5565   B->insertmode = NOT_SET_VALUES;
5566   b->size       = size;
5567 
5568   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5569 
5570   /* build cache for off array entries formed */
5571   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5572 
5573   b->donotstash  = PETSC_FALSE;
5574   b->colmap      = 0;
5575   b->garray      = 0;
5576   b->roworiented = PETSC_TRUE;
5577 
5578   /* stuff used for matrix vector multiply */
5579   b->lvec  = NULL;
5580   b->Mvctx = NULL;
5581 
5582   /* stuff for MatGetRow() */
5583   b->rowindices   = 0;
5584   b->rowvalues    = 0;
5585   b->getrowactive = PETSC_FALSE;
5586 
5587   /* flexible pointer used in CUSP/CUSPARSE classes */
5588   b->spptr = NULL;
5589 
5590   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5591   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5592   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5593   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5594   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5595   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
5596   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5597   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5598   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5599 #if defined(PETSC_HAVE_MKL_SPARSE)
5600   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
5601 #endif
5602   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5603   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5604 #if defined(PETSC_HAVE_ELEMENTAL)
5605   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5606 #endif
5607 #if defined(PETSC_HAVE_HYPRE)
5608   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5609 #endif
5610   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
5611   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
5612   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5613   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5614   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5615 #if defined(PETSC_HAVE_HYPRE)
5616   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5617 #endif
5618   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
5619   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5620   PetscFunctionReturn(0);
5621 }
5622 
5623 /*@C
5624      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5625          and "off-diagonal" part of the matrix in CSR format.
5626 
5627    Collective on MPI_Comm
5628 
5629    Input Parameters:
5630 +  comm - MPI communicator
5631 .  m - number of local rows (Cannot be PETSC_DECIDE)
5632 .  n - This value should be the same as the local size used in creating the
5633        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5634        calculated if N is given) For square matrices n is almost always m.
5635 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5636 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5637 .   i - row indices for "diagonal" portion of matrix
5638 .   j - column indices
5639 .   a - matrix values
5640 .   oi - row indices for "off-diagonal" portion of matrix
5641 .   oj - column indices
5642 -   oa - matrix values
5643 
5644    Output Parameter:
5645 .   mat - the matrix
5646 
5647    Level: advanced
5648 
5649    Notes:
5650        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5651        must free the arrays once the matrix has been destroyed and not before.
5652 
5653        The i and j indices are 0 based
5654 
5655        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5656 
5657        This sets local rows and cannot be used to set off-processor values.
5658 
5659        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5660        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5661        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5662        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5663        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5664        communication if it is known that only local entries will be set.
5665 
5666 .keywords: matrix, aij, compressed row, sparse, parallel
5667 
5668 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5669           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5670 @*/
5671 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5672 {
5673   PetscErrorCode ierr;
5674   Mat_MPIAIJ     *maij;
5675 
5676   PetscFunctionBegin;
5677   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5678   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5679   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5680   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5681   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5682   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5683   maij = (Mat_MPIAIJ*) (*mat)->data;
5684 
5685   (*mat)->preallocated = PETSC_TRUE;
5686 
5687   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5688   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5689 
5690   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5691   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5692 
5693   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5694   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5695   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5696   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5697 
5698   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
5699   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5700   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5701   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
5702   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5703   PetscFunctionReturn(0);
5704 }
5705 
5706 /*
5707     Special version for direct calls from Fortran
5708 */
5709 #include <petsc/private/fortranimpl.h>
5710 
5711 /* Change these macros so can be used in void function */
5712 #undef CHKERRQ
5713 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5714 #undef SETERRQ2
5715 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5716 #undef SETERRQ3
5717 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5718 #undef SETERRQ
5719 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5720 
5721 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5722 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5723 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5724 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5725 #else
5726 #endif
5727 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5728 {
5729   Mat            mat  = *mmat;
5730   PetscInt       m    = *mm, n = *mn;
5731   InsertMode     addv = *maddv;
5732   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5733   PetscScalar    value;
5734   PetscErrorCode ierr;
5735 
5736   MatCheckPreallocated(mat,1);
5737   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5738 
5739 #if defined(PETSC_USE_DEBUG)
5740   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5741 #endif
5742   {
5743     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5744     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5745     PetscBool roworiented = aij->roworiented;
5746 
5747     /* Some Variables required in the macro */
5748     Mat        A                 = aij->A;
5749     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5750     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5751     MatScalar  *aa               = a->a;
5752     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5753     Mat        B                 = aij->B;
5754     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5755     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5756     MatScalar  *ba               = b->a;
5757 
5758     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5759     PetscInt  nonew = a->nonew;
5760     MatScalar *ap1,*ap2;
5761 
5762     PetscFunctionBegin;
5763     for (i=0; i<m; i++) {
5764       if (im[i] < 0) continue;
5765 #if defined(PETSC_USE_DEBUG)
5766       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5767 #endif
5768       if (im[i] >= rstart && im[i] < rend) {
5769         row      = im[i] - rstart;
5770         lastcol1 = -1;
5771         rp1      = aj + ai[row];
5772         ap1      = aa + ai[row];
5773         rmax1    = aimax[row];
5774         nrow1    = ailen[row];
5775         low1     = 0;
5776         high1    = nrow1;
5777         lastcol2 = -1;
5778         rp2      = bj + bi[row];
5779         ap2      = ba + bi[row];
5780         rmax2    = bimax[row];
5781         nrow2    = bilen[row];
5782         low2     = 0;
5783         high2    = nrow2;
5784 
5785         for (j=0; j<n; j++) {
5786           if (roworiented) value = v[i*n+j];
5787           else value = v[i+j*m];
5788           if (in[j] >= cstart && in[j] < cend) {
5789             col = in[j] - cstart;
5790             if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5791             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5792           } else if (in[j] < 0) continue;
5793 #if defined(PETSC_USE_DEBUG)
5794           /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
5795           else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5796 #endif
5797           else {
5798             if (mat->was_assembled) {
5799               if (!aij->colmap) {
5800                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5801               }
5802 #if defined(PETSC_USE_CTABLE)
5803               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5804               col--;
5805 #else
5806               col = aij->colmap[in[j]] - 1;
5807 #endif
5808               if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5809               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5810                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5811                 col  =  in[j];
5812                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5813                 B     = aij->B;
5814                 b     = (Mat_SeqAIJ*)B->data;
5815                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5816                 rp2   = bj + bi[row];
5817                 ap2   = ba + bi[row];
5818                 rmax2 = bimax[row];
5819                 nrow2 = bilen[row];
5820                 low2  = 0;
5821                 high2 = nrow2;
5822                 bm    = aij->B->rmap->n;
5823                 ba    = b->a;
5824               }
5825             } else col = in[j];
5826             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5827           }
5828         }
5829       } else if (!aij->donotstash) {
5830         if (roworiented) {
5831           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5832         } else {
5833           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5834         }
5835       }
5836     }
5837   }
5838   PetscFunctionReturnVoid();
5839 }
5840 
5841